agent examples

SentienceDEV · SentienceDEV · commit 8263270f07e7 · 2026-02-14T21:19:36.000-08:00
diff --git a/examples/agent/README.md b/examples/agent/README.md
@@ -0,0 +1,5 @@
+Predicate agent examples.
+
+- `predicate_browser_agent_minimal.py`: minimal `PredicateBrowserAgent` usage.
+- `predicate_browser_agent_custom_prompt.py`: customize the compact prompt builder.
+
diff --git a/examples/agent/predicate_browser_agent_custom_prompt.py b/examples/agent/predicate_browser_agent_custom_prompt.py
@@ -0,0 +1,117 @@
+"""
+Example: PredicateBrowserAgent with compact prompt customization.
+
+This shows how to override the compact prompt used for action proposal.
+
+Usage:
+  python examples/agent/predicate_browser_agent_custom_prompt.py
+"""
+
+import asyncio
+import os
+
+from predicate import AsyncSentienceBrowser, PredicateBrowserAgent, PredicateBrowserAgentConfig
+from predicate.agent_runtime import AgentRuntime
+from predicate.llm_provider import LLMProvider, LLMResponse
+from predicate.models import Snapshot
+from predicate.runtime_agent import RuntimeStep
+from predicate.tracing import JsonlTraceSink, Tracer
+
+
+class RecordingProvider(LLMProvider):
+    """
+    Example provider that records the prompts it receives.
+
+    Swap this for OpenAIProvider / AnthropicProvider / DeepInfraProvider / LocalLLMProvider in real usage.
+    """
+
+    def __init__(self, action: str = "FINISH()"):
+        super().__init__(model="recording-provider")
+        self._action = action
+        self.last_system: str | None = None
+        self.last_user: str | None = None
+
+    def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
+        _ = kwargs
+        self.last_system = system_prompt
+        self.last_user = user_prompt
+        return LLMResponse(content=self._action, model_name=self.model_name)
+
+    def supports_json_mode(self) -> bool:
+        return False
+
+    @property
+    def model_name(self) -> str:
+        return "recording-provider"
+
+
+def compact_prompt_builder(
+    task_goal: str,
+    step_goal: str,
+    dom_context: str,
+    snap: Snapshot,
+    history_summary: str,
+) -> tuple[str, str]:
+    _ = snap
+    system = (
+        "You are a web automation executor.\n"
+        "Return ONLY ONE action in this format:\n"
+        "- CLICK(id)\n"
+        '- TYPE(id, "text")\n'
+        "- PRESS('key')\n"
+        "- FINISH()\n"
+        "No prose."
+    )
+    # Optional: aggressively control token usage by truncating DOM context.
+    dom_context = dom_context[:4000]
+    user = (
+        f"TASK GOAL:\n{task_goal}\n\n"
+        + (f"RECENT STEPS:\n{history_summary}\n\n" if history_summary else "")
+        + f"STEP GOAL:\n{step_goal}\n\n"
+        f"DOM CONTEXT:\n{dom_context}\n"
+    )
+    return system, user
+
+
+async def main() -> None:
+    run_id = "predicate-browser-agent-custom-prompt"
+    tracer = Tracer(run_id=run_id, sink=JsonlTraceSink(f"traces/{run_id}.jsonl"))
+
+    api_key = os.environ.get("PREDICATE_API_KEY") or os.environ.get("SENTIENCE_API_KEY")
+
+    async with AsyncSentienceBrowser(api_key=api_key, headless=False) as browser:
+        page = await browser.new_page()
+        await page.goto("https://example.com")
+        await page.wait_for_load_state("networkidle")
+
+        runtime = await AgentRuntime.from_sentience_browser(
+            browser=browser, page=page, tracer=tracer
+        )
+
+        executor = RecordingProvider(action="FINISH()")
+
+        agent = PredicateBrowserAgent(
+            runtime=runtime,
+            executor=executor,
+            config=PredicateBrowserAgentConfig(
+                history_last_n=2,
+                compact_prompt_builder=compact_prompt_builder,
+            ),
+        )
+
+        out = await agent.step(
+            task_goal="Open example.com",
+            step=RuntimeStep(goal="Take no action; just finish"),
+        )
+        print(f"step ok: {out.ok}")
+        print("--- prompt preview (system) ---")
+        print((executor.last_system or "")[:300])
+        print("--- prompt preview (user) ---")
+        print((executor.last_user or "")[:300])
+
+    tracer.close()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+
diff --git a/examples/agent/predicate_browser_agent_minimal.py b/examples/agent/predicate_browser_agent_minimal.py
@@ -0,0 +1,101 @@
+"""
+Example: PredicateBrowserAgent minimal demo.
+
+PredicateBrowserAgent is a higher-level, browser-use-like wrapper over:
+AgentRuntime + RuntimeAgent (snapshot-first action proposal + execution + verification).
+
+Usage:
+  python examples/agent/predicate_browser_agent_minimal.py
+"""
+
+import asyncio
+import os
+
+from predicate import AsyncSentienceBrowser, PredicateBrowserAgent, PredicateBrowserAgentConfig
+from predicate.agent_runtime import AgentRuntime
+from predicate.llm_provider import LLMProvider, LLMResponse
+from predicate.runtime_agent import RuntimeStep, StepVerification
+from predicate.tracing import JsonlTraceSink, Tracer
+from predicate.verification import exists, url_contains
+
+
+class FixedActionProvider(LLMProvider):
+    """Tiny in-process provider for examples/tests."""
+
+    def __init__(self, action: str):
+        super().__init__(model="fixed-action")
+        self._action = action
+
+    def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
+        _ = system_prompt, user_prompt, kwargs
+        return LLMResponse(content=self._action, model_name=self.model_name)
+
+    def supports_json_mode(self) -> bool:
+        return False
+
+    @property
+    def model_name(self) -> str:
+        return "fixed-action"
+
+
+async def main() -> None:
+    run_id = "predicate-browser-agent-minimal"
+    tracer = Tracer(run_id=run_id, sink=JsonlTraceSink(f"traces/{run_id}.jsonl"))
+
+    api_key = os.environ.get("PREDICATE_API_KEY") or os.environ.get("SENTIENCE_API_KEY")
+
+    async with AsyncSentienceBrowser(api_key=api_key, headless=False) as browser:
+        page = await browser.new_page()
+        await page.goto("https://example.com")
+        await page.wait_for_load_state("networkidle")
+
+        runtime = await AgentRuntime.from_sentience_browser(
+            browser=browser, page=page, tracer=tracer
+        )
+
+        # For a "real" run, swap this for OpenAIProvider / AnthropicProvider / DeepInfraProvider / LocalLLMProvider.
+        executor = FixedActionProvider("FINISH()")
+
+        agent = PredicateBrowserAgent(
+            runtime=runtime,
+            executor=executor,
+            config=PredicateBrowserAgentConfig(
+                # Keep a tiny, bounded LLM-facing step history (0 disables history entirely).
+                history_last_n=2,
+            ),
+        )
+
+        steps = [
+            RuntimeStep(
+                goal="Verify Example Domain is loaded",
+                verifications=[
+                    StepVerification(
+                        predicate=url_contains("example.com"),
+                        label="url_contains_example",
+                        required=True,
+                        eventually=True,
+                        timeout_s=5.0,
+                    ),
+                    StepVerification(
+                        predicate=exists("role=heading"),
+                        label="has_heading",
+                        required=True,
+                        eventually=True,
+                        timeout_s=5.0,
+                    ),
+                ],
+                max_snapshot_attempts=2,
+                snapshot_limit_base=60,
+            )
+        ]
+
+        ok = await agent.run(task_goal="Open example.com and verify", steps=steps)
+        print(f"run ok: {ok}")
+
+    tracer.close()
+    print(f"trace written to traces/{run_id}.jsonl")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+
diff --git a/predicate/runtime_agent.py b/predicate/runtime_agent.py
@@ -21,7 +21,7 @@
 from .llm_interaction_handler import LLMInteractionHandler
 from .llm_provider import LLMProvider
 from .models import BBox, Snapshot, StepHookContext
-from .verification import AssertContext, AssertOutcome, Predicate
+from .verification import Predicate
 
 
 @dataclass(frozen=True)
@@ -55,6 +55,13 @@ class RuntimeStep:
     max_vision_executor_attempts: int = 1
 
 
+@dataclass(frozen=True)
+class ActOnceResult:
+    action: str
+    snap: Snapshot
+    used_vision: bool
+
+
 class RuntimeAgent:
     """
     A thin orchestration layer over AgentRuntime:
@@ -164,6 +171,128 @@ async def run_step(
                 ),
             )
 
+    async def act_once(
+        self,
+        *,
+        task_goal: str,
+        step: RuntimeStep,
+        allow_vision_fallback: bool = True,
+        history_summary: str = "",
+        compact_prompt_builder: Callable[
+            [str, str, str, Snapshot, str], tuple[str, str]
+        ]
+        | None = None,
+        dom_context_postprocessor: Callable[[str], str] | None = None,
+    ) -> str:
+        """
+        Execute exactly one action for a step without owning step lifecycle.
+
+        This helper is designed for orchestration layers (e.g. WebBench) that already
+        call `runtime.begin_step(...)` / `runtime.emit_step_end(...)` and want to
+        reuse RuntimeAgent's snapshot-first action proposal + execution logic without:
+        - double-counting step budgets
+        - emitting duplicate step_start/step_end events
+
+        Returns:
+            Action string (e.g. "CLICK(123)", "TYPE(5, \"foo\")", "PRESS(\"Enter\")", "FINISH()")
+        """
+        res = await self.act_once_result(
+            task_goal=task_goal,
+            step=step,
+            allow_vision_fallback=allow_vision_fallback,
+            history_summary=history_summary,
+            compact_prompt_builder=compact_prompt_builder,
+            dom_context_postprocessor=dom_context_postprocessor,
+        )
+        return res.action
+
+    async def act_once_with_snapshot(
+        self,
+        *,
+        task_goal: str,
+        step: RuntimeStep,
+        allow_vision_fallback: bool = True,
+        history_summary: str = "",
+        compact_prompt_builder: Callable[
+            [str, str, str, Snapshot, str], tuple[str, str]
+        ]
+        | None = None,
+        dom_context_postprocessor: Callable[[str], str] | None = None,
+    ) -> tuple[str, Snapshot]:
+        """
+        Like `act_once`, but also returns the pre-action snapshot used for proposal.
+        """
+        res = await self.act_once_result(
+            task_goal=task_goal,
+            step=step,
+            allow_vision_fallback=allow_vision_fallback,
+            history_summary=history_summary,
+            compact_prompt_builder=compact_prompt_builder,
+            dom_context_postprocessor=dom_context_postprocessor,
+        )
+        return res.action, res.snap
+
+    async def act_once_result(
+        self,
+        *,
+        task_goal: str,
+        step: RuntimeStep,
+        allow_vision_fallback: bool = True,
+        history_summary: str = "",
+        compact_prompt_builder: Callable[
+            [str, str, str, Snapshot, str], tuple[str, str]
+        ]
+        | None = None,
+        dom_context_postprocessor: Callable[[str], str] | None = None,
+    ) -> ActOnceResult:
+        """
+        Like `act_once`, but returns action + proposal snapshot + whether vision was used.
+        """
+        snap = await self._snapshot_with_ramp(step=step)
+
+        # Optional short-circuit to vision (bounded by caller).
+        if allow_vision_fallback and await self._should_short_circuit_to_vision(step=step, snap=snap):
+            if self.vision_executor and self.vision_executor.supports_vision():
+                url = await self._get_url_for_prompt()
+                image_b64 = await self._screenshot_base64_png()
+                system_prompt, user_prompt = self._vision_executor_prompts(
+                    task_goal=task_goal,
+                    step=step,
+                    url=url,
+                    snap=snap,
+                )
+                resp = self.vision_executor.generate_with_image(
+                    system_prompt,
+                    user_prompt,
+                    image_b64,
+                    temperature=0.0,
+                )
+                action = self._extract_action_from_text(resp.content)
+                await self._execute_action(action=action, snap=snap)
+                return ActOnceResult(action=action, snap=snap, used_vision=True)
+
+        # Structured snapshot-first proposal.
+        dom_context = self._structured_llm.build_context(snap, step.goal)
+        if dom_context_postprocessor is not None:
+            dom_context = dom_context_postprocessor(dom_context)
+
+        if compact_prompt_builder is not None:
+            system_prompt, user_prompt = compact_prompt_builder(
+                task_goal, step.goal, dom_context, snap, history_summary or ""
+            )
+            resp = self.executor.generate(system_prompt, user_prompt, temperature=0.0)
+            action = self._structured_llm.extract_action(resp.content)
+        else:
+            combined_goal = task_goal
+            if history_summary:
+                combined_goal = f"{task_goal}\n\nRECENT STEPS:\n{history_summary}"
+            combined_goal = f"{combined_goal}\n\nSTEP: {step.goal}"
+            resp = self._structured_llm.query_llm(dom_context, combined_goal)
+            action = self._structured_llm.extract_action(resp.content)
+
+        await self._execute_action(action=action, snap=snap)
+        return ActOnceResult(action=action, snap=snap, used_vision=False)
+
     async def _run_hook(
         self,
         hook: Callable[[StepHookContext], Any] | None,
diff --git a/tests/unit/test_runtime_agent_act_once.py b/tests/unit/test_runtime_agent_act_once.py