Skip to content

Commit 8263270

Browse files
author
SentienceDEV
committed
agent examples
1 parent f268f24 commit 8263270

File tree

5 files changed

+486
-1
lines changed

5 files changed

+486
-1
lines changed

examples/agent/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Predicate agent examples.
2+
3+
- `predicate_browser_agent_minimal.py`: minimal `PredicateBrowserAgent` usage.
4+
- `predicate_browser_agent_custom_prompt.py`: customize the compact prompt builder.
5+
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
"""
2+
Example: PredicateBrowserAgent with compact prompt customization.
3+
4+
This shows how to override the compact prompt used for action proposal.
5+
6+
Usage:
7+
python examples/agent/predicate_browser_agent_custom_prompt.py
8+
"""
9+
10+
import asyncio
11+
import os
12+
13+
from predicate import AsyncSentienceBrowser, PredicateBrowserAgent, PredicateBrowserAgentConfig
14+
from predicate.agent_runtime import AgentRuntime
15+
from predicate.llm_provider import LLMProvider, LLMResponse
16+
from predicate.models import Snapshot
17+
from predicate.runtime_agent import RuntimeStep
18+
from predicate.tracing import JsonlTraceSink, Tracer
19+
20+
21+
class RecordingProvider(LLMProvider):
22+
"""
23+
Example provider that records the prompts it receives.
24+
25+
Swap this for OpenAIProvider / AnthropicProvider / DeepInfraProvider / LocalLLMProvider in real usage.
26+
"""
27+
28+
def __init__(self, action: str = "FINISH()"):
29+
super().__init__(model="recording-provider")
30+
self._action = action
31+
self.last_system: str | None = None
32+
self.last_user: str | None = None
33+
34+
def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
35+
_ = kwargs
36+
self.last_system = system_prompt
37+
self.last_user = user_prompt
38+
return LLMResponse(content=self._action, model_name=self.model_name)
39+
40+
def supports_json_mode(self) -> bool:
41+
return False
42+
43+
@property
44+
def model_name(self) -> str:
45+
return "recording-provider"
46+
47+
48+
def compact_prompt_builder(
49+
task_goal: str,
50+
step_goal: str,
51+
dom_context: str,
52+
snap: Snapshot,
53+
history_summary: str,
54+
) -> tuple[str, str]:
55+
_ = snap
56+
system = (
57+
"You are a web automation executor.\n"
58+
"Return ONLY ONE action in this format:\n"
59+
"- CLICK(id)\n"
60+
'- TYPE(id, "text")\n'
61+
"- PRESS('key')\n"
62+
"- FINISH()\n"
63+
"No prose."
64+
)
65+
# Optional: aggressively control token usage by truncating DOM context.
66+
dom_context = dom_context[:4000]
67+
user = (
68+
f"TASK GOAL:\n{task_goal}\n\n"
69+
+ (f"RECENT STEPS:\n{history_summary}\n\n" if history_summary else "")
70+
+ f"STEP GOAL:\n{step_goal}\n\n"
71+
f"DOM CONTEXT:\n{dom_context}\n"
72+
)
73+
return system, user
74+
75+
76+
async def main() -> None:
77+
run_id = "predicate-browser-agent-custom-prompt"
78+
tracer = Tracer(run_id=run_id, sink=JsonlTraceSink(f"traces/{run_id}.jsonl"))
79+
80+
api_key = os.environ.get("PREDICATE_API_KEY") or os.environ.get("SENTIENCE_API_KEY")
81+
82+
async with AsyncSentienceBrowser(api_key=api_key, headless=False) as browser:
83+
page = await browser.new_page()
84+
await page.goto("https://example.com")
85+
await page.wait_for_load_state("networkidle")
86+
87+
runtime = await AgentRuntime.from_sentience_browser(
88+
browser=browser, page=page, tracer=tracer
89+
)
90+
91+
executor = RecordingProvider(action="FINISH()")
92+
93+
agent = PredicateBrowserAgent(
94+
runtime=runtime,
95+
executor=executor,
96+
config=PredicateBrowserAgentConfig(
97+
history_last_n=2,
98+
compact_prompt_builder=compact_prompt_builder,
99+
),
100+
)
101+
102+
out = await agent.step(
103+
task_goal="Open example.com",
104+
step=RuntimeStep(goal="Take no action; just finish"),
105+
)
106+
print(f"step ok: {out.ok}")
107+
print("--- prompt preview (system) ---")
108+
print((executor.last_system or "")[:300])
109+
print("--- prompt preview (user) ---")
110+
print((executor.last_user or "")[:300])
111+
112+
tracer.close()
113+
114+
115+
if __name__ == "__main__":
116+
asyncio.run(main())
117+
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
"""
2+
Example: PredicateBrowserAgent minimal demo.
3+
4+
PredicateBrowserAgent is a higher-level, browser-use-like wrapper over:
5+
AgentRuntime + RuntimeAgent (snapshot-first action proposal + execution + verification).
6+
7+
Usage:
8+
python examples/agent/predicate_browser_agent_minimal.py
9+
"""
10+
11+
import asyncio
12+
import os
13+
14+
from predicate import AsyncSentienceBrowser, PredicateBrowserAgent, PredicateBrowserAgentConfig
15+
from predicate.agent_runtime import AgentRuntime
16+
from predicate.llm_provider import LLMProvider, LLMResponse
17+
from predicate.runtime_agent import RuntimeStep, StepVerification
18+
from predicate.tracing import JsonlTraceSink, Tracer
19+
from predicate.verification import exists, url_contains
20+
21+
22+
class FixedActionProvider(LLMProvider):
23+
"""Tiny in-process provider for examples/tests."""
24+
25+
def __init__(self, action: str):
26+
super().__init__(model="fixed-action")
27+
self._action = action
28+
29+
def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
30+
_ = system_prompt, user_prompt, kwargs
31+
return LLMResponse(content=self._action, model_name=self.model_name)
32+
33+
def supports_json_mode(self) -> bool:
34+
return False
35+
36+
@property
37+
def model_name(self) -> str:
38+
return "fixed-action"
39+
40+
41+
async def main() -> None:
42+
run_id = "predicate-browser-agent-minimal"
43+
tracer = Tracer(run_id=run_id, sink=JsonlTraceSink(f"traces/{run_id}.jsonl"))
44+
45+
api_key = os.environ.get("PREDICATE_API_KEY") or os.environ.get("SENTIENCE_API_KEY")
46+
47+
async with AsyncSentienceBrowser(api_key=api_key, headless=False) as browser:
48+
page = await browser.new_page()
49+
await page.goto("https://example.com")
50+
await page.wait_for_load_state("networkidle")
51+
52+
runtime = await AgentRuntime.from_sentience_browser(
53+
browser=browser, page=page, tracer=tracer
54+
)
55+
56+
# For a "real" run, swap this for OpenAIProvider / AnthropicProvider / DeepInfraProvider / LocalLLMProvider.
57+
executor = FixedActionProvider("FINISH()")
58+
59+
agent = PredicateBrowserAgent(
60+
runtime=runtime,
61+
executor=executor,
62+
config=PredicateBrowserAgentConfig(
63+
# Keep a tiny, bounded LLM-facing step history (0 disables history entirely).
64+
history_last_n=2,
65+
),
66+
)
67+
68+
steps = [
69+
RuntimeStep(
70+
goal="Verify Example Domain is loaded",
71+
verifications=[
72+
StepVerification(
73+
predicate=url_contains("example.com"),
74+
label="url_contains_example",
75+
required=True,
76+
eventually=True,
77+
timeout_s=5.0,
78+
),
79+
StepVerification(
80+
predicate=exists("role=heading"),
81+
label="has_heading",
82+
required=True,
83+
eventually=True,
84+
timeout_s=5.0,
85+
),
86+
],
87+
max_snapshot_attempts=2,
88+
snapshot_limit_base=60,
89+
)
90+
]
91+
92+
ok = await agent.run(task_goal="Open example.com and verify", steps=steps)
93+
print(f"run ok: {ok}")
94+
95+
tracer.close()
96+
print(f"trace written to traces/{run_id}.jsonl")
97+
98+
99+
if __name__ == "__main__":
100+
asyncio.run(main())
101+

predicate/runtime_agent.py

Lines changed: 130 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from .llm_interaction_handler import LLMInteractionHandler
2222
from .llm_provider import LLMProvider
2323
from .models import BBox, Snapshot, StepHookContext
24-
from .verification import AssertContext, AssertOutcome, Predicate
24+
from .verification import Predicate
2525

2626

2727
@dataclass(frozen=True)
@@ -55,6 +55,13 @@ class RuntimeStep:
5555
max_vision_executor_attempts: int = 1
5656

5757

58+
@dataclass(frozen=True)
59+
class ActOnceResult:
60+
action: str
61+
snap: Snapshot
62+
used_vision: bool
63+
64+
5865
class RuntimeAgent:
5966
"""
6067
A thin orchestration layer over AgentRuntime:
@@ -164,6 +171,128 @@ async def run_step(
164171
),
165172
)
166173

174+
async def act_once(
175+
self,
176+
*,
177+
task_goal: str,
178+
step: RuntimeStep,
179+
allow_vision_fallback: bool = True,
180+
history_summary: str = "",
181+
compact_prompt_builder: Callable[
182+
[str, str, str, Snapshot, str], tuple[str, str]
183+
]
184+
| None = None,
185+
dom_context_postprocessor: Callable[[str], str] | None = None,
186+
) -> str:
187+
"""
188+
Execute exactly one action for a step without owning step lifecycle.
189+
190+
This helper is designed for orchestration layers (e.g. WebBench) that already
191+
call `runtime.begin_step(...)` / `runtime.emit_step_end(...)` and want to
192+
reuse RuntimeAgent's snapshot-first action proposal + execution logic without:
193+
- double-counting step budgets
194+
- emitting duplicate step_start/step_end events
195+
196+
Returns:
197+
Action string (e.g. "CLICK(123)", "TYPE(5, \"foo\")", "PRESS(\"Enter\")", "FINISH()")
198+
"""
199+
res = await self.act_once_result(
200+
task_goal=task_goal,
201+
step=step,
202+
allow_vision_fallback=allow_vision_fallback,
203+
history_summary=history_summary,
204+
compact_prompt_builder=compact_prompt_builder,
205+
dom_context_postprocessor=dom_context_postprocessor,
206+
)
207+
return res.action
208+
209+
async def act_once_with_snapshot(
210+
self,
211+
*,
212+
task_goal: str,
213+
step: RuntimeStep,
214+
allow_vision_fallback: bool = True,
215+
history_summary: str = "",
216+
compact_prompt_builder: Callable[
217+
[str, str, str, Snapshot, str], tuple[str, str]
218+
]
219+
| None = None,
220+
dom_context_postprocessor: Callable[[str], str] | None = None,
221+
) -> tuple[str, Snapshot]:
222+
"""
223+
Like `act_once`, but also returns the pre-action snapshot used for proposal.
224+
"""
225+
res = await self.act_once_result(
226+
task_goal=task_goal,
227+
step=step,
228+
allow_vision_fallback=allow_vision_fallback,
229+
history_summary=history_summary,
230+
compact_prompt_builder=compact_prompt_builder,
231+
dom_context_postprocessor=dom_context_postprocessor,
232+
)
233+
return res.action, res.snap
234+
235+
async def act_once_result(
236+
self,
237+
*,
238+
task_goal: str,
239+
step: RuntimeStep,
240+
allow_vision_fallback: bool = True,
241+
history_summary: str = "",
242+
compact_prompt_builder: Callable[
243+
[str, str, str, Snapshot, str], tuple[str, str]
244+
]
245+
| None = None,
246+
dom_context_postprocessor: Callable[[str], str] | None = None,
247+
) -> ActOnceResult:
248+
"""
249+
Like `act_once`, but returns action + proposal snapshot + whether vision was used.
250+
"""
251+
snap = await self._snapshot_with_ramp(step=step)
252+
253+
# Optional short-circuit to vision (bounded by caller).
254+
if allow_vision_fallback and await self._should_short_circuit_to_vision(step=step, snap=snap):
255+
if self.vision_executor and self.vision_executor.supports_vision():
256+
url = await self._get_url_for_prompt()
257+
image_b64 = await self._screenshot_base64_png()
258+
system_prompt, user_prompt = self._vision_executor_prompts(
259+
task_goal=task_goal,
260+
step=step,
261+
url=url,
262+
snap=snap,
263+
)
264+
resp = self.vision_executor.generate_with_image(
265+
system_prompt,
266+
user_prompt,
267+
image_b64,
268+
temperature=0.0,
269+
)
270+
action = self._extract_action_from_text(resp.content)
271+
await self._execute_action(action=action, snap=snap)
272+
return ActOnceResult(action=action, snap=snap, used_vision=True)
273+
274+
# Structured snapshot-first proposal.
275+
dom_context = self._structured_llm.build_context(snap, step.goal)
276+
if dom_context_postprocessor is not None:
277+
dom_context = dom_context_postprocessor(dom_context)
278+
279+
if compact_prompt_builder is not None:
280+
system_prompt, user_prompt = compact_prompt_builder(
281+
task_goal, step.goal, dom_context, snap, history_summary or ""
282+
)
283+
resp = self.executor.generate(system_prompt, user_prompt, temperature=0.0)
284+
action = self._structured_llm.extract_action(resp.content)
285+
else:
286+
combined_goal = task_goal
287+
if history_summary:
288+
combined_goal = f"{task_goal}\n\nRECENT STEPS:\n{history_summary}"
289+
combined_goal = f"{combined_goal}\n\nSTEP: {step.goal}"
290+
resp = self._structured_llm.query_llm(dom_context, combined_goal)
291+
action = self._structured_llm.extract_action(resp.content)
292+
293+
await self._execute_action(action=action, snap=snap)
294+
return ActOnceResult(action=action, snap=snap, used_vision=False)
295+
167296
async def _run_hook(
168297
self,
169298
hook: Callable[[StepHookContext], Any] | None,

0 commit comments

Comments
 (0)