Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
144 commits
Select commit Hold shift + click to select a range
4bdae5a
feat: Minimal agent instrumentation for AuthBridge OTEL (Approach A)
Ladas Feb 13, 2026
f80ba0f
feat: Add Starlette OTEL instrumentation for traceparent extraction
Ladas Feb 14, 2026
4cd3104
feat: add sandbox_agent with per-context workspace isolation
Ladas Feb 15, 2026
aa3dd18
fix: use a2a-sdk[http-server] for starlette/sse deps
Ladas Feb 15, 2026
5838a52
feat: add web_fetch tool with domain allowlist from sources.json
Ladas Feb 17, 2026
0bf5a38
feat: Emit LangGraph events as valid JSON for ext_proc parsing
Ladas Feb 17, 2026
2e4cdaa
fix: add MemorySaver checkpointer for multi-turn memory
Ladas Feb 18, 2026
6d83a8f
fix: address security review — interpreter bypass, HITL interrupt, TT…
Ladas Feb 25, 2026
ac9fbce
feat: add C19 workspace cleanup and C20 sub-agent spawning tools
Ladas Feb 25, 2026
14d8719
fix: harden interpreter bypass, path traversal, and approval checks
Ladas Feb 25, 2026
9822f63
feat: wire AsyncPostgresSaver for persistent session checkpointing
Ladas Feb 25, 2026
9f31312
feat: use A2A SDK DatabaseTaskStore for generic session persistence
Ladas Feb 25, 2026
92fc74c
refactor: rename agent from Sandbox Assistant to Sandbox Legion
Ladas Feb 25, 2026
7cf09ba
fix: correct DatabaseTaskStore import path
Ladas Feb 25, 2026
1649027
chore: update uv.lock after adding postgresql dependencies
Ladas Feb 25, 2026
bdb9e49
fix: lazy-init AsyncPostgresSaver with asyncpg pool
Ladas Feb 25, 2026
517cc45
fix: disable SSL for in-cluster postgres connections
Ladas Feb 25, 2026
36519ea
fix: use psycopg_pool for AsyncPostgresSaver (not asyncpg)
Ladas Feb 25, 2026
36cfc18
fix: use from_conn_string context manager for AsyncPostgresSaver
Ladas Feb 25, 2026
123d18c
fix: extract only text from tool-calling model responses
Ladas Feb 26, 2026
ec6fe43
feat: concurrency locks, interpreter bypass, TOFU verification
Ladas Feb 26, 2026
6d28be7
feat(sandbox): wire LangGraphSerializer into agent streaming loop
Ladas Feb 27, 2026
a74359c
feat(sandbox): emit LLM thinking with tool calls + aggregate multi-ta…
Ladas Feb 28, 2026
66ee018
fix(sandbox): add pool_recycle + pool_pre_ping to prevent stale DB co…
Ladas Feb 28, 2026
2e2590b
fix(sandbox): switch TaskStore from asyncpg to psycopg driver
Ladas Mar 1, 2026
048f0de
fix(sandbox): handle LLM 429/quota errors gracefully in SSE stream
Ladas Mar 1, 2026
e489461
fix(sandbox): add CACHE_BUST arg to Dockerfile for fresh builds
Ladas Mar 1, 2026
b83a366
debug: add agent.py line count check to Dockerfile build
Ladas Mar 2, 2026
dd84219
fix(sandbox): OCP arbitrary UID compatibility
Ladas Mar 2, 2026
b9bdc5c
feat(sandbox): wire multi-mode delegate tool into agent
Ladas Mar 2, 2026
939981e
feat(sandbox): add plan-execute-reflect reasoning loop
Ladas Mar 4, 2026
1d40073
feat(sandbox): add loop_id to all reasoning loop events for UI rendering
Ladas Mar 5, 2026
3772845
feat(sandbox): planner prompts for RCA reports and delegation
Ladas Mar 5, 2026
4a6d5be
feat(sandbox): skill loading + child session DB records
Ladas Mar 6, 2026
e742462
docs: add TODO for Session N skill_pack_loader integration
Ladas Mar 6, 2026
699966d
feat(sandbox): declare all tools as skills in agent card
Ladas Mar 6, 2026
716b513
fix(sandbox): revert to single skill, add dynamic scan TODO
Ladas Mar 6, 2026
5f4b512
feat(sandbox): dynamically scan workspace skills into agent card
Ladas Mar 6, 2026
4eee409
fix(sandbox): add missing os import for dynamic skill scanning
Ladas Mar 6, 2026
6f3f9b0
feat(sandbox): clone skill repos at startup for agent card + invocation
Ladas Mar 6, 2026
d9f1d9c
fix(sandbox): use upstream kagenti repo, support @branch, rm stale clone
Ladas Mar 6, 2026
eaf19db
fix(sandbox): scan SKILL.md files by directory, extract description
Ladas Mar 6, 2026
8cdcdca
fix(sandbox): search shared workspace root for skills, support SKILL.md
Ladas Mar 7, 2026
dc525f2
fix(sandbox): install gh CLI, fix delegation, improve prompts (Sessio…
Ladas Mar 7, 2026
a476b9e
feat(sandbox): text-based tool call parser for vLLM compat (Session L+3)
Ladas Mar 7, 2026
90bffff
fix(sandbox): instruct agent to clone repo before gh commands (Sessio…
Ladas Mar 7, 2026
bbaf7ef
fix(sandbox): set origin remote to upstream repo for gh CLI (Session …
Ladas Mar 7, 2026
3f84dc2
fix(sandbox): handle tuple/InvalidToolCall in event serializer (Sessi…
Ladas Mar 7, 2026
e5a63cf
feat(sandbox): add grep+glob tools, fix tuple error, single tool per …
Ladas Mar 7, 2026
0eb583d
fix(sandbox): crash-proof ToolNode + multi tool call support (Session…
Ladas Mar 7, 2026
377da2c
fix(sandbox): compound command permissions + rate-limit retry (Sessio…
Ladas Mar 8, 2026
d2cda9c
fix(sandbox): tools→reflector edge + duplicate prevention (Session R)
Ladas Mar 8, 2026
1762cab
fix(sandbox): add missing git subcommands to allow list (Session R)
Ladas Mar 8, 2026
f1b6a38
fix(sandbox): revert tools→reflector, restore tools→executor edge (Se…
Ladas Mar 8, 2026
f8d1d9b
feat(sandbox): fast-path planner + tool dedup + LiteLLM metadata (Ses…
Ladas Mar 8, 2026
40e84ad
fix(sandbox): parse Llama 4 tool format + never skip reflection (Sess…
Ladas Mar 8, 2026
43e567d
feat: token emission in SSE events + request_id tracking + recursion …
Ladas Mar 8, 2026
1dc08cd
fix(sandbox): shell tool docstring includes workspace path
Ladas Mar 8, 2026
231e857
fix(sandbox): revert f-string docstring on shell tool
Ladas Mar 8, 2026
29850d1
feat: typed event schema + serializer refactor + unit tests
Ladas Mar 8, 2026
38eed6a
fix: reporter_node detects bare decision keywords from reflector
Ladas Mar 9, 2026
add2f90
feat: emit tool_call events for text-parsed tools + reasoning field
Ladas Mar 9, 2026
d8cbe0c
fix: executor prompt enforces tool calling API usage
Ladas Mar 9, 2026
a7c68e6
fix: catch CancelledError, log every graph event for crash diagnosis
Ladas Mar 9, 2026
78c5ca2
fix: agent continues processing on client disconnect
Ladas Mar 9, 2026
be08f6f
fix: parse /shell and bash code blocks as tool calls, clarify prompt
Ladas Mar 9, 2026
4ea981b
revert: remove slash-command parser hack
Ladas Mar 9, 2026
d015770
fix: force tool calling with tool_choice=any
Ladas Mar 9, 2026
952fef9
feat: increase default budget — 40 iterations, 10 tools/step, 1M tokens
Ladas Mar 9, 2026
1ddf88b
feat: budget 100 iterations, hitl at 50
Ladas Mar 9, 2026
eae7ed6
feat: reflector stall detection — force done after 3 no-progress iter…
Ladas Mar 9, 2026
2b8fbe7
feat: planner gets tool call history on replan
Ladas Mar 9, 2026
2d58c86
fix: replan decision should go back to planner, not reporter
Ladas Mar 9, 2026
b8992b2
fix: improve stall detection, executor reliability, configurable budget
Ladas Mar 9, 2026
a08cf37
fix: escape curly braces in executor prompt to prevent format() error
Ladas Mar 9, 2026
622ab48
fix: use _safe_format for prompt templates to prevent agent crashes
Ladas Mar 9, 2026
40bee51
feat: add SERIALIZE and A2A_EMIT pipeline logging
Ladas Mar 9, 2026
2cc4031
feat: shield graph execution from client disconnect cancellation
Ladas Mar 9, 2026
4926c33
fix: include original plan with step status in replan context
Ladas Mar 10, 2026
558d98f
fix: reset stall detection after replan boundary
Ladas Mar 10, 2026
e7b344d
fix: reflector no longer forces done based on step count
Ladas Mar 10, 2026
891c8c3
fix: planner prompt defaults to proper multi-step planning
Ladas Mar 10, 2026
fa80b53
fix: filter dedup sentinel from reporter to prevent final answer leak
Ladas Mar 10, 2026
5454548
feat: router entry node + structured plan persistence across turns
Ladas Mar 10, 2026
8a86bb7
fix: reflector sees actual tool error instead of dedup sentinel
Ladas Mar 10, 2026
b512098
fix: allow export/curl/wget, enable outbound, fix HITL interrupt prop…
Ladas Mar 10, 2026
1be3345
fix: auto-approve all shell commands, remove web_fetch domain check
Ladas Mar 10, 2026
1be0259
fix: handle __interrupt__ graph events (HITL) without crashing
Ladas Mar 10, 2026
0045be7
fix: shell(*:*) wildcard prefix now matches all commands
Ladas Mar 10, 2026
6575673
fix: planner prompt remove broken export GH_TOKEN, reporter shows fai…
Ladas Mar 10, 2026
27b96d9
fix: break replan loop + add prompt visibility to events
Ladas Mar 10, 2026
a744e02
feat: prompt visibility + no-tool executor stall breaker
Ladas Mar 10, 2026
51b5d51
fix: replan loop — max replan limit, state tracking, reflector context
Ladas Mar 10, 2026
c8bb72e
feat: micro-reflection executor — one tool call at a time
Ladas Mar 10, 2026
eeac280
fix: skip lost+found in workspace cleanup (EBS ext4 metadata)
Ladas Mar 10, 2026
9b467bc
fix: don't stall-fail executor after tool errors with micro-reflection
Ladas Mar 10, 2026
134f072
fix: remove force-done overrides — let budget handle termination
Ladas Mar 10, 2026
c5e2543
fix: scope dedup to current plan iteration only
Ladas Mar 10, 2026
6ee5afd
fix: route reflector continue→executor, replan→planner
Ladas Mar 10, 2026
1d0af4a
fix: rename continue→execute in reflector routing
Ladas Mar 10, 2026
aad7ca1
docs: add mermaid graph diagram to agent code
Ladas Mar 10, 2026
39a62b8
fix: add LLM timeout (120s) and retry (3x) to ChatOpenAI
Ladas Mar 10, 2026
2e14a4d
feat: configurable LLM timeout and retries via budget
Ladas Mar 10, 2026
6e5d0dd
fix: persist background graph events after SSE consumer cancellation
Ladas Mar 10, 2026
2f2418b
feat(agent): add micro_reasoning events and full prompt data
Ladas Mar 11, 2026
1f10955
fix(agent): populate empty micro-reasoning with tool call summary
Ladas Mar 11, 2026
4d53186
fix(agent): preserve backend metadata during A2A task save
Ladas Mar 11, 2026
d0a55a8
fix(agent): add _system_prompt, _prompt_messages, model to SandboxState
Ladas Mar 11, 2026
c5164a7
feat(agent): always emit micro_reasoning, add call_id and status to t…
Ladas Mar 11, 2026
6bf25a1
feat(agent): increase prompt truncation to 50KB for full visibility
Ladas Mar 11, 2026
60712bf
fix(agent): unique step index per node invocation
Ladas Mar 11, 2026
5990d16
feat(agent): wire budget.add_tokens() in all reasoning nodes
Ladas Mar 11, 2026
4c0b2b9
feat(agent): budget_update events + general exceeded check in reflector
Ladas Mar 11, 2026
d59c328
feat(agent): add plan_step and iteration to executor events
Ladas Mar 11, 2026
7199dc5
fix(agent): truncate tool output, window executor messages, reflector…
Ladas Mar 11, 2026
913a9c5
fix(agent): reflector sees complete tool call pairs (args + result)
Ladas Mar 11, 2026
b1c57b4
fix(agent): token-based executor windowing and subagent tool filtering
Ladas Mar 11, 2026
a6649fd
fix(agent): prompt preview includes tool call arguments
Ladas Mar 11, 2026
1825d51
fix(agent): bump default max_iterations to 200
Ladas Mar 11, 2026
ca51925
fix(agent): revert max_iterations to 100, keep recursion_limit at 2000
Ladas Mar 11, 2026
a625887
fix(agent): reflector sees remaining steps, prevents premature "done"
Ladas Mar 11, 2026
b028da6
fix(agent): override reflector "done" when plan steps remain
Ladas Mar 11, 2026
2bff904
fix(agent): executor passes current_step in return dict for serializer
Ladas Mar 11, 2026
7124a25
fix(agent): enforce step boundary — executor must not jump to next step
Ladas Mar 11, 2026
7855485
feat(agent): add step_selector node between planner and executor
Ladas Mar 11, 2026
ac1e1f1
feat(agent): step_selector uses LLM to write focused executor brief
Ladas Mar 11, 2026
859f6cd
fix(agent): set recursion_limit default to 300
Ladas Mar 11, 2026
5a3d0b4
fix(agent): restore tool_choice=any — Llama 4 Scout fabricates output…
Ladas Mar 11, 2026
193f77d
feat(agent): configurable tool_choice via SANDBOX_FORCE_TOOL_CHOICE e…
Ladas Mar 11, 2026
d945fd1
feat(agent): text tool parsing controlled by SANDBOX_TEXT_TOOL_PARSIN…
Ladas Mar 11, 2026
5667ea9
fix(agent): reflector assessment echo and executor step propagation
Ladas Mar 11, 2026
09c84be
feat(agent): debug prompts controlled by SANDBOX_DEBUG_PROMPTS env var
Ladas Mar 11, 2026
7fcd9cd
fix(agent): move _DEBUG_PROMPTS after os import (NameError crash)
Ladas Mar 11, 2026
0f73f06
feat(agent): emit step_selector events for UI visibility
Ladas Mar 11, 2026
55b6fb0
fix(agent): add prompt context to early-termination events + gh CLI h…
Ladas Mar 11, 2026
0e11913
fix(agent): always run LLM in reporter — no single-step shortcut
Ladas Mar 11, 2026
1047703
fix(agent): add _budget_summary to SandboxState for budget_update events
Ladas Mar 11, 2026
7e64695
fix(agent): don't stall-detect when executor hits tool call limit
Ladas Mar 11, 2026
834937a
feat(agent): enforce token budget via LiteLLM as single source of truth
Ladas Mar 11, 2026
0d456f5
fix(agent): remove stall detector — let reflector LLM decide
Ladas Mar 12, 2026
5e1ff07
feat(agent): use LLM Budget Proxy for token budget enforcement
Ladas Mar 12, 2026
deee92c
fix: add jq to sandbox agent base image
Ladas Mar 12, 2026
65c7e57
fix(agent): reporter produces real summary on step limit instead of g…
Ladas Mar 12, 2026
31e30b5
fix(agent): remove token budget from local exceeded check
Ladas Mar 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions a2a/sandbox_agent/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM python:3.12-slim-bookworm
ARG RELEASE_VERSION="main"

# Install system tools for sandboxed execution
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
curl \
jq \
&& rm -rf /var/lib/apt/lists/* \
# Install GitHub CLI
&& curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
-o /usr/share/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
> /etc/apt/sources.list.d/github-cli.list \
&& apt-get update && apt-get install -y --no-install-recommends gh \
&& rm -rf /var/lib/apt/lists/*

# Install uv
RUN pip install --no-cache-dir uv

WORKDIR /app
ARG CACHE_BUST
COPY . .
RUN uv sync --no-cache --locked --link-mode copy

ENV PRODUCTION_MODE=True \
RELEASE_VERSION=${RELEASE_VERSION}

# Create workspace and set permissions.
# Use chmod g+w so OCP arbitrary UIDs (same group) can write to /app.
RUN mkdir -p /workspace && chown -R 1001:0 /app /workspace && chmod -R g+w /app /workspace
USER 1001

CMD ["uv", "run", "--no-sync", "server"]
1 change: 1 addition & 0 deletions a2a/sandbox_agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Sandbox Agent
36 changes: 36 additions & 0 deletions a2a/sandbox_agent/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
[project]
name = "sandbox-agent"
version = "0.0.1"
description = "LangGraph agent with sandboxed shell execution and per-context workspace isolation."
authors = []
readme = "README.md"
license = { text = "Apache" }
requires-python = ">=3.11"
dependencies = [
"a2a-sdk[http-server,postgresql]>=0.2.16",
"langgraph>=0.2.55",
"langchain-community>=0.3.9",
"langchain-openai>=0.3.7",
"langgraph-checkpoint-postgres>=2.0.0",
"asyncpg>=0.30.0",
"psycopg[binary]>=3.1.0",
"pydantic-settings>=2.8.1",
"opentelemetry-exporter-otlp",
"opentelemetry-instrumentation-starlette",
"httpx>=0.27.0",
"uvicorn>=0.40.0",
"starlette>=0.52.1",
]

[project.scripts]
server = "sandbox_agent.agent:run"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[dependency-groups]
dev = [
"pytest>=9.0.2",
"pytest-asyncio>=1.3.0",
]
20 changes: 20 additions & 0 deletions a2a/sandbox_agent/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"_comment": "Agent sandbox operation settings. Operations not in allow or deny go through HITL.",
"context_workspace": "/workspace/${CONTEXT_ID}",
"permissions": {
"allow": [
"shell(*:*)",
"network(outbound:*)",
"file(read:${WORKSPACE}/**)", "file(write:${WORKSPACE}/**)",
"file(delete:${WORKSPACE}/**)"
],
"deny": [
"shell(rm -rf /:*)", "shell(rm -rf /*:*)", "shell(sudo:*)",
"shell(chmod 777:*)",
"shell(nc:*)", "shell(ncat:*)",
"file(read:/etc/shadow:*)", "file(write:/etc/**:*)",
"file(read:/proc/**:*)", "shell(mount:*)", "shell(umount:*)",
"shell(chroot:*)", "shell(nsenter:*)"
]
}
}
32 changes: 32 additions & 0 deletions a2a/sandbox_agent/sources.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"_comment": "Declares what this agent can access and install. Baked into agent image.",
"agent_type": "python-data-agent",
"package_managers": {
"pip": {
"enabled": true,
"registries": [
{"name": "pypi", "url": "https://pypi.org/simple/", "trusted": true}
],
"max_install_size_mb": 500,
"blocked_packages": ["subprocess32", "pyautogui"]
},
"conda": {"enabled": false},
"npm": {"enabled": false}
},
"web_access": {
"enabled": true,
"allowed_domains": ["github.com", "api.github.com", "raw.githubusercontent.com", "pypi.org", "huggingface.co", "docs.python.org"],
"blocked_domains": ["*.internal", "metadata.google.internal"]
},
"git": {
"enabled": true,
"allowed_remotes": ["https://github.com/*", "https://gitlab.com/*"],
"max_clone_size_mb": 1000
},
"runtime": {
"languages": ["python3.11", "bash"],
"interpreters": {"python": "/usr/bin/python3", "bash": "/bin/bash"},
"max_execution_time_seconds": 300,
"max_memory_mb": 2048
}
}
Empty file.
Loading