Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Fixed unit tests that still referenced the removed `create_changelog_subagent` by migrating them to `create_docs_research_subagent` expectations and `/agents` output assertions.
- Fixed duplicate agent launches when issue labels are added, removed, and re-added by checking if DAIV has already reacted to the issue before processing label events.
- Fixed sandbox archive layout to avoid adding the repository root folder; repository contents are now archived at the top level (while still excluding `.git`).
- Fixed handling of empty GitHub repositories when reading config files; the client now gracefully returns `None` instead of raising an exception when attempting to read files from empty repositories.
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ compilemessages:
uv run django-admin compilemessages

integration-tests:
uv run pytest --reuse-db tests/integration_tests --no-cov --log-level=INFO -k test_skill_activated -n 4
uv run pytest --reuse-db tests/integration_tests --no-cov --log-level=INFO -m subagents

swebench:
uv run evals/swebench.py --dataset-path "SWE-bench/SWE-bench_Lite" --dataset-split "dev" --output-path predictions.json --num-samples 1
Expand All @@ -62,4 +62,4 @@ docs-serve:
uv run --only-group=docs mkdocs serve -o -a localhost:4000 -w docs/

langsmith-fetch:
uv run langsmith-fetch traces --project-uuid 00d1a04e-0087-4813-9a18-5995cd5bee5c --limit 1 ./my-traces
uv run langsmith-fetch traces --project-uuid 00d1a04e-0087-4813-9a18-5995cd5bee5c --limit 4 ./my-traces
4 changes: 4 additions & 0 deletions daiv/automation/agent/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ class DAIVAgentSettings(BaseSettings):
default=ModelName.CLAUDE_HAIKU_4_5,
description="Model for the explore subagent, a fast model with capabilities to call tools.",
)
DOCS_RESEARCH_MODEL_NAME: ModelName | str = Field(
default=ModelName.GPT_5_1_CODEX_MINI,
description="Model for the docs research subagent, a fast model with capabilities to call tools.",
)


settings = DAIVAgentSettings()
5 changes: 3 additions & 2 deletions daiv/automation/agent/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@ class ModelName(StrEnum):

# OpenAI models
GPT_4_1_MINI = "openrouter:openai/gpt-4.1-mini"
GPT_4_1 = "openrouter:openai/gpt-4.1"
GPT_5_1_CODEX_MINI = "openrouter:openai/gpt-5.1-codex-mini"
GPT_5_2 = "openrouter:openai/gpt-5.2"
GPT_5_2_CODEX = "openrouter:openai/gpt-5.2-codex"
GPT_5_3_CODEX = "openrouter:openai/gpt-5.3-codex"

# z-ai models
Z_AI_GLM_4_7 = "openrouter:z-ai/glm-4.7"
Z_AI_GLM_5 = "openrouter:z-ai/glm-5"

# minimax models
Expand Down
11 changes: 6 additions & 5 deletions daiv/automation/agent/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from automation.agent.middlewares.web_search import WebSearchMiddleware
from automation.agent.prompts import DAIV_SYSTEM_PROMPT, WRITE_TODOS_SYSTEM_PROMPT
from automation.agent.subagents import (
create_changelog_subagent,
create_docs_research_subagent,
create_explore_subagent,
create_general_purpose_subagent,
)
Expand Down Expand Up @@ -161,12 +161,13 @@ async def create_daiv_agent(
web_search_enabled=web_search_enabled,
web_fetch_enabled=_web_fetch_enabled,
),
create_explore_subagent(backend, ctx),
create_changelog_subagent(
model, backend, ctx, sandbox_enabled=_sandbox_enabled, web_search_enabled=web_search_enabled
),
create_explore_subagent(backend),
]

if _web_fetch_enabled:
# only create the docs research subagent if web fetch is enabled as it requires web fetch to be enabled
subagents.append(create_docs_research_subagent(backend))

agent_conditional_middlewares = []

if web_search_enabled:
Expand Down
72 changes: 66 additions & 6 deletions daiv/automation/agent/middlewares/web_fetch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import hashlib
import ipaddress
import logging
from typing import TYPE_CHECKING, Annotated
from urllib.parse import urljoin, urlparse, urlunparse
Expand All @@ -19,6 +20,8 @@
if TYPE_CHECKING:
from collections.abc import Awaitable, Callable

from pydantic import SecretStr

logger = logging.getLogger("daiv.tools")

WEB_FETCH_NAME = "web_fetch"
Expand Down Expand Up @@ -54,27 +57,74 @@
"""


def _get_auth_headers_for_url(url: str) -> dict[str, str]:
"""
Return authentication headers configured for the given URL's domain.

Matching is exact: a configured domain of ``context7.com`` matches
only ``context7.com`` and not ``api.context7.com`` or ``notcontext7.com``.
"""
hostname = urlparse(url).hostname or ""
matched: list[tuple[str, dict[str, SecretStr]]] = []
for domain, headers in settings.WEB_FETCH_AUTH_HEADERS.items():
if hostname == domain:
matched.append((domain, headers))
# Merge from least-specific to most-specific so longer domains win.
matched.sort(key=lambda item: len(item[0]))
result: dict[str, str] = {}
for _domain, headers in matched:
result.update({key: value.get_secret_value() for key, value in headers.items()})
return result


def _upgrade_http_to_https(url: str) -> str:
parsed = urlparse(url)
if parsed.scheme == "http":
return urlunparse(("https", parsed.netloc, parsed.path, parsed.params, parsed.query, parsed.fragment))
return url


def _is_private_or_local(hostname: str) -> bool:
"""
Check if a hostname is a private/local IP address or localhost.
"""
# Check hostname literals first
if hostname.lower() in {"localhost", "localhost.localdomain"}:
return True

try:
ip = ipaddress.ip_address(hostname)
return ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved or ip.is_multicast
except ValueError:
# Not a valid IP address, could be a hostname
# Check for localhost-like patterns
return hostname.lower().endswith(".local") or hostname.lower().endswith(".localhost")


def _is_valid_http_url(url: str) -> bool:
parsed = urlparse(url)
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)


async def _fetch_url_text(url: str, *, timeout_seconds: int, proxy_url: str | None) -> tuple[str, str, str]:
async def _fetch_url_text(
url: str, *, timeout_seconds: int, proxy_url: str | None, extra_headers: dict[str, str] | None = None
) -> tuple[str, str, str]:
"""
Returns (final_url, content_type, page_raw).
"""
from httpx import AsyncClient, HTTPError

# SSRF protection: block private/local addresses
parsed = urlparse(url)
hostname = parsed.hostname or ""
if _is_private_or_local(hostname):
raise ValueError(f"Requests to private/local addresses are blocked: {url}")

request_headers = {"User-Agent": USER_AGENT, **(extra_headers or {})}

async with AsyncClient(proxy=proxy_url, follow_redirects=False) as client:
try:
response = await client.get(url, headers={"User-Agent": USER_AGENT}, timeout=timeout_seconds)
response = await client.get(url, headers=request_headers, timeout=timeout_seconds)
except HTTPError as e:
raise ValueError(f"Failed to fetch {url}: {e!r}") from e

Expand All @@ -86,7 +136,9 @@ async def _fetch_url_text(url: str, *, timeout_seconds: int, proxy_url: str | No
raise RuntimeError(f"<redirect_url>{redirect_url}</redirect_url>")

# Same-host redirects are fine to follow automatically (e.g., path normalization).
return await _fetch_url_text(redirect_url, timeout_seconds=timeout_seconds, proxy_url=proxy_url)
return await _fetch_url_text(
redirect_url, timeout_seconds=timeout_seconds, proxy_url=proxy_url, extra_headers=extra_headers
)

if response.status_code >= 400:
raise ValueError(f"Failed to fetch {url} - status code {response.status_code}")
Expand All @@ -111,8 +163,12 @@ async def _fetch_markdown_for_url(url: str) -> str:
"""
Fetch the URL and return markdown content.
"""
auth_headers = _get_auth_headers_for_url(url)
final_url, content_type, page_raw = await _fetch_url_text(
url, timeout_seconds=settings.WEB_FETCH_TIMEOUT_SECONDS, proxy_url=settings.WEB_FETCH_PROXY_URL
url,
timeout_seconds=settings.WEB_FETCH_TIMEOUT_SECONDS,
proxy_url=settings.WEB_FETCH_PROXY_URL,
extra_headers=auth_headers or None,
)

is_html = "<html" in page_raw[:200].lower() or ("text/html" in content_type) or not content_type
Expand Down Expand Up @@ -187,5 +243,9 @@ def __init__(self) -> None:
async def awrap_model_call(
self, request: ModelRequest, handler: Callable[[ModelRequest], Awaitable[ModelResponse]]
) -> ModelResponse:
request = request.override(system_prompt=request.system_prompt + "\n\n" + WEB_FETCH_SYSTEM_PROMPT)
return await handler(request)
system_prompt = ""
if request.system_prompt:
system_prompt = request.system_prompt + "\n\n"
system_prompt += WEB_FETCH_SYSTEM_PROMPT

return await handler(request.override(system_prompt=system_prompt))
Loading
Loading