srtab · srtab · Feb 12, 2026 · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ repos:
         args: ["--branch", "main"]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.14.11
+    rev: v0.15.0
     hooks:
       - id: ruff
         name: Run the ruff linter
@@ -16,6 +16,6 @@ repos:
         name: Run the ruff formatter
 
   - repo: https://github.com/tox-dev/pyproject-fmt
-    rev: v2.11.1
+    rev: v2.14.2
     hooks:
       - id: pyproject-fmt
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -52,6 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed repeated generation of GitHub CLI installation tokens by caching the token in the agent session state for subsequent `gh` tool calls.
 - Fixed skill tool to properly return a `Command` object for state updates instead of returning messages directly.
 - Fixed `daiv-auto` label to work as a trigger label that both launches the agent and enables auto-approval mode, eliminating the need to add two separate labels.
+- Fixed agent post-run failures when git push returns authentication/permission errors by handling push permission failures gracefully in git middleware and adding regression tests.
 
 ### Removed
 

diff --git a/Makefile b/Makefile
@@ -11,7 +11,7 @@ help:
 	@echo "  make lint-fix       - Fix linting and formatting issues"
 	@echo "  make lint-typing    - Run type checking with ty"
 	@echo "  make lock           - Update uv lock"
-	@echo "  make evals          - Run evals"
+	@echo "  make integration-tests          - Run integration tests"
 
 test:
 	LANGCHAIN_TRACING_V2=false uv run pytest -s tests/unit_tests
@@ -42,8 +42,8 @@ makemessages:
 compilemessages:
 	uv run django-admin compilemessages
 
-evals:
-	LANGSMITH_TEST_SUITE="DAIV: PR Describer" uv run pytest --reuse-db evals --no-cov --log-level=INFO -k test_pr_describer -n 2
+integration-tests:
+	LANGSMITH_TEST_SUITE="DAIV: Diff to Metadata" uv run pytest --reuse-db tests/integration_tests --no-cov --log-level=INFO -k test_diff_to_metadata -n 2
 
 swebench:
 	uv run evals/swebench.py --dataset-path "SWE-bench/SWE-bench_Lite" --dataset-split "dev" --output-path predictions.json --num-samples 1

diff --git a/daiv/automation/agent/conf.py b/daiv/automation/agent/conf.py
@@ -25,7 +25,7 @@ class DAIVAgentSettings(BaseSettings):
         description="Thinking level to be used for tasks. Set as `None` to disable thinking.",
     )
     MAX_MODEL_NAME: ModelName | str = Field(
-        default=ModelName.CLAUDE_OPUS_4_5,
+        default=ModelName.CLAUDE_OPUS_4_6,
         description=(
             "Model for tasks when daiv-max label is present, a multi-modal (image and text) model with "
             "capabilities to call tools."
@@ -37,6 +37,10 @@ class DAIVAgentSettings(BaseSettings):
             "Thinking level to be used for tasks when daiv-max label is present. Set as `None` to disable thinking."
         ),
     )
+    EXPLORE_MODEL_NAME: ModelName | str = Field(
+        default=ModelName.CLAUDE_HAIKU_4_5,
+        description="Model for the explore subagent, a fast model with capabilities to call tools.",
+    )
 
 
 settings = DAIVAgentSettings()
diff --git a/daiv/automation/agent/constants.py b/daiv/automation/agent/constants.py
@@ -6,16 +6,15 @@
 BUILTIN_SKILLS_PATH = PROJECT_DIR / "automation" / "agent" / "skills"
 
 # Path where the skills are stored in repository.
-DAIV_SKILLS_PATH = ".daiv/skills"
 CURSOR_SKILLS_PATH = ".cursor/skills"
-CLAUDE_CODER_SKILLS_PATH = ".claude/skills"
+CLAUDE_CODE_SKILLS_PATH = ".claude/skills"
 AGENTS_SKILLS_PATH = ".agents/skills"
 
 # Paths where the skills are stored in repository.
-SKILLS_SOURCES = [DAIV_SKILLS_PATH, CURSOR_SKILLS_PATH, CLAUDE_CODER_SKILLS_PATH, AGENTS_SKILLS_PATH]
+SKILLS_SOURCES = [CURSOR_SKILLS_PATH, CLAUDE_CODE_SKILLS_PATH, AGENTS_SKILLS_PATH]
 
 # Path where the memory is stored in repository.
-DAIV_MEMORY_PATH = ".daiv/AGENTS.md"
+AGENTS_MEMORY_PATH = ".agents/AGENTS.md"
 
 
 class ModelName(StrEnum):
@@ -24,18 +23,17 @@ class ModelName(StrEnum):
 
     You can also use `anthropic`, `google` or `openai` model providers directly to use any model that is supported
     by Anthropic, Google or OpenAI.
+
+    Only models that have been tested and are working well are listed here for the sake of convenience.
     """
 
     # Anthropic models
-    CLAUDE_OPUS_4_5 = "openrouter:anthropic/claude-opus-4.5"
+    CLAUDE_OPUS_4_6 = "openrouter:anthropic/claude-opus-4.6"
     CLAUDE_SONNET_4_5 = "openrouter:anthropic/claude-sonnet-4.5"
     CLAUDE_HAIKU_4_5 = "openrouter:anthropic/claude-haiku-4.5"
 
     # OpenAI models
     GPT_4_1_MINI = "openrouter:openai/gpt-4.1-mini"
-    GPT_5_1_CODEX_MINI = "openrouter:openai/gpt-5.1-codex-mini"
-    GPT_5_1_CODEX = "openrouter:openai/gpt-5.1-codex"
-    GPT_5_1_CODEX_MAX = "openrouter:openai/gpt-5.1-codex-max"
     GPT_5_2 = "openrouter:openai/gpt-5.2"
     GPT_5_2_CODEX = "openrouter:openai/gpt-5.2-codex"
 

diff --git a/...automation/agent/pr_describer/__init__.py → ...mation/agent/diff_to_metadata/__init__.py b/...automation/agent/pr_describer/__init__.py → ...mation/agent/diff_to_metadata/__init__.py
diff --git a/daiv/automation/agent/diff_to_metadata/conf.py b/daiv/automation/agent/diff_to_metadata/conf.py
@@ -0,0 +1,18 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+from automation.agent.constants import ModelName
+
+
+class DiffToMetadataSettings(BaseSettings):
+    model_config = SettingsConfigDict(env_prefix="DIFF_TO_METADATA_", env_parse_none_str="None")
+
+    MODEL_NAME: ModelName | str = Field(
+        default=ModelName.CLAUDE_HAIKU_4_5, description="Model name to be used to transform a diff into metadata."
+    )
+    FALLBACK_MODEL_NAME: ModelName | str = Field(
+        default=ModelName.GPT_4_1_MINI, description="Fallback model name to be used when the primary model fails."
+    )
+
+
+settings = DiffToMetadataSettings()
diff --git a/daiv/automation/agent/diff_to_metadata/graph.py b/daiv/automation/agent/diff_to_metadata/graph.py
@@ -0,0 +1,167 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, cast
+
+from django.utils import timezone
+
+from deepagents.backends import FilesystemBackend
+from deepagents.graph import create_agent
+from deepagents.middleware.memory import MemoryMiddleware
+from langchain.agents.middleware import ModelFallbackMiddleware, dynamic_prompt
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnableLambda, RunnableParallel
+from prompt_toolkit import HTML, PromptSession
+
+from automation.agent import BaseAgent
+from automation.agent.constants import AGENTS_MEMORY_PATH, ModelName
+from automation.agent.middlewares.prompt_cache import AnthropicPromptCachingMiddleware
+from codebase.base import Scope
+from codebase.context import RuntimeCtx, set_runtime_ctx
+from codebase.utils import redact_diff_content
+
+from .conf import settings
+from .prompts import human_commit_message, human_pr_metadata, system
+from .schemas import CommitMetadata, PullRequestMetadata
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from langchain.agents.middleware.types import ModelRequest
+    from langchain_core.runnables import Runnable
+
+
+@dynamic_prompt
+def dynamic_system_prompt(request: ModelRequest) -> str:
+    """
+    Dynamic system prompt for the changes metadata agent.
+    """
+    system_prompt = ""
+    if request.system_prompt:
+        system_prompt = request.system_prompt + "\n\n"
+    return system_prompt + cast("str", system.format(current_date_time=timezone.now().strftime("%d %B, %Y")).content)
+
+
+def create_diff_to_metadata_graph(
+    model_names: Sequence[ModelName | str] = (settings.MODEL_NAME, settings.FALLBACK_MODEL_NAME),
+    *,
+    ctx: RuntimeCtx,
+    include_pr_metadata: bool = True,
+    include_commit_message: bool = True,
+) -> Runnable:
+    """
+    Create a graph to describe changes to feed into a pull request and optionally a commit message.
+
+    Args:
+        model: The model to use for the agent.
+        ctx: The runtime context.
+
+    Returns:
+        The PR metadata graph.
+    """
+    assert include_pr_metadata or include_commit_message, (
+        "At least one of include_pr_metadata or include_commit_message must be True"
+    )
+
+    agent_path = Path(ctx.repo.working_dir)
+
+    backend = FilesystemBackend(root_dir=agent_path.parent, virtual_mode=True)
+
+    model = BaseAgent.get_model(model=model_names[0])
+    fallback_models = [BaseAgent.get_model(model=model_name) for model_name in model_names[1:]]
+
+    middleware = [
+        MemoryMiddleware(
+            backend=backend,
+            sources=[f"/{agent_path.name}/{ctx.config.context_file_name}", f"/{agent_path.name}/{AGENTS_MEMORY_PATH}"],
+        ),
+        AnthropicPromptCachingMiddleware(),
+        dynamic_system_prompt,
+    ]
+
+    if fallback_models:
+        middleware.append(ModelFallbackMiddleware(fallback_models[0], *fallback_models[1:]))
+
+    graphs: dict[str, Runnable] = {}
+
+    if include_pr_metadata:
+        graphs["pr_metadata"] = (
+            ChatPromptTemplate.from_messages([human_pr_metadata]).partial(extra_context="")
+            | create_agent(
+                model=model,
+                tools=[],  # No tools are needed for this agent, it only uses the memory and the system prompt
+                middleware=middleware,
+                response_format=PullRequestMetadata,
+                context_schema=RuntimeCtx,
+            )
+        ).with_config(run_name="PRMetadata")
+
+    if include_commit_message:
+        graphs["commit_message"] = (
+            ChatPromptTemplate.from_messages([human_commit_message])
+            | create_agent(
+                model=model,
+                tools=[],  # No tools are needed for this agent, it only uses the memory and the system prompt
+                middleware=middleware,
+                response_format=CommitMetadata,
+                context_schema=RuntimeCtx,
+            )
+        ).with_config(run_name="CommitMessage")
+
+    def _input_selector(x: dict[str, Any]) -> dict[str, str]:
+        input_data = {}
+        if include_pr_metadata:
+            input_data["pr_metadata_diff"] = x.get("pr_metadata_diff", x.get("diff", ""))
+        if include_commit_message:
+            input_data["commit_message_diff"] = x.get("commit_message_diff", x.get("diff", ""))
+        return input_data
+
+    def _output_selector(x: dict[str, Any]) -> dict[str, PullRequestMetadata | CommitMetadata]:
+        output: dict[str, PullRequestMetadata | CommitMetadata] = {}
+        if include_pr_metadata and "pr_metadata" in x:
+            output["pr_metadata"] = x["pr_metadata"]["structured_response"]
+        if include_commit_message and "commit_message" in x:
+            output["commit_message"] = x["commit_message"]["structured_response"]
+        return output
+
+    run_name = "DiffToMetadata"
+    return (RunnableLambda(_input_selector) | RunnableParallel(graphs) | RunnableLambda(_output_selector)).with_config(
+        run_name=run_name,
+        tags=[run_name],
+        metadata={"include_pr_metadata": include_pr_metadata, "include_commit_message": include_commit_message},
+    )
+
+
+async def main():
+    session = PromptSession(
+        message=HTML('<style fg="#ffffff">></style> '),
+        complete_while_typing=True,  # Show completions as you type
+        complete_in_thread=True,  # Async completion prevents menu freezing
+        mouse_support=False,
+        enable_open_in_editor=True,  # Allow Ctrl+X Ctrl+E to open external editor
+        enable_history_search=True,
+        wrap_lines=True,
+        reserve_space_for_menu=7,  # Reserve space for completion menu to show 5-6 results
+    )
+    async with set_runtime_ctx(repo_id="srtab/daiv", scope=Scope.GLOBAL, ref="main") as ctx:
+        diff_to_metadata_graph = create_diff_to_metadata_graph(ctx=ctx, model_names=[ModelName.CLAUDE_HAIKU_4_5])
+        while True:
+            user_input = await session.prompt_async()
+            output = await diff_to_metadata_graph.ainvoke(
+                {"diff": redact_diff_content(user_input, ctx.config.omit_content_patterns)},
+                context=ctx,
+                config={"configurable": {"thread_id": "1"}},
+            )
+            if output and "pr_metadata" in output:
+                print(output["pr_metadata"].model_dump_json(indent=2))  # noqa: T201
+            if output and "commit_message" in output:
+                print(output["commit_message"].model_dump_json(indent=2))  # noqa: T201
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    import django
+
+    django.setup()
+    asyncio.run(main())
diff --git a/.../automation/agent/pr_describer/prompts.py → ...omation/agent/diff_to_metadata/prompts.py b/.../automation/agent/pr_describer/prompts.py → ...omation/agent/diff_to_metadata/prompts.py
@@ -16,17 +16,16 @@
    - Use a sensible default:
      - branch: <type>/<short-kebab-summary> where type ∈ {feat, fix, chore, docs, refactor, test}
      - commit_message: Conventional Commits style "<type>: <short summary>" (subject only)
-5) Output MUST match the requested structured format exactly (no extra keys).
-""",
+5) Output MUST match the requested structured format exactly (no extra keys).""",
     "mustache",
 )
 
-human = HumanMessagePromptTemplate.from_template(
-    """Generate PR metadata from the repo instructions and code changes.
+human_pr_metadata = HumanMessagePromptTemplate.from_template(
+    """Generate PR metadata from the memory and code changes.
 
 Diff hunks (unified diff; may include multiple files):
 ~~~diff
-{{diff}}
+{{pr_metadata_diff}}
 ~~~
 
 {{#extra_context}}
@@ -56,7 +55,26 @@
 - branch:
   - If memory defines a naming convention, follow it.
   - Otherwise use: "<type>/<kebab-case-summary>".
-  - Keep it lowercase, ascii, no spaces, avoid > 50 chars.
-""",
+  - Keep it lowercase, ascii, no spaces, avoid > 50 chars.""",
+    "mustache",
+)
+
+
+human_commit_message = HumanMessagePromptTemplate.from_template(
+    """Generate a commit message from the memory and code changes.
+
+Diff hunks (unified diff; may include multiple files):
+~~~diff
+{{commit_message_diff}}
+~~~
+
+Output requirements:
+- Return a single JSON object with EXACTLY this key:
+  - commit_message
+
+Field rules:
+- commit_message:
+  - If memory defines a format, follow it.
+  - Otherwise use: "<type>: <summary>" (Conventional Commits), single line.""",
     "mustache",
 )
diff --git a/.../automation/agent/pr_describer/schemas.py → ...omation/agent/diff_to_metadata/schemas.py b/.../automation/agent/pr_describer/schemas.py → ...omation/agent/diff_to_metadata/schemas.py
@@ -3,8 +3,11 @@
 from pydantic import BaseModel, Field
 
 
+class CommitMetadata(BaseModel):
+    commit_message: str
+
+
 class PullRequestMetadata(BaseModel):
-    title: str = Field()
+    title: str
     branch: str = Field(pattern=r"[a-z0-9-_/]")
-    description: str = Field()
-    commit_message: str = Field()
+    description: str