Skip to content
58 changes: 58 additions & 0 deletions scripts/verify_fast_path_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Verification script for fast-path observability logging.

This script validates that the coding loop contains:
1. Fast-path detection logic (iteration == 1 check) in the approve block
2. Fast-path logging with 'fast_path' tag for observability
"""

import re
import sys
from pathlib import Path


def test_fast_path_logging_exists():
"""Test that fast-path detection and logging are present in the approve block."""
# Read the coding_loop.py file
coding_loop_path = Path(__file__).parent.parent / "swe_af" / "execution" / "coding_loop.py"

if not coding_loop_path.exists():
raise FileNotFoundError(f"Could not find coding_loop.py at {coding_loop_path}")

content = coding_loop_path.read_text()

# Find the approve block in the "BRANCH ON ACTION" section
# This is the second 'if action == "approve"' block after the comment
branch_section_pattern = r'# --- 4\. BRANCH ON ACTION ---.*?if action == "approve":(.*?)(?=\n\s{8}if action ==)'
approve_match = re.search(branch_section_pattern, content, re.DOTALL)

if not approve_match:
raise AssertionError("Could not find 'if action == \"approve\":' block in coding_loop.py")

approve_block = approve_match.group(1)

# Check for iteration == 1 condition
iteration_check_pattern = r'if iteration == 1:'
if not re.search(iteration_check_pattern, approve_block):
raise AssertionError(
"Fast-path detection missing: 'if iteration == 1:' condition not found in approve block"
)

# Check for fast_path tag in logging
fast_path_tag_pattern = r'tags=\[.*?["\']fast_path["\'].*?\]'
if not re.search(fast_path_tag_pattern, approve_block):
raise AssertionError(
"Fast-path logging missing: 'fast_path' tag not found in logging call within approve block"
)

print("✓ Fast-path detection logic present (iteration == 1 check)")
print("✓ Fast-path logging with 'fast_path' tag present")
print("\nAll verification checks passed!")


if __name__ == "__main__":
try:
test_fast_path_logging_exists()
sys.exit(0)
except (AssertionError, FileNotFoundError) as e:
print(f"✗ Verification failed: {e}", file=sys.stderr)
sys.exit(1)
49 changes: 49 additions & 0 deletions scripts/verify_model_defaults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env python3
"""Verification script for model default assignments.

This script validates that _RUNTIME_BASE_MODELS['claude_code'] assigns:
- 'haiku' to exactly 4 models: qa_synthesizer_model, git_model, merger_model, retry_advisor_model
- 'sonnet' to all other 12 models
"""

from swe_af.execution.schemas import _RUNTIME_BASE_MODELS


def verify_model_defaults() -> None:
"""Verify model assignments in _RUNTIME_BASE_MODELS['claude_code']."""
claude_models = _RUNTIME_BASE_MODELS["claude_code"]

# Expected assignments
expected_haiku = {"qa_synthesizer_model", "git_model", "merger_model", "retry_advisor_model"}
expected_sonnet = {
"pm_model", "architect_model", "tech_lead_model", "sprint_planner_model",
"coder_model", "qa_model", "code_reviewer_model", "replan_model",
"issue_writer_model", "issue_advisor_model", "verifier_model",
"integration_tester_model"
}

# Verify haiku assignments
print("Verifying haiku model assignments...")
for role in expected_haiku:
assert role in claude_models, f"Missing role: {role}"
actual = claude_models[role]
assert actual == "haiku", f"{role} should be 'haiku', got {actual!r}"
print(f" ✓ {role}: {actual}")

# Verify sonnet assignments
print("\nVerifying sonnet model assignments...")
for role in expected_sonnet:
assert role in claude_models, f"Missing role: {role}"
actual = claude_models[role]
assert actual == "sonnet", f"{role} should be 'sonnet', got {actual!r}"
print(f" ✓ {role}: {actual}")

# Verify total count
all_expected = expected_haiku | expected_sonnet
assert len(all_expected) == 16, f"Expected 16 total models, got {len(all_expected)}"
print(f"\n✓ All 16 models verified: 4 haiku + 12 sonnet")


if __name__ == "__main__":
verify_model_defaults()
print("\n✓ Verification passed!")
136 changes: 136 additions & 0 deletions scripts/verify_turn_budgets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
"""Verification script for turn budget right-sizing.

Tests that all 17 execution agent functions in execution_agents.py have the
correct role-specific max_turns values per the architecture specification.
"""

import re
import subprocess
import sys
from pathlib import Path


def test_all_turn_budgets_match_specification():
"""Verify all 17 agent functions have correct max_turns values."""
# Expected turn budgets per architecture specification
expected = {
'run_retry_advisor': 20,
'run_issue_advisor': 20,
'run_replanner': 30,
'run_issue_writer': 20,
'run_verifier': 20,
'run_git_init': 10,
'run_workspace_setup': 10,
'run_merger': 10,
'run_integration_tester': 10,
'run_workspace_cleanup': 10,
'run_coder': 50,
'run_qa': 20,
'run_code_reviewer': 20,
'run_qa_synthesizer': 10,
'generate_fix_issues': 20,
'run_repo_finalize': 10,
'run_github_pr': 10,
}

# Read the execution_agents.py file
file_path = Path(__file__).parent.parent / 'swe_af' / 'reasoners' / 'execution_agents.py'
with open(file_path) as f:
content = f.read()

errors = []
for func_name, expected_turns in expected.items():
# Find function definition and extract max_turns value
# Pattern matches: async def func_name(...) followed by AgentAI(AgentAIConfig(...max_turns=N...))
pattern = rf'async def {func_name}\(.*?max_turns=(\d+)'
match = re.search(pattern, content, re.DOTALL)

if not match:
errors.append(f'{func_name}: max_turns assignment not found in AgentAIConfig')
continue

actual_turns = int(match.group(1))
if actual_turns != expected_turns:
errors.append(
f'{func_name}: expected max_turns={expected_turns}, got {actual_turns}'
)

# Assert no errors
if errors:
error_msg = '\n'.join(f' - {e}' for e in errors)
raise AssertionError(f'Turn budget verification failed:\n{error_msg}')

print(f'✓ All {len(expected)} turn budgets match specification')


def test_no_default_agent_max_turns_usage():
"""Verify DEFAULT_AGENT_MAX_TURNS is not used in agent configs (except import)."""
file_path = Path(__file__).parent.parent / 'swe_af' / 'reasoners' / 'execution_agents.py'

# Run grep to find all occurrences, excluding line 16 (the import)
result = subprocess.run(
['grep', '-n', 'DEFAULT_AGENT_MAX_TURNS', str(file_path)],
capture_output=True,
text=True
)

if result.returncode != 0:
# No matches found - this is good
print('✓ No DEFAULT_AGENT_MAX_TURNS usage in agent configs')
return

# Filter out line 16 (import line)
lines = [line for line in result.stdout.strip().split('\n') if not line.startswith('16:')]

if lines:
error_msg = '\n'.join(f' Line {line}' for line in lines)
raise AssertionError(
f'DEFAULT_AGENT_MAX_TURNS still used in agent configs:\n{error_msg}'
)

print('✓ No DEFAULT_AGENT_MAX_TURNS usage in agent configs')


def test_all_17_functions_checked():
"""Verify we're checking exactly 17 functions (edge case test)."""
expected_count = 17
expected = {
'run_retry_advisor': 20,
'run_issue_advisor': 20,
'run_replanner': 30,
'run_issue_writer': 20,
'run_verifier': 20,
'run_git_init': 10,
'run_workspace_setup': 10,
'run_merger': 10,
'run_integration_tester': 10,
'run_workspace_cleanup': 10,
'run_coder': 50,
'run_qa': 20,
'run_code_reviewer': 20,
'run_qa_synthesizer': 10,
'generate_fix_issues': 20,
'run_repo_finalize': 10,
'run_github_pr': 10,
}

actual_count = len(expected)
if actual_count != expected_count:
raise AssertionError(
f'Expected to check {expected_count} functions, but only checking {actual_count}'
)

print(f'✓ Checking exactly {expected_count} functions as specified')


if __name__ == '__main__':
# Run all tests
try:
test_all_17_functions_checked()
test_all_turn_budgets_match_specification()
test_no_default_agent_max_turns_usage()
print('\n✅ All verification tests passed!')
sys.exit(0)
except AssertionError as e:
print(f'\n❌ Verification failed:\n{e}')
sys.exit(1)
7 changes: 7 additions & 0 deletions swe_af/execution/coding_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,13 @@ async def run_coding_loop(
f"Coding loop APPROVED: {issue_name} after {iteration} iteration(s)",
tags=["coding_loop", "complete", issue_name],
)
# Fast-path detection for observability
if iteration == 1:
if note_fn:
note_fn(
f"Fast-path success: {issue_name} approved on first iteration",
tags=["coding_loop", "fast_path", issue_name],
)
return IssueResult(
issue_name=issue_name,
outcome=IssueOutcome.COMPLETED,
Expand Down
3 changes: 3 additions & 0 deletions swe_af/execution/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,9 @@ class QASynthesisResult(BaseModel):
"claude_code": {
**{field: "sonnet" for field in ALL_MODEL_FIELDS},
"qa_synthesizer_model": "haiku",
"git_model": "haiku",
"merger_model": "haiku",
"retry_advisor_model": "haiku",
},
"open_code": {
**{field: "minimax/minimax-m2.5" for field in ALL_MODEL_FIELDS},
Expand Down
Loading
Loading