diff --git a/src/agents/repository_analysis_agent/agent.py b/src/agents/repository_analysis_agent/agent.py index acadc95..c3d9781 100644 --- a/src/agents/repository_analysis_agent/agent.py +++ b/src/agents/repository_analysis_agent/agent.py @@ -38,6 +38,10 @@ async def execute(self, **kwargs) -> AgentResult: await analyze_pr_history(state, request.max_prs) await analyze_contributing_guidelines(state) + # Only generate recommendations if we have basic repository data + if not state.repository_features.language: + raise ValueError("Unable to determine repository language - cannot generate appropriate rules") + state.recommendations = _default_recommendations(state) validate_recommendations(state) response = summarize_analysis(state, request) diff --git a/src/agents/repository_analysis_agent/nodes.py b/src/agents/repository_analysis_agent/nodes.py index edf2cb8..0286f42 100644 --- a/src/agents/repository_analysis_agent/nodes.py +++ b/src/agents/repository_analysis_agent/nodes.py @@ -8,6 +8,7 @@ from __future__ import annotations +import logging import textwrap from typing import Any @@ -32,6 +33,9 @@ async def analyze_repository_structure(state: RepositoryAnalysisState) -> None: installation_id = state.installation_id repo_data = await github_client.get_repository(repo, installation_id=installation_id) + if not repo_data: + raise ValueError(f"Could not fetch repository data for {repo}") + workflows = await github_client.list_directory_any_auth( repo_full_name=repo, path=".github/workflows", installation_id=installation_id ) @@ -42,7 +46,7 @@ async def analyze_repository_structure(state: RepositoryAnalysisState) -> None: has_codeowners=bool(await github_client.get_file_content(repo, ".github/CODEOWNERS", installation_id)), has_workflows=bool(workflows), workflow_count=len(workflows or []), - language=(repo_data or {}).get("language"), + language=repo_data.get("language"), contributor_count=len(contributors), pr_count=0, ) @@ -54,8 +58,14 @@ async def analyze_pr_history(state: RepositoryAnalysisState, max_prs: int) -> No installation_id = state.installation_id prs = await github_client.list_pull_requests(repo, installation_id=installation_id, state="all", per_page=max_prs) + if prs is None: + # If PR listing fails, continue with empty samples rather than failing + state.pr_samples = [] + state.repository_features.pr_count = 0 + return + samples: list[PullRequestSample] = [] - for pr in prs or []: + for pr in prs: samples.append( PullRequestSample( number=pr.get("number", 0), @@ -215,19 +225,28 @@ def _default_recommendations( Currently, validators like `author_team_is` and `file_patterns` operate independently. """ + logger = logging.getLogger(__name__) + recommendations: list[RuleRecommendation] = [] # Get language-specific patterns based on repository analysis - source_patterns, test_patterns = _get_language_specific_patterns(state.repository_features.language) + language = state.repository_features.language + source_patterns, test_patterns = _get_language_specific_patterns(language) + + logger.info( + f"Generating recommendations for {state.repository_full_name}: language={language}, pr_count={state.repository_features.pr_count}" + ) # Analyze PR history for bad habits pr_issues = _analyze_pr_bad_habits(state) # Require tests when source code changes. # This is especially important if we detect missing tests in PR history - test_reasoning = f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository." + test_reasoning = f"Repository analysis for {state.repository_full_name}. Language: {language or 'unknown'}. Patterns adapted for {language or 'multi-language'} repository." if pr_issues.get("missing_tests", 0) > 0: test_reasoning += f" Detected {pr_issues['missing_tests']} recent PRs without test files." + if state.contributing_analysis.content and state.contributing_analysis.requires_tests: + test_reasoning += " Contributing guidelines explicitly require tests." # Build YAML rule with proper indentation # parameters: is at column 0, source_patterns: at column 2, list items at column 4 @@ -239,17 +258,23 @@ def _default_recommendations( severity: medium event_types: - pull_request -parameters: + parameters: source_patterns: {source_patterns_yaml} test_patterns: {test_patterns_yaml} """ + confidence = 0.74 + if pr_issues.get("missing_tests", 0) > 0: + confidence = 0.85 + if state.contributing_analysis.content and state.contributing_analysis.requires_tests: + confidence = min(0.95, confidence + 0.1) + recommendations.append( RuleRecommendation( yaml_rule=yaml_content.strip(), - confidence=0.74 if pr_issues.get("missing_tests", 0) == 0 else 0.85, + confidence=confidence, reasoning=test_reasoning, strategy_used="hybrid", ) @@ -257,37 +282,60 @@ def _default_recommendations( # Require description in PR body. # Increase confidence if we detect short titles in PR history (indicator of missing context) - desc_reasoning = "Encourage context for reviewers; lightweight default." + desc_reasoning = f"Repository analysis for {state.repository_full_name}." if pr_issues.get("short_titles", 0) > 0: desc_reasoning += f" Detected {pr_issues['short_titles']} PRs with very short titles (likely missing context)." + else: + desc_reasoning += " Encourages context for reviewers; lightweight default." + + desc_confidence = 0.68 + if pr_issues.get("short_titles", 0) > 0: + desc_confidence = 0.80 recommendations.append( RuleRecommendation( yaml_rule=textwrap.dedent( """ description: "Ensure PRs include context" - enabled: true +enabled: true severity: low - event_types: - - pull_request - parameters: +event_types: + - pull_request + parameters: min_description_length: 50 """ ).strip(), - confidence=0.68 if pr_issues.get("short_titles", 0) == 0 else 0.80, + confidence=desc_confidence, reasoning=desc_reasoning, strategy_used="static", ) ) - # If contributing guidelines require tests, increase confidence - if state.contributing_analysis.content is not None and state.contributing_analysis.requires_tests: - # Find the test rule and boost its confidence - for rec in recommendations: - if "tests" in rec.yaml_rule.lower(): - rec.confidence = min(0.95, rec.confidence + 0.1) - rec.reasoning += " Contributing guidelines explicitly require tests." + # Add a repository-specific rule if we detect specific patterns + if state.repository_features.has_workflows: + workflow_rule = textwrap.dedent( + """ + description: "Protect CI/CD workflows" +enabled: true + severity: high +event_types: + - pull_request + parameters: + file_patterns: + - ".github/workflows/**" + """ + ).strip() + + recommendations.append( + RuleRecommendation( + yaml_rule=workflow_rule, + confidence=0.90, + reasoning=f"Repository {state.repository_full_name} has {state.repository_features.workflow_count} workflows that should be protected.", + strategy_used="static", + ) + ) + logger.info(f"Generated {len(recommendations)} recommendations for {state.repository_full_name}") return recommendations diff --git a/src/agents/repository_analysis_agent/prompts.py b/src/agents/repository_analysis_agent/prompts.py index 94bfe4a..65c83e2 100644 --- a/src/agents/repository_analysis_agent/prompts.py +++ b/src/agents/repository_analysis_agent/prompts.py @@ -83,14 +83,8 @@ severity: "medium" event_types: - pull_request -conditions: - - type: "condition_type" - parameters: - key: "value" -actions: - - type: "action_type" - parameters: - key: "value" +parameters: + key: "value" ``` Make sure the rule is functional and follows best practices. diff --git a/src/api/recommendations.py b/src/api/recommendations.py index 1a0ef22..ef401ba 100644 --- a/src/api/recommendations.py +++ b/src/api/recommendations.py @@ -48,7 +48,9 @@ async def recommend_rules( if not request.repository_full_name or "/" not in request.repository_full_name: raise HTTPException(status_code=400, detail="Invalid repository name format. Expected 'owner/repo'") - cache_key = f"repo_analysis:{request.repository_full_name}" + # Include authentication context in cache key to ensure different access levels get different results + auth_context = request.installation_id or request.user_token or "anonymous" + cache_key = f"repo_analysis:{request.repository_full_name}:{auth_context}" cached_result = await get_cache(cache_key) if cached_result: @@ -57,6 +59,7 @@ async def recommend_rules( "cache_hit", operation="repository_analysis", subject_ids=[request.repository_full_name], + auth_context=auth_context, cached=True, ) return RepositoryAnalysisResponse(**cached_result) @@ -85,6 +88,8 @@ async def recommend_rules( decision="failed", error=result.message, ) + # Clear any cached results for this repository to ensure fresh analysis on retry + await set_cache(cache_key, None, ttl=1) # Use 1 second TTL to effectively clear cache raise HTTPException(status_code=500, detail=result.message) analysis_response = result.data.get("analysis_response") @@ -161,6 +166,18 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith branch=request.branch_name, existing_sha=existing_branch_sha, ) + # Verify the branch points to the correct base + if existing_branch_sha != base_sha: + log_structured( + logger, + "branch_sha_mismatch", + operation="proceed_with_pr", + subject_ids=[repo], + branch=request.branch_name, + existing_sha=existing_branch_sha, + expected_sha=base_sha, + warning="Branch exists but points to different SHA than base branch", + ) else: # Create new branch created_ref = await github_client.create_git_ref(repo, request.branch_name, base_sha, **auth_ctx) @@ -182,6 +199,15 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith "The branch may already exist or you may not have permission to create branches." ), ) + log_structured( + logger, + "branch_created", + operation="proceed_with_pr", + subject_ids=[repo], + branch=request.branch_name, + base_branch=base_branch, + new_sha=created_ref.get("object", {}).get("sha"), + ) file_result = await github_client.create_or_update_file( repo_full_name=repo, @@ -209,6 +235,17 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith ), ) + commit_sha = (file_result.get("commit") or {}).get("sha") + log_structured( + logger, + "file_created", + operation="proceed_with_pr", + subject_ids=[repo], + branch=request.branch_name, + file_path=request.file_path, + commit_sha=commit_sha, + ) + pr = await github_client.create_pull_request( repo_full_name=repo, title=request.pr_title, @@ -237,16 +274,62 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith ) pr_url = pr.get("html_url", "") - if not pr_url: + pr_number = pr.get("number") + if not pr_url or not pr_number: log_structured( logger, - "pr_url_missing", + "pr_creation_incomplete", operation="proceed_with_pr", subject_ids=[repo], pr_data=pr, - error="PR created but html_url is missing", + pr_url=pr_url, + pr_number=pr_number, + error="PR creation response missing required fields", + ) + raise HTTPException(status_code=500, detail="PR was created but response is incomplete") + + # Validate the PR URL is a proper GitHub URL format + if not pr_url.startswith("https://github.com/") or "/pull/" not in pr_url: + log_structured( + logger, + "pr_url_invalid", + operation="proceed_with_pr", + subject_ids=[repo], + pr_url=pr_url, + pr_number=pr_number, + error="PR URL is not a valid GitHub pull request URL", + ) + raise HTTPException(status_code=500, detail="PR was created but returned invalid URL format") + + # Validate PR number is reasonable + if not isinstance(pr_number, int) or pr_number <= 0: + log_structured( + logger, + "pr_number_invalid", + operation="proceed_with_pr", + subject_ids=[repo], + pr_url=pr_url, + pr_number=pr_number, + error="PR number is invalid", + ) + raise HTTPException(status_code=500, detail="PR was created but returned invalid PR number") + + # Double-check URL format one more time + expected_url_pattern = f"https://github.com/{repo}/pull/{pr_number}" + if pr_url != expected_url_pattern: + log_structured( + logger, + "pr_url_mismatch", + operation="proceed_with_pr", + subject_ids=[repo], + expected_url=expected_url_pattern, + actual_url=pr_url, + pr_number=pr_number, + error="PR URL doesn't match expected pattern", + ) + raise HTTPException( + status_code=500, detail=f"PR URL mismatch: expected {expected_url_pattern} but got {pr_url}" ) - raise HTTPException(status_code=500, detail="PR was created but URL is missing") log_structured( logger, @@ -255,11 +338,12 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith subject_ids=[repo], decision="success", branch=request.branch_name, - pr_number=pr.get("number"), + pr_number=pr_number, + pr_url=pr_url, ) return ProceedWithPullRequestResponse( - pull_request_url=pr.get("html_url", ""), + pull_request_url=pr_url, branch_name=request.branch_name, base_branch=base_branch, file_path=request.file_path,