diff --git a/src/agents/repository_analysis_agent/nodes.py b/src/agents/repository_analysis_agent/nodes.py index 8e97ab2..f740c91 100644 --- a/src/agents/repository_analysis_agent/nodes.py +++ b/src/agents/repository_analysis_agent/nodes.py @@ -99,49 +99,92 @@ async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Non ) +def _get_language_specific_patterns(language: str | None) -> tuple[list[str], list[str]]: + """ + Get source and test patterns based on repository language. + + Returns: + Tuple of (source_patterns, test_patterns) lists + """ + # Language-specific patterns + patterns_map: dict[str, tuple[list[str], list[str]]] = { + "Python": ( + ["**/*.py"], + ["**/tests/**", "**/*_test.py", "**/test_*.py", "**/*.test.py"], + ), + "TypeScript": ( + ["**/*.ts", "**/*.tsx"], + ["**/*.spec.ts", "**/*.test.ts", "**/tests/**"], + ), + "JavaScript": ( + ["**/*.js", "**/*.jsx"], + ["**/*.test.js", "**/*.spec.js", "**/tests/**"], + ), + "Go": ( + ["**/*.go"], + ["**/*_test.go", "**/*.test.go"], + ), + "Java": ( + ["**/*.java"], + ["**/*Test.java", "**/*Tests.java", "**/test/**"], + ), + "Rust": ( + ["**/*.rs"], + ["**/*.rs"], # Rust tests are in same file + ), + } + + if language and language in patterns_map: + return patterns_map[language] + + # Default fallback patterns for unknown languages + return ( + ["**/*.py", "**/*.ts", "**/*.tsx", "**/*.js", "**/*.go"], + ["**/tests/**", "**/*_test.py", "**/*.spec.ts", "**/*.test.js", "**/*.test.ts", "**/*.test.jsx"], + ) + + def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecommendation]: - """Return a minimal, deterministic set of diff-aware rules.""" + """ + Return a minimal, deterministic set of diff-aware rules. + + Note: These recommendations use repository-specific patterns when available. + For more advanced use cases like restricting specific authors from specific paths + (e.g., preventing a member from modifying /auth), the rule engine would need: + 1. A combined validator that checks both author AND file patterns, OR + 2. Support for combining multiple validators with AND/OR logic in a single rule. + + Currently, validators like `author_team_is` and `file_patterns` operate independently. + """ recommendations: list[RuleRecommendation] = [] + # Get language-specific patterns based on repository analysis + source_patterns, test_patterns = _get_language_specific_patterns(state.repository_features.language) + # Require tests when source code changes. recommendations.append( RuleRecommendation( yaml_rule=textwrap.dedent( - """ + f""" description: "Require tests when code changes" enabled: true severity: medium event_types: - pull_request - validators: - - type: diff_pattern - parameters: - file_patterns: - - "**/*.py" - - "**/*.ts" - - "**/*.tsx" - - "**/*.js" - - "**/*.go" - - type: related_tests - parameters: - search_paths: - - "**/tests/**" - - "**/*_test.py" - - "**/*.spec.ts" - - "**/*.test.js" - actions: - - type: warn - parameters: - message: "Please include or update tests for code changes." + parameters: + source_patterns: +{chr(10).join(f' - "{pattern}"' for pattern in source_patterns)} + test_patterns: +{chr(10).join(f' - "{pattern}"' for pattern in test_patterns)} """ ).strip(), confidence=0.74, - reasoning="Default guardrail for code changes without tests.", - strategy_used="static", + reasoning=f"Default guardrail for code changes without tests. Patterns adapted for {state.repository_features.language or 'multi-language'} repository.", + strategy_used="hybrid", ) ) - # Require description and linked issue in PR body. + # Require description in PR body. recommendations.append( RuleRecommendation( yaml_rule=textwrap.dedent( @@ -151,15 +194,8 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme severity: low event_types: - pull_request - validators: - - type: required_field_in_diff - parameters: - field: "body" - pattern: "(?i)(summary|context|issue)" - actions: - - type: warn - parameters: - message: "Add a short summary and linked issue in the PR body." + parameters: + min_description_length: 50 """ ).strip(), confidence=0.68, @@ -169,40 +205,20 @@ def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecomme ) # If no CODEOWNERS, suggest one for shared ownership signals. - if not state.repository_features.has_codeowners: - recommendations.append( - RuleRecommendation( - yaml_rule=textwrap.dedent( - """ - description: "Flag missing CODEOWNERS entries" - enabled: true - severity: low - event_types: - - pull_request - validators: - - type: diff_pattern - parameters: - file_patterns: - - "**/*" - actions: - - type: warn - parameters: - message: "Consider adding CODEOWNERS to clarify ownership." - """ - ).strip(), - confidence=0.6, - reasoning="Repository lacks CODEOWNERS; gentle nudge to add.", - strategy_used="static", - ) - ) + # Note: This is informational only - we can't enforce CODEOWNERS creation via validators + # but we can encourage it through the recommendation reasoning. return recommendations def _render_rules_yaml(recommendations: list[RuleRecommendation]) -> str: """Combine rule YAML snippets into a single YAML document.""" - yaml_blocks = [rec.yaml_rule.strip() for rec in recommendations] - return "\n\n---\n\n".join(yaml_blocks) + rules_list = [] + for rec in recommendations: + rule_dict = yaml.safe_load(rec.yaml_rule) + if rule_dict: + rules_list.append(rule_dict) + return yaml.dump({"rules": rules_list}, default_flow_style=False, sort_keys=False) def _default_pr_plan(state: RepositoryAnalysisState) -> PullRequestPlan: diff --git a/src/api/recommendations.py b/src/api/recommendations.py index a7810ae..de2525e 100644 --- a/src/api/recommendations.py +++ b/src/api/recommendations.py @@ -139,17 +139,44 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith base_sha = await github_client.get_git_ref_sha(repo, base_branch, **auth_ctx) if not base_sha: + log_structured( + logger, + "base_branch_resolution_failed", + operation="proceed_with_pr", + subject_ids=[repo], + base_branch=base_branch, + error="Unable to resolve base branch SHA", + ) raise HTTPException(status_code=400, detail=f"Unable to resolve base branch '{base_branch}'") - created_ref = await github_client.create_git_ref(repo, request.branch_name, base_sha, **auth_ctx) - if not created_ref: + # Check if branch already exists + existing_branch_sha = await github_client.get_git_ref_sha(repo, request.branch_name, **auth_ctx) + if existing_branch_sha: + # Branch exists - use it log_structured( logger, - "branch_exists_or_create_failed", + "branch_already_exists", operation="proceed_with_pr", subject_ids=[repo], branch=request.branch_name, + existing_sha=existing_branch_sha, ) + else: + # Create new branch + created_ref = await github_client.create_git_ref(repo, request.branch_name, base_sha, **auth_ctx) + if not created_ref: + log_structured( + logger, + "branch_creation_failed", + operation="proceed_with_pr", + subject_ids=[repo], + branch=request.branch_name, + base_sha=base_sha, + error="Failed to create branch", + ) + raise HTTPException( + status_code=400, detail=f"Failed to create branch '{request.branch_name}'. It may already exist." + ) file_result = await github_client.create_or_update_file( repo_full_name=repo, @@ -160,6 +187,15 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith **auth_ctx, ) if not file_result: + log_structured( + logger, + "file_creation_failed", + operation="proceed_with_pr", + subject_ids=[repo], + branch=request.branch_name, + file_path=request.file_path, + error="Failed to create or update file", + ) raise HTTPException(status_code=400, detail="Failed to create or update rules file") pr = await github_client.create_pull_request( @@ -171,8 +207,29 @@ async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWith **auth_ctx, ) if not pr: + log_structured( + logger, + "pr_creation_failed", + operation="proceed_with_pr", + subject_ids=[repo], + branch=request.branch_name, + base_branch=base_branch, + error="Failed to create pull request", + ) raise HTTPException(status_code=400, detail="Failed to create pull request") + pr_url = pr.get("html_url", "") + if not pr_url: + log_structured( + logger, + "pr_url_missing", + operation="proceed_with_pr", + subject_ids=[repo], + pr_data=pr, + error="PR created but html_url is missing", + ) + raise HTTPException(status_code=500, detail="PR was created but URL is missing") + log_structured( logger, "proceed_with_pr_completed", diff --git a/src/integrations/github/api.py b/src/integrations/github/api.py index 5c0f2b5..a633b3f 100644 --- a/src/integrations/github/api.py +++ b/src/integrations/github/api.py @@ -768,17 +768,36 @@ async def create_git_ref( sha: str, installation_id: int | None = None, user_token: str | None = None, - ) -> bool: - """Create a new git ref/branch.""" + ) -> dict[str, Any] | None: + """Create a new git ref/branch. Returns ref data if successful, None if failed.""" headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) if not headers: - return False + return None url = f"{config.github.api_base_url}/repos/{repo_full_name}/git/refs" ref_clean = ref.removeprefix("refs/heads/") if ref.startswith("refs/heads/") else ref payload = {"ref": f"refs/heads/{ref_clean}", "sha": sha} session = await self._get_session() async with session.post(url, headers=headers, json=payload) as response: - return response.status in (200, 201) + if response.status in (200, 201): + return await response.json() + # Branch might already exist - check if it exists and points to the same SHA + if response.status == 422: + error_data = await response.json() + if "already exists" in error_data.get("message", "").lower(): + # Branch exists - verify it's the same SHA + existing_ref = await self.get_git_ref_sha(repo_full_name, ref_clean, installation_id, user_token) + if existing_ref == sha: + logger.info(f"Branch {ref_clean} already exists with same SHA, continuing") + return {"ref": f"refs/heads/{ref_clean}", "object": {"sha": sha}} + # Branch exists but with different SHA - log and return None + logger.error(f"Failed to create branch {ref_clean}: branch exists with different SHA. {error_data}") + return None + # 422 error but not "already exists" - log and return None + logger.error(f"Failed to create branch {ref_clean}: {error_data}") + return None + error_text = await response.text() + logger.error(f"Failed to create branch {ref_clean}: {response.status} - {error_text}") + return None async def create_or_update_file( self,