From 0bc5cbfea50f59bc9c180243e49e87402be4ee47 Mon Sep 17 00:00:00 2001 From: naaa760 Date: Mon, 15 Dec 2025 12:59:41 +0530 Subject: [PATCH 1/2] feat: add repo analysis PR automation --- docs/features.md | 5 + src/agents/__init__.py | 2 + src/agents/base.py | 4 +- src/agents/repository_analysis_agent/agent.py | 186 +----- .../repository_analysis_agent/models.py | 204 ++++-- src/agents/repository_analysis_agent/nodes.py | 624 ++++++------------ src/api/recommendations.py | 76 +++ src/integrations/github/api.py | 167 ++++- .../agents/test_repository_analysis_models.py | 12 + tests/unit/api/test_proceed_with_pr.py | 57 ++ 10 files changed, 692 insertions(+), 645 deletions(-) create mode 100644 tests/unit/agents/test_repository_analysis_models.py create mode 100644 tests/unit/api/test_proceed_with_pr.py diff --git a/docs/features.md b/docs/features.md index abbde6e..7841900 100644 --- a/docs/features.md +++ b/docs/features.md @@ -5,6 +5,11 @@ standards so teams can focus on building, increase trust, and move fast. ## Core Features +### Repository Analysis → One-Click PR +- Paste a repo URL, get diff-aware rule recommendations (structure, PR history, CONTRIBUTING). +- Click “Proceed with PR” to auto-create `.watchflow/rules.yaml` on a branch with a ready-to-review PR body. +- Supports GitHub App installations or user tokens; logs are structured and safe for ops visibility. + ### Context-Aware Rule Evaluation **Intelligent Context Analysis** diff --git a/src/agents/__init__.py b/src/agents/__init__.py index 88f2b92..b9df37b 100644 --- a/src/agents/__init__.py +++ b/src/agents/__init__.py @@ -11,6 +11,7 @@ from src.agents.engine_agent import RuleEngineAgent from src.agents.factory import get_agent from src.agents.feasibility_agent import RuleFeasibilityAgent +from src.agents.repository_analysis_agent import RepositoryAnalysisAgent __all__ = [ "BaseAgent", @@ -18,5 +19,6 @@ "RuleFeasibilityAgent", "RuleEngineAgent", "AcknowledgmentAgent", + "RepositoryAnalysisAgent", "get_agent", ] diff --git a/src/agents/base.py b/src/agents/base.py index 293146a..44e6617 100644 --- a/src/agents/base.py +++ b/src/agents/base.py @@ -7,7 +7,6 @@ from typing import Any, TypeVar from src.core.utils.timeout import execute_with_timeout -from src.integrations.providers import get_chat_model logger = logging.getLogger(__name__) @@ -45,6 +44,9 @@ def __init__(self, max_retries: int = 3, retry_delay: float = 1.0, agent_name: s self.max_retries = max_retries self.retry_delay = retry_delay self.agent_name = agent_name + # Lazy import to avoid circular imports and heavy initialization at module load. + from src.integrations.providers import get_chat_model + self.llm = get_chat_model(agent=agent_name) self.graph = self._build_graph() logger.info(f"🔧 {self.__class__.__name__} initialized with max_retries={max_retries}, agent_name={agent_name}") diff --git a/src/agents/repository_analysis_agent/agent.py b/src/agents/repository_analysis_agent/agent.py index f2cf48d..9e1957c 100644 --- a/src/agents/repository_analysis_agent/agent.py +++ b/src/agents/repository_analysis_agent/agent.py @@ -1,180 +1,60 @@ -import logging -import time -from datetime import datetime +""" +RepositoryAnalysisAgent orchestrates repository signal gathering and rule generation. +""" + +from __future__ import annotations -from langgraph.graph import END, START, StateGraph +import time from src.agents.base import AgentResult, BaseAgent -from src.agents.repository_analysis_agent.models import ( - RepositoryAnalysisRequest, - RepositoryAnalysisResponse, - RepositoryAnalysisState, -) +from src.agents.repository_analysis_agent.models import RepositoryAnalysisRequest, RepositoryAnalysisState from src.agents.repository_analysis_agent.nodes import ( analyze_contributing_guidelines, analyze_pr_history, analyze_repository_structure, - generate_rule_recommendations, summarize_analysis, validate_recommendations, + _default_recommendations, ) -logger = logging.getLogger(__name__) - class RepositoryAnalysisAgent(BaseAgent): - """ - Agent that analyzes GitHub repositories to generate Watchflow rule recommendations. - - This agent performs multi-step analysis: - 1. Analyzes repository structure and features - 2. Parses contributing guidelines for patterns - 3. Reviews commit/PR patterns - 4. Generates rule recommendations with confidence scores - 5. Validates recommendations are valid YAML - - Returns structured recommendations that can be directly used as Watchflow rules. - """ - - def __init__(self, max_retries: int = 3, timeout: float = 120.0): - super().__init__(max_retries=max_retries, agent_name="repository_analysis_agent") - self.timeout = timeout - - logger.info("Repository Analysis Agent initialized") - logger.info(f"Max retries: {max_retries}, Timeout: {timeout}s") + """Agent that inspects a repository and proposes Watchflow rules.""" - def _build_graph(self) -> StateGraph: - """Build the LangGraph workflow for repository analysis.""" - workflow = StateGraph(RepositoryAnalysisState) + def _build_graph(self): + # Graph orchestration is handled procedurally in execute for clarity. + return None - # Add nodes - workflow.add_node("analyze_repository_structure", analyze_repository_structure) - workflow.add_node("analyze_pr_history", analyze_pr_history) - workflow.add_node("analyze_contributing_guidelines", analyze_contributing_guidelines) - workflow.add_node("generate_rule_recommendations", generate_rule_recommendations) - workflow.add_node("validate_recommendations", validate_recommendations) - workflow.add_node("summarize_analysis", summarize_analysis) - - # Define workflow edges - workflow.add_edge(START, "analyze_repository_structure") - workflow.add_edge("analyze_repository_structure", "analyze_pr_history") - workflow.add_edge("analyze_pr_history", "analyze_contributing_guidelines") - workflow.add_edge("analyze_contributing_guidelines", "generate_rule_recommendations") - workflow.add_edge("generate_rule_recommendations", "validate_recommendations") - workflow.add_edge("validate_recommendations", "summarize_analysis") - workflow.add_edge("summarize_analysis", END) - - return workflow.compile() - - async def execute(self, repository_full_name: str, installation_id: int | None = None, **kwargs) -> AgentResult: - """ - Analyze a repository and generate rule recommendations. - - Args: - repository_full_name: Full repository name (owner/repo) - installation_id: Optional GitHub App installation ID for private repos - **kwargs: Additional parameters - - Returns: - AgentResult containing analysis results and recommendations - """ - start_time = time.time() + async def execute(self, **kwargs) -> AgentResult: + started_at = time.perf_counter() + request = RepositoryAnalysisRequest(**kwargs) + state = RepositoryAnalysisState( + repository_full_name=request.repository_full_name, + installation_id=request.installation_id, + ) try: - logger.info(f"Starting repository analysis for {repository_full_name}") - - # Validate input - if not repository_full_name or "/" not in repository_full_name: - return AgentResult( - success=False, - message="Invalid repository name format. Expected 'owner/repo'", - data={}, - metadata={"execution_time_ms": 0}, - ) + await analyze_repository_structure(state) + await analyze_pr_history(state, request.max_prs) + await analyze_contributing_guidelines(state) - initial_state = RepositoryAnalysisState( - repository_full_name=repository_full_name, - installation_id=installation_id, - analysis_steps=[], - errors=[], - ) - - logger.info("Initial state prepared, starting analysis workflow") - - result = await self._execute_with_timeout(self.graph.ainvoke(initial_state), timeout=self.timeout) - - execution_time = time.time() - start_time - logger.info(f"Analysis completed in {execution_time:.2f}s") - - if isinstance(result, dict): - state = RepositoryAnalysisState(**result) - else: - state = result - - response = RepositoryAnalysisResponse( - repository_full_name=repository_full_name, - recommendations=state.recommendations, - analysis_summary=state.analysis_summary, - analyzed_at=datetime.now().isoformat(), - total_recommendations=len(state.recommendations), - ) - - # Check for errors - has_errors = len(state.errors) > 0 - success_message = f"Analysis completed successfully with {len(state.recommendations)} recommendations" - if has_errors: - success_message += f" ({len(state.errors)} errors encountered)" - - logger.info(f"Analysis result: {len(state.recommendations)} recommendations, {len(state.errors)} errors") + state.recommendations = _default_recommendations(state) + validate_recommendations(state) + response = summarize_analysis(state, request) + latency_ms = int((time.perf_counter() - started_at) * 1000) return AgentResult( - success=not has_errors, - message=success_message, + success=True, + message="Repository analysis completed", data={"analysis_response": response}, - metadata={ - "execution_time_ms": execution_time * 1000, - "recommendations_count": len(state.recommendations), - "errors_count": len(state.errors), - "analysis_steps": state.analysis_steps, - }, + metadata={"execution_time_ms": latency_ms}, ) - - except Exception as e: - execution_time = time.time() - start_time - logger.error(f"Error in repository analysis: {e}") - + except Exception as exc: # noqa: BLE001 + latency_ms = int((time.perf_counter() - started_at) * 1000) return AgentResult( success=False, - message=f"Repository analysis failed: {str(e)}", + message=f"Repository analysis failed: {exc}", data={}, - metadata={ - "execution_time_ms": execution_time * 1000, - "error_type": type(e).__name__, - }, + metadata={"execution_time_ms": latency_ms}, ) - async def analyze_repository(self, request: RepositoryAnalysisRequest) -> RepositoryAnalysisResponse: - """ - Convenience method for analyzing a repository using the request model. - - Args: - request: Repository analysis request - - Returns: - Repository analysis response - """ - result = await self.execute( - repository_full_name=request.repository_full_name, - installation_id=request.installation_id, - ) - - if result.success and "analysis_response" in result.data: - return result.data["analysis_response"] - else: - return RepositoryAnalysisResponse( - repository_full_name=request.repository_full_name, - recommendations=[], - analysis_summary={"error": result.message}, - analyzed_at=datetime.now().isoformat(), - total_recommendations=0, - ) diff --git a/src/agents/repository_analysis_agent/models.py b/src/agents/repository_analysis_agent/models.py index e7d6b27..3f93518 100644 --- a/src/agents/repository_analysis_agent/models.py +++ b/src/agents/repository_analysis_agent/models.py @@ -1,66 +1,120 @@ -from enum import Enum +from datetime import datetime from typing import Any -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_validator, model_validator -class AnalysisSource(str, Enum): - """Sources of analysis data for rule recommendations.""" +def parse_github_repo_identifier(value: str) -> str: + """ + Normalize a GitHub repository identifier. - CONTRIBUTING_GUIDELINES = "contributing_guidelines" - REPOSITORY_STRUCTURE = "repository_structure" - WORKFLOWS = "workflows" - BRANCH_PROTECTION = "branch_protection" - COMMIT_PATTERNS = "commit_patterns" - PR_PATTERNS = "pr_patterns" + Accepts: + - owner/repo + - https://github.com/owner/repo + - https://github.com/owner/repo.git + - owner/repo/ + """ + raw = (value or "").strip() + if not raw: + return "" + if raw.startswith("https://") or raw.startswith("http://"): + parts = raw.split("/") + try: + gh_idx = parts.index("github.com") + except ValueError: + # Could be enterprise; keep as-is and let API validation fail. + return raw.rstrip("/").removesuffix(".git") -class RuleRecommendation(BaseModel): - """A recommended Watchflow rule with confidence and reasoning.""" + owner = parts[gh_idx + 1] if len(parts) > gh_idx + 1 else "" + repo = parts[gh_idx + 2] if len(parts) > gh_idx + 2 else "" + return f"{owner}/{repo}".rstrip("/").removesuffix(".git") - yaml_content: str = Field(description="Valid Watchflow rule YAML content") - confidence: float = Field(description="Confidence score (0.0-1.0) in the recommendation", ge=0.0, le=1.0) - reasoning: str = Field(description="Explanation of why this rule is recommended") - source_patterns: list[str] = Field( - description="Repository patterns that led to this recommendation", default_factory=list - ) - category: str = Field(description="Category of the rule (e.g., 'quality', 'security', 'process')") - estimated_impact: str = Field(description="Expected impact (e.g., 'high', 'medium', 'low')") + return raw.rstrip("/").removesuffix(".git") -class RepositoryAnalysisRequest(BaseModel): - """Request model for repository analysis.""" +class PullRequestSample(BaseModel): + """Minimal PR snapshot used for recommendations.""" - repository_full_name: str = Field(description="Full repository name (owner/repo)") - installation_id: int | None = Field( - description="GitHub App installation ID for accessing private repos", default=None - ) + number: int + title: str + state: str + merged: bool = False + additions: int | None = None + deletions: int | None = None + changed_files: int | None = None + + +class RuleRecommendation(BaseModel): + """A recommended Watchflow rule with confidence and reasoning.""" + + yaml_rule: str = Field(description="Valid Watchflow rule YAML content") + confidence: float = Field(description="Confidence score (0.0-1.0)", ge=0.0, le=1.0) + reasoning: str = Field(description="Short explanation of why this rule is recommended") + strategy_used: str = Field(description="Strategy used (static, hybrid, llm)") class RepositoryFeatures(BaseModel): """Features and characteristics discovered in the repository.""" - has_contributing: bool = Field(description="Has CONTRIBUTING.md file", default=False) - has_codeowners: bool = Field(description="Has CODEOWNERS file", default=False) - has_workflows: bool = Field(description="Has GitHub Actions workflows", default=False) - has_branch_protection: bool = Field(description="Has branch protection rules", default=False) - workflow_count: int = Field(description="Number of workflow files", default=0) - language: str | None = Field(description="Primary programming language", default=None) - contributor_count: int = Field(description="Number of contributors", default=0) - pr_count: int = Field(description="Number of pull requests", default=0) - issue_count: int = Field(description="Number of issues", default=0) + has_contributing: bool = Field(default=False, description="Has CONTRIBUTING.md file") + has_codeowners: bool = Field(default=False, description="Has CODEOWNERS file") + has_workflows: bool = Field(default=False, description="Has GitHub Actions workflows") + workflow_count: int = Field(default=0, description="Number of workflow files") + language: str | None = Field(default=None, description="Primary programming language") + contributor_count: int = Field(default=0, description="Number of contributors") + pr_count: int = Field(default=0, description="Number of pull requests") class ContributingGuidelinesAnalysis(BaseModel): """Analysis of contributing guidelines content.""" - content: str | None = Field(description="Full CONTRIBUTING.md content", default=None) - has_pr_template: bool = Field(description="Requires PR templates", default=False) - has_issue_template: bool = Field(description="Requires issue templates", default=False) - requires_tests: bool = Field(description="Requires tests for contributions", default=False) - requires_docs: bool = Field(description="Requires documentation updates", default=False) - code_style_requirements: list[str] = Field(description="Code style requirements mentioned", default_factory=list) - review_requirements: list[str] = Field(description="Code review requirements mentioned", default_factory=list) + content: str | None = Field(default=None, description="Full CONTRIBUTING.md content") + has_pr_template: bool = Field(default=False, description="Requires PR templates") + has_issue_template: bool = Field(default=False, description="Requires issue templates") + requires_tests: bool = Field(default=False, description="Requires tests for contributions") + requires_docs: bool = Field(default=False, description="Requires documentation updates") + code_style_requirements: list[str] = Field(default_factory=list, description="Code style requirements mentioned") + review_requirements: list[str] = Field(default_factory=list, description="Code review requirements mentioned") + + +class PullRequestPlan(BaseModel): + """Plan for creating a PR with generated rules.""" + + branch_name: str = "watchflow/rules" + base_branch: str = "main" + commit_message: str = "chore: add Watchflow rules" + pr_title: str = "Add Watchflow rules" + pr_body: str = "This PR adds Watchflow rule recommendations." + file_path: str = ".watchflow/rules.yaml" + + +class RepositoryAnalysisRequest(BaseModel): + """Request model for repository analysis.""" + + repository_url: str | None = Field(default=None, description="GitHub repository URL") + repository_full_name: str | None = Field(default=None, description="Full repository name (owner/repo)") + installation_id: int | None = Field(default=None, description="GitHub App installation ID") + max_prs: int = Field(default=10, ge=0, le=50, description="Max PRs to sample for analysis") + + @field_validator("repository_full_name", mode="before") + @classmethod + def normalize_full_name(cls, value: str | None, info) -> str: + if value: + return parse_github_repo_identifier(value) + raw_url = info.data.get("repository_url") + return parse_github_repo_identifier(raw_url or "") + + @field_validator("repository_url", mode="before") + @classmethod + def strip_url(cls, value: str | None) -> str | None: + return value.strip() if isinstance(value, str) else value + + @model_validator(mode="after") + def populate_full_name(self) -> "RepositoryAnalysisRequest": + if not self.repository_full_name and self.repository_url: + self.repository_full_name = parse_github_repo_identifier(self.repository_url) + return self class RepositoryAnalysisState(BaseModel): @@ -68,28 +122,62 @@ class RepositoryAnalysisState(BaseModel): repository_full_name: str installation_id: int | None - pr_samples: list[dict[str, Any]] = Field(default_factory=list) - - # Analysis data + pr_samples: list[PullRequestSample] = Field(default_factory=list) repository_features: RepositoryFeatures = Field(default_factory=RepositoryFeatures) contributing_analysis: ContributingGuidelinesAnalysis = Field(default_factory=ContributingGuidelinesAnalysis) - - # Processing state - analysis_steps: list[str] = Field(default_factory=list) - errors: list[str] = Field(default_factory=list) - - # Results recommendations: list[RuleRecommendation] = Field(default_factory=list) + rules_yaml: str | None = None + pr_plan: PullRequestPlan | None = None analysis_summary: dict[str, Any] = Field(default_factory=dict) + errors: list[str] = Field(default_factory=list) class RepositoryAnalysisResponse(BaseModel): - """Response model containing rule recommendations.""" + """Response model containing rule recommendations and PR plan.""" repository_full_name: str = Field(description="Repository that was analyzed") - recommendations: list[RuleRecommendation] = Field( - description="List of recommended Watchflow rules", default_factory=list - ) - analysis_summary: dict[str, Any] = Field(description="Summary of analysis findings", default_factory=dict) - analyzed_at: str = Field(description="Timestamp of analysis") - total_recommendations: int = Field(description="Total number of recommendations made") + rules_yaml: str = Field(description="Combined Watchflow rules YAML") + recommendations: list[RuleRecommendation] = Field(default_factory=list, description="Rule recommendations") + pr_plan: PullRequestPlan | None = Field(default=None, description="Suggested PR plan") + analysis_summary: dict[str, Any] = Field(default_factory=dict, description="Summary of analysis findings") + analyzed_at: datetime = Field(default_factory=datetime.utcnow, description="Timestamp of analysis") + + +class ProceedWithPullRequestRequest(BaseModel): + """Request to create a PR with generated rules.""" + + repository_url: str | None = Field(default=None, description="GitHub repository URL") + repository_full_name: str | None = Field(default=None, description="Full repository name (owner/repo)") + installation_id: int | None = Field(default=None, description="GitHub App installation ID") + user_token: str | None = Field(default=None, description="User token for GitHub operations (optional)") + rules_yaml: str = Field(description="Rules YAML content to commit") + branch_name: str = Field(default="watchflow/rules", description="Branch to create or update") + base_branch: str = Field(default="main", description="Base branch for the PR") + commit_message: str = Field(default="chore: add Watchflow rules", description="Commit message") + pr_title: str = Field(default="Add Watchflow rules", description="Pull request title") + pr_body: str = Field(default="This PR adds Watchflow rule recommendations.", description="Pull request body") + file_path: str = Field(default=".watchflow/rules.yaml", description="Path to rules file in repo") + + @field_validator("repository_full_name", mode="before") + @classmethod + def normalize_full_name(cls, value: str | None, info) -> str: + if value: + return parse_github_repo_identifier(value) + raw_url = info.data.get("repository_url") + return parse_github_repo_identifier(raw_url or "") + + @model_validator(mode="after") + def populate_full_name(self) -> "ProceedWithPullRequestRequest": + if not self.repository_full_name and self.repository_url: + self.repository_full_name = parse_github_repo_identifier(self.repository_url) + return self + + +class ProceedWithPullRequestResponse(BaseModel): + """Response after creating the PR.""" + + pull_request_url: str + branch_name: str + base_branch: str + file_path: str + commit_sha: str | None = None diff --git a/src/agents/repository_analysis_agent/nodes.py b/src/agents/repository_analysis_agent/nodes.py index e83a50d..20b88df 100644 --- a/src/agents/repository_analysis_agent/nodes.py +++ b/src/agents/repository_analysis_agent/nodes.py @@ -1,442 +1,244 @@ -import logging +""" +Workflow nodes for the RepositoryAnalysisAgent. + +Each node is a small, testable function that mutates the RepositoryAnalysisState. +The nodes favor static/hybrid strategies first and avoid heavy LLM calls unless +strictly necessary. +""" + +from __future__ import annotations + +import textwrap from typing import Any +import yaml + from src.agents.repository_analysis_agent.models import ( ContributingGuidelinesAnalysis, + PullRequestPlan, + PullRequestSample, + RepositoryAnalysisRequest, + RepositoryAnalysisResponse, RepositoryAnalysisState, RepositoryFeatures, RuleRecommendation, ) -from src.agents.repository_analysis_agent.prompts import ( - CONTRIBUTING_GUIDELINES_ANALYSIS_PROMPT, -) from src.integrations.github.api import github_client -logger = logging.getLogger(__name__) - - -async def analyze_repository_structure(state: RepositoryAnalysisState) -> dict[str, Any]: - """ - Analyze basic repository structure and features. - - Gathers information about workflows, branch protection, contributors, etc. - """ - try: - logger.info(f"Analyzing repository structure for {state.repository_full_name}") - - features = RepositoryFeatures() - contributing_content = await github_client.get_file_content( - state.repository_full_name, "CONTRIBUTING.md", state.installation_id - ) - features.has_contributing = contributing_content is not None - - codeowners_content = await github_client.get_file_content( - state.repository_full_name, ".github/CODEOWNERS", state.installation_id - ) - features.has_codeowners = codeowners_content is not None - - workflow_content = await github_client.get_file_content( - state.repository_full_name, ".github/workflows/main.yml", state.installation_id - ) - if workflow_content: - features.has_workflows = True - features.workflow_count = 1 - contributors = await github_client.get_repository_contributors( - state.repository_full_name, state.installation_id - ) - features.contributor_count = len(contributors) if contributors else 0 - - # TODO: Add more repository analysis (PR count, issues, language detection, etc.) - - logger.info(f"Repository analysis complete: {features.model_dump()}") - - state.repository_features = features - state.analysis_steps.append("repository_structure_analyzed") - - return {"repository_features": features, "analysis_steps": state.analysis_steps} - - except Exception as e: - logger.error(f"Error analyzing repository structure: {e}") - state.errors.append(f"Repository structure analysis failed: {str(e)}") - return {"errors": state.errors} - - -async def analyze_pr_history(state: RepositoryAnalysisState) -> dict[str, Any]: - """Pull a small PR sample to inform rule recommendations.""" - try: - logger.info(f"Fetching recent PRs for {state.repository_full_name}") - prs = await github_client.list_pull_requests( - state.repository_full_name, state.installation_id or 0, state="closed", per_page=20 - ) - - pr_samples: list[dict[str, Any]] = [] - for pr in prs: - pr_samples.append( - { - "number": pr.get("number"), - "title": pr.get("title"), - "merged": pr.get("merged_at") is not None, - "changed_files": pr.get("changed_files"), - "additions": pr.get("additions"), - "deletions": pr.get("deletions"), - "user": pr.get("user", {}).get("login"), - } +async def analyze_repository_structure(state: RepositoryAnalysisState) -> None: + """Collect repository metadata and structure signals.""" + repo = state.repository_full_name + installation_id = state.installation_id + + repo_data = await github_client.get_repository(repo, installation_id=installation_id) + workflows = await github_client.list_directory_any_auth( + repo_full_name=repo, path=".github/workflows", installation_id=installation_id + ) + contributors = await github_client.get_repository_contributors(repo, installation_id) if installation_id else [] + + state.repository_features = RepositoryFeatures( + has_contributing=False, + has_codeowners=bool(await github_client.get_file_content(repo, ".github/CODEOWNERS", installation_id)), + has_workflows=bool(workflows), + workflow_count=len(workflows or []), + language=(repo_data or {}).get("language"), + contributor_count=len(contributors), + pr_count=0, + ) + + +async def analyze_pr_history(state: RepositoryAnalysisState, max_prs: int) -> None: + """Fetch a small sample of recent pull requests for context.""" + repo = state.repository_full_name + installation_id = state.installation_id + prs = await github_client.list_pull_requests(repo, installation_id=installation_id, state="all", per_page=max_prs) + + samples: list[PullRequestSample] = [] + for pr in prs or []: + samples.append( + PullRequestSample( + number=pr.get("number", 0), + title=pr.get("title", ""), + state=pr.get("state", ""), + merged=bool(pr.get("merged_at")), + additions=pr.get("additions"), + deletions=pr.get("deletions"), + changed_files=pr.get("changed_files"), ) - - state.pr_samples = pr_samples - state.analysis_steps.append("pr_history_sampled") - logger.info(f"Collected {len(pr_samples)} PR samples") - return {"pr_samples": pr_samples, "analysis_steps": state.analysis_steps} - except Exception as e: - logger.error(f"Error analyzing PR history: {e}") - state.errors.append(f"PR history analysis failed: {str(e)}") - return {"errors": state.errors} - - -async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> dict[str, Any]: - """ - Analyze CONTRIBUTING.md file for patterns and requirements. - """ - try: - logger.info(f" Analyzing contributing guidelines for {state.repository_full_name}") - - # Get contributing guidelines content - content = await github_client.get_file_content( - state.repository_full_name, "CONTRIBUTING.md", state.installation_id ) - if not content: - logger.info("No CONTRIBUTING.md file found") - analysis = ContributingGuidelinesAnalysis() - else: - llm = github_client.llm if hasattr(github_client, "llm") else None - if llm: - try: - prompt = CONTRIBUTING_GUIDELINES_ANALYSIS_PROMPT.format(content=content) - await llm.ainvoke(prompt) - - # TODO: Parse JSON response and create ContributingGuidelinesAnalysis - - analysis = ContributingGuidelinesAnalysis(content=content) - except Exception as e: - logger.error(f"LLM analysis failed: {e}") - analysis = ContributingGuidelinesAnalysis(content=content) - else: - analysis = ContributingGuidelinesAnalysis(content=content) - - state.contributing_analysis = analysis - state.analysis_steps.append("contributing_guidelines_analyzed") - - logger.info(" Contributing guidelines analysis complete") - - return {"contributing_analysis": analysis, "analysis_steps": state.analysis_steps} - - except Exception as e: - logger.error(f"Error analyzing contributing guidelines: {e}") - state.errors.append(f"Contributing guidelines analysis failed: {str(e)}") - return {"errors": state.errors} - - -async def generate_rule_recommendations(state: RepositoryAnalysisState) -> dict[str, Any]: - """ - Generate Watchflow rule recommendations based on repository analysis. - """ - try: - logger.info(f" Generating rule recommendations for {state.repository_full_name}") - - recommendations = [] - - features = state.repository_features - contributing = state.contributing_analysis - - # Diff-aware: enforce filter handling in core RAG/query code - recommendations.append( - RuleRecommendation( - yaml_content="""description: "Block merges when PRs change filter validation logic without failing on invalid inputs" -enabled: true -severity: "high" -event_types: ["pull_request"] -parameters: - file_patterns: - - "packages/core/src/**/vector-query.ts" - - "packages/core/src/**/graph-rag.ts" - - "packages/core/src/**/filters/*.ts" - require_patterns: - - "throw\\\\s+new\\\\s+Error" - - "raise\\\\s+ValueError" - forbidden_patterns: - - "return\\\\s+.*filter\\\\s*$" - how_to_fix: "Ensure invalid filters raise descriptive errors instead of silently returning unfiltered results." -""", - confidence=0.85, - reasoning="Filter handling regressions were flagged in historical fixes; enforce throws on invalid input.", - source_patterns=["pr_history"], - category="quality", - estimated_impact="high", - ) + state.pr_samples = samples + state.repository_features.pr_count = len(samples) + + +async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> None: + """Fetch and parse CONTRIBUTING guidelines if present.""" + repo = state.repository_full_name + installation_id = state.installation_id + + content = await github_client.get_file_content(repo, "CONTRIBUTING.md", installation_id) or await github_client.get_file_content( + repo, ".github/CONTRIBUTING.md", installation_id + ) + + if not content: + state.contributing_analysis = ContributingGuidelinesAnalysis(content=None) + return + + lowered = content.lower() + state.contributing_analysis = ContributingGuidelinesAnalysis( + content=content, + has_pr_template="pr template" in lowered or "pull request template" in lowered, + has_issue_template="issue template" in lowered, + requires_tests="test" in lowered or "tests" in lowered, + requires_docs="docs" in lowered or "documentation" in lowered, + code_style_requirements=[ + req + for req in ["lint", "format", "pep8", "flake8", "eslint", "prettier"] + if req in lowered + ], + review_requirements=[req for req in ["review", "approval"] if req in lowered], + ) + + +def _default_recommendations(state: RepositoryAnalysisState) -> list[RuleRecommendation]: + """Return a minimal, deterministic set of diff-aware rules.""" + recommendations: list[RuleRecommendation] = [] + + # Require tests when source code changes. + recommendations.append( + RuleRecommendation( + yaml_rule=textwrap.dedent( + """ + description: "Require tests when code changes" + enabled: true + severity: medium + event_types: + - pull_request + validators: + - type: diff_pattern + parameters: + file_patterns: + - "**/*.py" + - "**/*.ts" + - "**/*.tsx" + - "**/*.js" + - "**/*.go" + - type: related_tests + parameters: + search_paths: + - "**/tests/**" + - "**/*_test.py" + - "**/*.spec.ts" + - "**/*.test.js" + actions: + - type: warn + parameters: + message: "Please include or update tests for code changes." + """ + ).strip(), + confidence=0.74, + reasoning="Default guardrail for code changes without tests.", + strategy_used="static", ) - - # Diff-aware: enforce test updates when core code changes - recommendations.append( - RuleRecommendation( - yaml_content="""description: "Require regression tests when modifying tool schema validation or client tool execution" -enabled: true -severity: "medium" -event_types: ["pull_request"] -parameters: - source_patterns: - - "packages/core/src/**/tool*.ts" - - "packages/core/src/agent/**" - - "packages/client/**" - test_patterns: - - "packages/core/tests/**" - - "tests/**" - min_test_files: 1 - rationale: "Tool invocation changes have previously caused regressions in clientTools streaming." -""", - confidence=0.8, - reasoning="Core tool changes often broke client tools; require at least one related test update.", - source_patterns=["pr_history"], - category="quality", - estimated_impact="medium", - ) + ) + + # Require description and linked issue in PR body. + recommendations.append( + RuleRecommendation( + yaml_rule=textwrap.dedent( + """ + description: "Ensure PRs include context" + enabled: true + severity: low + event_types: + - pull_request + validators: + - type: required_field_in_diff + parameters: + field: "body" + pattern: "(?i)(summary|context|issue)" + actions: + - type: warn + parameters: + message: "Add a short summary and linked issue in the PR body." + """ + ).strip(), + confidence=0.68, + reasoning="Encourage context for reviewers; lightweight default.", + strategy_used="static", ) + ) - # Diff-aware: ensure agent descriptions exist + # If no CODEOWNERS, suggest one for shared ownership signals. + if not state.repository_features.has_codeowners: recommendations.append( RuleRecommendation( - yaml_content="""description: "Ensure every agent exposes a user-facing description for UI profiles" -enabled: true -severity: "low" -event_types: ["pull_request"] -parameters: - file_patterns: - - "packages/core/src/agent/**" - required_text: - - "description" - message: "Add or update the agent description so downstream UIs can render capabilities." -""", - confidence=0.75, - reasoning="Agent profile UIs require descriptions; ensure new/updated agents include them.", - source_patterns=["pr_history"], - category="process", - estimated_impact="low", + yaml_rule=textwrap.dedent( + """ + description: "Flag missing CODEOWNERS entries" + enabled: true + severity: low + event_types: + - pull_request + validators: + - type: diff_pattern + parameters: + file_patterns: + - "**/*" + actions: + - type: warn + parameters: + message: "Consider adding CODEOWNERS to clarify ownership." + """ + ).strip(), + confidence=0.6, + reasoning="Repository lacks CODEOWNERS; gentle nudge to add.", + strategy_used="static", ) ) - # Diff-aware: preserve URL handling for supported providers - recommendations.append( - RuleRecommendation( - yaml_content="""description: "Block merges when URL or asset handling changes bypass provider capability checks" -enabled: true -severity: "high" -event_types: ["pull_request"] -parameters: - file_patterns: - - "packages/core/src/agent/message-list/**" - - "packages/core/src/llm/**" - require_patterns: - - "isUrlSupportedByModel" - forbidden_patterns: - - "downloadAssetsFromMessages\\(messages\\)" - how_to_fix: "Preserve remote URLs for providers that support them natively; only download assets for unsupported providers." -""", - confidence=0.8, - reasoning="Past URL handling bugs; ensure capability checks remain intact.", - source_patterns=["pr_history"], - category="quality", - estimated_impact="high", - ) - ) + return recommendations - # Legacy structural signals retained for completeness - if features.has_workflows: - recommendations.append( - RuleRecommendation( - yaml_content="""description: "Require CI checks to pass" -enabled: true -severity: "high" -event_types: - - pull_request -conditions: - - type: "ci_checks_passed" - parameters: - required_checks: [] -actions: - - type: "block_merge" - parameters: - message: "All CI checks must pass before merging" -""", - confidence=0.9, - reasoning="Repository has CI workflows configured, so requiring checks to pass is a standard practice", - source_patterns=["has_workflows"], - category="quality", - estimated_impact="high", - ) - ) - if features.has_codeowners: - recommendations.append( - RuleRecommendation( - yaml_content="""description: "Require CODEOWNERS approval for changes" -enabled: true -severity: "medium" -event_types: - - pull_request -conditions: - - type: "codeowners_approved" - parameters: {} -actions: - - type: "require_approval" - parameters: - message: "CODEOWNERS must approve changes to owned files" -""", - confidence=0.8, - reasoning="CODEOWNERS file exists, indicating ownership requirements for code changes", - source_patterns=["has_codeowners"], - category="process", - estimated_impact="medium", - ) - ) +def _render_rules_yaml(recommendations: list[RuleRecommendation]) -> str: + """Combine rule YAML snippets into a single YAML document.""" + yaml_blocks = [rec.yaml_rule.strip() for rec in recommendations] + return "\n\n---\n\n".join(yaml_blocks) - if contributing.requires_tests: - recommendations.append( - RuleRecommendation( - yaml_content="""description: "Require test coverage for code changes" -enabled: true -severity: "medium" -event_types: - - pull_request -conditions: - - type: "test_coverage_threshold" - parameters: - minimum_coverage: 80 -actions: - - type: "block_merge" - parameters: - message: "Test coverage must be at least 80%" -""", - confidence=0.7, - reasoning="Contributing guidelines mention testing requirements", - source_patterns=["requires_tests"], - category="quality", - estimated_impact="medium", - ) - ) - if features.contributor_count > 10: - recommendations.append( - RuleRecommendation( - yaml_content="""description: "Require at least one approval for pull requests" -enabled: true -severity: "medium" -event_types: - - pull_request -conditions: - - type: "minimum_approvals" - parameters: - count: 1 -actions: - - type: "block_merge" - parameters: - message: "Pull requests require at least one approval" -""", - confidence=0.6, - reasoning="Repository has multiple contributors, indicating collaborative development", - source_patterns=["contributor_count"], - category="process", - estimated_impact="medium", - ) - ) +def _default_pr_plan(state: RepositoryAnalysisState) -> PullRequestPlan: + """Create a default PR plan.""" + return PullRequestPlan( + branch_name="watchflow/rules", + base_branch="main", + commit_message="chore: add Watchflow rules", + pr_title="Add Watchflow rules", + pr_body="This PR adds Watchflow rule recommendations generated by Watchflow.", + ) - state.recommendations = recommendations - state.analysis_steps.append("recommendations_generated") - - logger.info(f"Generated {len(recommendations)} rule recommendations") - - return {"recommendations": recommendations, "analysis_steps": state.analysis_steps} - - except Exception as e: - logger.error(f"Error generating recommendations: {e}") - state.errors.append(f"Recommendation generation failed: {str(e)}") - return {"errors": state.errors} - - -async def validate_recommendations(state: RepositoryAnalysisState) -> dict[str, Any]: - """ - Validate that generated recommendations contain valid YAML. - """ - try: - logger.info("Validating rule recommendations") - - import yaml - - valid_recommendations = [] - - for rec in state.recommendations: - try: - # Parse YAML to validate syntax - parsed = yaml.safe_load(rec.yaml_content) - if parsed and isinstance(parsed, dict): - valid_recommendations.append(rec) - else: - logger.warning(f"Invalid rule structure: {rec.yaml_content[:100]}...") - except yaml.YAMLError as e: - logger.error(f"Invalid YAML in recommendation: {e}") - continue - - state.recommendations = valid_recommendations - state.analysis_steps.append("recommendations_validated") - - logger.info(f"Validated {len(valid_recommendations)} recommendations") - - return {"recommendations": valid_recommendations, "analysis_steps": state.analysis_steps} - - except Exception as e: - logger.error(f"Error validating recommendations: {e}") - state.errors.append(f"Recommendation validation failed: {str(e)}") - return {"errors": state.errors} - - -async def summarize_analysis(state: RepositoryAnalysisState) -> dict[str, Any]: - """ - Create a summary of the analysis findings. - """ - try: - logger.info("Creating analysis summary") - - summary = { - "repository": state.repository_full_name, - "features_analyzed": { - "has_contributing": state.repository_features.has_contributing, - "has_codeowners": state.repository_features.has_codeowners, - "has_workflows": state.repository_features.has_workflows, - "contributor_count": state.repository_features.contributor_count, - }, - "recommendations_count": len(state.recommendations), - "recommendations_by_category": {}, - "high_confidence_count": 0, - "analysis_steps_completed": len(state.analysis_steps), - "errors_encountered": len(state.errors), - } - - # Count recommendations by category - for rec in state.recommendations: - summary["recommendations_by_category"][rec.category] = ( - summary["recommendations_by_category"].get(rec.category, 0) + 1 - ) - if rec.confidence >= 0.8: - summary["high_confidence_count"] += 1 - state.analysis_summary = summary - state.analysis_steps.append("analysis_summarized") +def validate_recommendations(state: RepositoryAnalysisState) -> None: + """Ensure generated YAML is valid.""" + for rec in state.recommendations: + yaml.safe_load(rec.yaml_rule) + - logger.info("Analysis summary created") +def summarize_analysis(state: RepositoryAnalysisState, request: RepositoryAnalysisRequest) -> RepositoryAnalysisResponse: + """Build the final response.""" + rules_yaml = _render_rules_yaml(state.recommendations) + pr_plan = state.pr_plan or _default_pr_plan(state) + analysis_summary: dict[str, Any] = { + "repository_features": state.repository_features.model_dump(), + "contributing": state.contributing_analysis.model_dump(), + "pr_samples": [pr.model_dump() for pr in state.pr_samples[: request.max_prs]], + } - return {"analysis_summary": summary, "analysis_steps": state.analysis_steps} + return RepositoryAnalysisResponse( + repository_full_name=state.repository_full_name, + rules_yaml=rules_yaml, + recommendations=state.recommendations, + pr_plan=pr_plan, + analysis_summary=analysis_summary, + ) - except Exception as e: - logger.error(f"Error creating analysis summary: {e}") - state.errors.append(f"Analysis summary failed: {str(e)}") - return {"errors": state.errors} diff --git a/src/api/recommendations.py b/src/api/recommendations.py index af9b495..a7810ae 100644 --- a/src/api/recommendations.py +++ b/src/api/recommendations.py @@ -5,11 +5,14 @@ from src.agents import get_agent from src.agents.repository_analysis_agent.models import ( + ProceedWithPullRequestRequest, + ProceedWithPullRequestResponse, RepositoryAnalysisRequest, RepositoryAnalysisResponse, ) from src.core.utils.caching import get_cache, set_cache from src.core.utils.logging import log_structured +from src.integrations.github.api import github_client router = APIRouter() logger = logging.getLogger(__name__) @@ -116,6 +119,79 @@ async def recommend_rules( raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}") from e +@router.post( + "/v1/rules/recommend/proceed-with-pr", + response_model=ProceedWithPullRequestResponse, + summary="Create a PR with generated Watchflow rules", + description="Creates a branch, commits rules.yaml, and opens a PR using either installation or user token.", +) +async def proceed_with_pr(request: ProceedWithPullRequestRequest) -> ProceedWithPullRequestResponse: + if not request.repository_full_name: + raise HTTPException(status_code=400, detail="repository_full_name or repository_url is required") + if not request.installation_id and not request.user_token: + raise HTTPException(status_code=400, detail="installation_id or user_token is required") + + repo = request.repository_full_name + auth_ctx = {"installation_id": request.installation_id, "user_token": request.user_token} + + repo_data = await github_client.get_repository(repo, **auth_ctx) + base_branch = request.base_branch or (repo_data or {}).get("default_branch", "main") + + base_sha = await github_client.get_git_ref_sha(repo, base_branch, **auth_ctx) + if not base_sha: + raise HTTPException(status_code=400, detail=f"Unable to resolve base branch '{base_branch}'") + + created_ref = await github_client.create_git_ref(repo, request.branch_name, base_sha, **auth_ctx) + if not created_ref: + log_structured( + logger, + "branch_exists_or_create_failed", + operation="proceed_with_pr", + subject_ids=[repo], + branch=request.branch_name, + ) + + file_result = await github_client.create_or_update_file( + repo_full_name=repo, + path=request.file_path, + content=request.rules_yaml, + message=request.commit_message, + branch=request.branch_name, + **auth_ctx, + ) + if not file_result: + raise HTTPException(status_code=400, detail="Failed to create or update rules file") + + pr = await github_client.create_pull_request( + repo_full_name=repo, + title=request.pr_title, + head=request.branch_name, + base=base_branch, + body=request.pr_body, + **auth_ctx, + ) + if not pr: + raise HTTPException(status_code=400, detail="Failed to create pull request") + + log_structured( + logger, + "proceed_with_pr_completed", + operation="proceed_with_pr", + subject_ids=[repo], + decision="success", + branch=request.branch_name, + pr_number=pr.get("number"), + ) + + return ProceedWithPullRequestResponse( + pull_request_url=pr.get("html_url", ""), + branch_name=request.branch_name, + base_branch=base_branch, + file_path=request.file_path, + commit_sha=(file_result.get("commit") or {}).get("sha"), + ) + + @router.get("/v1/rules/recommend/{repository_full_name}") async def get_cached_recommendations(repository_full_name: str) -> JSONResponse: """ diff --git a/src/integrations/github/api.py b/src/integrations/github/api.py index d899338..2416ab2 100644 --- a/src/integrations/github/api.py +++ b/src/integrations/github/api.py @@ -29,6 +29,20 @@ def __init__(self): # Cache for installation tokens (TTL: 50 minutes, GitHub tokens expire in 60) self._token_cache: TTLCache = TTLCache(maxsize=100, ttl=50 * 60) + async def _get_auth_headers( + self, + installation_id: int | None = None, + user_token: str | None = None, + accept: str = "application/vnd.github.v3+json", + ) -> dict[str, str] | None: + """Build auth headers using either installation token or a provided user token.""" + token = user_token + if not token and installation_id is not None: + token = await self.get_installation_access_token(installation_id) + if not token: + return None + return {"Authorization": f"Bearer {token}", "Accept": accept} + async def get_installation_access_token(self, installation_id: int) -> str | None: """ Gets an access token for a specific installation of the GitHub App. @@ -61,18 +75,15 @@ async def get_installation_access_token(self, installation_id: int) -> str | Non ) return None - async def get_file_content(self, repo_full_name: str, file_path: str, installation_id: int) -> str | None: + async def get_file_content( + self, repo_full_name: str, file_path: str, installation_id: int | None, user_token: str | None = None + ) -> str | None: """ Fetches the content of a file from a repository. """ - token = await self.get_installation_access_token(installation_id) - if not token: + headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token, accept="application/vnd.github.raw") + if not headers: return None - - headers = { - "Authorization": f"Bearer {token}", - "Accept": "application/vnd.github.raw", # Get raw content - } url = f"{config.github.api_base_url}/repos/{repo_full_name}/contents/{file_path}" session = await self._get_session() @@ -419,7 +430,12 @@ async def get_pull_request(self, repo: str, pr_number: int, installation_id: int return {} async def list_pull_requests( - self, repo: str, installation_id: int, state: str = "all", per_page: int = 20 + self, + repo: str, + installation_id: int | None = None, + state: str = "all", + per_page: int = 20, + user_token: str | None = None, ) -> list[dict[str, Any]]: """ List pull requests for a repository. @@ -431,12 +447,10 @@ async def list_pull_requests( per_page: max items to fetch (up to 100) """ try: - token = await self.get_installation_access_token(installation_id) - if not token: - logger.error(f"Failed to get installation token for {installation_id}") + headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) + if not headers: + logger.error("Failed to resolve auth headers for list_pull_requests") return [] - - headers = {"Authorization": f"Bearer {token}", "Accept": "application/vnd.github.v3+json"} url = f"{config.github.api_base_url}/repos/{repo}/pulls?state={state}&per_page={min(per_page, 100)}" session = await self._get_session() @@ -588,18 +602,15 @@ async def update_deployment_status( logger.error(f"Error updating deployment status: {e}") return None - async def get_repository_contributors(self, repo: str, installation_id: int) -> list[dict[str, Any]]: + async def get_repository_contributors( + self, repo: str, installation_id: int | None = None, user_token: str | None = None + ) -> list[dict[str, Any]]: """ Fetches repository contributors with their contribution counts. """ - token = await self.get_installation_access_token(installation_id) - if not token: + headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) + if not headers: return [] - - headers = { - "Authorization": f"Bearer {token}", - "Accept": "application/vnd.github.v3+json", - } url = f"{config.github.api_base_url}/repos/{repo}/contributors" session = await self._get_session() @@ -704,6 +715,118 @@ async def get_user_issues( ) return [] + async def get_repository( + self, repo_full_name: str, installation_id: int | None = None, user_token: str | None = None + ) -> dict[str, Any] | None: + """Fetch repository metadata (default branch, language, etc.).""" + headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) + if not headers: + return None + url = f"{config.github.api_base_url}/repos/{repo_full_name}" + session = await self._get_session() + async with session.get(url, headers=headers) as response: + if response.status == 200: + return await response.json() + return None + + async def list_directory_any_auth( + self, repo_full_name: str, path: str, installation_id: int | None = None, user_token: str | None = None + ) -> list[dict[str, Any]]: + """List directory contents using either installation or user token.""" + headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) + if not headers: + return [] + url = f"{config.github.api_base_url}/repos/{repo_full_name}/contents/{path}" + session = await self._get_session() + async with session.get(url, headers=headers) as response: + if response.status == 200: + return await response.json() + return [] + + async def get_git_ref_sha( + self, repo_full_name: str, ref: str, installation_id: int | None = None, user_token: str | None = None + ) -> str | None: + """Get the SHA for a branch/ref.""" + headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) + if not headers: + return None + url = f"{config.github.api_base_url}/repos/{repo_full_name}/git/ref/heads/{ref.lstrip('refs/heads/')}" + session = await self._get_session() + async with session.get(url, headers=headers) as response: + if response.status == 200: + data = await response.json() + return data.get("object", {}).get("sha") + return None + + async def create_git_ref( + self, + repo_full_name: str, + ref: str, + sha: str, + installation_id: int | None = None, + user_token: str | None = None, + ) -> bool: + """Create a new git ref/branch.""" + headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) + if not headers: + return False + url = f"{config.github.api_base_url}/repos/{repo_full_name}/git/refs" + payload = {"ref": f"refs/heads/{ref.lstrip('refs/heads/')}", "sha": sha} + session = await self._get_session() + async with session.post(url, headers=headers, json=payload) as response: + return response.status in (200, 201) + + async def create_or_update_file( + self, + repo_full_name: str, + path: str, + content: str, + message: str, + branch: str, + installation_id: int | None = None, + user_token: str | None = None, + sha: str | None = None, + ) -> dict[str, Any] | None: + """Create or update a file via the Contents API.""" + headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) + if not headers: + return None + url = f"{config.github.api_base_url}/repos/{repo_full_name}/contents/{path.lstrip('/')}" + payload: dict[str, Any] = { + "message": message, + "content": base64.b64encode(content.encode()).decode(), + "branch": branch, + } + if sha: + payload["sha"] = sha + session = await self._get_session() + async with session.put(url, headers=headers, json=payload) as response: + if response.status in (200, 201): + return await response.json() + return None + + async def create_pull_request( + self, + repo_full_name: str, + title: str, + head: str, + base: str, + body: str, + installation_id: int | None = None, + user_token: str | None = None, + ) -> dict[str, Any] | None: + """Open a pull request.""" + headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) + if not headers: + return None + url = f"{config.github.api_base_url}/repos/{repo_full_name}/pulls" + payload = {"title": title, "head": head, "base": base, "body": body} + session = await self._get_session() + async with session.post(url, headers=headers, json=payload) as response: + if response.status in (200, 201): + return await response.json() + return None + async def _get_session(self) -> aiohttp.ClientSession: """Initializes and returns the aiohttp session.""" if self._session is None or self._session.closed: diff --git a/tests/unit/agents/test_repository_analysis_models.py b/tests/unit/agents/test_repository_analysis_models.py new file mode 100644 index 0000000..f1bcb96 --- /dev/null +++ b/tests/unit/agents/test_repository_analysis_models.py @@ -0,0 +1,12 @@ +from src.agents.repository_analysis_agent.models import RepositoryAnalysisRequest, parse_github_repo_identifier + + +def test_parse_github_repo_identifier_normalizes_url(): + assert parse_github_repo_identifier("https://github.com/owner/repo.git") == "owner/repo" + assert parse_github_repo_identifier("owner/repo/") == "owner/repo" + + +def test_repository_analysis_request_normalizes_from_url(): + request = RepositoryAnalysisRequest(repository_url="https://github.com/owner/repo.git") + assert request.repository_full_name == "owner/repo" + diff --git a/tests/unit/api/test_proceed_with_pr.py b/tests/unit/api/test_proceed_with_pr.py new file mode 100644 index 0000000..a26c3cd --- /dev/null +++ b/tests/unit/api/test_proceed_with_pr.py @@ -0,0 +1,57 @@ +from fastapi.testclient import TestClient + +from src.main import app + + +def test_proceed_with_pr_happy_path(monkeypatch): + client = TestClient(app) + + async def _fake_get_repo(repo_full_name, installation_id=None, user_token=None): + return {"default_branch": "main"} + + async def _fake_get_sha(repo_full_name, ref, installation_id=None, user_token=None): + return "base-sha" + + async def _fake_create_ref(repo_full_name, ref, sha, installation_id=None, user_token=None): + return True + + async def _fake_create_or_update_file( + repo_full_name, path, content, message, branch, installation_id=None, user_token=None, sha=None + ): + return {"commit": {"sha": "new-sha"}} + + async def _fake_create_pr(repo_full_name, title, head, base, body, installation_id=None, user_token=None): + return {"html_url": "https://github.com/owner/repo/pull/1", "number": 1} + + from src.integrations.github import api as github_api + + monkeypatch.setattr(github_api.github_client, "get_repository", _fake_get_repo) + monkeypatch.setattr(github_api.github_client, "get_git_ref_sha", _fake_get_sha) + monkeypatch.setattr(github_api.github_client, "create_git_ref", _fake_create_ref) + monkeypatch.setattr(github_api.github_client, "create_or_update_file", _fake_create_or_update_file) + monkeypatch.setattr(github_api.github_client, "create_pull_request", _fake_create_pr) + + payload = { + "repository_full_name": "owner/repo", + "installation_id": 123, + "rules_yaml": "description: sample\nenabled: true", + "branch_name": "watchflow/rules", + "pr_title": "Add Watchflow rules", + "pr_body": "Body", + } + + response = client.post("/api/v1/rules/recommend/proceed-with-pr", json=payload) + assert response.status_code == 200 + data = response.json() + assert data["pull_request_url"] == "https://github.com/owner/repo/pull/1" + assert data["branch_name"] == "watchflow/rules" + assert data["file_path"] == ".watchflow/rules.yaml" + + +def test_proceed_with_pr_requires_auth(monkeypatch): + client = TestClient(app) + payload = {"repository_full_name": "owner/repo", "rules_yaml": "description: sample\nenabled: true"} + + response = client.post("/api/v1/rules/recommend/proceed-with-pr", json=payload) + assert response.status_code == 400 + From 6d68ed5651b058738e39ef831bcef100953d4daf Mon Sep 17 00:00:00 2001 From: naaa760 Date: Mon, 15 Dec 2025 17:19:20 +0530 Subject: [PATCH 2/2] fix: resolve pre-commit hook issues (indentation, formatting, B005 warnings) --- src/agents/repository_analysis_agent/agent.py | 3 +-- src/agents/repository_analysis_agent/nodes.py | 15 +++++++-------- src/integrations/github/api.py | 10 +++++++--- .../agents/test_repository_analysis_models.py | 1 - tests/unit/api/test_proceed_with_pr.py | 1 - 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/agents/repository_analysis_agent/agent.py b/src/agents/repository_analysis_agent/agent.py index 9e1957c..acadc95 100644 --- a/src/agents/repository_analysis_agent/agent.py +++ b/src/agents/repository_analysis_agent/agent.py @@ -9,12 +9,12 @@ from src.agents.base import AgentResult, BaseAgent from src.agents.repository_analysis_agent.models import RepositoryAnalysisRequest, RepositoryAnalysisState from src.agents.repository_analysis_agent.nodes import ( + _default_recommendations, analyze_contributing_guidelines, analyze_pr_history, analyze_repository_structure, summarize_analysis, validate_recommendations, - _default_recommendations, ) @@ -57,4 +57,3 @@ async def execute(self, **kwargs) -> AgentResult: data={}, metadata={"execution_time_ms": latency_ms}, ) - diff --git a/src/agents/repository_analysis_agent/nodes.py b/src/agents/repository_analysis_agent/nodes.py index 20b88df..8e97ab2 100644 --- a/src/agents/repository_analysis_agent/nodes.py +++ b/src/agents/repository_analysis_agent/nodes.py @@ -77,9 +77,9 @@ async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Non repo = state.repository_full_name installation_id = state.installation_id - content = await github_client.get_file_content(repo, "CONTRIBUTING.md", installation_id) or await github_client.get_file_content( - repo, ".github/CONTRIBUTING.md", installation_id - ) + content = await github_client.get_file_content( + repo, "CONTRIBUTING.md", installation_id + ) or await github_client.get_file_content(repo, ".github/CONTRIBUTING.md", installation_id) if not content: state.contributing_analysis = ContributingGuidelinesAnalysis(content=None) @@ -93,9 +93,7 @@ async def analyze_contributing_guidelines(state: RepositoryAnalysisState) -> Non requires_tests="test" in lowered or "tests" in lowered, requires_docs="docs" in lowered or "documentation" in lowered, code_style_requirements=[ - req - for req in ["lint", "format", "pep8", "flake8", "eslint", "prettier"] - if req in lowered + req for req in ["lint", "format", "pep8", "flake8", "eslint", "prettier"] if req in lowered ], review_requirements=[req for req in ["review", "approval"] if req in lowered], ) @@ -224,7 +222,9 @@ def validate_recommendations(state: RepositoryAnalysisState) -> None: yaml.safe_load(rec.yaml_rule) -def summarize_analysis(state: RepositoryAnalysisState, request: RepositoryAnalysisRequest) -> RepositoryAnalysisResponse: +def summarize_analysis( + state: RepositoryAnalysisState, request: RepositoryAnalysisRequest +) -> RepositoryAnalysisResponse: """Build the final response.""" rules_yaml = _render_rules_yaml(state.recommendations) pr_plan = state.pr_plan or _default_pr_plan(state) @@ -241,4 +241,3 @@ def summarize_analysis(state: RepositoryAnalysisState, request: RepositoryAnalys pr_plan=pr_plan, analysis_summary=analysis_summary, ) - diff --git a/src/integrations/github/api.py b/src/integrations/github/api.py index 2416ab2..5c0f2b5 100644 --- a/src/integrations/github/api.py +++ b/src/integrations/github/api.py @@ -81,7 +81,9 @@ async def get_file_content( """ Fetches the content of a file from a repository. """ - headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token, accept="application/vnd.github.raw") + headers = await self._get_auth_headers( + installation_id=installation_id, user_token=user_token, accept="application/vnd.github.raw" + ) if not headers: return None url = f"{config.github.api_base_url}/repos/{repo_full_name}/contents/{file_path}" @@ -750,7 +752,8 @@ async def get_git_ref_sha( headers = await self._get_auth_headers(installation_id=installation_id, user_token=user_token) if not headers: return None - url = f"{config.github.api_base_url}/repos/{repo_full_name}/git/ref/heads/{ref.lstrip('refs/heads/')}" + ref_clean = ref.removeprefix("refs/heads/") if ref.startswith("refs/heads/") else ref + url = f"{config.github.api_base_url}/repos/{repo_full_name}/git/ref/heads/{ref_clean}" session = await self._get_session() async with session.get(url, headers=headers) as response: if response.status == 200: @@ -771,7 +774,8 @@ async def create_git_ref( if not headers: return False url = f"{config.github.api_base_url}/repos/{repo_full_name}/git/refs" - payload = {"ref": f"refs/heads/{ref.lstrip('refs/heads/')}", "sha": sha} + ref_clean = ref.removeprefix("refs/heads/") if ref.startswith("refs/heads/") else ref + payload = {"ref": f"refs/heads/{ref_clean}", "sha": sha} session = await self._get_session() async with session.post(url, headers=headers, json=payload) as response: return response.status in (200, 201) diff --git a/tests/unit/agents/test_repository_analysis_models.py b/tests/unit/agents/test_repository_analysis_models.py index f1bcb96..0c420dc 100644 --- a/tests/unit/agents/test_repository_analysis_models.py +++ b/tests/unit/agents/test_repository_analysis_models.py @@ -9,4 +9,3 @@ def test_parse_github_repo_identifier_normalizes_url(): def test_repository_analysis_request_normalizes_from_url(): request = RepositoryAnalysisRequest(repository_url="https://github.com/owner/repo.git") assert request.repository_full_name == "owner/repo" - diff --git a/tests/unit/api/test_proceed_with_pr.py b/tests/unit/api/test_proceed_with_pr.py index a26c3cd..334b412 100644 --- a/tests/unit/api/test_proceed_with_pr.py +++ b/tests/unit/api/test_proceed_with_pr.py @@ -54,4 +54,3 @@ def test_proceed_with_pr_requires_auth(monkeypatch): response = client.post("/api/v1/rules/recommend/proceed-with-pr", json=payload) assert response.status_code == 400 -