Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/agents/repository_analysis_agent/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class RepositoryAnalysisRequest(BaseModel):
repository_url: str | None = Field(default=None, description="GitHub repository URL")
repository_full_name: str | None = Field(default=None, description="Full repository name (owner/repo)")
installation_id: int | None = Field(default=None, description="GitHub App installation ID")
user_token: str | None = Field(default=None, description="User token for GitHub operations (optional)")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

For security, sensitive values like tokens should be stored in Pydantic's SecretStr type to prevent them from being exposed in logs or other string representations of the model. You will need to add SecretStr to your pydantic imports.

Additionally, this new user_token field is not currently being used in the agent's workflow. For it to be effective, it needs to be passed down through RepositoryAnalysisState to the various github_client API calls. This involves:

  1. Adding user_token to RepositoryAnalysisState.
  2. Initializing it in RepositoryAnalysisAgent.execute.
  3. Passing it to github_client methods in the analysis nodes.

When accessing the token value from the SecretStr field, you'll need to use .get_secret_value().

Suggested change
user_token: str | None = Field(default=None, description="User token for GitHub operations (optional)")
user_token: SecretStr | None = Field(default=None, description="User token for GitHub operations (optional)")

max_prs: int = Field(default=10, ge=0, le=50, description="Max PRs to sample for analysis")

@field_validator("repository_full_name", mode="before")
Expand Down
31 changes: 12 additions & 19 deletions src/agents/repository_analysis_agent/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from __future__ import annotations

import logging
import textwrap
from typing import Any

import yaml
Expand Down Expand Up @@ -258,7 +257,7 @@ def _default_recommendations(
severity: medium
event_types:
- pull_request
parameters:
parameters:
source_patterns:
{source_patterns_yaml}
test_patterns:
Expand Down Expand Up @@ -294,17 +293,14 @@ def _default_recommendations(

recommendations.append(
RuleRecommendation(
yaml_rule=textwrap.dedent(
"""
description: "Ensure PRs include context"
yaml_rule="""description: "Ensure PRs include context"
enabled: true
severity: low
severity: low
event_types:
- pull_request
parameters:
min_description_length: 50
"""
).strip(),
parameters:
min_description_length: 50
""".strip(),
Comment on lines +296 to +303

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

While the YAML string is now correctly formatted, generating YAML by hardcoding multiline strings can be fragile and hard to maintain. A more robust approach is to define rules as Python dictionaries and then serialize them to YAML. This eliminates the risk of manual indentation or syntax errors.

This would also simplify the overall flow, as you are currently creating a YAML string, which is then parsed back into a dictionary in _render_rules_yaml and validate_recommendations.

Consider this alternative approach:

import yaml

rule_dict = {
    "description": "Ensure PRs include context",
    "enabled": True,
    "severity": "low",
    "event_types": ["pull_request"],
    "parameters": {
        "min_description_length": 50,
    },
}

recommendations.append(
    RuleRecommendation(
        yaml_rule=yaml.dump(rule_dict, sort_keys=False),
        confidence=desc_confidence,
        reasoning=desc_reasoning,
        strategy_used="static",
    )
)

This would likely require adjusting RuleRecommendation or how it's used, but it would make the code more robust and easier to manage in the long term.

confidence=desc_confidence,
reasoning=desc_reasoning,
strategy_used="static",
Expand All @@ -313,18 +309,15 @@ def _default_recommendations(

# Add a repository-specific rule if we detect specific patterns
if state.repository_features.has_workflows:
workflow_rule = textwrap.dedent(
"""
description: "Protect CI/CD workflows"
workflow_rule = """description: "Protect CI/CD workflows"
enabled: true
severity: high
severity: high
event_types:
- pull_request
parameters:
file_patterns:
- ".github/workflows/**"
"""
).strip()
parameters:
file_patterns:
- ".github/workflows/**"
""".strip()

recommendations.append(
RuleRecommendation(
Expand Down