Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/branch-protection-policy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"branch": "main",
"required_approving_review_count": 1,
"required_status_checks": [
"applitools-core",
"pr-agent",
"deep-agent",
"audit-pr-evidence",
"backend",
"backend-postgres",
"coverage",
"CodeQL",
"codecov-analytics",
"Analyze (actions)",
"Analyze (javascript-typescript)",
"Analyze (python)",
"CodeRabbit",
"dependency-review",
"compose-smoke",
"frontend",
"label",
"codacy-equivalent-zero",
"sonar-branch-zero",
"Seer Code Review",
"SonarCloud Code Analysis"
],
"strict": true,
"require_linear_history": true,
"require_conversation_resolution": false
}
39 changes: 39 additions & 0 deletions .github/workflows/strict21-preflight.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: strict-21 Preflight

on:
workflow_dispatch:
pull_request:
branches:
- main

permissions:
contents: read

jobs:
strict21-preflight:
name: strict-21-preflight
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Run strict-21 preflight
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
STAMP: ${{ github.event.pull_request.number || github.run_id }}
run: |
set -euo pipefail
mkdir -p .tmp/strict21-preflight
python3 scripts/strict21_preflight.py \
--repo "${GITHUB_REPOSITORY}" \
--branch main \
--ref "${GITHUB_SHA}" \
--out-json ".tmp/strict21-preflight/preflight.json" \
--out-md ".tmp/strict21-preflight/preflight.md"
cat .tmp/strict21-preflight/preflight.md

- name: Upload strict-21 artifact
uses: actions/upload-artifact@v4
with:
name: strict21-preflight
path: .tmp/strict21-preflight
301 changes: 301 additions & 0 deletions scripts/strict21_preflight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import os
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from urllib.parse import urlparse
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen

PERMISSION_HTTP_CODES = {401, 403, 404}
DEFAULT_CANONICAL_CONTEXTS = [
"applitools-core",
"pr-agent",
"deep-agent",
"audit-pr-evidence",
"backend",
"backend-postgres",
"coverage",
"CodeQL",
"codecov-analytics",
"Analyze (actions)",
"Analyze (javascript-typescript)",
"Analyze (python)",
"CodeRabbit",
"dependency-review",
"compose-smoke",
"frontend",
"label",
"codacy-equivalent-zero",
"sonar-branch-zero",
"Seer Code Review",
"SonarCloud Code Analysis",
]
Comment on lines +15 to +38

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Contexts don't match repo 🐞 Bug ✓ Correctness

The preflight enforces a large hard-coded canonical context list and fails when any are missing from
branch protection or emitted checks. In this repo, the documented required check is only verify
and the workflows shown do not define most canonical contexts, so strict21-preflight is very likely
to mark PRs non_compliant and block merges once required.
Agent Prompt
## Issue description
The strict-21 preflight enforces a hard-coded canonical contexts list that appears inconsistent with the repo’s documented/defined CI checks (e.g., `verify`). This likely causes `non_compliant` results and blocks PRs.

## Issue Context
- The script defaults to `DEFAULT_CANONICAL_CONTEXTS` and fails when any context is missing.
- Repo documentation and workflows indicate `verify` is the required check today.

## Fix Focus Areas
- scripts/strict21_preflight.py[15-38]
- scripts/strict21_preflight.py[93-97]
- scripts/strict21_preflight.py[99-128]
- .github/workflows/strict21-preflight.yml[20-32]
- .github/BRANCH_PROTECTION.md[5-12]
- .github/workflows/verify.yml[13-17]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools



@dataclass
class PreflightResult:
status: str
findings: list[str]
missing_in_branch_protection: list[str]
missing_in_check_runs: list[str]
ref_sha: str | None
http_status: int | None = None
http_error: str | None = None


def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Strict-21 preflight: compare canonical contexts against branch protection and emitted check-runs."
)
parser.add_argument("--repo", required=True, help="GitHub repository in owner/repo format")
parser.add_argument("--ref", default="main", help="Ref (branch/tag/SHA) used for emitted check context inventory")
parser.add_argument("--branch", default="main", help="Branch used for branch-protection context inventory")
parser.add_argument("--api-base", default="https://api.github.com", help="GitHub API base URL")
parser.add_argument(
"--canonical-contexts",
default="",
help="Optional comma-separated canonical context names; defaults to built-in strict-21 list.",
)
parser.add_argument("--out-json", required=True, help="Output JSON path")
parser.add_argument("--out-md", required=True, help="Output markdown path")
return parser.parse_args()


def _classify_http_status(code: int) -> str:
return "inconclusive_permissions" if code in PERMISSION_HTTP_CODES else "api_error"

Comment on lines +70 to +72

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

2. Preflight fails open on 403/404 🐞 Bug ⛨ Security

API permission/availability failures (including HTTP 401/403/404) are classified as
inconclusive_permissions and the script exits 0, so the workflow can succeed without having
validated compliance. This undermines the intended “preflight gate”.
Agent Prompt
## Issue description
The preflight gate succeeds (exit 0) when it cannot validate compliance due to missing token or certain HTTP errors (401/403/404). This is a fail-open pattern for a protection gate.

## Issue Context
- HTTP 401/403/404 map to `inconclusive_permissions`.
- `main()` returns failure only for `non_compliant` or `api_error`.

## Fix Focus Areas
- scripts/strict21_preflight.py[15-16]
- scripts/strict21_preflight.py[70-72]
- scripts/strict21_preflight.py[189-197]
- scripts/strict21_preflight.py[295-297]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools


def _api_get(api_base: str, repo: str, path: str, token: str) -> dict[str, Any]:
url = f"{api_base.rstrip('/')}/repos/{repo}/{path.lstrip('/')}"
parsed = urlparse(url)
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
raise ValueError(f"Unsupported API URL: {url!r}")
req = Request(
url,
headers={
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {token}",
"X-GitHub-Api-Version": "2022-11-28",
"User-Agent": "reframe-strict21-preflight",
},
method="GET",
)
Comment on lines +79 to +88

Check failure

Code scanning / CodeQL

Partial server-side request forgery Critical

Part of the URL of this request depends on a
user-provided value
.

Copilot Autofix

AI about 18 hours ago

In general, to fix partial SSRF when part of a URL is user-controlled, you either (a) restrict the user to selecting from a set of known-good base URLs, or (b) strictly validate the supplied URL to ensure it targets only allowed hosts and protocols and does not point to internal or loopback addresses.

For this script, the best fix with minimal functional impact is to validate args.api_base before it is used in _api_get. We can introduce a small helper that parses and enforces that the host is one of a small whitelist of allowed GitHub API hosts (for example, api.github.com and api.github.com-equivalents that might be needed like github.com APIs, or optionally GitHub Enterprise patterns if you want to support them explicitly). Because we must not assume more about the environment than we see, a conservative approach is to:

  1. Parse api_base with urlparse.
  2. Ensure the scheme is https (and optionally allow http only if you truly need it; current code allows both, but tightening to https is safer).
  3. Ensure the hostname is one of a small allowed set, e.g., api.github.com.

This helper will be called in main() to normalize/validate args.api_base before passing it to _run_preflight, so that _api_get only ever receives a vetted base URL. This preserves existing behavior for normal usage (default https://api.github.com) while preventing redirection to attacker-chosen hosts.

Concretely:

  • Add a new function, for example _normalize_api_base(raw: str) -> str, near _api_get, that applies these checks and returns a clean base URL string.
  • In main(), after parsing args and before calling _run_preflight, replace uses of args.api_base with a validated api_base variable produced by _normalize_api_base.
  • Adjust _run_preflight to accept and use this validated api_base instead of the raw, unvalidated args.api_base.

No new imports are required; urlparse is already imported and used.


Suggested changeset 1
scripts/strict21_preflight.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/scripts/strict21_preflight.py b/scripts/strict21_preflight.py
--- a/scripts/strict21_preflight.py
+++ b/scripts/strict21_preflight.py
@@ -71,6 +71,26 @@
     return "inconclusive_permissions" if code in PERMISSION_HTTP_CODES else "api_error"
 
 
+def _normalize_api_base(raw: str) -> str:
+    """
+    Validate and normalize the GitHub API base URL.
+
+    This ensures that the base URL uses HTTPS and targets an expected GitHub API host,
+    preventing arbitrary redirection to attacker-controlled endpoints.
+    """
+    parsed = urlparse(raw)
+    if parsed.scheme not in {"https"}:
+        raise ValueError(f"Unsupported API scheme in {raw!r}; only 'https' is allowed")
+    hostname = parsed.hostname or ""
+    # Restrict to the public GitHub API host. Extend this list explicitly if needed.
+    allowed_hosts = {"api.github.com"}
+    if hostname not in allowed_hosts:
+        raise ValueError(f"Unsupported API host in {raw!r}")
+    # Reconstruct a normalized base without path/query/fragment
+    netloc = parsed.netloc
+    return f"{parsed.scheme}://{netloc}"
+
+
 def _api_get(api_base: str, repo: str, path: str, token: str) -> dict[str, Any]:
     url = f"{api_base.rstrip('/')}/repos/{repo}/{path.lstrip('/')}"
     parsed = urlparse(url)
@@ -228,15 +248,16 @@
     args: argparse.Namespace,
     canonical: list[str],
     token: str,
+    api_base: str,
 ) -> tuple[PreflightResult, list[str], list[str]]:
     try:
-        protection = _api_get(args.api_base, args.repo, f"branches/{args.branch}/protection", token)
-        ref_payload = _api_get(args.api_base, args.repo, f"commits/{args.ref}", token)
+        protection = _api_get(api_base, args.repo, f"branches/{args.branch}/protection", token)
+        ref_payload = _api_get(api_base, args.repo, f"commits/{args.ref}", token)
         ref_sha = str(ref_payload.get("sha") or "").strip() or None
         if ref_sha is None:
             raise RuntimeError(f"Unable to resolve SHA for ref {args.ref!r}")
-        check_runs = _api_get(args.api_base, args.repo, f"commits/{ref_sha}/check-runs?per_page=100", token)
-        status_payload = _api_get(args.api_base, args.repo, f"commits/{ref_sha}/status", token)
+        check_runs = _api_get(api_base, args.repo, f"commits/{ref_sha}/check-runs?per_page=100", token)
+        status_payload = _api_get(api_base, args.repo, f"commits/{ref_sha}/status", token)
 
         branch_required_checks = sorted((protection.get("required_status_checks") or {}).get("contexts") or [])
         emitted_contexts = _collect_emitted_contexts(check_runs, status_payload)
@@ -264,10 +280,12 @@
     if not token:
         result, branch_required_checks, emitted_contexts = _missing_token_result()
     else:
+        api_base = _normalize_api_base(args.api_base)
         result, branch_required_checks, emitted_contexts = _run_preflight(
             args=args,
             canonical=canonical,
             token=token,
+            api_base=api_base,
         )
 
     payload = {
EOF
@@ -71,6 +71,26 @@
return "inconclusive_permissions" if code in PERMISSION_HTTP_CODES else "api_error"


def _normalize_api_base(raw: str) -> str:
"""
Validate and normalize the GitHub API base URL.

This ensures that the base URL uses HTTPS and targets an expected GitHub API host,
preventing arbitrary redirection to attacker-controlled endpoints.
"""
parsed = urlparse(raw)
if parsed.scheme not in {"https"}:
raise ValueError(f"Unsupported API scheme in {raw!r}; only 'https' is allowed")
hostname = parsed.hostname or ""
# Restrict to the public GitHub API host. Extend this list explicitly if needed.
allowed_hosts = {"api.github.com"}
if hostname not in allowed_hosts:
raise ValueError(f"Unsupported API host in {raw!r}")
# Reconstruct a normalized base without path/query/fragment
netloc = parsed.netloc
return f"{parsed.scheme}://{netloc}"


def _api_get(api_base: str, repo: str, path: str, token: str) -> dict[str, Any]:
url = f"{api_base.rstrip('/')}/repos/{repo}/{path.lstrip('/')}"
parsed = urlparse(url)
@@ -228,15 +248,16 @@
args: argparse.Namespace,
canonical: list[str],
token: str,
api_base: str,
) -> tuple[PreflightResult, list[str], list[str]]:
try:
protection = _api_get(args.api_base, args.repo, f"branches/{args.branch}/protection", token)
ref_payload = _api_get(args.api_base, args.repo, f"commits/{args.ref}", token)
protection = _api_get(api_base, args.repo, f"branches/{args.branch}/protection", token)
ref_payload = _api_get(api_base, args.repo, f"commits/{args.ref}", token)
ref_sha = str(ref_payload.get("sha") or "").strip() or None
if ref_sha is None:
raise RuntimeError(f"Unable to resolve SHA for ref {args.ref!r}")
check_runs = _api_get(args.api_base, args.repo, f"commits/{ref_sha}/check-runs?per_page=100", token)
status_payload = _api_get(args.api_base, args.repo, f"commits/{ref_sha}/status", token)
check_runs = _api_get(api_base, args.repo, f"commits/{ref_sha}/check-runs?per_page=100", token)
status_payload = _api_get(api_base, args.repo, f"commits/{ref_sha}/status", token)

branch_required_checks = sorted((protection.get("required_status_checks") or {}).get("contexts") or [])
emitted_contexts = _collect_emitted_contexts(check_runs, status_payload)
@@ -264,10 +280,12 @@
if not token:
result, branch_required_checks, emitted_contexts = _missing_token_result()
else:
api_base = _normalize_api_base(args.api_base)
result, branch_required_checks, emitted_contexts = _run_preflight(
args=args,
canonical=canonical,
token=token,
api_base=api_base,
)

payload = {
Copilot is powered by AI and may make mistakes. Always verify output.
with urlopen(req, timeout=30) as resp: # nosec B310 - URL scheme and host are validated above
return json.loads(resp.read().decode("utf-8"))


def _canonical_contexts(raw: str) -> list[str]:
if not raw.strip():
return list(DEFAULT_CANONICAL_CONTEXTS)
return [item.strip() for item in raw.split(",") if item.strip()]


def evaluate_contexts(
*,
canonical_contexts: list[str],
branch_required_checks: list[str],
emitted_contexts: list[str],
ref_sha: str | None,
) -> PreflightResult:
missing_in_branch = [ctx for ctx in canonical_contexts if ctx not in branch_required_checks]
missing_in_emitted = [ctx for ctx in canonical_contexts if ctx not in emitted_contexts]

findings: list[str] = []
if missing_in_branch:
findings.append(
"Canonical contexts missing from branch protection: "
+ ", ".join(missing_in_branch)
)
if missing_in_emitted:
findings.append(
"Canonical contexts missing from emitted checks on ref: "
+ ", ".join(missing_in_emitted)
)

status = "compliant" if not findings else "non_compliant"
return PreflightResult(
status=status,
findings=findings,
missing_in_branch_protection=missing_in_branch,
missing_in_check_runs=missing_in_emitted,
ref_sha=ref_sha,
)


def _collect_emitted_contexts(check_runs: dict[str, Any], status_payload: dict[str, Any]) -> list[str]:
contexts: set[str] = set(_collect_named_values(check_runs.get("check_runs") or [], "name"))
contexts.update(_collect_named_values(status_payload.get("statuses") or [], "context"))
return sorted(contexts)


def _collect_named_values(items: list[dict[str, Any]], field: str) -> list[str]:
values: list[str] = []
for item in items:
value = str(item.get(field) or "").strip()
if value:
values.append(value)
return values


def _render_markdown(payload: dict[str, Any]) -> str:
lines = [
"# strict-21 Preflight",
"",
f"- Status: `{payload['status']}`",
f"- Repo: `{payload['repo']}`",
f"- Branch policy target: `{payload['branch']}`",
f"- Ref target: `{payload['ref']}`",
f"- Resolved ref SHA: `{payload.get('ref_sha') or 'unknown'}`",
f"- Timestamp (UTC): `{payload['timestamp_utc']}`",
"",
"## Findings",
]

findings = payload.get("findings") or []
if findings:
lines.extend(f"- {item}" for item in findings)
else:
lines.append("- None")

lines.extend(
[
"",
"## Missing contexts",
"",
f"- branch protection missing: `{len(payload.get('missing_in_branch_protection') or [])}`",
f"- emitted checks missing: `{len(payload.get('missing_in_check_runs') or [])}`",
]
)

if payload.get("http_status") is not None:
lines.extend(
[
"",
"## API details",
f"- HTTP status: `{payload['http_status']}`",
f"- Message: `{payload.get('http_error') or ''}`",
]
)

return "\n".join(lines) + "\n"


def _missing_token_result() -> tuple[PreflightResult, list[str], list[str]]:
result = PreflightResult(
status="inconclusive_permissions",
findings=["GitHub token missing; strict-21 preflight cannot query branch protection/check-runs."],
missing_in_branch_protection=[],
missing_in_check_runs=[],
ref_sha=None,
)
return result, [], []


def _http_error_result(exc: HTTPError) -> tuple[PreflightResult, list[str], list[str]]:
message = exc.read().decode("utf-8", errors="replace")[:1000]
result = PreflightResult(
status=_classify_http_status(exc.code),
findings=[f"GitHub API request failed (HTTP {exc.code}) while running strict-21 preflight."],
missing_in_branch_protection=[],
missing_in_check_runs=[],
ref_sha=None,
http_status=exc.code,
http_error=message,
)
return result, [], []


def _url_error_result(exc: URLError) -> tuple[PreflightResult, list[str], list[str]]:
result = PreflightResult(
status="api_error",
findings=["Network error while requesting GitHub API for strict-21 preflight."],
missing_in_branch_protection=[],
missing_in_check_runs=[],
ref_sha=None,
http_error=str(exc.reason),
)
return result, [], []


def _run_preflight(
*,
args: argparse.Namespace,
canonical: list[str],
token: str,
) -> tuple[PreflightResult, list[str], list[str]]:
try:
protection = _api_get(args.api_base, args.repo, f"branches/{args.branch}/protection", token)
ref_payload = _api_get(args.api_base, args.repo, f"commits/{args.ref}", token)
ref_sha = str(ref_payload.get("sha") or "").strip() or None
if ref_sha is None:
raise RuntimeError(f"Unable to resolve SHA for ref {args.ref!r}")
check_runs = _api_get(args.api_base, args.repo, f"commits/{ref_sha}/check-runs?per_page=100", token)
status_payload = _api_get(args.api_base, args.repo, f"commits/{ref_sha}/status", token)

branch_required_checks = sorted((protection.get("required_status_checks") or {}).get("contexts") or [])
emitted_contexts = _collect_emitted_contexts(check_runs, status_payload)
result = evaluate_contexts(
canonical_contexts=canonical,
branch_required_checks=branch_required_checks,
emitted_contexts=emitted_contexts,
ref_sha=ref_sha,
)
return result, branch_required_checks, emitted_contexts
except HTTPError as exc:
return _http_error_result(exc)
except URLError as exc:
return _url_error_result(exc)


def main() -> int:
args = _parse_args()
out_json = Path(args.out_json)
out_md = Path(args.out_md)
canonical = _canonical_contexts(args.canonical_contexts)

token = (os.environ.get("GITHUB_TOKEN") or "").strip() or (os.environ.get("GH_TOKEN") or "").strip()
now = datetime.now(timezone.utc).isoformat()
if not token:
result, branch_required_checks, emitted_contexts = _missing_token_result()
else:
result, branch_required_checks, emitted_contexts = _run_preflight(
args=args,
canonical=canonical,
token=token,
)

payload = {
"status": result.status,
"repo": args.repo,
"branch": args.branch,
"ref": args.ref,
"ref_sha": result.ref_sha,
"timestamp_utc": now,
"canonical_contexts": canonical,
"branch_protection_required_checks": branch_required_checks,
"emitted_contexts": emitted_contexts,
"missing_in_branch_protection": result.missing_in_branch_protection,
"missing_in_check_runs": result.missing_in_check_runs,
"findings": result.findings,
"http_status": result.http_status,
"http_error": result.http_error,
}

out_json.parent.mkdir(parents=True, exist_ok=True)

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
out_md.parent.mkdir(parents=True, exist_ok=True)

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.

Copilot Autofix

AI about 18 hours ago

In general, to fix uncontrolled path usage you normalize the user-provided path and ensure it stays within a trusted base directory, or otherwise constrain it via an allow‑list or safe filename transformation. Here, we only need to write result files; we can safely constrain both out_json and out_md to reside under a base output directory (e.g. the current working directory) by resolving them and verifying they don’t escape that base.

The best fix with minimal behavior change is:

  • Define a small helper, e.g. _safe_subpath(base: Path, target: Path) -> Path, that:
    • Resolves base to an absolute path (e.g. base.resolve()).
    • Resolves target as if under that base (e.g. (base / target).resolve()).
    • Checks that the resolved target is within the base using Path.is_relative_to (Py3.9+) or a relative_to try/except fallback.
    • Raises a ValueError if the target escapes the base.
  • In main(), instead of using Path(args.out_json) and Path(args.out_md) directly, compute them via this helper with a base of Path.cwd() (or another appropriate base, but we must not assume more than what we see); e.g.:
    • base_output = Path.cwd().resolve()
    • out_json = _safe_subpath(base_output, Path(args.out_json))
    • out_md = _safe_subpath(base_output, Path(args.out_md))
  • Leave the later calls (out_json.parent.mkdir(...), write_text(...)) unchanged; they now operate on validated paths.

This keeps functionality (the user can still specify nested paths under the working directory) while preventing directory traversal or writing outside the intended workspace.

Suggested changeset 1
scripts/strict21_preflight.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/scripts/strict21_preflight.py b/scripts/strict21_preflight.py
--- a/scripts/strict21_preflight.py
+++ b/scripts/strict21_preflight.py
@@ -67,6 +67,26 @@
     return parser.parse_args()
 
 
+def _safe_subpath(base: Path, target: Path) -> Path:
+    """
+    Return a resolved path for ``target`` under ``base``, ensuring it does not escape ``base``.
+    """
+    base_resolved = base.resolve()
+    candidate = (base_resolved / target).resolve()
+    try:
+        # Python 3.9+ provides is_relative_to; fall back to relative_to for older versions.
+        is_relative = candidate.is_relative_to(base_resolved)  # type: ignore[attr-defined]
+    except AttributeError:
+        try:
+            candidate.relative_to(base_resolved)
+            is_relative = True
+        except ValueError:
+            is_relative = False
+    if not is_relative:
+        raise ValueError(f"Output path {candidate} escapes base directory {base_resolved}")
+    return candidate
+
+
 def _classify_http_status(code: int) -> str:
     return "inconclusive_permissions" if code in PERMISSION_HTTP_CODES else "api_error"
 
@@ -255,8 +275,9 @@
 
 def main() -> int:
     args = _parse_args()
-    out_json = Path(args.out_json)
-    out_md = Path(args.out_md)
+    base_output = Path.cwd()
+    out_json = _safe_subpath(base_output, Path(args.out_json))
+    out_md = _safe_subpath(base_output, Path(args.out_md))
     canonical = _canonical_contexts(args.canonical_contexts)
 
     token = (os.environ.get("GITHUB_TOKEN") or "").strip() or (os.environ.get("GH_TOKEN") or "").strip()
EOF
@@ -67,6 +67,26 @@
return parser.parse_args()


def _safe_subpath(base: Path, target: Path) -> Path:
"""
Return a resolved path for ``target`` under ``base``, ensuring it does not escape ``base``.
"""
base_resolved = base.resolve()
candidate = (base_resolved / target).resolve()
try:
# Python 3.9+ provides is_relative_to; fall back to relative_to for older versions.
is_relative = candidate.is_relative_to(base_resolved) # type: ignore[attr-defined]
except AttributeError:
try:
candidate.relative_to(base_resolved)
is_relative = True
except ValueError:
is_relative = False
if not is_relative:
raise ValueError(f"Output path {candidate} escapes base directory {base_resolved}")
return candidate


def _classify_http_status(code: int) -> str:
return "inconclusive_permissions" if code in PERMISSION_HTTP_CODES else "api_error"

@@ -255,8 +275,9 @@

def main() -> int:
args = _parse_args()
out_json = Path(args.out_json)
out_md = Path(args.out_md)
base_output = Path.cwd()
out_json = _safe_subpath(base_output, Path(args.out_json))
out_md = _safe_subpath(base_output, Path(args.out_md))
canonical = _canonical_contexts(args.canonical_contexts)

token = (os.environ.get("GITHUB_TOKEN") or "").strip() or (os.environ.get("GH_TOKEN") or "").strip()
Copilot is powered by AI and may make mistakes. Always verify output.
out_json.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.

Copilot Autofix

AI about 18 hours ago

In general, to fix uncontrolled path usage for CLI-provided output paths, we should normalize the path and then either (a) restrict it to lie within a safe root directory, or (b) otherwise sanitize it so that dangerous constructs like .. or absolute paths are rejected. For a reporting/preflight script, a common pattern is to write all outputs under a known working directory (for example, the current directory or a configurable base directory) and ensure user-specified paths cannot escape that root.

For this specific script, the minimal, functionality-preserving fix is to introduce a safe output root (e.g., the current working directory) and validate that --out-json and --out-md resolve to paths within that root after normalization. We can use Path.resolve() to normalize and obtain absolute paths, and then check that each resolved path has the root as one of its parents (or is equal to the root). If a user attempts to supply a path outside the root (/etc/passwd, ../../secret), we raise a clear error instead of writing. This keeps the intended ability to choose output filenames and subdirectories but defends against directory traversal and writes to unexpected locations.

Concretely, in main() in scripts/strict21_preflight.py, around lines 257–260 and before we use out_json and out_md, we will:

  1. Define a root directory, such as output_root = Path.cwd().resolve().
  2. Convert the arguments to paths and resolve them: out_json = Path(args.out_json).resolve(), out_md = Path(args.out_md).resolve().
  3. Verify output_root is a parent of both resolved paths (or equal). If not, raise ValueError with an explanatory message.
  4. Use the validated out_json and out_md as before (mkdir, write).

This change only affects the handling of the two output arguments and does not modify imports or other functionality.

Suggested changeset 1
scripts/strict21_preflight.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/scripts/strict21_preflight.py b/scripts/strict21_preflight.py
--- a/scripts/strict21_preflight.py
+++ b/scripts/strict21_preflight.py
@@ -255,8 +255,13 @@
 
 def main() -> int:
     args = _parse_args()
-    out_json = Path(args.out_json)
-    out_md = Path(args.out_md)
+    output_root = Path.cwd().resolve()
+    out_json = Path(args.out_json).resolve()
+    out_md = Path(args.out_md).resolve()
+    if output_root not in (out_json, *out_json.parents):
+        raise ValueError(f"Output JSON path must be under {output_root}, got {out_json}")
+    if output_root not in (out_md, *out_md.parents):
+        raise ValueError(f"Output markdown path must be under {output_root}, got {out_md}")
     canonical = _canonical_contexts(args.canonical_contexts)
 
     token = (os.environ.get("GITHUB_TOKEN") or "").strip() or (os.environ.get("GH_TOKEN") or "").strip()
EOF
@@ -255,8 +255,13 @@

def main() -> int:
args = _parse_args()
out_json = Path(args.out_json)
out_md = Path(args.out_md)
output_root = Path.cwd().resolve()
out_json = Path(args.out_json).resolve()
out_md = Path(args.out_md).resolve()
if output_root not in (out_json, *out_json.parents):
raise ValueError(f"Output JSON path must be under {output_root}, got {out_json}")
if output_root not in (out_md, *out_md.parents):
raise ValueError(f"Output markdown path must be under {output_root}, got {out_md}")
canonical = _canonical_contexts(args.canonical_contexts)

token = (os.environ.get("GITHUB_TOKEN") or "").strip() or (os.environ.get("GH_TOKEN") or "").strip()
Copilot is powered by AI and may make mistakes. Always verify output.
out_md.write_text(_render_markdown(payload), encoding="utf-8")

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.

Copilot Autofix

AI about 18 hours ago

In general, to fix uncontrolled path usage for files derived from user input, you should constrain where files can be written and/or normalize and validate the path before use. A common pattern is to define a fixed “output root” directory, resolve user-provided paths relative to that root, normalize via Path.resolve() or os.path.realpath(), and then ensure the final path remains within the root. If it does not, fail with an error instead of writing the file. This prevents ../ traversal and disallows redirecting output to unintended locations.

For this script, the minimal fix without changing observable behavior too much is:

  • Introduce an output root directory (e.g., current working directory by default).
  • Resolve out_json and out_md relative to that root.
  • Normalize them with Path.resolve(strict=False) (or resolve()), and then check that their resolved paths are within the root directory using Path.is_relative_to() if available, or an equivalent check with relative_to() in a try/except.
  • If the check fails, raise a clear exception instead of writing.

Since we must only modify the shown code in scripts/strict21_preflight.py, we can:

  1. Compute a base_dir = Path.cwd().resolve() in main().
  2. Replace the simple Path(args.out_json) / Path(args.out_md) with validated helper calls like _safe_output_path(base_dir, args.out_json) and _safe_output_path(base_dir, args.out_md).
  3. Add a small helper function _safe_output_path(base_dir: Path, user_path: str) -> Path above main() that:
    • Creates a Path(user_path).
    • If it is absolute, uses it as-is but still validates it against base_dir (or, alternatively, disallow absolute paths).
    • Joins with base_dir if it is relative.
    • Calls .resolve() and checks that the resolved path is under base_dir. If not, raises ValueError.

This retains the ability to specify file names relative to the current directory and prevents directory traversal outside the output root. No new imports are needed; Path is already imported.


Suggested changeset 1
scripts/strict21_preflight.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/scripts/strict21_preflight.py b/scripts/strict21_preflight.py
--- a/scripts/strict21_preflight.py
+++ b/scripts/strict21_preflight.py
@@ -90,6 +90,26 @@
         return json.loads(resp.read().decode("utf-8"))
 
 
+def _safe_output_path(base_dir: Path, user_path: str) -> Path:
+    """
+    Construct a normalized output path under ``base_dir`` from a user-provided
+    path. Raises ValueError if the resulting path would escape ``base_dir``.
+    """
+    raw_path = Path(user_path)
+
+    if not raw_path.is_absolute():
+        candidate = (base_dir / raw_path).resolve()
+    else:
+        candidate = raw_path.resolve()
+
+    try:
+        candidate.relative_to(base_dir)
+    except ValueError:
+        raise ValueError(f"Output path {candidate!r} is outside allowed directory {base_dir!r}")
+
+    return candidate
+
+
 def _canonical_contexts(raw: str) -> list[str]:
     if not raw.strip():
         return list(DEFAULT_CANONICAL_CONTEXTS)
@@ -255,8 +275,9 @@
 
 def main() -> int:
     args = _parse_args()
-    out_json = Path(args.out_json)
-    out_md = Path(args.out_md)
+    base_dir = Path.cwd().resolve()
+    out_json = _safe_output_path(base_dir, args.out_json)
+    out_md = _safe_output_path(base_dir, args.out_md)
     canonical = _canonical_contexts(args.canonical_contexts)
 
     token = (os.environ.get("GITHUB_TOKEN") or "").strip() or (os.environ.get("GH_TOKEN") or "").strip()
EOF
@@ -90,6 +90,26 @@
return json.loads(resp.read().decode("utf-8"))


def _safe_output_path(base_dir: Path, user_path: str) -> Path:
"""
Construct a normalized output path under ``base_dir`` from a user-provided
path. Raises ValueError if the resulting path would escape ``base_dir``.
"""
raw_path = Path(user_path)

if not raw_path.is_absolute():
candidate = (base_dir / raw_path).resolve()
else:
candidate = raw_path.resolve()

try:
candidate.relative_to(base_dir)
except ValueError:
raise ValueError(f"Output path {candidate!r} is outside allowed directory {base_dir!r}")

return candidate


def _canonical_contexts(raw: str) -> list[str]:
if not raw.strip():
return list(DEFAULT_CANONICAL_CONTEXTS)
@@ -255,8 +275,9 @@

def main() -> int:
args = _parse_args()
out_json = Path(args.out_json)
out_md = Path(args.out_md)
base_dir = Path.cwd().resolve()
out_json = _safe_output_path(base_dir, args.out_json)
out_md = _safe_output_path(base_dir, args.out_md)
canonical = _canonical_contexts(args.canonical_contexts)

token = (os.environ.get("GITHUB_TOKEN") or "").strip() or (os.environ.get("GH_TOKEN") or "").strip()
Copilot is powered by AI and may make mistakes. Always verify output.

if result.status in {"non_compliant", "api_error"}:
return 1
return 0


if __name__ == "__main__":
raise SystemExit(main())