From 21e4859727a0f3728f104d47361af7e8d82691df Mon Sep 17 00:00:00 2001 From: Test User Date: Sat, 14 Feb 2026 22:24:05 -0700 Subject: [PATCH 1/6] feat(gates): add dependency installation pre-flight check Add _ensure_dependencies_installed() to auto-install missing dependencies before running test gates. Prevents false failures when dependencies aren't installed yet. - Check Python: requirements.txt exists but no .venv/venv directory - Check Node: package.json exists but no node_modules directory - Install using uv (Python) or npm (Node) with 5-minute timeout - New parameter: auto_install_deps (default=True) on gates.run() - Return ERROR gate check if installation fails Tests: 6 new tests in TestDependencyPreFlight covering auto-install, skip cases, and failure handling. Part of #385 (Step 1/5: Dependency installation pre-flight) --- codeframe/core/gates.py | 140 +++++++++++++ ...26-02-09-react-integration-tests-review.md | 129 ++++++++++++ .../2026-02-11-detect-success-fix-review.md | 105 ++++++++++ .../2026-02-12-api-key-propagation-review.md | 186 ++++++++++++++++++ tests/core/test_gates_observability.py | 117 +++++++++++ 5 files changed, 677 insertions(+) create mode 100644 docs/code-review/2026-02-09-react-integration-tests-review.md create mode 100644 docs/code-review/2026-02-11-detect-success-fix-review.md create mode 100644 docs/code-review/2026-02-12-api-key-propagation-review.md diff --git a/codeframe/core/gates.py b/codeframe/core/gates.py index 2c37d74e..a4a474f6 100644 --- a/codeframe/core/gates.py +++ b/codeframe/core/gates.py @@ -151,10 +151,116 @@ def get_errors_by_file(self) -> dict[str, list[str]]: return by_file +def _ensure_dependencies_installed( + repo_path: Path, + auto_install: bool = True, +) -> tuple[bool, str]: + """Ensure project dependencies are installed before running test gates. + + Checks for missing dependencies and optionally installs them: + - Python: If requirements.txt exists but no .venv/ or venv/ directory + - Node.js: If package.json exists but no node_modules/ directory + + Args: + repo_path: Path to the repository root + auto_install: Whether to auto-install missing dependencies (default: True) + + Returns: + Tuple of (success: bool, message: str) + - success=True means deps installed or already present + - success=False means installation failed + """ + messages = [] + + # Check Python dependencies + requirements_txt = repo_path / "requirements.txt" + venv_dirs = [repo_path / ".venv", repo_path / "venv"] + has_venv = any(d.exists() and d.is_dir() for d in venv_dirs) + + if requirements_txt.exists() and not has_venv: + if not auto_install: + messages.append("Python dependencies not installed (auto-install disabled)") + else: + # Try to install using uv first, fallback to pip + uv_bin = shutil.which("uv") + pip_bin = shutil.which("pip") + + if uv_bin: + try: + result = subprocess.run( + ["uv", "pip", "install", "-r", str(requirements_txt)], + cwd=repo_path, + capture_output=True, + text=True, + timeout=300, # 5 minutes + ) + if result.returncode == 0: + messages.append(f"Installed Python dependencies via uv: {requirements_txt.name}") + else: + return False, f"Failed to install Python dependencies: {result.stderr}" + except Exception as e: + return False, f"Error installing Python dependencies: {e}" + elif pip_bin: + try: + result = subprocess.run( + ["pip", "install", "-r", str(requirements_txt)], + cwd=repo_path, + capture_output=True, + text=True, + timeout=300, + ) + if result.returncode == 0: + messages.append(f"Installed Python dependencies via pip: {requirements_txt.name}") + else: + return False, f"Failed to install Python dependencies: {result.stderr}" + except Exception as e: + return False, f"Error installing Python dependencies: {e}" + else: + messages.append("Python dependencies needed but no package manager found (uv/pip)") + elif requirements_txt.exists() and has_venv: + messages.append("Python dependencies already installed (venv exists)") + + # Check Node.js dependencies + package_json = repo_path / "package.json" + node_modules = repo_path / "node_modules" + + if package_json.exists() and not node_modules.exists(): + if not auto_install: + messages.append("Node dependencies not installed (auto-install disabled)") + else: + npm_bin = shutil.which("npm") + + if npm_bin: + try: + result = subprocess.run( + ["npm", "install"], + cwd=repo_path, + capture_output=True, + text=True, + timeout=300, # 5 minutes + ) + if result.returncode == 0: + messages.append("Installed Node dependencies via npm") + else: + return False, f"Failed to install Node dependencies: {result.stderr}" + except Exception as e: + return False, f"Error installing Node dependencies: {e}" + else: + messages.append("Node dependencies needed but npm not found") + elif package_json.exists() and node_modules.exists(): + messages.append("Node dependencies already installed (node_modules exists)") + + # Success if we get here + if not messages: + return True, "No dependency checks needed" + return True, " | ".join(messages) + + def run( workspace: Workspace, gates: Optional[list[str]] = None, verbose: bool = False, + auto_install_deps: bool = True, ) -> GateResult: """Run verification gates. @@ -162,6 +268,7 @@ def run( workspace: Target workspace gates: Specific gates to run (None = all available) verbose: Whether to capture full output + auto_install_deps: Whether to auto-install missing dependencies before test gates (default: True) Returns: GateResult with all check results @@ -179,6 +286,39 @@ def run( print_event=True, ) + # PRE-FLIGHT: Ensure dependencies are installed before running test gates + dep_success, dep_message = _ensure_dependencies_installed( + workspace.repo_path, + auto_install=auto_install_deps, + ) + if verbose: + print(f"[gates] Dependency check: {dep_message}") + + # If dependency installation fails, create an ERROR check and return early + if not dep_success: + check = GateCheck( + name="dependency-check", + status=GateStatus.ERROR, + output=f"Dependency installation failed: {dep_message}", + ) + result = GateResult( + passed=False, + checks=[check], + started_at=started_at, + completed_at=_utc_now(), + ) + events.emit_for_workspace( + workspace, + events.EventType.GATES_COMPLETED, + { + "passed": False, + "summary": "Dependency installation failed", + "checks": [{"name": check.name, "status": check.status.value}], + }, + print_event=True, + ) + return result + checks: list[GateCheck] = [] repo_path = workspace.repo_path diff --git a/docs/code-review/2026-02-09-react-integration-tests-review.md b/docs/code-review/2026-02-09-react-integration-tests-review.md new file mode 100644 index 00000000..6b537a4c --- /dev/null +++ b/docs/code-review/2026-02-09-react-integration-tests-review.md @@ -0,0 +1,129 @@ +# Code Review Report: ReactAgent CLI Integration Tests + +**Date:** 2026-02-09 +**Reviewer:** Code Review Agent +**Component:** TestReactAgentIntegration (issue #368) +**Files Reviewed:** tests/cli/test_v2_cli_integration.py +**Ready for Production:** Yes (with minor improvements noted) + +## Executive Summary + +This PR adds 3 CLI integration tests for ReactAgent runtime parameters (verbose, dry-run, streaming). The tests follow existing file patterns, pass reliably, and correctly use MockProvider. Two minor issues identified: the `mock_llm` return value is unused (inconsistent with existing tests that capture `provider`), and the `ws` variable is unused in `test_react_verbose_mode` and `test_react_dry_run`. + +**Critical Issues:** 0 +**Major Issues:** 0 +**Minor Issues:** 2 +**Positive Findings:** 5 + +--- + +## Review Context + +**Code Type:** Test code (CLI integration tests) +**Risk Level:** Low — test-only, no production code modified +**Business Constraints:** Test quality — must actually validate acceptance criteria from issue #368 + +### Review Focus Areas + +- ✅ Test Quality — Do assertions cover the acceptance criteria? +- ✅ Pattern Consistency — Follow existing conventions in test file +- ✅ Reliability — Deterministic, no flaky timing issues +- ❌ OWASP Web/LLM/ML — Not applicable (test code) +- ❌ Zero Trust — Not applicable (test code) + +--- + +## Priority 3 Issues - Minor 📝 + +### 1. `mock_llm` return value discarded — inconsistent with existing pattern + +**Location:** `test_v2_cli_integration.py:979, 1000, 1020` +**Severity:** Minor (Nitpick) +**Category:** Pattern Consistency + +**Problem:** +Existing tests in `TestAIAgentExecution` capture the return value as `provider = mock_llm([...])` and assert `provider.call_count >= 1` to verify the LLM was actually called. The new tests call `mock_llm([...])` without capturing the return, so there's no verification that the MockProvider was invoked. + +**Current Code:** +```python +mock_llm([MOCK_REACT_COMPLETION]) # return value discarded +``` + +**Suggested Fix:** +```python +provider = mock_llm([MOCK_REACT_COMPLETION]) +# ... invoke CLI ... +assert provider.call_count >= 1 +``` + +**Impact:** Without this, a test could pass even if the code path silently skipped the LLM call entirely (e.g., due to early return). This is a defense-in-depth assertion. + +### 2. Unused `ws` variable in two tests + +**Location:** `test_v2_cli_integration.py:981, 1002` +**Severity:** Minor (Nitpick) +**Category:** Code Quality + +**Problem:** +In `test_react_verbose_mode` and `test_react_dry_run`, the `ws` variable is created via `create_or_load_workspace()` but only used to list tasks. The workspace object itself is never referenced again. This is a minor inconsistency — in `test_react_streaming_output_log`, `ws` is correctly reused for `runtime.list_runs(ws, ...)`. + +**Note:** This matches the existing pattern in `TestAIAgentExecution` where `ws` is also created just to call `tasks.list_tasks()`, so this is consistent with the file. No change required — flagging only for awareness. + +--- + +## Positive Findings ✨ + +### Excellent Practices + +- **Correct adaptation of traycer plan:** The original plan incorrectly assumed dry-run would raise ValueError, but PR #367 added full dry_run support to ReactAgent. The test correctly verifies success instead. + +- **MockProvider text-only response pattern:** Using a text-only response (`MOCK_REACT_COMPLETION`) correctly leverages ReactAgent's loop termination logic — no tool calls = completion. This avoids complex mock setup. + +- **Streaming test verifies full pipeline:** `test_react_streaming_output_log` validates both the output.log creation AND that `cf work follow` can read it — testing the real integration between RunOutputLogger and the follow command. + +### Good Architectural Decisions + +- **Avoided fragile threading:** The streaming test uses "execute then follow" rather than threading, which avoids flaky timing issues. Real-time streaming is already covered by `test_work_follow.py`. + +- **Consistent with file conventions:** New tests use the same `workspace_with_ready_tasks` fixture, `mock_llm` pattern, `runner.invoke()` style, and error message format as existing tests. + +### Organization +- **Clear section numbering:** Renumbered PR commands section from 16→17 to make room for the new section 16. +- **Issue reference in class docstring:** Links to GitHub issue #368 for traceability. + +--- + +## Testing Recommendations + +### Edge Cases to Consider (Future) + +- [ ] Test `--verbose --dry-run --engine react` (all three flags combined) +- [ ] Test `--engine react` with an invalid task ID (error path) +- [ ] Test that verbose mode output goes to both stdout AND output.log (verifying `_verbose_print` dual-write) + +--- + +## Action Items Summary + +### Immediate (Before Merge) +None required — all issues are minor/nitpick level. + +### Short-term (Optional Improvements) +1. Capture `provider = mock_llm(...)` return value and assert `call_count >= 1` for defense-in-depth +2. Consider adding combined flags test (`--verbose --dry-run --engine react`) + +--- + +## Conclusion + +Clean, focused PR that adds 3 well-structured integration tests covering the acceptance criteria from issue #368. Tests pass reliably (3/3), follow existing file patterns, and correctly use MockProvider. The adaptation from the traycer plan (dry-run success vs failure) shows good understanding of the current codebase state. + +**Recommendation:** Approve for merge. Minor improvements optional. + +--- + +## Metrics + +- **Lines of Code Reviewed:** 103 additions, 1 deletion +- **Functions/Methods Reviewed:** 3 test methods + 1 constant +- **Security Patterns Checked:** 0 (not applicable to test code) diff --git a/docs/code-review/2026-02-11-detect-success-fix-review.md b/docs/code-review/2026-02-11-detect-success-fix-review.md new file mode 100644 index 00000000..2a82938c --- /dev/null +++ b/docs/code-review/2026-02-11-detect-success-fix-review.md @@ -0,0 +1,105 @@ +# Code Review Report: _detect_success() False Positive Fix + +**Date:** 2026-02-11 +**Reviewer:** Code Review Agent +**Component:** GoldenPathRunner success detection + CLI exit codes +**Files Reviewed:** `tests/e2e/cli/golden_path_runner.py`, `codeframe/cli/app.py`, `tests/e2e/cli/test_detect_success.py`, `tests/cli/test_work_exit_codes.py` +**Ready for Production:** Yes + +## Executive Summary + +Bug fix for `_detect_success()` returning false positives when CLI output has no matching patterns. The fix changes the default from "assume success" to "assume failure" (conservative/fail-safe). Additionally, CLI now returns exit code 1 for BLOCKED/FAILED agent states. One minor issue (debug log skipped on exit) was found and fixed during review. + +**Critical Issues:** 0 +**Major Issues:** 0 +**Minor Issues:** 1 (fixed during review) +**Positive Findings:** 3 + +--- + +## Review Context + +**Code Type:** Test infrastructure + CLI behavior +**Risk Level:** Low-Medium +**Business Constraints:** Test reliability — false positives mask real failures + +### Review Focus Areas + +- ✅ Reliability — Logic correctness, pattern matching behavior +- ✅ Correctness — Exit code placement, code flow impact +- ✅ Test coverage — Edge case completeness +- ❌ OWASP Web/LLM/ML — Not applicable (test infrastructure code) +- ❌ Zero Trust — Not applicable (no auth/access control) +- ❌ Performance — Not applicable (not a hot path) + +--- + +## Priority 1 Issues - Critical + +None. + +--- + +## Priority 2 Issues - Major + +None. + +--- + +## Priority 3 Issues - Minor + +### Debug log path skipped on BLOCKED/FAILED exit (FIXED) +**Location:** `codeframe/cli/app.py:2066-2073` +**Severity:** Minor +**Category:** Reliability + +**Problem:** Initial implementation placed `raise typer.Exit(1)` inside the BLOCKED/FAILED branches, which skipped the debug log location output below. + +**Fix Applied:** Moved `typer.Exit(1)` after the debug log section using a single `if state.status in (BLOCKED, FAILED)` check. Committed as a follow-up fix. + +--- + +## Positive Findings + +### TDD Approach +Tests were written first, confirmed failing (3 failures matching the 3 bugs), then the fix was applied. This is excellent practice for bug fixes. + +### Conservative Default (Fail-Safe Principle) +Changing the default from "assume success" to "assume failure" is the correct defensive approach. False failures are visible and fixable; false successes silently mask problems. + +### Failure-First Pattern Matching +Reordering to check failure patterns before success patterns ensures that mixed output (both success and failure markers) correctly returns `False`. This prevents masking errors that occur after a partial success message. + +--- + +## Testing Recommendations + +All testing needs are met: + +- [x] Empty output → False (core bug scenario) +- [x] No matching patterns → False +- [x] Explicit success → True +- [x] Each failure pattern → False +- [x] Non-zero exit code → False (overrides patterns) +- [x] Mixed success + failure → False +- [x] CLI exit code 0 for COMPLETED +- [x] CLI exit code 1 for FAILED +- [x] CLI exit code 1 for BLOCKED + +--- + +## Future Considerations + +### Pre-existing: No `else` clause for unknown AgentStatus +The if/elif chain for COMPLETED/BLOCKED/FAILED has no `else`. If a new status (e.g., TIMEOUT) is added, the CLI would silently return exit 0. Not introduced by this PR. + +### Case-sensitive pattern matching +`_detect_success()` uses case-sensitive `in` checks. The CLI prints "Task completed successfully!" with specific casing. If output format changes, patterns need updating. + +--- + +## Conclusion + +Clean, focused bug fix with comprehensive tests. The core change (1 line: `return exit_code == 0` → `return False`) directly addresses the reported issue. The CLI exit code improvement is a natural companion fix. One minor issue found during review was fixed immediately. + +**Recommendation:** Deploy. diff --git a/docs/code-review/2026-02-12-api-key-propagation-review.md b/docs/code-review/2026-02-12-api-key-propagation-review.md new file mode 100644 index 00000000..c0e1311c --- /dev/null +++ b/docs/code-review/2026-02-12-api-key-propagation-review.md @@ -0,0 +1,186 @@ +# Code Review Report: API Key Propagation Fix (#376) + +**Date:** 2026-02-12 +**Reviewer:** Code Review Agent +**Component:** CLI API Key Validation & Subprocess Propagation +**PR:** #383 +**Files Reviewed:** `codeframe/cli/validators.py`, `codeframe/cli/app.py`, `codeframe/core/tasks.py`, `tests/core/test_cli_validators.py`, `tests/e2e/cli/golden_path_runner.py`, `tests/e2e/cli/test_detect_success.py` +**Ready for Production:** Yes + +## Executive Summary + +This PR adds fail-fast API key validation to CLI commands that require LLM access, preventing silent fallbacks and cryptic errors. The implementation is well-structured, follows existing project patterns, and correctly handles the Python exception hierarchy (`json.JSONDecodeError` subclassing `ValueError`). No security vulnerabilities found. Two minor UX and test coverage gaps noted. + +**Critical Issues:** 0 +**Major Issues:** 0 +**Minor Issues:** 2 +**Positive Findings:** 5 + +--- + +## Review Context + +**Code Type:** CLI input validation, credential handling, error propagation +**Risk Level:** Medium — handles API credentials, modifies exit code behavior +**Business Constraints:** Bug fix for silent failures — reliability is paramount + +### Review Focus Areas + +- ✅ **Credential Handling Security** — API key validation, .env parsing, no accidental key leakage +- ✅ **Reliability / Error Propagation** — Exception hierarchy, exit codes, state mutation ordering +- ✅ **Test Completeness** — Coverage of critical paths, edge cases, test isolation +- ❌ OWASP Web Top 10 — Not applicable (no HTTP endpoints changed) +- ❌ OWASP LLM Top 10 — Not applicable (no LLM prompt changes) +- ❌ Performance — Not applicable (validation is O(1)) + +--- + +## Priority 1 Issues - Critical ⛔ + +None. + +--- + +## Priority 2 Issues - Major ⚠️ + +None. + +--- + +## Priority 3 Issues - Minor 📝 + +### 1. `work_retry` validates API key before status checks +**Location:** `codeframe/cli/app.py:2494` +**Severity:** Minor +**Category:** UX / Error Message Ordering + +**Problem:** +`require_anthropic_api_key()` runs before the task status checks (DONE, IN_PROGRESS). If a user runs `cf work retry ` without an API key, they see "ANTHROPIC_API_KEY is not set" instead of the more helpful "Task is already completed." + +**Current Code:** +```python +task = matching[0] + +# Validate API key before any state modifications +from codeframe.cli.validators import require_anthropic_api_key +require_anthropic_api_key() + +# Reset task to READY if it's FAILED or BLOCKED +if task.status in (TaskStatus.FAILED, TaskStatus.BLOCKED): + ... +elif task.status == TaskStatus.DONE: + console.print("[yellow]Task is already completed[/yellow]") + raise typer.Exit(0) +``` + +**Suggested Fix:** +Move validation after the non-retryable status checks but before the state-modifying reset: + +```python +task = matching[0] + +if task.status == TaskStatus.IN_PROGRESS: + console.print("[yellow]Task is currently running[/yellow]") + raise typer.Exit(1) +elif task.status == TaskStatus.DONE: + console.print("[yellow]Task is already completed[/yellow]") + raise typer.Exit(0) + +# Validate API key before state modifications (only reached for retryable statuses) +from codeframe.cli.validators import require_anthropic_api_key +require_anthropic_api_key() + +if task.status in (TaskStatus.FAILED, TaskStatus.BLOCKED): + runtime.reset_blocked_run(workspace, task.id) + ... +``` + +**Verdict:** Non-blocking. The current behavior is technically correct — the user does need an API key to retry. Just not the most helpful error for this edge case. + +--- + +### 2. Missing integration tests for `work_retry` and `batch_run` validation +**Location:** `tests/core/test_cli_validators.py` +**Severity:** Minor +**Category:** Test Coverage + +**Problem:** +Integration tests cover `tasks generate` and `work start --execute`, but `work retry` and `batch run` validation are untested at the CLI integration level. + +**Recommendation:** +Consider adding in a follow-up PR: +- `TestWorkRetryValidation::test_work_retry_without_key_exits` +- `TestBatchRunValidation::test_batch_run_without_key_exits` + +These are harder to set up (require failed/ready tasks in the fixture) and the validation code is identical (`require_anthropic_api_key()`), so the risk of regression is low. + +--- + +## Positive Findings ✨ + +### Excellent Practices + +- **Validate before mutate:** `work_start` validates the API key *before* `start_task_run()`, preventing tasks from being left stuck in IN_PROGRESS — this is the correct "measure twice, cut once" pattern. + +- **Exception hierarchy awareness:** The `json.JSONDecodeError` → `ValueError` → `Exception` ordering in `tasks.py` correctly handles Python's exception subclassing. This was validated by the CI failure that caught the original issue. + +- **Test isolation:** `Path.home()` is mocked in `test_raises_exit_when_key_missing_everywhere` to prevent developer `~/.env` files from interfering with test results. Good defensive testing. + +### Good Architectural Decisions + +- **Centralized validation:** Single `require_anthropic_api_key()` function prevents validation logic from being duplicated across 4 CLI commands. + +- **Bypass flags respected:** `--no-llm` and `--stub` correctly skip API key validation, maintaining existing non-LLM workflows. + +### Security Wins + +- **No key leakage:** The API key value is never logged, printed, or included in error messages. Only the environment variable name appears in error output. + +- **Subprocess env isolation:** `_build_env()` creates a copy of `os.environ` — modifications to the returned dict don't affect the parent process. + +--- + +## Testing Recommendations + +### Existing Coverage (All Passing) +- [x] Unit tests for `require_anthropic_api_key()` (4 tests) +- [x] CLI integration for `tasks generate` validation (2 tests) +- [x] CLI integration for `work start` validation (2 tests) +- [x] `_build_env()` env loading (5 tests) +- [x] `_detect_success()` with non-zero exit code (1 test) +- [x] Regression: `test_generate_llm_returns_invalid_json_falls_back` (JSONDecodeError handling) + +### Suggested Future Tests +- [ ] `work retry` without API key exits non-zero +- [ ] `batch run` without API key exits non-zero +- [ ] `work start --execute` with missing key does NOT create a run record + +--- + +## Action Items Summary + +### Immediate (Before Production) +None required — PR is production-ready. + +### Short-term (Next Sprint) +1. Consider reordering `work_retry` validation after status checks (Minor #1) +2. Add integration tests for `work retry` and `batch run` paths (Minor #2) + +### Long-term (Backlog) +1. Consider a custom exception class (e.g., `ConfigurationError`) to replace bare `ValueError` for config issues, avoiding conflicts with Python stdlib exceptions that subclass `ValueError`. + +--- + +## Conclusion + +This PR correctly fixes the silent API key failure problem across all LLM-dependent CLI commands. The implementation is clean, well-tested (24 new tests), and follows the project's established patterns. The exception hierarchy fix (`JSONDecodeError` before `ValueError`) demonstrates good understanding of Python's exception model. Two minor issues noted are non-blocking and can be addressed in follow-up work. + +**Recommendation:** Deploy as-is. Minor improvements can follow. + +--- + +## Metrics +- **Lines of Code Changed:** 340 additions, 3 deletions +- **Files Modified:** 6 (2 new, 4 modified) +- **New Tests:** 24 +- **Security Patterns Checked:** Credential handling, key leakage, subprocess isolation diff --git a/tests/core/test_gates_observability.py b/tests/core/test_gates_observability.py index 035c027d..eccb6185 100644 --- a/tests/core/test_gates_observability.py +++ b/tests/core/test_gates_observability.py @@ -297,6 +297,123 @@ def test_linter_config_has_autofix_cmd(self): def test_linter_config_autofix_cmd_defaults_none(self): """autofix_cmd defaults to None when not specified.""" + + +class TestDependencyPreFlight: + """Tests for dependency installation pre-flight checks.""" + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_python_deps_missing_auto_installs(self, mock_which, mock_run, tmp_path): + """When requirements.txt exists but no venv, auto-install deps if enabled.""" + from codeframe.core.gates import _ensure_dependencies_installed + + # Create requirements.txt + (tmp_path / "requirements.txt").write_text("pytest==7.0.0\n") + + # uv is available + mock_which.side_effect = lambda cmd: "/usr/bin/uv" if cmd == "uv" else None + + # Mock successful installation + mock_run.return_value = subprocess.CompletedProcess( + args=["uv", "pip", "install", "-r", "requirements.txt"], + returncode=0, + stdout="Successfully installed pytest-7.0.0", + stderr="", + ) + + success, message = _ensure_dependencies_installed(tmp_path, auto_install=True) + + assert success is True + assert "installed" in message.lower() or "success" in message.lower() + mock_run.assert_called_once() + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_node_deps_missing_auto_installs(self, mock_which, mock_run, tmp_path): + """When package.json exists but no node_modules, auto-install deps.""" + from codeframe.core.gates import _ensure_dependencies_installed + + # Create package.json + (tmp_path / "package.json").write_text('{"name": "test", "dependencies": {"jest": "^29.0.0"}}') + + # npm is available + mock_which.side_effect = lambda cmd: "/usr/bin/npm" if cmd == "npm" else None + + # Mock successful installation + mock_run.return_value = subprocess.CompletedProcess( + args=["npm", "install"], + returncode=0, + stdout="added 100 packages", + stderr="", + ) + + success, message = _ensure_dependencies_installed(tmp_path, auto_install=True) + + assert success is True + mock_run.assert_called_once() + + def test_venv_exists_skips_python_install(self, tmp_path): + """When .venv exists, skip Python dependency installation.""" + from codeframe.core.gates import _ensure_dependencies_installed + + # Create requirements.txt and .venv directory + (tmp_path / "requirements.txt").write_text("pytest==7.0.0\n") + (tmp_path / ".venv").mkdir() + + success, message = _ensure_dependencies_installed(tmp_path, auto_install=True) + + assert success is True + assert "skip" in message.lower() or "already" in message.lower() + + def test_node_modules_exists_skips_npm_install(self, tmp_path): + """When node_modules exists, skip npm install.""" + from codeframe.core.gates import _ensure_dependencies_installed + + # Create package.json and node_modules directory + (tmp_path / "package.json").write_text('{"name": "test"}') + (tmp_path / "node_modules").mkdir() + + success, message = _ensure_dependencies_installed(tmp_path, auto_install=True) + + assert success is True + + def test_auto_install_disabled_skips_installation(self, tmp_path): + """When auto_install=False, skip installation and return skip message.""" + from codeframe.core.gates import _ensure_dependencies_installed + + # Create requirements.txt without venv + (tmp_path / "requirements.txt").write_text("pytest==7.0.0\n") + + success, message = _ensure_dependencies_installed(tmp_path, auto_install=False) + + assert success is True + assert "disabled" in message.lower() or "skip" in message.lower() + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_installation_failure_returns_false(self, mock_which, mock_run, tmp_path): + """When dependency installation fails, return (False, error_message).""" + from codeframe.core.gates import _ensure_dependencies_installed + + # Create requirements.txt + (tmp_path / "requirements.txt").write_text("nonexistent-package==99.99.99\n") + + # uv is available + mock_which.side_effect = lambda cmd: "/usr/bin/uv" if cmd == "uv" else None + + # Mock failed installation + mock_run.return_value = subprocess.CompletedProcess( + args=["uv", "pip", "install", "-r", "requirements.txt"], + returncode=1, + stdout="", + stderr="ERROR: Could not find a version that satisfies the requirement", + ) + + success, message = _ensure_dependencies_installed(tmp_path, auto_install=True) + + assert success is False + assert "error" in message.lower() or "fail" in message.lower() cfg = LinterConfig( name="test", extensions={".py"}, From de6bd024f47ddf4e69b4a5c02bcb00bfa60a92ca Mon Sep 17 00:00:00 2001 From: Test User Date: Sat, 14 Feb 2026 22:26:01 -0700 Subject: [PATCH 2/6] feat(gates): add TypeScript type-check gate (tsc) Add TypeScript type checking gate with structured error parsing. Auto-detected for projects with tsconfig.json. - Prefer "type-check" script in package.json if exists - Fallback to npx tsc --noEmit - Parse tsc output: file(line,col): error TSxxxx: message - Structured errors enable intelligent fix generation - 120-second timeout (matches mypy) Tests: 6 new tests in TestTypeScriptTypeCheckGate covering script detection, fallback, error parsing, and auto-detection. Part of #385 (Step 2/5: TypeScript type-check gate) --- codeframe/core/gates.py | 115 ++++++++++++++++++++++- tests/core/test_gates_observability.py | 125 +++++++++++++++++++++++++ 2 files changed, 239 insertions(+), 1 deletion(-) diff --git a/codeframe/core/gates.py b/codeframe/core/gates.py index a4a474f6..917fa48a 100644 --- a/codeframe/core/gates.py +++ b/codeframe/core/gates.py @@ -25,6 +25,7 @@ def _utc_now() -> datetime: _RUFF_ERROR_PATTERN = re.compile(r'^(.+?):(\d+):(\d+): ([A-Z]+\d+) (.+)$') +_TSC_ERROR_PATTERN = re.compile(r'^(.+?)\((\d+),(\d+)\): error (TS\d+): (.+)$') def _parse_ruff_errors(output: str) -> list[dict[str, Any]]: @@ -52,6 +53,31 @@ def _parse_ruff_errors(output: str) -> list[dict[str, Any]]: return errors +def _parse_tsc_errors(output: str) -> list[dict[str, Any]]: + """Parse TypeScript compiler output into structured error dicts. + + Parses lines matching the pattern: src/file.ts(10,5): error TS2339: Property does not exist + + Args: + output: Raw tsc stdout/stderr output. + + Returns: + List of dicts with keys: file, line, col, code, message. + """ + errors = [] + for line in output.splitlines(): + match = _TSC_ERROR_PATTERN.match(line.strip()) + if match: + errors.append({ + "file": match.group(1), + "line": int(match.group(2)), + "col": int(match.group(3)), + "code": match.group(4), + "message": match.group(5), + }) + return errors + + class GateStatus(str, Enum): """Status of a gate check.""" @@ -327,7 +353,7 @@ def run( gates = _detect_available_gates(repo_path) # Known gate names - known_gates = {"pytest", "ruff", "mypy", "npm-test", "npm-lint"} + known_gates = {"pytest", "ruff", "mypy", "npm-test", "npm-lint", "tsc"} # Run each gate for gate_name in gates: @@ -341,6 +367,8 @@ def run( check = _run_npm_test(repo_path, verbose) elif gate_name == "npm-lint": check = _run_npm_lint(repo_path, verbose) + elif gate_name == "tsc": + check = _run_tsc(repo_path, verbose) else: # Unknown gate: FAILED if explicitly requested, SKIPPED if auto-detected if gates_explicitly_provided: @@ -420,6 +448,10 @@ def _detect_available_gates(repo_path: Path) -> list[str]: except Exception: pass + # TypeScript: tsc type checking + if (repo_path / "tsconfig.json").exists(): + gates.append("tsc") + return gates @@ -698,6 +730,87 @@ def _run_npm_lint(repo_path: Path, verbose: bool = False) -> GateCheck: ) +def _run_tsc(repo_path: Path, verbose: bool = False) -> GateCheck: + """Run TypeScript type checking (tsc --noEmit). + + Checks for type-check script in package.json first, otherwise uses npx tsc --noEmit. + """ + import json + import time + + start = time.time() + + # Check if tsconfig.json exists + tsconfig_path = repo_path / "tsconfig.json" + if not tsconfig_path.exists(): + return GateCheck( + name="tsc", + status=GateStatus.SKIPPED, + output="tsconfig.json not found", + ) + + # Check if npx is available + if not shutil.which("npx"): + return GateCheck( + name="tsc", + status=GateStatus.SKIPPED, + output="npx not found", + ) + + # Determine command: prefer "type-check" script in package.json + package_json_path = repo_path / "package.json" + cmd = ["npx", "tsc", "--noEmit"] # Default fallback + + if package_json_path.exists(): + try: + package_data = json.loads(package_json_path.read_text()) + scripts = package_data.get("scripts", {}) + if "type-check" in scripts: + cmd = ["npm", "run", "type-check"] + except Exception: + pass # Fallback to npx tsc --noEmit + + try: + result = subprocess.run( + cmd, + cwd=repo_path, + capture_output=True, + text=True, + timeout=120, # 2 minutes, same as mypy + ) + + duration_ms = int((time.time() - start) * 1000) + + output = result.stdout + if result.stderr: + output += "\n" + result.stderr + + # Parse TypeScript errors into structured format + detailed_errors = _parse_tsc_errors(output) if result.returncode != 0 else None + + return GateCheck( + name="tsc", + status=GateStatus.PASSED if result.returncode == 0 else GateStatus.FAILED, + exit_code=result.returncode, + output=output[:2000] if not verbose else output, + duration_ms=duration_ms, + detailed_errors=detailed_errors, + ) + + except subprocess.TimeoutExpired: + return GateCheck( + name="tsc", + status=GateStatus.ERROR, + output="Timeout after 120 seconds", + ) + except Exception as e: + return GateCheck( + name="tsc", + status=GateStatus.ERROR, + output=str(e), + ) + + # --------------------------------------------------------------------------- # Per-file lint gate (language-aware) # --------------------------------------------------------------------------- diff --git a/tests/core/test_gates_observability.py b/tests/core/test_gates_observability.py index eccb6185..046f590f 100644 --- a/tests/core/test_gates_observability.py +++ b/tests/core/test_gates_observability.py @@ -414,6 +414,131 @@ def test_installation_failure_returns_false(self, mock_which, mock_run, tmp_path assert success is False assert "error" in message.lower() or "fail" in message.lower() + + +class TestTypeScriptTypeCheckGate: + """Tests for TypeScript type-check gate (tsc).""" + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_tsc_gate_runs_type_check_script(self, mock_which, mock_run, tmp_path): + """When package.json has type-check script, use it instead of tsc directly.""" + from codeframe.core.gates import _run_tsc + + # Create tsconfig.json and package.json with type-check script + (tmp_path / "tsconfig.json").write_text('{"compilerOptions": {"strict": true}}') + (tmp_path / "package.json").write_text('{"scripts": {"type-check": "tsc --noEmit"}}') + + # npm is available + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock successful type check + mock_run.return_value = subprocess.CompletedProcess( + args=["npm", "run", "type-check"], + returncode=0, + stdout="", + stderr="", + ) + + check = _run_tsc(tmp_path, verbose=False) + + assert check.name == "tsc" + assert check.status == GateStatus.PASSED + # Verify it used npm run type-check, not npx tsc + mock_run.assert_called_once() + assert "npm" in mock_run.call_args[0][0] + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_tsc_gate_fallback_to_npx(self, mock_which, mock_run, tmp_path): + """When no type-check script, fallback to npx tsc --noEmit.""" + from codeframe.core.gates import _run_tsc + + # Create tsconfig.json without type-check script + (tmp_path / "tsconfig.json").write_text('{"compilerOptions": {}}') + (tmp_path / "package.json").write_text('{"name": "test"}') + + # npx is available + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock successful type check + mock_run.return_value = subprocess.CompletedProcess( + args=["npx", "tsc", "--noEmit"], + returncode=0, + stdout="", + stderr="", + ) + + check = _run_tsc(tmp_path, verbose=False) + + assert check.status == GateStatus.PASSED + # Verify it used npx tsc --noEmit + assert "npx" in mock_run.call_args[0][0] + assert "--noEmit" in mock_run.call_args[0][0] + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_tsc_gate_detects_type_errors(self, mock_which, mock_run, tmp_path): + """Type errors from tsc should return FAILED status with structured errors.""" + from codeframe.core.gates import _run_tsc + + (tmp_path / "tsconfig.json").write_text('{}') + + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock type error output + mock_run.return_value = subprocess.CompletedProcess( + args=["npx", "tsc", "--noEmit"], + returncode=2, + stdout=( + "src/api.ts(15,10): error TS2339: Property 'completed' does not exist on type 'CreateTodoRequest'.\n" + "src/api.ts(20,5): error TS2322: Type 'string' is not assignable to type 'number'.\n" + ), + stderr="", + ) + + check = _run_tsc(tmp_path, verbose=False) + + assert check.status == GateStatus.FAILED + assert check.exit_code == 2 + assert "TS2339" in check.output + # Check for structured errors + assert check.detailed_errors is not None + assert len(check.detailed_errors) == 2 + assert check.detailed_errors[0]["code"] == "TS2339" + assert check.detailed_errors[0]["file"] == "src/api.ts" + assert check.detailed_errors[0]["line"] == 15 + + def test_tsc_gate_skipped_no_tsconfig(self, tmp_path): + """Without tsconfig.json, tsc gate is SKIPPED.""" + from codeframe.core.gates import _run_tsc + + check = _run_tsc(tmp_path, verbose=False) + + assert check.status == GateStatus.SKIPPED + assert "tsconfig.json" in check.output.lower() + + @patch("codeframe.core.gates.shutil.which", return_value=None) + def test_tsc_gate_skipped_no_npx(self, mock_which, tmp_path): + """Without npx available, tsc gate is SKIPPED.""" + from codeframe.core.gates import _run_tsc + + (tmp_path / "tsconfig.json").write_text('{}') + + check = _run_tsc(tmp_path, verbose=False) + + assert check.status == GateStatus.SKIPPED + assert "npx" in check.output.lower() + + def test_tsc_gate_auto_detected(self, tmp_path): + """tsc gate is auto-detected when tsconfig.json exists.""" + from codeframe.core.gates import _detect_available_gates + + (tmp_path / "tsconfig.json").write_text('{}') + + gates = _detect_available_gates(tmp_path) + + assert "tsc" in gates cfg = LinterConfig( name="test", extensions={".py"}, From d17b2cda7d47ef052d1c0169c5e0ec91181c232a Mon Sep 17 00:00:00 2001 From: Test User Date: Sat, 14 Feb 2026 22:27:13 -0700 Subject: [PATCH 3/6] feat(gates): harden pytest gate with exit code analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve pytest gate to properly detect collection and import errors instead of treating them as silent skips. Exit code handling: - 0: All tests passed → PASSED - 1: Tests ran but failed → FAILED - 2/3/4: Collection/internal/usage errors → FAILED - 5: No tests collected → check output: - "no tests ran" + clean output → PASSED (acceptable empty suite) - "ERROR" / "ImportError" in output → FAILED (collection error) This catches the init_db() signature mismatch bug from #385 where collection errors were being ignored. Tests: 5 new tests in TestPytestGateHardening covering all exit code scenarios and error pattern detection. Part of #385 (Step 3/5: Harden pytest gate) --- codeframe/core/gates.py | 37 +++++++- tests/core/test_gates_observability.py | 119 +++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/codeframe/core/gates.py b/codeframe/core/gates.py index 917fa48a..4584af5c 100644 --- a/codeframe/core/gates.py +++ b/codeframe/core/gates.py @@ -494,9 +494,44 @@ def _run_pytest(repo_path: Path, verbose: bool = False) -> GateCheck: if len(output) > 10000: output = output[:5000] + "\n...[truncated]...\n" + output[-5000:] + # Determine status based on exit code and output patterns + # pytest exit codes: + # 0 = all tests passed + # 1 = tests were collected and run but some failed + # 2 = test execution was interrupted by the user + # 3 = internal error happened while executing tests + # 4 = pytest command line usage error (often import errors) + # 5 = no tests were collected + status = GateStatus.PASSED + + if result.returncode == 0: + status = GateStatus.PASSED + elif result.returncode == 1: + # Tests ran but failed - this is a legitimate FAILED + status = GateStatus.FAILED + elif result.returncode in [2, 3, 4]: + # Collection errors, internal errors, usage errors - all FAILED + status = GateStatus.FAILED + elif result.returncode == 5: + # Exit code 5: no tests collected + # Check if this is a clean "no tests" or an error during collection + output_lower = output.lower() + if "error" in output_lower or "importerror" in output_lower or "modulenotfounderror" in output_lower: + # Collection error disguised as "no tests collected" + status = GateStatus.FAILED + elif "no tests ran" in output_lower or "collected 0 items" in output_lower: + # Legitimately empty test suite - acceptable + status = GateStatus.PASSED + else: + # Unclear, default to FAILED to be safe + status = GateStatus.FAILED + else: + # Unknown exit code - treat as FAILED + status = GateStatus.FAILED + return GateCheck( name="pytest", - status=GateStatus.PASSED if result.returncode == 0 else GateStatus.FAILED, + status=status, exit_code=result.returncode, output=output if verbose else _summarize_pytest_output(output), duration_ms=duration_ms, diff --git a/tests/core/test_gates_observability.py b/tests/core/test_gates_observability.py index 046f590f..5a413450 100644 --- a/tests/core/test_gates_observability.py +++ b/tests/core/test_gates_observability.py @@ -539,6 +539,125 @@ def test_tsc_gate_auto_detected(self, tmp_path): gates = _detect_available_gates(tmp_path) assert "tsc" in gates + + +class TestPytestGateHardening: + """Tests for hardened pytest gate with collection error detection.""" + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_pytest_collection_error_fails_gate(self, mock_which, mock_run, tmp_path): + """pytest collection errors (exit code 2/3/4) should FAIL the gate.""" + from codeframe.core.gates import _run_pytest + + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock collection error (exit code 4 = usage error, often import issues) + mock_run.return_value = subprocess.CompletedProcess( + args=["pytest", "-v", "--tb=short"], + returncode=4, + stdout=( + "ERROR: not found: /path/to/tests/test_api.py::test_init_db\n" + "ERROR: file or directory not found: tests/test_api.py\n" + ), + stderr="ERROR collecting tests/test_api.py", + ) + + check = _run_pytest(tmp_path, verbose=False) + + assert check.status == GateStatus.FAILED + assert check.exit_code == 4 + assert "ERROR" in check.output + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_pytest_import_error_during_collection_fails(self, mock_which, mock_run, tmp_path): + """ImportError during collection (exit code 2) should FAIL the gate.""" + from codeframe.core.gates import _run_pytest + + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock import error during collection + mock_run.return_value = subprocess.CompletedProcess( + args=["pytest", "-v", "--tb=short"], + returncode=2, + stdout=( + "ImportError while importing test module 'tests/test_db.py'.\n" + "ERROR: ModuleNotFoundError: No module named 'sqlalchemy'\n" + ), + stderr="", + ) + + check = _run_pytest(tmp_path, verbose=False) + + assert check.status == GateStatus.FAILED + assert "ImportError" in check.output or "ModuleNotFoundError" in check.output + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_pytest_no_tests_collected_passes(self, mock_which, mock_run, tmp_path): + """Exit code 5 with 'no tests collected' should PASS (acceptable empty suite).""" + from codeframe.core.gates import _run_pytest + + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock no tests collected (exit code 5, but clean) + mock_run.return_value = subprocess.CompletedProcess( + args=["pytest", "-v", "--tb=short"], + returncode=5, + stdout="collected 0 items\n\nno tests ran in 0.01s\n", + stderr="", + ) + + check = _run_pytest(tmp_path, verbose=False) + + assert check.status == GateStatus.PASSED + assert "no tests ran" in check.output.lower() + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_pytest_exit_code_5_with_error_fails(self, mock_which, mock_run, tmp_path): + """Exit code 5 with ERROR messages should FAIL (collection error, not empty suite).""" + from codeframe.core.gates import _run_pytest + + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock exit code 5 but with collection errors + mock_run.return_value = subprocess.CompletedProcess( + args=["pytest", "-v", "--tb=short"], + returncode=5, + stdout=( + "ERROR collecting tests/test_api.py\n" + "collected 0 items\n" + ), + stderr="ImportError: cannot import name 'app' from 'main'", + ) + + check = _run_pytest(tmp_path, verbose=False) + + assert check.status == GateStatus.FAILED + assert "ERROR" in check.output or "ImportError" in check.output + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_pytest_exit_code_1_test_failures(self, mock_which, mock_run, tmp_path): + """Exit code 1 (tests failed) should FAIL the gate.""" + from codeframe.core.gates import _run_pytest + + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock test failures + mock_run.return_value = subprocess.CompletedProcess( + args=["pytest", "-v", "--tb=short"], + returncode=1, + stdout="tests/test_api.py::test_foo FAILED\n\n1 failed, 0 passed in 0.5s\n", + stderr="", + ) + + check = _run_pytest(tmp_path, verbose=False) + + assert check.status == GateStatus.FAILED + assert check.exit_code == 1 cfg = LinterConfig( name="test", extensions={".py"}, From ef6faf8060f9a2ab2a1fb590045a6668a8ea95cc Mon Sep 17 00:00:00 2001 From: Test User Date: Sat, 14 Feb 2026 22:28:43 -0700 Subject: [PATCH 4/6] feat(gates): add build verification gates Add python-build and npm-build gates to catch build/import errors. python-build gate: - Auto-detects entry points: main.py, app.py, api.py, __main__.py - Runs smoke test: import - Uses uv run python if available - 60-second timeout - Catches import errors and missing dependencies npm-build gate: - Auto-detects build script in package.json - Runs npm run build - 5-minute timeout (builds can be slow) - Catches TypeScript errors, webpack failures, etc. These gates run during final verification and catch cross-file inconsistencies that per-file linting misses. Tests: 8 new tests in TestBuildVerificationGates covering success, failure, skips, and auto-detection. Part of #385 (Step 4/5: Build verification gates) --- codeframe/core/gates.py | 182 ++++++++++++++++++++++++- tests/core/test_gates_observability.py | 142 +++++++++++++++++++ 2 files changed, 323 insertions(+), 1 deletion(-) diff --git a/codeframe/core/gates.py b/codeframe/core/gates.py index 4584af5c..1081570d 100644 --- a/codeframe/core/gates.py +++ b/codeframe/core/gates.py @@ -353,7 +353,7 @@ def run( gates = _detect_available_gates(repo_path) # Known gate names - known_gates = {"pytest", "ruff", "mypy", "npm-test", "npm-lint", "tsc"} + known_gates = {"pytest", "ruff", "mypy", "npm-test", "npm-lint", "tsc", "python-build", "npm-build"} # Run each gate for gate_name in gates: @@ -369,6 +369,10 @@ def run( check = _run_npm_lint(repo_path, verbose) elif gate_name == "tsc": check = _run_tsc(repo_path, verbose) + elif gate_name == "python-build": + check = _run_python_build(repo_path, verbose) + elif gate_name == "npm-build": + check = _run_npm_build(repo_path, verbose) else: # Unknown gate: FAILED if explicitly requested, SKIPPED if auto-detected if gates_explicitly_provided: @@ -452,6 +456,23 @@ def _detect_available_gates(repo_path: Path) -> list[str]: if (repo_path / "tsconfig.json").exists(): gates.append("tsc") + # Python build verification (smoke test import) + for entry_point in ["main.py", "app.py", "api.py", "__main__.py"]: + if (repo_path / entry_point).exists(): + gates.append("python-build") + break + + # Node.js build verification + if package_json.exists(): + try: + import json + pkg = json.loads(package_json.read_text()) + scripts = pkg.get("scripts", {}) + if "build" in scripts: + gates.append("npm-build") + except Exception: + pass + return gates @@ -765,6 +786,165 @@ def _run_npm_lint(repo_path: Path, verbose: bool = False) -> GateCheck: ) +def _run_python_build(repo_path: Path, verbose: bool = False) -> GateCheck: + """Run Python build verification via smoke test import. + + Attempts to import common entry points to verify the project builds/imports correctly. + Checks for: main.py, app.py, api.py, __main__.py + """ + import time + + start = time.time() + + # Detect entry points + entry_points = [] + for candidate in ["main", "app", "api", "__main__"]: + if (repo_path / f"{candidate}.py").exists(): + entry_points.append(candidate) + + if not entry_points: + return GateCheck( + name="python-build", + status=GateStatus.SKIPPED, + output="No entry point files found (main.py, app.py, api.py, __main__.py)", + ) + + # Check if python is available + python_cmd = "python" + if shutil.which("uv"): + python_cmd = "uv run python" + elif not shutil.which("python"): + return GateCheck( + name="python-build", + status=GateStatus.SKIPPED, + output="python not found", + ) + + # Try importing the first entry point + entry_point = entry_points[0] + + try: + # Try to import the module (smoke test) + cmd = python_cmd.split() + ["-c", f"import {entry_point}"] + + result = subprocess.run( + cmd, + cwd=repo_path, + capture_output=True, + text=True, + timeout=60, # 1 minute for import check + ) + + duration_ms = int((time.time() - start) * 1000) + + output = result.stdout + if result.stderr: + output += "\n" + result.stderr + + return GateCheck( + name="python-build", + status=GateStatus.PASSED if result.returncode == 0 else GateStatus.FAILED, + exit_code=result.returncode, + output=output[:2000] if not verbose else output, + duration_ms=duration_ms, + ) + + except subprocess.TimeoutExpired: + return GateCheck( + name="python-build", + status=GateStatus.ERROR, + output="Timeout after 60 seconds", + ) + except Exception as e: + return GateCheck( + name="python-build", + status=GateStatus.ERROR, + output=str(e), + ) + + +def _run_npm_build(repo_path: Path, verbose: bool = False) -> GateCheck: + """Run npm build if build script exists in package.json.""" + import json + import time + + start = time.time() + + # Check if package.json exists + package_json = repo_path / "package.json" + if not package_json.exists(): + return GateCheck( + name="npm-build", + status=GateStatus.SKIPPED, + output="package.json not found", + ) + + # Check if build script exists + try: + pkg = json.loads(package_json.read_text()) + scripts = pkg.get("scripts", {}) + if "build" not in scripts: + return GateCheck( + name="npm-build", + status=GateStatus.SKIPPED, + output="No build script in package.json", + ) + except Exception as e: + return GateCheck( + name="npm-build", + status=GateStatus.ERROR, + output=f"Failed to parse package.json: {e}", + ) + + # Check if npm is available + if not shutil.which("npm"): + return GateCheck( + name="npm-build", + status=GateStatus.SKIPPED, + output="npm not found", + ) + + try: + result = subprocess.run( + ["npm", "run", "build"], + cwd=repo_path, + capture_output=True, + text=True, + timeout=300, # 5 minutes for builds + ) + + duration_ms = int((time.time() - start) * 1000) + + output = result.stdout + if result.stderr: + output += "\n" + result.stderr + + # Truncate long build output + if len(output) > 5000 and not verbose: + output = output[:2500] + "\n...[truncated]...\n" + output[-2500:] + + return GateCheck( + name="npm-build", + status=GateStatus.PASSED if result.returncode == 0 else GateStatus.FAILED, + exit_code=result.returncode, + output=output, + duration_ms=duration_ms, + ) + + except subprocess.TimeoutExpired: + return GateCheck( + name="npm-build", + status=GateStatus.ERROR, + output="Timeout after 5 minutes", + ) + except Exception as e: + return GateCheck( + name="npm-build", + status=GateStatus.ERROR, + output=str(e), + ) + + def _run_tsc(repo_path: Path, verbose: bool = False) -> GateCheck: """Run TypeScript type checking (tsc --noEmit). diff --git a/tests/core/test_gates_observability.py b/tests/core/test_gates_observability.py index 5a413450..5bb54cfb 100644 --- a/tests/core/test_gates_observability.py +++ b/tests/core/test_gates_observability.py @@ -658,6 +658,148 @@ def test_pytest_exit_code_1_test_failures(self, mock_which, mock_run, tmp_path): assert check.status == GateStatus.FAILED assert check.exit_code == 1 + + +class TestBuildVerificationGates: + """Tests for build verification gates (python-build, npm-build).""" + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_python_build_gate_smoke_test_success(self, mock_which, mock_run, tmp_path): + """python-build gate runs smoke test by importing common entry points.""" + from codeframe.core.gates import _run_python_build + + # Create a simple main.py + (tmp_path / "main.py").write_text("app = 'test'\n") + + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock successful import + mock_run.return_value = subprocess.CompletedProcess( + args=["python", "-c", "from main import app"], + returncode=0, + stdout="", + stderr="", + ) + + check = _run_python_build(tmp_path, verbose=False) + + assert check.name == "python-build" + assert check.status == GateStatus.PASSED + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_python_build_gate_import_error_fails(self, mock_which, mock_run, tmp_path): + """python-build gate FAILS when import smoke test fails.""" + from codeframe.core.gates import _run_python_build + + (tmp_path / "main.py").write_text("import nonexistent_module\n") + + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock import error + mock_run.return_value = subprocess.CompletedProcess( + args=["python", "-c", "from main import app"], + returncode=1, + stdout="", + stderr="ModuleNotFoundError: No module named 'nonexistent_module'", + ) + + check = _run_python_build(tmp_path, verbose=False) + + assert check.status == GateStatus.FAILED + assert "ModuleNotFoundError" in check.output + + def test_python_build_gate_skipped_no_entry_points(self, tmp_path): + """python-build gate SKIPPED when no entry points found.""" + from codeframe.core.gates import _run_python_build + + # Empty directory, no entry points + check = _run_python_build(tmp_path, verbose=False) + + assert check.status == GateStatus.SKIPPED + assert "entry point" in check.output.lower() + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_npm_build_gate_success(self, mock_which, mock_run, tmp_path): + """npm-build gate runs npm run build when build script exists.""" + from codeframe.core.gates import _run_npm_build + + # Create package.json with build script + (tmp_path / "package.json").write_text('{"scripts": {"build": "tsc && webpack"}}') + + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock successful build + mock_run.return_value = subprocess.CompletedProcess( + args=["npm", "run", "build"], + returncode=0, + stdout="Build completed successfully", + stderr="", + ) + + check = _run_npm_build(tmp_path, verbose=False) + + assert check.name == "npm-build" + assert check.status == GateStatus.PASSED + + @patch("codeframe.core.gates.subprocess.run") + @patch("codeframe.core.gates.shutil.which") + def test_npm_build_gate_build_failure(self, mock_which, mock_run, tmp_path): + """npm-build gate FAILS when build fails.""" + from codeframe.core.gates import _run_npm_build + + (tmp_path / "package.json").write_text('{"scripts": {"build": "tsc"}}') + + mock_which.side_effect = lambda cmd: f"/usr/bin/{cmd}" + + # Mock build failure + mock_run.return_value = subprocess.CompletedProcess( + args=["npm", "run", "build"], + returncode=1, + stdout="", + stderr="error TS2322: Type 'string' is not assignable to type 'number'", + ) + + check = _run_npm_build(tmp_path, verbose=False) + + assert check.status == GateStatus.FAILED + assert "TS2322" in check.output + + def test_npm_build_gate_skipped_no_build_script(self, tmp_path): + """npm-build gate SKIPPED when package.json has no build script.""" + from codeframe.core.gates import _run_npm_build + + (tmp_path / "package.json").write_text('{"name": "test"}') + + check = _run_npm_build(tmp_path, verbose=False) + + assert check.status == GateStatus.SKIPPED + assert "build" in check.output.lower() + + def test_npm_build_gate_skipped_no_package_json(self, tmp_path): + """npm-build gate SKIPPED when no package.json exists.""" + from codeframe.core.gates import _run_npm_build + + check = _run_npm_build(tmp_path, verbose=False) + + assert check.status == GateStatus.SKIPPED + + def test_build_gates_auto_detected(self, tmp_path): + """Build gates are auto-detected based on project files.""" + from codeframe.core.gates import _detect_available_gates + + # Python project + (tmp_path / "main.py").write_text("app = 'test'\n") + gates = _detect_available_gates(tmp_path) + assert "python-build" in gates + + # Node project with build script + (tmp_path / "main.py").unlink() + (tmp_path / "package.json").write_text('{"scripts": {"build": "tsc"}}') + gates = _detect_available_gates(tmp_path) + assert "npm-build" in gates cfg = LinterConfig( name="test", extensions={".py"}, From ac8ecdd5effba0c58628d9bfa42b7959ef4d14d2 Mon Sep 17 00:00:00 2001 From: Test User Date: Sat, 14 Feb 2026 22:31:14 -0700 Subject: [PATCH 5/6] feat(conductor): add batch-level validation with full gate sweep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After all tasks in a batch complete, run full gate sweep to catch cross-task inconsistencies that per-task gates miss. Integration: - New event: BATCH_VALIDATION_FAILED - New function: _run_batch_level_validation() in conductor - Integrated into 4 batch completion paths: - _execute_serial - _execute_parallel - _execute_retries - resume_batch Behavior: - Runs after all tasks COMPLETED - Executes all auto-detected gates (pytest, ruff, tsc, build, etc.) - If gates fail, changes batch status: COMPLETED → PARTIAL - Emits BATCH_VALIDATION_FAILED event with failed gate details - Prints validation errors for visibility This catches bugs like: - Double `/api/api/` prefix (integration test failure) - Cross-file import errors - Build errors that only appear when all tasks combined - Test failures from task ordering issues Part of #385 (Step 5/5: Batch-level validation) --- codeframe/core/conductor.py | 89 +++++++++++++++++++++++++++++++++++++ codeframe/core/events.py | 1 + 2 files changed, 90 insertions(+) diff --git a/codeframe/core/conductor.py b/codeframe/core/conductor.py index d7c30db9..a738a1fe 100644 --- a/codeframe/core/conductor.py +++ b/codeframe/core/conductor.py @@ -973,6 +973,17 @@ def _execute_serial_resume( if final_completed == total: batch.status = BatchStatus.COMPLETED event_type = events.EventType.BATCH_COMPLETED + + # Run batch-level validation (full gate sweep) + validation_passed, validation_error = _run_batch_level_validation(workspace, batch) + + if not validation_passed: + # Gates failed - change status to PARTIAL (tasks done, integration broken) + batch.status = BatchStatus.PARTIAL + event_type = events.EventType.BATCH_PARTIAL + print(f"\n⚠️ Batch marked PARTIAL due to failed batch-level gates") + print(f"Validation error: {validation_error}") + elif final_completed == 0 and (final_failed > 0 or final_blocked > 0): batch.status = BatchStatus.FAILED event_type = events.EventType.BATCH_FAILED @@ -1109,6 +1120,17 @@ def _execute_retries( if final_completed == total: batch.status = BatchStatus.COMPLETED event_type = events.EventType.BATCH_COMPLETED + + # Run batch-level validation (full gate sweep) + validation_passed, validation_error = _run_batch_level_validation(workspace, batch) + + if not validation_passed: + # Gates failed - change status to PARTIAL (tasks done, integration broken) + batch.status = BatchStatus.PARTIAL + event_type = events.EventType.BATCH_PARTIAL + print(f"\n⚠️ Batch marked PARTIAL due to failed batch-level gates") + print(f"Validation error: {validation_error}") + elif final_completed == 0 and (final_failed > 0 or final_blocked > 0): batch.status = BatchStatus.FAILED event_type = events.EventType.BATCH_FAILED @@ -1144,6 +1166,51 @@ def _execute_retries( print(f"\n⚠ {final_failed} task(s) still failing after {max_retries} retries") +def _run_batch_level_validation(workspace: Workspace, batch: BatchRun) -> tuple[bool, Optional[str]]: + """Run full gate sweep after all tasks complete to catch cross-task inconsistencies. + + Args: + workspace: The workspace + batch: The batch run that just completed + + Returns: + Tuple of (passed: bool, failure_summary: Optional[str]) + - passed=True means all gates passed + - passed=False means gates failed, with summary of failures + """ + from codeframe.core import gates + + print("\n[Conductor] Running batch-level validation (full gate sweep)...") + + # Run all auto-detected gates against the full workspace + result = gates.run(workspace, gates=None, verbose=False, auto_install_deps=True) + + if result.passed: + print("[Conductor] ✓ Batch-level validation passed") + return True, None + else: + # Extract failure summary + failure_summary = result.get_error_summary() + if not failure_summary: + failure_summary = "Gates failed (see individual gate outputs)" + + print(f"[Conductor] ✗ Batch-level validation failed:\n{failure_summary}") + + # Emit batch validation failed event + events.emit_for_workspace( + workspace, + events.EventType.BATCH_VALIDATION_FAILED, + { + "batch_id": batch.id, + "failed_gates": [check.name for check in result.checks if check.status == gates.GateStatus.FAILED], + "summary": failure_summary[:500], # Truncate for event payload + }, + print_event=True, + ) + + return False, failure_summary + + def _execute_serial( workspace: Workspace, batch: BatchRun, @@ -1247,6 +1314,17 @@ def _execute_serial( if completed_count == total: batch.status = BatchStatus.COMPLETED event_type = events.EventType.BATCH_COMPLETED + + # Run batch-level validation (full gate sweep) + validation_passed, validation_error = _run_batch_level_validation(workspace, batch) + + if not validation_passed: + # Gates failed - change status to PARTIAL (tasks done, integration broken) + batch.status = BatchStatus.PARTIAL + event_type = events.EventType.BATCH_PARTIAL + print(f"\n⚠️ Batch marked PARTIAL due to failed batch-level gates") + print(f"Validation error: {validation_error}") + elif completed_count == 0 and (failed_count > 0 or blocked_count > 0): batch.status = BatchStatus.FAILED event_type = events.EventType.BATCH_FAILED @@ -1376,6 +1454,17 @@ def _execute_parallel( if completed_count == total: batch.status = BatchStatus.COMPLETED event_type = events.EventType.BATCH_COMPLETED + + # Run batch-level validation (full gate sweep) + validation_passed, validation_error = _run_batch_level_validation(workspace, batch) + + if not validation_passed: + # Gates failed - change status to PARTIAL (tasks done, integration broken) + batch.status = BatchStatus.PARTIAL + event_type = events.EventType.BATCH_PARTIAL + print(f"\n⚠️ Batch marked PARTIAL due to failed batch-level gates") + print(f"Validation error: {validation_error}") + elif completed_count == 0 and (failed_count > 0 or blocked_count > 0): batch.status = BatchStatus.FAILED event_type = events.EventType.BATCH_FAILED diff --git a/codeframe/core/events.py b/codeframe/core/events.py index da841e38..ccf66e53 100644 --- a/codeframe/core/events.py +++ b/codeframe/core/events.py @@ -101,6 +101,7 @@ class EventType: BATCH_PARTIAL = "BATCH_PARTIAL" BATCH_FAILED = "BATCH_FAILED" BATCH_CANCELLED = "BATCH_CANCELLED" + BATCH_VALIDATION_FAILED = "BATCH_VALIDATION_FAILED" @dataclass From 61edefaa204f71feef0dbb4c5f91ab7bc00d8193 Mon Sep 17 00:00:00 2001 From: Test User Date: Sat, 14 Feb 2026 22:32:44 -0700 Subject: [PATCH 6/6] fix: remove extraneous f-string prefixes (ruff F541) --- codeframe/core/conductor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/codeframe/core/conductor.py b/codeframe/core/conductor.py index a738a1fe..b6e1ff5a 100644 --- a/codeframe/core/conductor.py +++ b/codeframe/core/conductor.py @@ -981,7 +981,7 @@ def _execute_serial_resume( # Gates failed - change status to PARTIAL (tasks done, integration broken) batch.status = BatchStatus.PARTIAL event_type = events.EventType.BATCH_PARTIAL - print(f"\n⚠️ Batch marked PARTIAL due to failed batch-level gates") + print("\n⚠️ Batch marked PARTIAL due to failed batch-level gates") print(f"Validation error: {validation_error}") elif final_completed == 0 and (final_failed > 0 or final_blocked > 0): @@ -1128,7 +1128,7 @@ def _execute_retries( # Gates failed - change status to PARTIAL (tasks done, integration broken) batch.status = BatchStatus.PARTIAL event_type = events.EventType.BATCH_PARTIAL - print(f"\n⚠️ Batch marked PARTIAL due to failed batch-level gates") + print("\n⚠️ Batch marked PARTIAL due to failed batch-level gates") print(f"Validation error: {validation_error}") elif final_completed == 0 and (final_failed > 0 or final_blocked > 0): @@ -1322,7 +1322,7 @@ def _execute_serial( # Gates failed - change status to PARTIAL (tasks done, integration broken) batch.status = BatchStatus.PARTIAL event_type = events.EventType.BATCH_PARTIAL - print(f"\n⚠️ Batch marked PARTIAL due to failed batch-level gates") + print("\n⚠️ Batch marked PARTIAL due to failed batch-level gates") print(f"Validation error: {validation_error}") elif completed_count == 0 and (failed_count > 0 or blocked_count > 0): @@ -1462,7 +1462,7 @@ def _execute_parallel( # Gates failed - change status to PARTIAL (tasks done, integration broken) batch.status = BatchStatus.PARTIAL event_type = events.EventType.BATCH_PARTIAL - print(f"\n⚠️ Batch marked PARTIAL due to failed batch-level gates") + print("\n⚠️ Batch marked PARTIAL due to failed batch-level gates") print(f"Validation error: {validation_error}") elif completed_count == 0 and (failed_count > 0 or blocked_count > 0):