From d934e79beb8e9613abbb750397cbb743cd56a681 Mon Sep 17 00:00:00 2001 From: Robert Mohid Date: Fri, 13 Feb 2026 13:56:40 -0500 Subject: [PATCH] feat: add mypy command with grouped error output (80% token reduction) Add rtk mypy module that parses mypy type checker output, groups errors by file and error code, and produces compact summaries. Includes discovery registry pattern, hook rewrites, and 11 new tests (324 total). Co-Authored-By: Claude Opus 4.6 --- .claude/hooks/rtk-rewrite.sh | 4 + CLAUDE.md | 8 + hooks/rtk-rewrite.sh | 4 + specs/001-mypy-cmd/checklists/requirements.md | 37 ++ specs/001-mypy-cmd/plan.md | 203 +++++++++ specs/001-mypy-cmd/research.md | 34 ++ specs/001-mypy-cmd/spec.md | 106 +++++ specs/001-mypy-cmd/tasks.md | 153 +++++++ src/discover/registry.rs | 34 ++ src/main.rs | 12 + src/mypy_cmd.rs | 389 ++++++++++++++++++ src/utils.rs | 1 - 12 files changed, 984 insertions(+), 1 deletion(-) create mode 100644 specs/001-mypy-cmd/checklists/requirements.md create mode 100644 specs/001-mypy-cmd/plan.md create mode 100644 specs/001-mypy-cmd/research.md create mode 100644 specs/001-mypy-cmd/spec.md create mode 100644 specs/001-mypy-cmd/tasks.md create mode 100644 src/mypy_cmd.rs diff --git a/.claude/hooks/rtk-rewrite.sh b/.claude/hooks/rtk-rewrite.sh index 6574b01..2a9564f 100755 --- a/.claude/hooks/rtk-rewrite.sh +++ b/.claude/hooks/rtk-rewrite.sh @@ -170,6 +170,10 @@ elif echo "$MATCH_CMD" | grep -qE '^pip[[:space:]]+(list|outdated|install|show)( REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^pip /rtk pip /')" elif echo "$MATCH_CMD" | grep -qE '^uv[[:space:]]+pip[[:space:]]+(list|outdated|install|show)([[:space:]]|$)'; then REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^uv pip /rtk pip /')" +elif echo "$MATCH_CMD" | grep -qE '^mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^mypy/rtk mypy/')" +elif echo "$MATCH_CMD" | grep -qE '^python[[:space:]]+-m[[:space:]]+mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^python -m mypy/rtk mypy/')" # --- Go tooling --- elif echo "$MATCH_CMD" | grep -qE '^go[[:space:]]+test([[:space:]]|$)'; then diff --git a/CLAUDE.md b/CLAUDE.md index 4954f04..249e70e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -219,6 +219,7 @@ rtk gain --history | grep proxy | pnpm_cmd.rs | pnpm package manager | Compact dependency trees (70-90% reduction) | | ruff_cmd.rs | Ruff linter/formatter | JSON for check, text for format (80%+ reduction) | | pytest_cmd.rs | Pytest test runner | State machine text parser (90%+ reduction) | +| mypy_cmd.rs | Mypy type checker | Group by file/error code (80% reduction) | | pip_cmd.rs | pip/uv package manager | JSON parsing, auto-detect uv (70-85% reduction) | | go_cmd.rs | Go commands | NDJSON for test, text for build/vet (80-90% reduction) | | golangci_cmd.rs | golangci-lint | JSON parsing, group by rule (85% reduction) | @@ -312,3 +313,10 @@ GitHub Actions workflow (.github/workflows/release.yml): - DEB/RPM package generation - Automated releases on version tags (v*) - Checksums for binary verification + +## Active Technologies +- Rust 2021 edition + regex (1), lazy_static (1.4), anyhow (1.0) -- all already in Cargo.toml (001-mypy-cmd) +- SQLite via rusqlite (existing tracking.rs) (001-mypy-cmd) + +## Recent Changes +- 001-mypy-cmd: Added Rust 2021 edition + regex (1), lazy_static (1.4), anyhow (1.0) -- all already in Cargo.toml diff --git a/hooks/rtk-rewrite.sh b/hooks/rtk-rewrite.sh index 6574b01..2a9564f 100644 --- a/hooks/rtk-rewrite.sh +++ b/hooks/rtk-rewrite.sh @@ -170,6 +170,10 @@ elif echo "$MATCH_CMD" | grep -qE '^pip[[:space:]]+(list|outdated|install|show)( REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^pip /rtk pip /')" elif echo "$MATCH_CMD" | grep -qE '^uv[[:space:]]+pip[[:space:]]+(list|outdated|install|show)([[:space:]]|$)'; then REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^uv pip /rtk pip /')" +elif echo "$MATCH_CMD" | grep -qE '^mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^mypy/rtk mypy/')" +elif echo "$MATCH_CMD" | grep -qE '^python[[:space:]]+-m[[:space:]]+mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^python -m mypy/rtk mypy/')" # --- Go tooling --- elif echo "$MATCH_CMD" | grep -qE '^go[[:space:]]+test([[:space:]]|$)'; then diff --git a/specs/001-mypy-cmd/checklists/requirements.md b/specs/001-mypy-cmd/checklists/requirements.md new file mode 100644 index 0000000..13ee98b --- /dev/null +++ b/specs/001-mypy-cmd/checklists/requirements.md @@ -0,0 +1,37 @@ +# Specification Quality Checklist: RTK Mypy Command + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-02-13 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- All items pass. Spec is ready for `/speckit.plan`. +- FR-001 through FR-009 cover the core filter function (testable with unit tests on raw strings). +- FR-010 through FR-013 cover the command execution wrapper (testable with integration patterns). +- FR-014 and FR-015 cover discovery and hook integration (testable with registry unit tests and hook script assertions). diff --git a/specs/001-mypy-cmd/plan.md b/specs/001-mypy-cmd/plan.md new file mode 100644 index 0000000..38eb8d8 --- /dev/null +++ b/specs/001-mypy-cmd/plan.md @@ -0,0 +1,203 @@ +# Implementation Plan: RTK Mypy Command + +**Branch**: `001-mypy-cmd` | **Date**: 2026-02-13 | **Spec**: [spec.md](spec.md) +**Input**: Feature specification from `/specs/001-mypy-cmd/spec.md` + +## Summary + +Add `rtk mypy` command that filters and compresses mypy type-checker output by parsing errors, grouping by file (most errors first), displaying a summary header with total counts and top error codes, and preserving every individual error. Follows the exact same structural pattern as `tsc_cmd.rs`. Includes discover registry pattern, auto-rewrite hook entry, and comprehensive TDD tests. + +## Technical Context + +**Language/Version**: Rust 2021 edition +**Primary Dependencies**: regex (1), lazy_static (1.4), anyhow (1.0) -- all already in Cargo.toml +**Storage**: SQLite via rusqlite (existing tracking.rs) +**Testing**: cargo test with embedded `#[cfg(test)] mod tests` +**Target Platform**: macOS, Linux (cross-platform CLI) +**Project Type**: Single Rust binary (existing) +**Performance Goals**: N/A (output filtering is string processing on small inputs) +**Constraints**: No new dependencies. Must follow existing module patterns exactly. +**Scale/Scope**: 6 files modified/created total + +## Constitution Check + +*No constitution file exists. Skipping gate check.* + +## Project Structure + +### Documentation (this feature) + +```text +specs/001-mypy-cmd/ +├── spec.md +├── plan.md # This file +├── research.md # Phase 0 output +├── checklists/ +│ └── requirements.md # Already created +└── tasks.md # Phase 2 output (created by /speckit.tasks) +``` + +### Source Code (repository root) + +```text +src/ +├── mypy_cmd.rs # NEW: mypy command module (filter + run) +├── main.rs # MODIFY: add Mypy variant to Commands enum + match arm +├── discover/ +│ └── registry.rs # MODIFY: add mypy + python3 -m mypy patterns and rules + +.claude/hooks/ +└── rtk-rewrite.sh # MODIFY: add mypy rewrite patterns + +hooks/ +└── rtk-rewrite.sh # MODIFY: mirror of .claude/hooks/rtk-rewrite.sh +``` + +**Structure Decision**: This is a leaf module addition following the exact pattern of `tsc_cmd.rs`, `ruff_cmd.rs`, and `pytest_cmd.rs`. No new directories, no new dependencies, no architectural changes. + +## Design + +### mypy_cmd.rs -- Core Module + +**Pattern**: Mirrors `tsc_cmd.rs` exactly. + +**Public API**: +``` +pub fn run(args: &[String], verbose: u8) -> Result<()> +``` + +**Internal filter function** (unit-testable): +``` +fn filter_mypy_output(output: &str) -> String +``` + +**Mypy output format** (the input we parse): +``` +src/module.py:12: error: Incompatible return value type [return-value] +src/module.py:12:5: error: Incompatible return value type [return-value] +src/module.py:15: note: Expected "int" +src/other.py:8: error: Name "foo" is not defined [name-defined] +Found 3 errors in 2 files (checked 10 source files) +``` + +**Key patterns**: +- Error line regex: `^(.+?):(\d+)(?::(\d+))?: (error|warning|note): (.+?)(?:\s+\[(.+)\])?$` +- Continuation: `note:` lines attach to the preceding error +- File-less errors: Lines matching `error:` without a file path prefix (e.g., mypy config errors) -- display verbatim at top +- Summary line: `Found N errors in M files` -- replaced by our header + +**RTK output format** (what we produce): +``` +mypy: 3 errors in 2 files +======================================= +Top codes: return-value (1x), name-defined (1x) + +src/module.py (2 errors) + L12: [return-value] Incompatible return value type + Expected "int" + L15: [some-code] Another error + +src/other.py (1 error) + L8: [name-defined] Name "foo" is not defined +``` + +**Command execution flow**: +1. Try `mypy` directly via `Command::new("mypy")` +2. If not found, try `python3 -m mypy` as fallback (same pattern as pytest_cmd.rs) +3. Forward all user args +4. Capture stdout + stderr, combine +5. Strip ANSI codes via `utils::strip_ansi()` +6. Filter through `filter_mypy_output()` +7. Track via `tracking::TimedExecution` +8. Exit with mypy's exit code via `std::process::exit()` + +### main.rs -- Wiring + +Add to `Commands` enum (alphabetical placement near other Python tools): +``` +Mypy { + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + args: Vec, +} +``` + +Add match arm: +``` +Commands::Mypy { args } => { + mypy_cmd::run(&args, cli.verbose)?; +} +``` + +Add module declaration: +``` +mod mypy_cmd; +``` + +### discover/registry.rs -- Discovery + +Add pattern after the existing Python tool patterns (after ruff, before docker): +``` +r"^(python3?\s+-m\s+)?mypy(\s|$)" +``` + +Add corresponding rule: +``` +RtkRule { + rtk_cmd: "rtk mypy", + category: "Build", + savings_pct: 80.0, + subcmd_savings: &[], + subcmd_status: &[], +} +``` + +### rtk-rewrite.sh -- Hook (both locations) + +Add after the ruff rewrite block in the "Python tooling" section: +```bash +elif echo "$MATCH_CMD" | grep -qE '^mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^mypy/rtk mypy/')" +elif echo "$MATCH_CMD" | grep -qE '^python[[:space:]]+-m[[:space:]]+mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^python -m mypy/rtk mypy/')" +``` + +## File Change Summary + +| File | Action | Lines (est.) | Risk | +|------|--------|-------------|------| +| `src/mypy_cmd.rs` | CREATE | ~200 | Low -- follows tsc_cmd.rs pattern | +| `src/main.rs` | MODIFY | +6 (mod decl, enum variant, match arm) | Low | +| `src/discover/registry.rs` | MODIFY | +10 (pattern + rule + test) | Low | +| `.claude/hooks/rtk-rewrite.sh` | MODIFY | +4 (two elif blocks) | Low | +| `hooks/rtk-rewrite.sh` | MODIFY | +4 (mirror) | Low | + +**Total**: 1 new file, 4 modified files. ~224 new lines. + +## Testing Strategy + +All tests follow TDD (Red-Green-Refactor) per project conventions. + +**Unit tests in mypy_cmd.rs** (embedded `#[cfg(test)] mod tests`): + +| Test | Validates | +|------|-----------| +| `test_filter_mypy_errors_grouped_by_file` | FR-001, FR-003, FR-004: Multi-file errors grouped correctly | +| `test_filter_mypy_with_column_numbers` | FR-002: Extended format `file:line:col:` parsed | +| `test_filter_mypy_top_codes_summary` | FR-005: Top codes shown when 2+ distinct codes | +| `test_filter_mypy_single_code_no_summary` | FR-005: Top codes omitted with 1 code | +| `test_filter_mypy_every_error_shown` | FR-006: No error messages collapsed | +| `test_filter_mypy_note_continuation` | FR-007: note: lines preserved as context | +| `test_filter_mypy_fileless_errors` | FR-008: Config errors shown verbatim at top | +| `test_filter_mypy_no_errors` | FR-013: Success message for clean output | +| `test_filter_mypy_no_file_limit` | All files shown (mirrors tsc test) | + +**Unit tests in discover/registry.rs** (added to existing test module): + +| Test | Validates | +|------|-----------| +| `test_classify_mypy` | FR-014: `mypy src/` classified as Supported | +| `test_classify_python_m_mypy` | FR-014: `python3 -m mypy` classified as Supported | + +## Complexity Tracking + +No constitution violations. No complexity justification needed. diff --git a/specs/001-mypy-cmd/research.md b/specs/001-mypy-cmd/research.md new file mode 100644 index 0000000..d51be44 --- /dev/null +++ b/specs/001-mypy-cmd/research.md @@ -0,0 +1,34 @@ +# Research: RTK Mypy Command + +**Date**: 2026-02-13 +**Status**: Complete + +## Findings + +### Mypy Output Format Stability + +- **Decision**: Parse the standard `file:line: severity: message [code]` format. +- **Rationale**: This format has been stable since mypy 0.9 (2020). The optional column format (`file:line:col:`) was added later but is backwards-compatible. Both formats coexist in modern mypy (1.x). +- **Alternatives considered**: Parsing mypy's `--output json` flag was considered but rejected -- it would require RTK to inject flags, which conflicts with the argument passthrough design (FR-010). The text format is sufficient and avoids modifying user intent. + +### Command Discovery (mypy vs python -m mypy) + +- **Decision**: Try `mypy` directly first, fall back to `python3 -m mypy`. +- **Rationale**: This is the same pattern used by `pytest_cmd.rs` (lines 18-25). Users who install mypy via pip have it on PATH. Users in virtualenvs or poetry/pipx may only have it via `python -m mypy`. +- **Alternatives considered**: Only supporting `mypy` directly was considered but would miss users who haven't activated their virtualenv. + +### ANSI Stripping + +- **Decision**: Reuse existing `utils::strip_ansi()` (src/utils.rs:47). +- **Rationale**: Mypy colorizes output when stdout is a TTY. Since RTK captures via `Command::output()` (not a TTY), mypy typically does not emit ANSI. However, users may have `MYPY_FORCE_COLOR=1` or `--color-output` set, so stripping is a safety measure. +- **Alternatives considered**: None -- the utility already exists. + +### Error Code Format + +- **Decision**: Display error codes in bracket format `[error-code]` matching mypy's native format. +- **Rationale**: Mypy uses bracketed codes like `[return-value]`, `[name-defined]`, `[assignment]`. This differs from tsc which uses `TS2322` style. Using brackets preserves the code as-is for easy copy-paste into mypy configuration (`# type: ignore[error-code]`). +- **Alternatives considered**: Stripping brackets was considered but reduces utility for suppression comments. + +## No Unresolved Unknowns + +All technical decisions are resolved. No NEEDS CLARIFICATION items remain. diff --git a/specs/001-mypy-cmd/spec.md b/specs/001-mypy-cmd/spec.md new file mode 100644 index 0000000..6f6a359 --- /dev/null +++ b/specs/001-mypy-cmd/spec.md @@ -0,0 +1,106 @@ +# Feature Specification: RTK Mypy Command + +**Feature Branch**: `001-mypy-cmd` +**Created**: 2026-02-13 +**Status**: Draft +**Input**: User description: "Add a mypy command module to RTK that filters and compresses mypy type checker output, grouping errors by file and error code, with token savings of 75-85%. Structurally similar to tsc_cmd.rs. Includes registry/hook updates and TDD." + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Filter Mypy Error Output (Priority: P1) + +A developer runs `rtk mypy` on a Python project. Mypy produces verbose type-checking output with errors scattered across many files. RTK parses this output, groups errors by file (sorted by error count descending), shows a summary header with total error/file counts and top error codes, and displays every individual error with file, line number, error code, and message. The output is 75-85% smaller than raw mypy output while preserving all actionable information. + +**Why this priority**: This is the core value proposition. Without error filtering and grouping, the command has no reason to exist. + +**Independent Test**: Can be fully tested by passing raw mypy output strings to the filter function and asserting the structured output contains correct groupings, counts, and every error message. + +**Acceptance Scenarios**: + +1. **Given** mypy output with errors in multiple files, **When** the user runs `rtk mypy`, **Then** output shows a summary header ("mypy: N errors in M files"), errors grouped by file with per-file counts, and every error's line number, error code, and message. +2. **Given** mypy output with zero errors, **When** the user runs `rtk mypy`, **Then** output shows a success message ("mypy: No issues found"). +3. **Given** mypy output with errors using a single repeated error code, **When** the filter runs, **Then** the "Top codes" summary line is omitted (only shown when 2+ distinct codes exist). + +--- + +### User Story 2 - Transparent Mypy Invocation (Priority: P1) + +A developer runs `rtk mypy src/` or `rtk mypy --strict --config-file mypy.ini` and all arguments are forwarded to the underlying mypy command. RTK locates and runs mypy, captures its output, filters it, tracks token savings, and preserves mypy's exit code so CI/CD pipelines can gate on type-check results. + +**Why this priority**: Argument passthrough and exit code preservation are required for the command to be usable in real workflows. Without them, users cannot replace `mypy` with `rtk mypy`. + +**Independent Test**: Can be tested by verifying that the command construction passes all user-provided arguments to mypy, and that the process exit code matches mypy's exit code. + +**Acceptance Scenarios**: + +1. **Given** the user runs `rtk mypy src/ --strict`, **When** RTK executes, **Then** mypy is invoked with arguments `src/ --strict`. +2. **Given** mypy exits with code 1 (errors found), **When** RTK finishes, **Then** RTK also exits with code 1. +3. **Given** mypy is not installed, **When** RTK tries to run it, **Then** an error message tells the user how to install mypy (e.g., "pip install mypy"). + +--- + +### User Story 3 - Discovery and Hook Integration (Priority: P2) + +When a developer uses `mypy` or `python3 -m mypy` in a Claude Code session, the RTK auto-rewrite hook transparently rewrites the command to `rtk mypy`. The RTK discover module classifies `mypy` commands as "Supported" and estimates token savings. This ensures the user gets token savings without changing their habits. + +**Why this priority**: Hooks and discovery are force multipliers -- they make the mypy command discoverable and automatic. But they only add value after the core filtering (P1) works. + +**Independent Test**: Can be tested by running the classify_command function against `mypy` and `python3 -m mypy` inputs and asserting they return Supported with the correct RTK equivalent and savings estimate. + +**Acceptance Scenarios**: + +1. **Given** a command `mypy src/`, **When** the discover registry classifies it, **Then** it returns Supported with `rtk_equivalent: "rtk mypy"`, category "Build", and estimated savings 80%. +2. **Given** a command `python3 -m mypy --strict`, **When** the discover registry classifies it, **Then** it returns Supported with the same RTK equivalent. +3. **Given** the auto-rewrite hook receives `mypy src/ --strict`, **When** it processes the command, **Then** it outputs a JSON rewrite to `rtk mypy src/ --strict`. + +--- + +### Edge Cases + +- What happens when mypy produces note-level output (not errors)? Notes are informational context lines that follow errors. They should be preserved as continuation lines under the parent error, not treated as standalone errors. +- What happens when mypy output contains color/ANSI codes? ANSI escape sequences must be stripped before parsing. +- What happens when mypy produces "error:" lines without a file reference (e.g., configuration errors, import errors)? These should be displayed verbatim at the top of the output, before any grouped file errors. +- What happens when mypy output contains column numbers (e.g., `file.py:10:5: error:`)? The column number should be parsed but not displayed (line number is sufficient for navigation). + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: System MUST parse mypy error lines in the format `file.py:LINE: error: MESSAGE [error-code]` and extract file, line number, error code, and message. +- **FR-002**: System MUST also parse the extended format `file.py:LINE:COL: error: MESSAGE [error-code]` (with column number). +- **FR-003**: System MUST group parsed errors by file, sorted by error count (most errors first). +- **FR-004**: System MUST display a summary header: "mypy: N errors in M files". +- **FR-005**: System MUST display a "Top codes" line when 2 or more distinct error codes are present, showing up to 5 codes with occurrence counts, sorted by frequency. +- **FR-006**: System MUST display every individual error with line number, error code, and message (no collapsing or truncation of error messages beyond a reasonable line length limit). +- **FR-007**: System MUST preserve continuation/note lines (indented or starting with "note:") as context under their parent error. +- **FR-008**: System MUST display file-less errors (configuration errors, import failures) verbatim before grouped output. +- **FR-009**: System MUST strip ANSI escape sequences from mypy output before parsing. +- **FR-010**: System MUST forward all user-provided arguments to the mypy command unchanged. +- **FR-011**: System MUST preserve mypy's exit code as the process exit code. +- **FR-012**: System MUST track token savings (input vs output size) in the RTK tracking database. +- **FR-013**: System MUST show a success message when mypy reports no errors. +- **FR-014**: The discover registry MUST classify `mypy` and `python3 -m mypy` commands as Supported. +- **FR-015**: The auto-rewrite hook MUST rewrite `mypy` and `python3 -m mypy` commands to `rtk mypy`. + +### Key Entities + +- **Mypy Error**: A single type-checking diagnostic with file path, line number, optional column, severity (error/note/warning), optional error code, and message text. +- **File Group**: A collection of mypy errors sharing the same file path, with a count of errors in that file. +- **Error Code Summary**: An aggregate count of how many times each error code appears across all files. + +## Assumptions + +- Mypy is installed and available on the user's PATH (either as `mypy` directly or via `python3 -m mypy`). RTK does not install or manage mypy. +- Mypy's output format is stable across versions 0.9+ (the `file:line: severity: message [code]` format has been consistent since mypy 0.9). +- The discover registry and hook patterns follow the same conventions as existing commands (ruff, pytest, pip). +- Token savings are estimated at 80% for the discover registry, based on the structural similarity to tsc (83%) adjusted for mypy's typically shorter output lines. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: Running `rtk mypy` on a project with 50+ type errors produces output that is 75-85% smaller than raw mypy output while preserving every individual error message, file path, line number, and error code. +- **SC-002**: All user-provided mypy arguments are forwarded correctly, including flags like `--strict`, `--config-file`, path arguments, and `--ignore-missing-imports`. +- **SC-003**: The RTK process exit code matches mypy's exit code in all cases (0 for success, 1 for errors, 2 for fatal errors). +- **SC-004**: The discover registry correctly classifies both `mypy` and `python3 -m mypy` commands as Supported with the correct RTK equivalent. +- **SC-005**: The module includes comprehensive tests following the project's TDD patterns, with test coverage for: error parsing, file grouping, success output, continuation lines, ANSI stripping, file-less errors, and edge cases. diff --git a/specs/001-mypy-cmd/tasks.md b/specs/001-mypy-cmd/tasks.md new file mode 100644 index 0000000..1cc02c7 --- /dev/null +++ b/specs/001-mypy-cmd/tasks.md @@ -0,0 +1,153 @@ +# Tasks: RTK Mypy Command + +**Input**: Design documents from `/specs/001-mypy-cmd/` +**Prerequisites**: plan.md, spec.md, research.md + +**Tests**: Included (TDD explicitly requested in spec and CLAUDE.md conventions). + +**Organization**: Tasks grouped by user story. US1 and US2 are both P1 but US1 (filter) must precede US2 (invocation) since the run function calls the filter. US3 (discovery/hooks) is P2. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies) +- **[Story]**: Which user story this task belongs to (e.g., US1, US2, US3) +- Include exact file paths in descriptions + +--- + +## Phase 1: Setup (Wiring) + +**Purpose**: Register the mypy module and command in the CLI so that `cargo build` compiles the new module. + +- [x] T001 Add `mod mypy_cmd;` declaration in `src/main.rs` (after `mod local_llm;`) +- [x] T002 Add `Mypy` variant to `Commands` enum in `src/main.rs` with `#[arg(trailing_var_arg = true, allow_hyphen_values = true)] args: Vec` +- [x] T003 Add `Commands::Mypy { args }` match arm in `src/main.rs` dispatching to `mypy_cmd::run(&args, cli.verbose)?` +- [x] T004 Create stub `src/mypy_cmd.rs` with `pub fn run(args: &[String], verbose: u8) -> Result<()> { todo!() }` and empty test module + +**Checkpoint**: `cargo check` passes with the new module wired in (run function is a stub). + +--- + +## Phase 2: User Story 1 - Filter Mypy Error Output (Priority: P1) + +**Goal**: Parse raw mypy output and produce grouped, compact output with summary header, top error codes, and every error preserved. + +**Independent Test**: Pass raw mypy output strings to `filter_mypy_output()` and assert structured output. + +### Tests for User Story 1 + +> **Write these tests FIRST, ensure they FAIL before implementation (TDD Red phase)** + +- [x] T005 [P] [US1] Write `test_filter_mypy_errors_grouped_by_file` in `src/mypy_cmd.rs` -- multi-file input, assert summary header "mypy: N errors in M files", assert file grouping with per-file counts, assert files sorted by error count descending (FR-001, FR-003, FR-004) +- [x] T006 [P] [US1] Write `test_filter_mypy_with_column_numbers` in `src/mypy_cmd.rs` -- input with `file.py:10:5: error:` format, assert line number extracted and error parsed correctly (FR-002) +- [x] T007 [P] [US1] Write `test_filter_mypy_top_codes_summary` in `src/mypy_cmd.rs` -- input with 3+ distinct error codes, assert "Top codes:" line shows up to 5 codes sorted by frequency (FR-005) +- [x] T008 [P] [US1] Write `test_filter_mypy_single_code_no_summary` in `src/mypy_cmd.rs` -- input with only one error code repeated, assert no "Top codes:" line (FR-005) +- [x] T009 [P] [US1] Write `test_filter_mypy_every_error_shown` in `src/mypy_cmd.rs` -- 3 errors in same file, assert each error message appears individually with line number and code (FR-006) +- [x] T010 [P] [US1] Write `test_filter_mypy_note_continuation` in `src/mypy_cmd.rs` -- error followed by note: line, assert note preserved as indented context under parent error (FR-007) +- [x] T011 [P] [US1] Write `test_filter_mypy_fileless_errors` in `src/mypy_cmd.rs` -- input with config/import errors (no file prefix), assert displayed verbatim before grouped output (FR-008) +- [x] T012 [P] [US1] Write `test_filter_mypy_no_errors` in `src/mypy_cmd.rs` -- input with "Success: no issues found", assert output is "mypy: No issues found" (FR-013) +- [x] T013 [P] [US1] Write `test_filter_mypy_no_file_limit` in `src/mypy_cmd.rs` -- 15 files with errors, assert all 15 files appear in output + +**Checkpoint**: All 9 tests exist and FAIL (`cargo test mypy_cmd` shows 9 failures). + +### Implementation for User Story 1 + +- [x] T014 [US1] Implement `filter_mypy_output()` in `src/mypy_cmd.rs` -- regex parsing, MypyError struct, file grouping by HashMap, error code counting, formatted output generation (FR-001 through FR-009, FR-013) + +**Checkpoint**: All 9 filter tests PASS (`cargo test mypy_cmd` shows 9 passing). + +--- + +## Phase 3: User Story 2 - Transparent Mypy Invocation (Priority: P1) + +**Goal**: Wire `pub fn run()` to execute mypy, capture output, filter it, track savings, and preserve exit code. + +**Independent Test**: Verified by `cargo check` (type-safe command construction) and manual invocation. + +**Depends on**: Phase 2 (filter function must exist for run to call it) + +### Implementation for User Story 2 + +- [x] T015 [US2] Implement `pub fn run()` in `src/mypy_cmd.rs` -- try `mypy` then fallback to `python3 -m mypy`, forward all args, capture stdout+stderr, strip ANSI via `utils::strip_ansi()`, call `filter_mypy_output()`, track via `tracking::TimedExecution`, exit with mypy's exit code (FR-010, FR-011, FR-012) + +**Checkpoint**: `cargo build` succeeds. If mypy is installed: `cargo run -- mypy --version` works. Full pre-commit gate passes: `cargo fmt --all --check && cargo clippy --all-targets && cargo test`. + +--- + +## Phase 4: User Story 3 - Discovery and Hook Integration (Priority: P2) + +**Goal**: Auto-rewrite `mypy` commands to `rtk mypy` and classify them in the discover registry. + +**Independent Test**: Registry unit tests + hook script grep assertions. + +**Depends on**: Phase 1 (module must exist) but NOT on Phase 2/3. + +### Tests for User Story 3 + +> **Write tests FIRST (TDD Red phase)** + +- [x] T016 [P] [US3] Write `test_classify_mypy` in `src/discover/registry.rs` -- assert `classify_command("mypy src/")` returns `Supported { rtk_equivalent: "rtk mypy", category: "Build", estimated_savings_pct: 80.0 }` (FR-014) +- [x] T017 [P] [US3] Write `test_classify_python_m_mypy` in `src/discover/registry.rs` -- assert `classify_command("python3 -m mypy --strict")` returns `Supported` with same fields (FR-014) + +**Checkpoint**: Both tests FAIL. + +### Implementation for User Story 3 + +- [x] T018 [US3] Add mypy pattern `r"^(python3?\s+-m\s+)?mypy(\s|$)"` to `PATTERNS` array in `src/discover/registry.rs` +- [x] T019 [US3] Add corresponding `RtkRule` to `RULES` array in `src/discover/registry.rs` -- `rtk_cmd: "rtk mypy"`, `category: "Build"`, `savings_pct: 80.0` +- [x] T020 [P] [US3] Add mypy rewrite patterns to `.claude/hooks/rtk-rewrite.sh` in the Python tooling section (after ruff block): `mypy` and `python -m mypy` rewrites (FR-015) +- [x] T021 [P] [US3] Mirror the same hook changes in `hooks/rtk-rewrite.sh` (FR-015) + +**Checkpoint**: Registry tests PASS. `cargo test registry` shows all existing + 2 new tests passing. Hook patterns grep-verifiable. + +--- + +## Phase 5: Polish & Verification + +**Purpose**: Full verification, documentation, and pre-commit gate. + +- [x] T022 Run full pre-commit gate: `cargo fmt --all --check && cargo clippy --all-targets && cargo test` +- [x] T023 Update CLAUDE.md architecture table to add mypy_cmd.rs entry with description and token strategy +- [x] T024 Verify `PATTERNS.len() == RULES.len()` assertion still passes in registry (existing test `test_patterns_rules_length_match`) + +**Checkpoint**: All tests pass, clippy clean, CLAUDE.md updated. Feature complete. + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Setup)**: No dependencies -- start immediately +- **Phase 2 (US1 Filter)**: Depends on Phase 1 (module must compile) +- **Phase 3 (US2 Invocation)**: Depends on Phase 2 (run calls filter) +- **Phase 4 (US3 Discovery)**: Depends on Phase 1 only (registry/hook are independent of filter implementation) +- **Phase 5 (Polish)**: Depends on all previous phases + +### User Story Dependencies + +- **US1 (Filter)**: Standalone after setup -- core value +- **US2 (Invocation)**: Depends on US1 (run function calls filter_mypy_output) +- **US3 (Discovery/Hooks)**: Independent of US1/US2 (registry + hooks don't import mypy_cmd) + +### Parallel Opportunities + +**Within Phase 2 (US1 Tests)**: +All 9 test tasks (T005-T013) are [P] -- they write to the same file but to independent test functions. Can be written in a single batch. + +**Within Phase 4 (US3)**: +- T016 + T017 (registry tests) are [P] with T020 + T021 (hook changes) -- different files +- T018 + T019 must be sequential (same array in same file) + +**Cross-phase**: +- Phase 4 (US3) can run in parallel with Phase 2 (US1) after Phase 1 completes -- different files entirely + +--- + +## Notes + +- All tasks in a single file should be done sequentially to avoid conflicts +- TDD is mandatory per project CLAUDE.md -- write tests first, verify they fail, then implement +- The filter function is pure (no I/O) making it fully unit-testable +- The run function follows the exact pattern of tsc_cmd.rs and pytest_cmd.rs +- Registry pattern+rule arrays must stay aligned (existing test enforces this) diff --git a/src/discover/registry.rs b/src/discover/registry.rs index 7ef375c..0f97b95 100644 --- a/src/discover/registry.rs +++ b/src/discover/registry.rs @@ -70,6 +70,7 @@ const PATTERNS: &[&str] = &[ r"^kubectl\s+(get|logs)", r"^curl\s+", r"^wget\s+", + r"^(python3?\s+-m\s+)?mypy(\s|$)", ]; const RULES: &[RtkRule] = &[ @@ -225,6 +226,13 @@ const RULES: &[RtkRule] = &[ subcmd_savings: &[], subcmd_status: &[], }, + RtkRule { + rtk_cmd: "rtk mypy", + category: "Build", + savings_pct: 80.0, + subcmd_savings: &[], + subcmd_status: &[], + }, ]; /// Commands to ignore (shell builtins, trivial, already rtk). @@ -732,4 +740,30 @@ mod tests { let cmd = "cat <<'EOF'\nhello && world\nEOF"; assert_eq!(split_command_chain(cmd), vec![cmd]); } + + #[test] + fn test_classify_mypy() { + assert_eq!( + classify_command("mypy src/"), + Classification::Supported { + rtk_equivalent: "rtk mypy", + category: "Build", + estimated_savings_pct: 80.0, + status: RtkStatus::Existing, + } + ); + } + + #[test] + fn test_classify_python_m_mypy() { + assert_eq!( + classify_command("python3 -m mypy --strict"), + Classification::Supported { + rtk_equivalent: "rtk mypy", + category: "Build", + estimated_savings_pct: 80.0, + status: RtkStatus::Existing, + } + ); + } } diff --git a/src/main.rs b/src/main.rs index 22b07cb..94bd5dc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -24,6 +24,7 @@ mod lint_cmd; mod local_llm; mod log_cmd; mod ls; +mod mypy_cmd; mod next_cmd; mod npm_cmd; mod parser; @@ -488,6 +489,13 @@ enum Commands { args: Vec, }, + /// Mypy type checker with grouped error output + Mypy { + /// Mypy arguments + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + args: Vec, + }, + /// Pip package manager with compact output (auto-detects uv) Pip { /// Pip arguments (e.g., list, outdated, install) @@ -1314,6 +1322,10 @@ fn main() -> Result<()> { pytest_cmd::run(&args, cli.verbose)?; } + Commands::Mypy { args } => { + mypy_cmd::run(&args, cli.verbose)?; + } + Commands::Pip { args } => { pip_cmd::run(&args, cli.verbose)?; } diff --git a/src/mypy_cmd.rs b/src/mypy_cmd.rs new file mode 100644 index 0000000..c141ddc --- /dev/null +++ b/src/mypy_cmd.rs @@ -0,0 +1,389 @@ +use crate::tracking; +use crate::utils::{strip_ansi, truncate}; +use anyhow::{Context, Result}; +use regex::Regex; +use std::collections::HashMap; +use std::process::Command; + +pub fn run(args: &[String], verbose: u8) -> Result<()> { + let timer = tracking::TimedExecution::start(); + + let mut cmd = if which_command("mypy").is_some() { + Command::new("mypy") + } else { + let mut c = Command::new("python3"); + c.arg("-m").arg("mypy"); + c + }; + + for arg in args { + cmd.arg(arg); + } + + if verbose > 0 { + eprintln!("Running: mypy {}", args.join(" ")); + } + + let output = cmd + .output() + .context("Failed to run mypy. Is it installed? Try: pip install mypy")?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + let raw = format!("{}\n{}", stdout, stderr); + let clean = strip_ansi(&raw); + + let filtered = filter_mypy_output(&clean); + + println!("{}", filtered); + + timer.track( + &format!("mypy {}", args.join(" ")), + &format!("rtk mypy {}", args.join(" ")), + &raw, + &filtered, + ); + + std::process::exit(output.status.code().unwrap_or(1)); +} + +fn which_command(cmd: &str) -> Option { + Command::new("which") + .arg(cmd) + .output() + .ok() + .filter(|o| o.status.success()) + .and_then(|o| String::from_utf8(o.stdout).ok()) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) +} + +struct MypyError { + file: String, + line: usize, + code: String, + message: String, + context_lines: Vec, +} + +fn filter_mypy_output(output: &str) -> String { + lazy_static::lazy_static! { + // file.py:12: error: Message [error-code] + // file.py:12:5: error: Message [error-code] + static ref MYPY_DIAG: Regex = Regex::new( + r"^(.+?):(\d+)(?::\d+)?: (error|warning|note): (.+?)(?:\s+\[(.+)\])?$" + ).unwrap(); + } + + let lines: Vec<&str> = output.lines().collect(); + let mut errors: Vec = Vec::new(); + let mut fileless_lines: Vec = Vec::new(); + let mut i = 0; + + while i < lines.len() { + let line = lines[i]; + + // Skip mypy's own summary line + if line.starts_with("Found ") && line.contains(" error") { + i += 1; + continue; + } + // Skip "Success: no issues found" + if line.starts_with("Success:") { + i += 1; + continue; + } + + if let Some(caps) = MYPY_DIAG.captures(line) { + let severity = &caps[3]; + let file = caps[1].to_string(); + let line_num: usize = caps[2].parse().unwrap_or(0); + let message = caps[4].to_string(); + let code = caps + .get(5) + .map(|m| m.as_str().to_string()) + .unwrap_or_default(); + + if severity == "note" { + // Attach note to preceding error if same file and line + if let Some(last) = errors.last_mut() { + if last.file == file { + last.context_lines.push(message); + i += 1; + continue; + } + } + // Standalone note with no parent -- display as fileless + fileless_lines.push(line.to_string()); + i += 1; + continue; + } + + let mut err = MypyError { + file, + line: line_num, + code, + message, + context_lines: Vec::new(), + }; + + // Capture continuation note lines + i += 1; + while i < lines.len() { + if let Some(next_caps) = MYPY_DIAG.captures(lines[i]) { + if &next_caps[3] == "note" && next_caps[1] == err.file { + let note_msg = next_caps[4].to_string(); + err.context_lines.push(note_msg); + i += 1; + continue; + } + } + break; + } + + errors.push(err); + } else if line.contains("error:") && !line.trim().is_empty() { + // File-less error (config errors, import errors) + fileless_lines.push(line.to_string()); + i += 1; + } else { + i += 1; + } + } + + // No errors at all + if errors.is_empty() && fileless_lines.is_empty() { + if output.contains("Success: no issues found") || output.contains("no issues found") { + return "mypy: No issues found".to_string(); + } + return "mypy: No issues found".to_string(); + } + + // Group by file + let mut by_file: HashMap> = HashMap::new(); + for err in &errors { + by_file.entry(err.file.clone()).or_default().push(err); + } + + // Count by error code + let mut by_code: HashMap = HashMap::new(); + for err in &errors { + if !err.code.is_empty() { + *by_code.entry(err.code.clone()).or_insert(0) += 1; + } + } + + let mut result = String::new(); + + // File-less errors first + for line in &fileless_lines { + result.push_str(line); + result.push('\n'); + } + if !fileless_lines.is_empty() && !errors.is_empty() { + result.push('\n'); + } + + if !errors.is_empty() { + result.push_str(&format!( + "mypy: {} errors in {} files\n", + errors.len(), + by_file.len() + )); + result.push_str("═══════════════════════════════════════\n"); + + // Top error codes summary (only when 2+ distinct codes) + let mut code_counts: Vec<_> = by_code.iter().collect(); + code_counts.sort_by(|a, b| b.1.cmp(a.1)); + + if code_counts.len() > 1 { + let codes_str: Vec = code_counts + .iter() + .take(5) + .map(|(code, count)| format!("{} ({}x)", code, count)) + .collect(); + result.push_str(&format!("Top codes: {}\n\n", codes_str.join(", "))); + } + + // Files sorted by error count (most errors first) + let mut files_sorted: Vec<_> = by_file.iter().collect(); + files_sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len())); + + for (file, file_errors) in &files_sorted { + result.push_str(&format!("{} ({} errors)\n", file, file_errors.len())); + + for err in *file_errors { + if err.code.is_empty() { + result.push_str(&format!( + " L{}: {}\n", + err.line, + truncate(&err.message, 120) + )); + } else { + result.push_str(&format!( + " L{}: [{}] {}\n", + err.line, + err.code, + truncate(&err.message, 120) + )); + } + for ctx in &err.context_lines { + result.push_str(&format!(" {}\n", truncate(ctx, 120))); + } + } + result.push('\n'); + } + } + + result.trim().to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_mypy_errors_grouped_by_file() { + let output = "\ +src/server/auth.py:12: error: Incompatible return value type (got \"str\", expected \"int\") [return-value] +src/server/auth.py:15: error: Argument 1 has incompatible type \"int\"; expected \"str\" [arg-type] +src/models/user.py:8: error: Name \"foo\" is not defined [name-defined] +src/models/user.py:10: error: Incompatible types in assignment [assignment] +src/models/user.py:20: error: Missing return statement [return] +Found 5 errors in 2 files (checked 10 source files) +"; + let result = filter_mypy_output(output); + assert!(result.contains("mypy: 5 errors in 2 files")); + // user.py has 3 errors, auth.py has 2 -- user.py should come first + let user_pos = result.find("user.py").unwrap(); + let auth_pos = result.find("auth.py").unwrap(); + assert!( + user_pos < auth_pos, + "user.py (3 errors) should appear before auth.py (2 errors)" + ); + assert!(result.contains("user.py (3 errors)")); + assert!(result.contains("auth.py (2 errors)")); + } + + #[test] + fn test_filter_mypy_with_column_numbers() { + let output = "\ +src/api.py:10:5: error: Incompatible return value type [return-value] +"; + let result = filter_mypy_output(output); + assert!(result.contains("L10:")); + assert!(result.contains("[return-value]")); + assert!(result.contains("Incompatible return value type")); + } + + #[test] + fn test_filter_mypy_top_codes_summary() { + let output = "\ +a.py:1: error: Error one [return-value] +a.py:2: error: Error two [return-value] +a.py:3: error: Error three [return-value] +b.py:1: error: Error four [name-defined] +c.py:1: error: Error five [arg-type] +Found 5 errors in 3 files +"; + let result = filter_mypy_output(output); + assert!(result.contains("Top codes:")); + assert!(result.contains("return-value (3x)")); + assert!(result.contains("name-defined (1x)")); + assert!(result.contains("arg-type (1x)")); + } + + #[test] + fn test_filter_mypy_single_code_no_summary() { + let output = "\ +a.py:1: error: Error one [return-value] +a.py:2: error: Error two [return-value] +b.py:1: error: Error three [return-value] +Found 3 errors in 2 files +"; + let result = filter_mypy_output(output); + assert!( + !result.contains("Top codes:"), + "Top codes should not appear with only one distinct code" + ); + } + + #[test] + fn test_filter_mypy_every_error_shown() { + let output = "\ +src/api.py:10: error: Type \"str\" not assignable to \"int\" [assignment] +src/api.py:20: error: Missing return statement [return] +src/api.py:30: error: Name \"bar\" is not defined [name-defined] +"; + let result = filter_mypy_output(output); + assert!(result.contains("Type \"str\" not assignable to \"int\"")); + assert!(result.contains("Missing return statement")); + assert!(result.contains("Name \"bar\" is not defined")); + assert!(result.contains("L10:")); + assert!(result.contains("L20:")); + assert!(result.contains("L30:")); + } + + #[test] + fn test_filter_mypy_note_continuation() { + let output = "\ +src/app.py:10: error: Incompatible types in assignment [assignment] +src/app.py:10: note: Expected type \"int\" +src/app.py:10: note: Got type \"str\" +src/app.py:20: error: Missing return statement [return] +"; + let result = filter_mypy_output(output); + assert!(result.contains("Incompatible types in assignment")); + assert!(result.contains("Expected type \"int\"")); + assert!(result.contains("Got type \"str\"")); + assert!(result.contains("L10:")); + assert!(result.contains("L20:")); + } + + #[test] + fn test_filter_mypy_fileless_errors() { + let output = "\ +mypy: error: No module named 'nonexistent' +src/api.py:10: error: Name \"foo\" is not defined [name-defined] +Found 1 error in 1 file +"; + let result = filter_mypy_output(output); + // File-less error should appear verbatim before grouped output + assert!(result.contains("mypy: error: No module named 'nonexistent'")); + assert!(result.contains("api.py (1 error")); + let fileless_pos = result.find("No module named").unwrap(); + let grouped_pos = result.find("api.py").unwrap(); + assert!( + fileless_pos < grouped_pos, + "File-less errors should appear before grouped file errors" + ); + } + + #[test] + fn test_filter_mypy_no_errors() { + let output = "Success: no issues found in 5 source files\n"; + let result = filter_mypy_output(output); + assert_eq!(result, "mypy: No issues found"); + } + + #[test] + fn test_filter_mypy_no_file_limit() { + let mut output = String::new(); + for i in 1..=15 { + output.push_str(&format!( + "src/file{}.py:{}: error: Error in file {}. [assignment]\n", + i, i, i + )); + } + output.push_str("Found 15 errors in 15 files\n"); + let result = filter_mypy_output(&output); + assert!(result.contains("15 errors in 15 files")); + for i in 1..=15 { + assert!( + result.contains(&format!("file{}.py", i)), + "file{}.py missing from output", + i + ); + } + } +} diff --git a/src/utils.rs b/src/utils.rs index dbf9c91..6ea0698 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -395,5 +395,4 @@ mod tests { let result = truncate(cjk, 6); assert!(result.ends_with("...")); } - }