diff --git a/.claude/hooks/rtk-rewrite.sh b/.claude/hooks/rtk-rewrite.sh index 6574b01..2a9564f 100755 --- a/.claude/hooks/rtk-rewrite.sh +++ b/.claude/hooks/rtk-rewrite.sh @@ -170,6 +170,10 @@ elif echo "$MATCH_CMD" | grep -qE '^pip[[:space:]]+(list|outdated|install|show)( REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^pip /rtk pip /')" elif echo "$MATCH_CMD" | grep -qE '^uv[[:space:]]+pip[[:space:]]+(list|outdated|install|show)([[:space:]]|$)'; then REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^uv pip /rtk pip /')" +elif echo "$MATCH_CMD" | grep -qE '^mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^mypy/rtk mypy/')" +elif echo "$MATCH_CMD" | grep -qE '^python[[:space:]]+-m[[:space:]]+mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^python -m mypy/rtk mypy/')" # --- Go tooling --- elif echo "$MATCH_CMD" | grep -qE '^go[[:space:]]+test([[:space:]]|$)'; then diff --git a/CLAUDE.md b/CLAUDE.md index 4954f04..249e70e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -219,6 +219,7 @@ rtk gain --history | grep proxy | pnpm_cmd.rs | pnpm package manager | Compact dependency trees (70-90% reduction) | | ruff_cmd.rs | Ruff linter/formatter | JSON for check, text for format (80%+ reduction) | | pytest_cmd.rs | Pytest test runner | State machine text parser (90%+ reduction) | +| mypy_cmd.rs | Mypy type checker | Group by file/error code (80% reduction) | | pip_cmd.rs | pip/uv package manager | JSON parsing, auto-detect uv (70-85% reduction) | | go_cmd.rs | Go commands | NDJSON for test, text for build/vet (80-90% reduction) | | golangci_cmd.rs | golangci-lint | JSON parsing, group by rule (85% reduction) | @@ -312,3 +313,10 @@ GitHub Actions workflow (.github/workflows/release.yml): - DEB/RPM package generation - Automated releases on version tags (v*) - Checksums for binary verification + +## Active Technologies +- Rust 2021 edition + regex (1), lazy_static (1.4), anyhow (1.0) -- all already in Cargo.toml (001-mypy-cmd) +- SQLite via rusqlite (existing tracking.rs) (001-mypy-cmd) + +## Recent Changes +- 001-mypy-cmd: Added Rust 2021 edition + regex (1), lazy_static (1.4), anyhow (1.0) -- all already in Cargo.toml diff --git a/hooks/rtk-rewrite.sh b/hooks/rtk-rewrite.sh index 6574b01..2a9564f 100644 --- a/hooks/rtk-rewrite.sh +++ b/hooks/rtk-rewrite.sh @@ -170,6 +170,10 @@ elif echo "$MATCH_CMD" | grep -qE '^pip[[:space:]]+(list|outdated|install|show)( REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^pip /rtk pip /')" elif echo "$MATCH_CMD" | grep -qE '^uv[[:space:]]+pip[[:space:]]+(list|outdated|install|show)([[:space:]]|$)'; then REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^uv pip /rtk pip /')" +elif echo "$MATCH_CMD" | grep -qE '^mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^mypy/rtk mypy/')" +elif echo "$MATCH_CMD" | grep -qE '^python[[:space:]]+-m[[:space:]]+mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^python -m mypy/rtk mypy/')" # --- Go tooling --- elif echo "$MATCH_CMD" | grep -qE '^go[[:space:]]+test([[:space:]]|$)'; then diff --git a/specs/001-mypy-cmd/checklists/requirements.md b/specs/001-mypy-cmd/checklists/requirements.md new file mode 100644 index 0000000..13ee98b --- /dev/null +++ b/specs/001-mypy-cmd/checklists/requirements.md @@ -0,0 +1,37 @@ +# Specification Quality Checklist: RTK Mypy Command + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-02-13 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- All items pass. Spec is ready for `/speckit.plan`. +- FR-001 through FR-009 cover the core filter function (testable with unit tests on raw strings). +- FR-010 through FR-013 cover the command execution wrapper (testable with integration patterns). +- FR-014 and FR-015 cover discovery and hook integration (testable with registry unit tests and hook script assertions). diff --git a/specs/001-mypy-cmd/plan.md b/specs/001-mypy-cmd/plan.md new file mode 100644 index 0000000..38eb8d8 --- /dev/null +++ b/specs/001-mypy-cmd/plan.md @@ -0,0 +1,203 @@ +# Implementation Plan: RTK Mypy Command + +**Branch**: `001-mypy-cmd` | **Date**: 2026-02-13 | **Spec**: [spec.md](spec.md) +**Input**: Feature specification from `/specs/001-mypy-cmd/spec.md` + +## Summary + +Add `rtk mypy` command that filters and compresses mypy type-checker output by parsing errors, grouping by file (most errors first), displaying a summary header with total counts and top error codes, and preserving every individual error. Follows the exact same structural pattern as `tsc_cmd.rs`. Includes discover registry pattern, auto-rewrite hook entry, and comprehensive TDD tests. + +## Technical Context + +**Language/Version**: Rust 2021 edition +**Primary Dependencies**: regex (1), lazy_static (1.4), anyhow (1.0) -- all already in Cargo.toml +**Storage**: SQLite via rusqlite (existing tracking.rs) +**Testing**: cargo test with embedded `#[cfg(test)] mod tests` +**Target Platform**: macOS, Linux (cross-platform CLI) +**Project Type**: Single Rust binary (existing) +**Performance Goals**: N/A (output filtering is string processing on small inputs) +**Constraints**: No new dependencies. Must follow existing module patterns exactly. +**Scale/Scope**: 6 files modified/created total + +## Constitution Check + +*No constitution file exists. Skipping gate check.* + +## Project Structure + +### Documentation (this feature) + +```text +specs/001-mypy-cmd/ +├── spec.md +├── plan.md # This file +├── research.md # Phase 0 output +├── checklists/ +│ └── requirements.md # Already created +└── tasks.md # Phase 2 output (created by /speckit.tasks) +``` + +### Source Code (repository root) + +```text +src/ +├── mypy_cmd.rs # NEW: mypy command module (filter + run) +├── main.rs # MODIFY: add Mypy variant to Commands enum + match arm +├── discover/ +│ └── registry.rs # MODIFY: add mypy + python3 -m mypy patterns and rules + +.claude/hooks/ +└── rtk-rewrite.sh # MODIFY: add mypy rewrite patterns + +hooks/ +└── rtk-rewrite.sh # MODIFY: mirror of .claude/hooks/rtk-rewrite.sh +``` + +**Structure Decision**: This is a leaf module addition following the exact pattern of `tsc_cmd.rs`, `ruff_cmd.rs`, and `pytest_cmd.rs`. No new directories, no new dependencies, no architectural changes. + +## Design + +### mypy_cmd.rs -- Core Module + +**Pattern**: Mirrors `tsc_cmd.rs` exactly. + +**Public API**: +``` +pub fn run(args: &[String], verbose: u8) -> Result<()> +``` + +**Internal filter function** (unit-testable): +``` +fn filter_mypy_output(output: &str) -> String +``` + +**Mypy output format** (the input we parse): +``` +src/module.py:12: error: Incompatible return value type [return-value] +src/module.py:12:5: error: Incompatible return value type [return-value] +src/module.py:15: note: Expected "int" +src/other.py:8: error: Name "foo" is not defined [name-defined] +Found 3 errors in 2 files (checked 10 source files) +``` + +**Key patterns**: +- Error line regex: `^(.+?):(\d+)(?::(\d+))?: (error|warning|note): (.+?)(?:\s+\[(.+)\])?$` +- Continuation: `note:` lines attach to the preceding error +- File-less errors: Lines matching `error:` without a file path prefix (e.g., mypy config errors) -- display verbatim at top +- Summary line: `Found N errors in M files` -- replaced by our header + +**RTK output format** (what we produce): +``` +mypy: 3 errors in 2 files +======================================= +Top codes: return-value (1x), name-defined (1x) + +src/module.py (2 errors) + L12: [return-value] Incompatible return value type + Expected "int" + L15: [some-code] Another error + +src/other.py (1 error) + L8: [name-defined] Name "foo" is not defined +``` + +**Command execution flow**: +1. Try `mypy` directly via `Command::new("mypy")` +2. If not found, try `python3 -m mypy` as fallback (same pattern as pytest_cmd.rs) +3. Forward all user args +4. Capture stdout + stderr, combine +5. Strip ANSI codes via `utils::strip_ansi()` +6. Filter through `filter_mypy_output()` +7. Track via `tracking::TimedExecution` +8. Exit with mypy's exit code via `std::process::exit()` + +### main.rs -- Wiring + +Add to `Commands` enum (alphabetical placement near other Python tools): +``` +Mypy { + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + args: Vec, +} +``` + +Add match arm: +``` +Commands::Mypy { args } => { + mypy_cmd::run(&args, cli.verbose)?; +} +``` + +Add module declaration: +``` +mod mypy_cmd; +``` + +### discover/registry.rs -- Discovery + +Add pattern after the existing Python tool patterns (after ruff, before docker): +``` +r"^(python3?\s+-m\s+)?mypy(\s|$)" +``` + +Add corresponding rule: +``` +RtkRule { + rtk_cmd: "rtk mypy", + category: "Build", + savings_pct: 80.0, + subcmd_savings: &[], + subcmd_status: &[], +} +``` + +### rtk-rewrite.sh -- Hook (both locations) + +Add after the ruff rewrite block in the "Python tooling" section: +```bash +elif echo "$MATCH_CMD" | grep -qE '^mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^mypy/rtk mypy/')" +elif echo "$MATCH_CMD" | grep -qE '^python[[:space:]]+-m[[:space:]]+mypy([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^python -m mypy/rtk mypy/')" +``` + +## File Change Summary + +| File | Action | Lines (est.) | Risk | +|------|--------|-------------|------| +| `src/mypy_cmd.rs` | CREATE | ~200 | Low -- follows tsc_cmd.rs pattern | +| `src/main.rs` | MODIFY | +6 (mod decl, enum variant, match arm) | Low | +| `src/discover/registry.rs` | MODIFY | +10 (pattern + rule + test) | Low | +| `.claude/hooks/rtk-rewrite.sh` | MODIFY | +4 (two elif blocks) | Low | +| `hooks/rtk-rewrite.sh` | MODIFY | +4 (mirror) | Low | + +**Total**: 1 new file, 4 modified files. ~224 new lines. + +## Testing Strategy + +All tests follow TDD (Red-Green-Refactor) per project conventions. + +**Unit tests in mypy_cmd.rs** (embedded `#[cfg(test)] mod tests`): + +| Test | Validates | +|------|-----------| +| `test_filter_mypy_errors_grouped_by_file` | FR-001, FR-003, FR-004: Multi-file errors grouped correctly | +| `test_filter_mypy_with_column_numbers` | FR-002: Extended format `file:line:col:` parsed | +| `test_filter_mypy_top_codes_summary` | FR-005: Top codes shown when 2+ distinct codes | +| `test_filter_mypy_single_code_no_summary` | FR-005: Top codes omitted with 1 code | +| `test_filter_mypy_every_error_shown` | FR-006: No error messages collapsed | +| `test_filter_mypy_note_continuation` | FR-007: note: lines preserved as context | +| `test_filter_mypy_fileless_errors` | FR-008: Config errors shown verbatim at top | +| `test_filter_mypy_no_errors` | FR-013: Success message for clean output | +| `test_filter_mypy_no_file_limit` | All files shown (mirrors tsc test) | + +**Unit tests in discover/registry.rs** (added to existing test module): + +| Test | Validates | +|------|-----------| +| `test_classify_mypy` | FR-014: `mypy src/` classified as Supported | +| `test_classify_python_m_mypy` | FR-014: `python3 -m mypy` classified as Supported | + +## Complexity Tracking + +No constitution violations. No complexity justification needed. diff --git a/specs/001-mypy-cmd/research.md b/specs/001-mypy-cmd/research.md new file mode 100644 index 0000000..d51be44 --- /dev/null +++ b/specs/001-mypy-cmd/research.md @@ -0,0 +1,34 @@ +# Research: RTK Mypy Command + +**Date**: 2026-02-13 +**Status**: Complete + +## Findings + +### Mypy Output Format Stability + +- **Decision**: Parse the standard `file:line: severity: message [code]` format. +- **Rationale**: This format has been stable since mypy 0.9 (2020). The optional column format (`file:line:col:`) was added later but is backwards-compatible. Both formats coexist in modern mypy (1.x). +- **Alternatives considered**: Parsing mypy's `--output json` flag was considered but rejected -- it would require RTK to inject flags, which conflicts with the argument passthrough design (FR-010). The text format is sufficient and avoids modifying user intent. + +### Command Discovery (mypy vs python -m mypy) + +- **Decision**: Try `mypy` directly first, fall back to `python3 -m mypy`. +- **Rationale**: This is the same pattern used by `pytest_cmd.rs` (lines 18-25). Users who install mypy via pip have it on PATH. Users in virtualenvs or poetry/pipx may only have it via `python -m mypy`. +- **Alternatives considered**: Only supporting `mypy` directly was considered but would miss users who haven't activated their virtualenv. + +### ANSI Stripping + +- **Decision**: Reuse existing `utils::strip_ansi()` (src/utils.rs:47). +- **Rationale**: Mypy colorizes output when stdout is a TTY. Since RTK captures via `Command::output()` (not a TTY), mypy typically does not emit ANSI. However, users may have `MYPY_FORCE_COLOR=1` or `--color-output` set, so stripping is a safety measure. +- **Alternatives considered**: None -- the utility already exists. + +### Error Code Format + +- **Decision**: Display error codes in bracket format `[error-code]` matching mypy's native format. +- **Rationale**: Mypy uses bracketed codes like `[return-value]`, `[name-defined]`, `[assignment]`. This differs from tsc which uses `TS2322` style. Using brackets preserves the code as-is for easy copy-paste into mypy configuration (`# type: ignore[error-code]`). +- **Alternatives considered**: Stripping brackets was considered but reduces utility for suppression comments. + +## No Unresolved Unknowns + +All technical decisions are resolved. No NEEDS CLARIFICATION items remain. diff --git a/specs/001-mypy-cmd/spec.md b/specs/001-mypy-cmd/spec.md new file mode 100644 index 0000000..6f6a359 --- /dev/null +++ b/specs/001-mypy-cmd/spec.md @@ -0,0 +1,106 @@ +# Feature Specification: RTK Mypy Command + +**Feature Branch**: `001-mypy-cmd` +**Created**: 2026-02-13 +**Status**: Draft +**Input**: User description: "Add a mypy command module to RTK that filters and compresses mypy type checker output, grouping errors by file and error code, with token savings of 75-85%. Structurally similar to tsc_cmd.rs. Includes registry/hook updates and TDD." + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Filter Mypy Error Output (Priority: P1) + +A developer runs `rtk mypy` on a Python project. Mypy produces verbose type-checking output with errors scattered across many files. RTK parses this output, groups errors by file (sorted by error count descending), shows a summary header with total error/file counts and top error codes, and displays every individual error with file, line number, error code, and message. The output is 75-85% smaller than raw mypy output while preserving all actionable information. + +**Why this priority**: This is the core value proposition. Without error filtering and grouping, the command has no reason to exist. + +**Independent Test**: Can be fully tested by passing raw mypy output strings to the filter function and asserting the structured output contains correct groupings, counts, and every error message. + +**Acceptance Scenarios**: + +1. **Given** mypy output with errors in multiple files, **When** the user runs `rtk mypy`, **Then** output shows a summary header ("mypy: N errors in M files"), errors grouped by file with per-file counts, and every error's line number, error code, and message. +2. **Given** mypy output with zero errors, **When** the user runs `rtk mypy`, **Then** output shows a success message ("mypy: No issues found"). +3. **Given** mypy output with errors using a single repeated error code, **When** the filter runs, **Then** the "Top codes" summary line is omitted (only shown when 2+ distinct codes exist). + +--- + +### User Story 2 - Transparent Mypy Invocation (Priority: P1) + +A developer runs `rtk mypy src/` or `rtk mypy --strict --config-file mypy.ini` and all arguments are forwarded to the underlying mypy command. RTK locates and runs mypy, captures its output, filters it, tracks token savings, and preserves mypy's exit code so CI/CD pipelines can gate on type-check results. + +**Why this priority**: Argument passthrough and exit code preservation are required for the command to be usable in real workflows. Without them, users cannot replace `mypy` with `rtk mypy`. + +**Independent Test**: Can be tested by verifying that the command construction passes all user-provided arguments to mypy, and that the process exit code matches mypy's exit code. + +**Acceptance Scenarios**: + +1. **Given** the user runs `rtk mypy src/ --strict`, **When** RTK executes, **Then** mypy is invoked with arguments `src/ --strict`. +2. **Given** mypy exits with code 1 (errors found), **When** RTK finishes, **Then** RTK also exits with code 1. +3. **Given** mypy is not installed, **When** RTK tries to run it, **Then** an error message tells the user how to install mypy (e.g., "pip install mypy"). + +--- + +### User Story 3 - Discovery and Hook Integration (Priority: P2) + +When a developer uses `mypy` or `python3 -m mypy` in a Claude Code session, the RTK auto-rewrite hook transparently rewrites the command to `rtk mypy`. The RTK discover module classifies `mypy` commands as "Supported" and estimates token savings. This ensures the user gets token savings without changing their habits. + +**Why this priority**: Hooks and discovery are force multipliers -- they make the mypy command discoverable and automatic. But they only add value after the core filtering (P1) works. + +**Independent Test**: Can be tested by running the classify_command function against `mypy` and `python3 -m mypy` inputs and asserting they return Supported with the correct RTK equivalent and savings estimate. + +**Acceptance Scenarios**: + +1. **Given** a command `mypy src/`, **When** the discover registry classifies it, **Then** it returns Supported with `rtk_equivalent: "rtk mypy"`, category "Build", and estimated savings 80%. +2. **Given** a command `python3 -m mypy --strict`, **When** the discover registry classifies it, **Then** it returns Supported with the same RTK equivalent. +3. **Given** the auto-rewrite hook receives `mypy src/ --strict`, **When** it processes the command, **Then** it outputs a JSON rewrite to `rtk mypy src/ --strict`. + +--- + +### Edge Cases + +- What happens when mypy produces note-level output (not errors)? Notes are informational context lines that follow errors. They should be preserved as continuation lines under the parent error, not treated as standalone errors. +- What happens when mypy output contains color/ANSI codes? ANSI escape sequences must be stripped before parsing. +- What happens when mypy produces "error:" lines without a file reference (e.g., configuration errors, import errors)? These should be displayed verbatim at the top of the output, before any grouped file errors. +- What happens when mypy output contains column numbers (e.g., `file.py:10:5: error:`)? The column number should be parsed but not displayed (line number is sufficient for navigation). + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: System MUST parse mypy error lines in the format `file.py:LINE: error: MESSAGE [error-code]` and extract file, line number, error code, and message. +- **FR-002**: System MUST also parse the extended format `file.py:LINE:COL: error: MESSAGE [error-code]` (with column number). +- **FR-003**: System MUST group parsed errors by file, sorted by error count (most errors first). +- **FR-004**: System MUST display a summary header: "mypy: N errors in M files". +- **FR-005**: System MUST display a "Top codes" line when 2 or more distinct error codes are present, showing up to 5 codes with occurrence counts, sorted by frequency. +- **FR-006**: System MUST display every individual error with line number, error code, and message (no collapsing or truncation of error messages beyond a reasonable line length limit). +- **FR-007**: System MUST preserve continuation/note lines (indented or starting with "note:") as context under their parent error. +- **FR-008**: System MUST display file-less errors (configuration errors, import failures) verbatim before grouped output. +- **FR-009**: System MUST strip ANSI escape sequences from mypy output before parsing. +- **FR-010**: System MUST forward all user-provided arguments to the mypy command unchanged. +- **FR-011**: System MUST preserve mypy's exit code as the process exit code. +- **FR-012**: System MUST track token savings (input vs output size) in the RTK tracking database. +- **FR-013**: System MUST show a success message when mypy reports no errors. +- **FR-014**: The discover registry MUST classify `mypy` and `python3 -m mypy` commands as Supported. +- **FR-015**: The auto-rewrite hook MUST rewrite `mypy` and `python3 -m mypy` commands to `rtk mypy`. + +### Key Entities + +- **Mypy Error**: A single type-checking diagnostic with file path, line number, optional column, severity (error/note/warning), optional error code, and message text. +- **File Group**: A collection of mypy errors sharing the same file path, with a count of errors in that file. +- **Error Code Summary**: An aggregate count of how many times each error code appears across all files. + +## Assumptions + +- Mypy is installed and available on the user's PATH (either as `mypy` directly or via `python3 -m mypy`). RTK does not install or manage mypy. +- Mypy's output format is stable across versions 0.9+ (the `file:line: severity: message [code]` format has been consistent since mypy 0.9). +- The discover registry and hook patterns follow the same conventions as existing commands (ruff, pytest, pip). +- Token savings are estimated at 80% for the discover registry, based on the structural similarity to tsc (83%) adjusted for mypy's typically shorter output lines. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: Running `rtk mypy` on a project with 50+ type errors produces output that is 75-85% smaller than raw mypy output while preserving every individual error message, file path, line number, and error code. +- **SC-002**: All user-provided mypy arguments are forwarded correctly, including flags like `--strict`, `--config-file`, path arguments, and `--ignore-missing-imports`. +- **SC-003**: The RTK process exit code matches mypy's exit code in all cases (0 for success, 1 for errors, 2 for fatal errors). +- **SC-004**: The discover registry correctly classifies both `mypy` and `python3 -m mypy` commands as Supported with the correct RTK equivalent. +- **SC-005**: The module includes comprehensive tests following the project's TDD patterns, with test coverage for: error parsing, file grouping, success output, continuation lines, ANSI stripping, file-less errors, and edge cases. diff --git a/specs/001-mypy-cmd/tasks.md b/specs/001-mypy-cmd/tasks.md new file mode 100644 index 0000000..1cc02c7 --- /dev/null +++ b/specs/001-mypy-cmd/tasks.md @@ -0,0 +1,153 @@ +# Tasks: RTK Mypy Command + +**Input**: Design documents from `/specs/001-mypy-cmd/` +**Prerequisites**: plan.md, spec.md, research.md + +**Tests**: Included (TDD explicitly requested in spec and CLAUDE.md conventions). + +**Organization**: Tasks grouped by user story. US1 and US2 are both P1 but US1 (filter) must precede US2 (invocation) since the run function calls the filter. US3 (discovery/hooks) is P2. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies) +- **[Story]**: Which user story this task belongs to (e.g., US1, US2, US3) +- Include exact file paths in descriptions + +--- + +## Phase 1: Setup (Wiring) + +**Purpose**: Register the mypy module and command in the CLI so that `cargo build` compiles the new module. + +- [x] T001 Add `mod mypy_cmd;` declaration in `src/main.rs` (after `mod local_llm;`) +- [x] T002 Add `Mypy` variant to `Commands` enum in `src/main.rs` with `#[arg(trailing_var_arg = true, allow_hyphen_values = true)] args: Vec` +- [x] T003 Add `Commands::Mypy { args }` match arm in `src/main.rs` dispatching to `mypy_cmd::run(&args, cli.verbose)?` +- [x] T004 Create stub `src/mypy_cmd.rs` with `pub fn run(args: &[String], verbose: u8) -> Result<()> { todo!() }` and empty test module + +**Checkpoint**: `cargo check` passes with the new module wired in (run function is a stub). + +--- + +## Phase 2: User Story 1 - Filter Mypy Error Output (Priority: P1) + +**Goal**: Parse raw mypy output and produce grouped, compact output with summary header, top error codes, and every error preserved. + +**Independent Test**: Pass raw mypy output strings to `filter_mypy_output()` and assert structured output. + +### Tests for User Story 1 + +> **Write these tests FIRST, ensure they FAIL before implementation (TDD Red phase)** + +- [x] T005 [P] [US1] Write `test_filter_mypy_errors_grouped_by_file` in `src/mypy_cmd.rs` -- multi-file input, assert summary header "mypy: N errors in M files", assert file grouping with per-file counts, assert files sorted by error count descending (FR-001, FR-003, FR-004) +- [x] T006 [P] [US1] Write `test_filter_mypy_with_column_numbers` in `src/mypy_cmd.rs` -- input with `file.py:10:5: error:` format, assert line number extracted and error parsed correctly (FR-002) +- [x] T007 [P] [US1] Write `test_filter_mypy_top_codes_summary` in `src/mypy_cmd.rs` -- input with 3+ distinct error codes, assert "Top codes:" line shows up to 5 codes sorted by frequency (FR-005) +- [x] T008 [P] [US1] Write `test_filter_mypy_single_code_no_summary` in `src/mypy_cmd.rs` -- input with only one error code repeated, assert no "Top codes:" line (FR-005) +- [x] T009 [P] [US1] Write `test_filter_mypy_every_error_shown` in `src/mypy_cmd.rs` -- 3 errors in same file, assert each error message appears individually with line number and code (FR-006) +- [x] T010 [P] [US1] Write `test_filter_mypy_note_continuation` in `src/mypy_cmd.rs` -- error followed by note: line, assert note preserved as indented context under parent error (FR-007) +- [x] T011 [P] [US1] Write `test_filter_mypy_fileless_errors` in `src/mypy_cmd.rs` -- input with config/import errors (no file prefix), assert displayed verbatim before grouped output (FR-008) +- [x] T012 [P] [US1] Write `test_filter_mypy_no_errors` in `src/mypy_cmd.rs` -- input with "Success: no issues found", assert output is "mypy: No issues found" (FR-013) +- [x] T013 [P] [US1] Write `test_filter_mypy_no_file_limit` in `src/mypy_cmd.rs` -- 15 files with errors, assert all 15 files appear in output + +**Checkpoint**: All 9 tests exist and FAIL (`cargo test mypy_cmd` shows 9 failures). + +### Implementation for User Story 1 + +- [x] T014 [US1] Implement `filter_mypy_output()` in `src/mypy_cmd.rs` -- regex parsing, MypyError struct, file grouping by HashMap, error code counting, formatted output generation (FR-001 through FR-009, FR-013) + +**Checkpoint**: All 9 filter tests PASS (`cargo test mypy_cmd` shows 9 passing). + +--- + +## Phase 3: User Story 2 - Transparent Mypy Invocation (Priority: P1) + +**Goal**: Wire `pub fn run()` to execute mypy, capture output, filter it, track savings, and preserve exit code. + +**Independent Test**: Verified by `cargo check` (type-safe command construction) and manual invocation. + +**Depends on**: Phase 2 (filter function must exist for run to call it) + +### Implementation for User Story 2 + +- [x] T015 [US2] Implement `pub fn run()` in `src/mypy_cmd.rs` -- try `mypy` then fallback to `python3 -m mypy`, forward all args, capture stdout+stderr, strip ANSI via `utils::strip_ansi()`, call `filter_mypy_output()`, track via `tracking::TimedExecution`, exit with mypy's exit code (FR-010, FR-011, FR-012) + +**Checkpoint**: `cargo build` succeeds. If mypy is installed: `cargo run -- mypy --version` works. Full pre-commit gate passes: `cargo fmt --all --check && cargo clippy --all-targets && cargo test`. + +--- + +## Phase 4: User Story 3 - Discovery and Hook Integration (Priority: P2) + +**Goal**: Auto-rewrite `mypy` commands to `rtk mypy` and classify them in the discover registry. + +**Independent Test**: Registry unit tests + hook script grep assertions. + +**Depends on**: Phase 1 (module must exist) but NOT on Phase 2/3. + +### Tests for User Story 3 + +> **Write tests FIRST (TDD Red phase)** + +- [x] T016 [P] [US3] Write `test_classify_mypy` in `src/discover/registry.rs` -- assert `classify_command("mypy src/")` returns `Supported { rtk_equivalent: "rtk mypy", category: "Build", estimated_savings_pct: 80.0 }` (FR-014) +- [x] T017 [P] [US3] Write `test_classify_python_m_mypy` in `src/discover/registry.rs` -- assert `classify_command("python3 -m mypy --strict")` returns `Supported` with same fields (FR-014) + +**Checkpoint**: Both tests FAIL. + +### Implementation for User Story 3 + +- [x] T018 [US3] Add mypy pattern `r"^(python3?\s+-m\s+)?mypy(\s|$)"` to `PATTERNS` array in `src/discover/registry.rs` +- [x] T019 [US3] Add corresponding `RtkRule` to `RULES` array in `src/discover/registry.rs` -- `rtk_cmd: "rtk mypy"`, `category: "Build"`, `savings_pct: 80.0` +- [x] T020 [P] [US3] Add mypy rewrite patterns to `.claude/hooks/rtk-rewrite.sh` in the Python tooling section (after ruff block): `mypy` and `python -m mypy` rewrites (FR-015) +- [x] T021 [P] [US3] Mirror the same hook changes in `hooks/rtk-rewrite.sh` (FR-015) + +**Checkpoint**: Registry tests PASS. `cargo test registry` shows all existing + 2 new tests passing. Hook patterns grep-verifiable. + +--- + +## Phase 5: Polish & Verification + +**Purpose**: Full verification, documentation, and pre-commit gate. + +- [x] T022 Run full pre-commit gate: `cargo fmt --all --check && cargo clippy --all-targets && cargo test` +- [x] T023 Update CLAUDE.md architecture table to add mypy_cmd.rs entry with description and token strategy +- [x] T024 Verify `PATTERNS.len() == RULES.len()` assertion still passes in registry (existing test `test_patterns_rules_length_match`) + +**Checkpoint**: All tests pass, clippy clean, CLAUDE.md updated. Feature complete. + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Setup)**: No dependencies -- start immediately +- **Phase 2 (US1 Filter)**: Depends on Phase 1 (module must compile) +- **Phase 3 (US2 Invocation)**: Depends on Phase 2 (run calls filter) +- **Phase 4 (US3 Discovery)**: Depends on Phase 1 only (registry/hook are independent of filter implementation) +- **Phase 5 (Polish)**: Depends on all previous phases + +### User Story Dependencies + +- **US1 (Filter)**: Standalone after setup -- core value +- **US2 (Invocation)**: Depends on US1 (run function calls filter_mypy_output) +- **US3 (Discovery/Hooks)**: Independent of US1/US2 (registry + hooks don't import mypy_cmd) + +### Parallel Opportunities + +**Within Phase 2 (US1 Tests)**: +All 9 test tasks (T005-T013) are [P] -- they write to the same file but to independent test functions. Can be written in a single batch. + +**Within Phase 4 (US3)**: +- T016 + T017 (registry tests) are [P] with T020 + T021 (hook changes) -- different files +- T018 + T019 must be sequential (same array in same file) + +**Cross-phase**: +- Phase 4 (US3) can run in parallel with Phase 2 (US1) after Phase 1 completes -- different files entirely + +--- + +## Notes + +- All tasks in a single file should be done sequentially to avoid conflicts +- TDD is mandatory per project CLAUDE.md -- write tests first, verify they fail, then implement +- The filter function is pure (no I/O) making it fully unit-testable +- The run function follows the exact pattern of tsc_cmd.rs and pytest_cmd.rs +- Registry pattern+rule arrays must stay aligned (existing test enforces this) diff --git a/src/discover/registry.rs b/src/discover/registry.rs index 7ef375c..0f97b95 100644 --- a/src/discover/registry.rs +++ b/src/discover/registry.rs @@ -70,6 +70,7 @@ const PATTERNS: &[&str] = &[ r"^kubectl\s+(get|logs)", r"^curl\s+", r"^wget\s+", + r"^(python3?\s+-m\s+)?mypy(\s|$)", ]; const RULES: &[RtkRule] = &[ @@ -225,6 +226,13 @@ const RULES: &[RtkRule] = &[ subcmd_savings: &[], subcmd_status: &[], }, + RtkRule { + rtk_cmd: "rtk mypy", + category: "Build", + savings_pct: 80.0, + subcmd_savings: &[], + subcmd_status: &[], + }, ]; /// Commands to ignore (shell builtins, trivial, already rtk). @@ -732,4 +740,30 @@ mod tests { let cmd = "cat <<'EOF'\nhello && world\nEOF"; assert_eq!(split_command_chain(cmd), vec![cmd]); } + + #[test] + fn test_classify_mypy() { + assert_eq!( + classify_command("mypy src/"), + Classification::Supported { + rtk_equivalent: "rtk mypy", + category: "Build", + estimated_savings_pct: 80.0, + status: RtkStatus::Existing, + } + ); + } + + #[test] + fn test_classify_python_m_mypy() { + assert_eq!( + classify_command("python3 -m mypy --strict"), + Classification::Supported { + rtk_equivalent: "rtk mypy", + category: "Build", + estimated_savings_pct: 80.0, + status: RtkStatus::Existing, + } + ); + } } diff --git a/src/main.rs b/src/main.rs index 22b07cb..94bd5dc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -24,6 +24,7 @@ mod lint_cmd; mod local_llm; mod log_cmd; mod ls; +mod mypy_cmd; mod next_cmd; mod npm_cmd; mod parser; @@ -488,6 +489,13 @@ enum Commands { args: Vec, }, + /// Mypy type checker with grouped error output + Mypy { + /// Mypy arguments + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + args: Vec, + }, + /// Pip package manager with compact output (auto-detects uv) Pip { /// Pip arguments (e.g., list, outdated, install) @@ -1314,6 +1322,10 @@ fn main() -> Result<()> { pytest_cmd::run(&args, cli.verbose)?; } + Commands::Mypy { args } => { + mypy_cmd::run(&args, cli.verbose)?; + } + Commands::Pip { args } => { pip_cmd::run(&args, cli.verbose)?; } diff --git a/src/mypy_cmd.rs b/src/mypy_cmd.rs new file mode 100644 index 0000000..c141ddc --- /dev/null +++ b/src/mypy_cmd.rs @@ -0,0 +1,389 @@ +use crate::tracking; +use crate::utils::{strip_ansi, truncate}; +use anyhow::{Context, Result}; +use regex::Regex; +use std::collections::HashMap; +use std::process::Command; + +pub fn run(args: &[String], verbose: u8) -> Result<()> { + let timer = tracking::TimedExecution::start(); + + let mut cmd = if which_command("mypy").is_some() { + Command::new("mypy") + } else { + let mut c = Command::new("python3"); + c.arg("-m").arg("mypy"); + c + }; + + for arg in args { + cmd.arg(arg); + } + + if verbose > 0 { + eprintln!("Running: mypy {}", args.join(" ")); + } + + let output = cmd + .output() + .context("Failed to run mypy. Is it installed? Try: pip install mypy")?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + let raw = format!("{}\n{}", stdout, stderr); + let clean = strip_ansi(&raw); + + let filtered = filter_mypy_output(&clean); + + println!("{}", filtered); + + timer.track( + &format!("mypy {}", args.join(" ")), + &format!("rtk mypy {}", args.join(" ")), + &raw, + &filtered, + ); + + std::process::exit(output.status.code().unwrap_or(1)); +} + +fn which_command(cmd: &str) -> Option { + Command::new("which") + .arg(cmd) + .output() + .ok() + .filter(|o| o.status.success()) + .and_then(|o| String::from_utf8(o.stdout).ok()) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) +} + +struct MypyError { + file: String, + line: usize, + code: String, + message: String, + context_lines: Vec, +} + +fn filter_mypy_output(output: &str) -> String { + lazy_static::lazy_static! { + // file.py:12: error: Message [error-code] + // file.py:12:5: error: Message [error-code] + static ref MYPY_DIAG: Regex = Regex::new( + r"^(.+?):(\d+)(?::\d+)?: (error|warning|note): (.+?)(?:\s+\[(.+)\])?$" + ).unwrap(); + } + + let lines: Vec<&str> = output.lines().collect(); + let mut errors: Vec = Vec::new(); + let mut fileless_lines: Vec = Vec::new(); + let mut i = 0; + + while i < lines.len() { + let line = lines[i]; + + // Skip mypy's own summary line + if line.starts_with("Found ") && line.contains(" error") { + i += 1; + continue; + } + // Skip "Success: no issues found" + if line.starts_with("Success:") { + i += 1; + continue; + } + + if let Some(caps) = MYPY_DIAG.captures(line) { + let severity = &caps[3]; + let file = caps[1].to_string(); + let line_num: usize = caps[2].parse().unwrap_or(0); + let message = caps[4].to_string(); + let code = caps + .get(5) + .map(|m| m.as_str().to_string()) + .unwrap_or_default(); + + if severity == "note" { + // Attach note to preceding error if same file and line + if let Some(last) = errors.last_mut() { + if last.file == file { + last.context_lines.push(message); + i += 1; + continue; + } + } + // Standalone note with no parent -- display as fileless + fileless_lines.push(line.to_string()); + i += 1; + continue; + } + + let mut err = MypyError { + file, + line: line_num, + code, + message, + context_lines: Vec::new(), + }; + + // Capture continuation note lines + i += 1; + while i < lines.len() { + if let Some(next_caps) = MYPY_DIAG.captures(lines[i]) { + if &next_caps[3] == "note" && next_caps[1] == err.file { + let note_msg = next_caps[4].to_string(); + err.context_lines.push(note_msg); + i += 1; + continue; + } + } + break; + } + + errors.push(err); + } else if line.contains("error:") && !line.trim().is_empty() { + // File-less error (config errors, import errors) + fileless_lines.push(line.to_string()); + i += 1; + } else { + i += 1; + } + } + + // No errors at all + if errors.is_empty() && fileless_lines.is_empty() { + if output.contains("Success: no issues found") || output.contains("no issues found") { + return "mypy: No issues found".to_string(); + } + return "mypy: No issues found".to_string(); + } + + // Group by file + let mut by_file: HashMap> = HashMap::new(); + for err in &errors { + by_file.entry(err.file.clone()).or_default().push(err); + } + + // Count by error code + let mut by_code: HashMap = HashMap::new(); + for err in &errors { + if !err.code.is_empty() { + *by_code.entry(err.code.clone()).or_insert(0) += 1; + } + } + + let mut result = String::new(); + + // File-less errors first + for line in &fileless_lines { + result.push_str(line); + result.push('\n'); + } + if !fileless_lines.is_empty() && !errors.is_empty() { + result.push('\n'); + } + + if !errors.is_empty() { + result.push_str(&format!( + "mypy: {} errors in {} files\n", + errors.len(), + by_file.len() + )); + result.push_str("═══════════════════════════════════════\n"); + + // Top error codes summary (only when 2+ distinct codes) + let mut code_counts: Vec<_> = by_code.iter().collect(); + code_counts.sort_by(|a, b| b.1.cmp(a.1)); + + if code_counts.len() > 1 { + let codes_str: Vec = code_counts + .iter() + .take(5) + .map(|(code, count)| format!("{} ({}x)", code, count)) + .collect(); + result.push_str(&format!("Top codes: {}\n\n", codes_str.join(", "))); + } + + // Files sorted by error count (most errors first) + let mut files_sorted: Vec<_> = by_file.iter().collect(); + files_sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len())); + + for (file, file_errors) in &files_sorted { + result.push_str(&format!("{} ({} errors)\n", file, file_errors.len())); + + for err in *file_errors { + if err.code.is_empty() { + result.push_str(&format!( + " L{}: {}\n", + err.line, + truncate(&err.message, 120) + )); + } else { + result.push_str(&format!( + " L{}: [{}] {}\n", + err.line, + err.code, + truncate(&err.message, 120) + )); + } + for ctx in &err.context_lines { + result.push_str(&format!(" {}\n", truncate(ctx, 120))); + } + } + result.push('\n'); + } + } + + result.trim().to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_mypy_errors_grouped_by_file() { + let output = "\ +src/server/auth.py:12: error: Incompatible return value type (got \"str\", expected \"int\") [return-value] +src/server/auth.py:15: error: Argument 1 has incompatible type \"int\"; expected \"str\" [arg-type] +src/models/user.py:8: error: Name \"foo\" is not defined [name-defined] +src/models/user.py:10: error: Incompatible types in assignment [assignment] +src/models/user.py:20: error: Missing return statement [return] +Found 5 errors in 2 files (checked 10 source files) +"; + let result = filter_mypy_output(output); + assert!(result.contains("mypy: 5 errors in 2 files")); + // user.py has 3 errors, auth.py has 2 -- user.py should come first + let user_pos = result.find("user.py").unwrap(); + let auth_pos = result.find("auth.py").unwrap(); + assert!( + user_pos < auth_pos, + "user.py (3 errors) should appear before auth.py (2 errors)" + ); + assert!(result.contains("user.py (3 errors)")); + assert!(result.contains("auth.py (2 errors)")); + } + + #[test] + fn test_filter_mypy_with_column_numbers() { + let output = "\ +src/api.py:10:5: error: Incompatible return value type [return-value] +"; + let result = filter_mypy_output(output); + assert!(result.contains("L10:")); + assert!(result.contains("[return-value]")); + assert!(result.contains("Incompatible return value type")); + } + + #[test] + fn test_filter_mypy_top_codes_summary() { + let output = "\ +a.py:1: error: Error one [return-value] +a.py:2: error: Error two [return-value] +a.py:3: error: Error three [return-value] +b.py:1: error: Error four [name-defined] +c.py:1: error: Error five [arg-type] +Found 5 errors in 3 files +"; + let result = filter_mypy_output(output); + assert!(result.contains("Top codes:")); + assert!(result.contains("return-value (3x)")); + assert!(result.contains("name-defined (1x)")); + assert!(result.contains("arg-type (1x)")); + } + + #[test] + fn test_filter_mypy_single_code_no_summary() { + let output = "\ +a.py:1: error: Error one [return-value] +a.py:2: error: Error two [return-value] +b.py:1: error: Error three [return-value] +Found 3 errors in 2 files +"; + let result = filter_mypy_output(output); + assert!( + !result.contains("Top codes:"), + "Top codes should not appear with only one distinct code" + ); + } + + #[test] + fn test_filter_mypy_every_error_shown() { + let output = "\ +src/api.py:10: error: Type \"str\" not assignable to \"int\" [assignment] +src/api.py:20: error: Missing return statement [return] +src/api.py:30: error: Name \"bar\" is not defined [name-defined] +"; + let result = filter_mypy_output(output); + assert!(result.contains("Type \"str\" not assignable to \"int\"")); + assert!(result.contains("Missing return statement")); + assert!(result.contains("Name \"bar\" is not defined")); + assert!(result.contains("L10:")); + assert!(result.contains("L20:")); + assert!(result.contains("L30:")); + } + + #[test] + fn test_filter_mypy_note_continuation() { + let output = "\ +src/app.py:10: error: Incompatible types in assignment [assignment] +src/app.py:10: note: Expected type \"int\" +src/app.py:10: note: Got type \"str\" +src/app.py:20: error: Missing return statement [return] +"; + let result = filter_mypy_output(output); + assert!(result.contains("Incompatible types in assignment")); + assert!(result.contains("Expected type \"int\"")); + assert!(result.contains("Got type \"str\"")); + assert!(result.contains("L10:")); + assert!(result.contains("L20:")); + } + + #[test] + fn test_filter_mypy_fileless_errors() { + let output = "\ +mypy: error: No module named 'nonexistent' +src/api.py:10: error: Name \"foo\" is not defined [name-defined] +Found 1 error in 1 file +"; + let result = filter_mypy_output(output); + // File-less error should appear verbatim before grouped output + assert!(result.contains("mypy: error: No module named 'nonexistent'")); + assert!(result.contains("api.py (1 error")); + let fileless_pos = result.find("No module named").unwrap(); + let grouped_pos = result.find("api.py").unwrap(); + assert!( + fileless_pos < grouped_pos, + "File-less errors should appear before grouped file errors" + ); + } + + #[test] + fn test_filter_mypy_no_errors() { + let output = "Success: no issues found in 5 source files\n"; + let result = filter_mypy_output(output); + assert_eq!(result, "mypy: No issues found"); + } + + #[test] + fn test_filter_mypy_no_file_limit() { + let mut output = String::new(); + for i in 1..=15 { + output.push_str(&format!( + "src/file{}.py:{}: error: Error in file {}. [assignment]\n", + i, i, i + )); + } + output.push_str("Found 15 errors in 15 files\n"); + let result = filter_mypy_output(&output); + assert!(result.contains("15 errors in 15 files")); + for i in 1..=15 { + assert!( + result.contains(&format!("file{}.py", i)), + "file{}.py missing from output", + i + ); + } + } +} diff --git a/src/utils.rs b/src/utils.rs index dbf9c91..6ea0698 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -395,5 +395,4 @@ mod tests { let result = truncate(cjk, 6); assert!(result.ends_with("...")); } - }