From 9c5ec4bad5c1a6ddff03f6bbdad7816ec8e664b6 Mon Sep 17 00:00:00 2001 From: heAdz0r Date: Sat, 14 Feb 2026 23:07:35 +0300 Subject: [PATCH 1/3] feat: add rtk rgai command for semantic code search Rust-native semantic search that scores files and lines by term relevance, symbol definitions, and path matching. No external dependencies (no grepai/embeddings required). Features: - Natural-language multi-word queries: rtk rgai "auth token refresh" - File scoring with symbol definition boost (+2.5) and comment penalty - Stop word removal + basic stemming for better recall - Compact and JSON output modes - File type filtering (--file-type ts/py/rust/etc.) - gitignore-aware traversal via `ignore` crate - Binary and large file skipping - Backward-compat: trailing path token auto-detection Includes 8 unit tests (5 in rgai_cmd, 3 for arg normalization). --- src/main.rs | 111 +++++++ src/rgai_cmd.rs | 789 ++++++++++++++++++++++++++++++++++++++++++++++++ src/utils.rs | 1 - 3 files changed, 900 insertions(+), 1 deletion(-) create mode 100644 src/rgai_cmd.rs diff --git a/src/main.rs b/src/main.rs index 22b07cb..c989c6c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,6 +34,7 @@ mod prettier_cmd; mod prisma_cmd; mod pytest_cmd; mod read; +mod rgai_cmd; // semantic search command (grepai-style intent matching) mod ruff_cmd; mod runner; mod summary; @@ -249,6 +250,34 @@ enum Commands { extra_args: Vec, }, + /// Rust-native semantic search (grepai-style intent matching) + Rgai { + /// Natural-language query + #[arg(required = true, num_args = 1..)] + query: Vec, + /// Path to search in + #[arg(short, long, default_value = ".")] + path: String, + /// Max files to show + #[arg(short, long, default_value = "8")] + max: usize, + /// Context lines around each match + #[arg(short = 'c', long, default_value = "1")] + context: usize, + /// Filter by file type (e.g., ts, py, rust) + #[arg(short = 't', long)] + file_type: Option, + /// Skip files larger than N KB + #[arg(long, default_value = "512")] + max_file_kb: usize, + /// Output machine-readable JSON + #[arg(long)] + json: bool, + /// Compact output (fewer lines per hit) + #[arg(long)] + compact: bool, + }, + /// Initialize rtk instructions in CLAUDE.md Init { /// Add to global ~/.claude/CLAUDE.md instead of local @@ -1028,6 +1057,31 @@ fn main() -> Result<()> { )?; } + Commands::Rgai { + query, + path, + max, + context, + file_type, + max_file_kb, + json, + compact, + } => { + // Backward-compat: rtk rgai "query words" ./src -> path="./src" + let (query, path) = normalize_rgai_args(query, path); + rgai_cmd::run( + &query, + &path, + max, + context, + file_type.as_deref(), + max_file_kb, + json, + compact, + cli.verbose, + )?; + } + Commands::Init { global, show, @@ -1388,3 +1442,60 @@ fn main() -> Result<()> { Ok(()) } + +/// Normalize rgai positional args: detect trailing path token in query words. +fn normalize_rgai_args(mut query_parts: Vec, mut path: String) -> (String, String) { + if path == "." && query_parts.len() > 1 { + if let Some(last) = query_parts.last().cloned() { + if looks_like_path_token(&last) { + path = last; + query_parts.pop(); + } + } + } + let query = query_parts.join(" "); + (query, path) +} + +fn looks_like_path_token(token: &str) -> bool { + token == "." + || token == ".." + || token.starts_with("./") + || token.starts_with('/') + || token.contains('/') +} + +#[cfg(test)] +mod rgai_arg_tests { + use super::*; + + #[test] + fn normalize_rgai_keeps_multiword_query() { + let (query, path) = normalize_rgai_args( + vec!["token".to_string(), "refresh".to_string()], + ".".to_string(), + ); + assert_eq!(query, "token refresh"); + assert_eq!(path, "."); + } + + #[test] + fn normalize_rgai_supports_old_positional_path() { + let (query, path) = normalize_rgai_args( + vec!["auth".to_string(), "flow".to_string(), "./src".to_string()], + ".".to_string(), + ); + assert_eq!(query, "auth flow"); + assert_eq!(path, "./src"); + } + + #[test] + fn normalize_rgai_does_not_treat_plain_word_as_path() { + let (query, path) = normalize_rgai_args( + vec!["domain".to_string(), "model".to_string()], + ".".to_string(), + ); + assert_eq!(query, "domain model"); + assert_eq!(path, "."); + } +} diff --git a/src/rgai_cmd.rs b/src/rgai_cmd.rs new file mode 100644 index 0000000..d51d723 --- /dev/null +++ b/src/rgai_cmd.rs @@ -0,0 +1,789 @@ +use crate::tracking; +use anyhow::{bail, Result}; +use ignore::WalkBuilder; +use lazy_static::lazy_static; +use regex::Regex; +use serde_json::json; +use std::collections::HashSet; +use std::fs; +use std::path::{Path, PathBuf}; + +const MAX_SNIPPETS_PER_FILE: usize = 2; +const MAX_SNIPPET_LINE_LEN: usize = 140; +const MIN_FILE_SCORE: f64 = 2.4; + +const STOP_WORDS: &[&str] = &[ + "a", "an", "and", "are", "as", "at", "be", "by", "code", "file", "find", "for", "from", "how", + "in", "is", "it", "of", "on", "or", "search", "show", "that", "the", "this", "to", "use", + "using", "what", "when", "where", "with", "why", +]; + +lazy_static! { + static ref SYMBOL_DEF_RE: Regex = Regex::new( + r"^\s*(?:pub\s+)?(?:async\s+)?(?:fn|def|class|struct|enum|trait|interface|impl|type)\s+[A-Za-z_][A-Za-z0-9_]*" + ) + .expect("valid symbol regex"); +} + +#[derive(Debug, Clone)] +struct QueryModel { + phrase: String, + terms: Vec, +} + +#[derive(Debug, Clone)] +struct LineCandidate { + line_idx: usize, + score: f64, + matched_terms: Vec, +} + +#[derive(Debug, Clone)] +struct Snippet { + lines: Vec<(usize, String)>, + matched_terms: Vec, +} + +#[derive(Debug, Clone)] +struct SearchHit { + path: String, + score: f64, + matched_lines: usize, + snippets: Vec, +} + +#[derive(Debug, Default)] +struct SearchOutcome { + scanned_files: usize, + skipped_large: usize, + skipped_binary: usize, + hits: Vec, + raw_output: String, +} + +pub fn run( + query: &str, + path: &str, + max_results: usize, + context_lines: usize, + file_type: Option<&str>, + max_file_kb: usize, + json_output: bool, + compact: bool, + verbose: u8, +) -> Result<()> { + let timer = tracking::TimedExecution::start(); + + let query = query.trim(); + if query.is_empty() { + bail!("query cannot be empty"); + } + + let root = Path::new(path); + if !root.exists() { + bail!("path does not exist: {}", path); + } + + let query_model = build_query_model(query); + if verbose > 0 { + eprintln!( + "rgai: '{}' in {} (terms: {})", + query, + path, + query_model.terms.join(", ") + ); + } + + let max_file_bytes = max_file_kb.saturating_mul(1024).max(1024); + let effective_context = if compact { 0 } else { context_lines }; + let snippets_per_file = if compact { 1 } else { MAX_SNIPPETS_PER_FILE }; + let outcome = search_project( + &query_model, + root, + effective_context, + snippets_per_file, + file_type, + max_file_bytes, + verbose, + )?; + + let mut rendered = String::new(); + if outcome.hits.is_empty() { + if json_output { + rendered = serde_json::to_string_pretty(&json!({ + "query": query, + "path": path, + "total_hits": 0, + "scanned_files": outcome.scanned_files, + "skipped_large": outcome.skipped_large, + "skipped_binary": outcome.skipped_binary, + "hits": [] + }))?; + rendered.push('\n'); + } else { + rendered.push_str(&format!("🧠 0 for '{}'\n", query)); + } + print!("{}", rendered); + timer.track( + &format!("grepai search '{}' {}", query, path), + "rtk rgai", + &outcome.raw_output, + &rendered, + ); + return Ok(()); + } + + if json_output { + let hits_json: Vec<_> = outcome + .hits + .iter() + .take(max_results) + .map(|hit| { + let snippets: Vec<_> = hit + .snippets + .iter() + .map(|snippet| { + let lines: Vec<_> = snippet + .lines + .iter() + .map(|(line_no, text)| json!({ "line": line_no, "text": text })) + .collect(); + json!({ + "lines": lines, + "matched_terms": snippet.matched_terms, + }) + }) + .collect(); + json!({ + "path": hit.path, + "score": hit.score, + "matched_lines": hit.matched_lines, + "snippets": snippets, + }) + }) + .collect(); + + rendered = serde_json::to_string_pretty(&json!({ + "query": query, + "path": path, + "total_hits": outcome.hits.len(), + "shown_hits": max_results.min(outcome.hits.len()), + "scanned_files": outcome.scanned_files, + "skipped_large": outcome.skipped_large, + "skipped_binary": outcome.skipped_binary, + "hits": hits_json + }))?; + rendered.push('\n'); + print!("{}", rendered); + timer.track( + &format!("grepai search '{}' {}", query, path), + "rtk rgai", + &outcome.raw_output, + &rendered, + ); + return Ok(()); + } + + rendered.push_str(&format!( + "🧠 {}F for '{}' (scan {}F)\n", + outcome.hits.len(), + query, + outcome.scanned_files + )); + rendered.push('\n'); + + for hit in outcome.hits.iter().take(max_results) { + rendered.push_str(&format!( + "πŸ“„ {} [{:.1}]\n", + compact_path(&hit.path), + hit.score + )); + + for snippet in &hit.snippets { + for (line_no, line) in &snippet.lines { + rendered.push_str(&format!(" {:>4}: {}\n", line_no, line)); + } + + if !compact && !snippet.matched_terms.is_empty() { + rendered.push_str(&format!(" ~ {}\n", snippet.matched_terms.join(", "))); + } + rendered.push('\n'); + } + + let shown_lines = hit.snippets.len(); + if hit.matched_lines > shown_lines { + rendered.push_str(&format!( + " +{} more lines\n\n", + hit.matched_lines - shown_lines + )); + } + } + + if outcome.hits.len() > max_results { + rendered.push_str(&format!("... +{}F\n", outcome.hits.len() - max_results)); + } + + if verbose > 0 { + rendered.push_str(&format!( + "\nscan stats: skipped {} large, {} binary\n", + outcome.skipped_large, outcome.skipped_binary + )); + } + + print!("{}", rendered); + timer.track( + &format!("grepai search '{}' {}", query, path), + "rtk rgai", + &outcome.raw_output, + &rendered, + ); + + Ok(()) +} + +fn search_project( + query: &QueryModel, + root: &Path, + context_lines: usize, + snippets_per_file: usize, + file_type: Option<&str>, + max_file_bytes: usize, + _verbose: u8, +) -> Result { + let mut outcome = SearchOutcome::default(); + + let walker = WalkBuilder::new(root) + .hidden(true) + .git_ignore(true) + .git_global(true) + .git_exclude(true) + .build(); + + for entry in walker { + let entry = match entry { + Ok(e) => e, + Err(_) => continue, + }; + + if !entry + .file_type() + .as_ref() + .map(|ft| ft.is_file()) + .unwrap_or(false) + { + continue; + } + + let full_path = entry.path(); + if !is_supported_text_file(full_path) { + continue; + } + + if let Some(ft) = file_type { + if !matches_file_type(full_path, ft) { + continue; + } + } + + let metadata = match fs::metadata(full_path) { + Ok(m) => m, + Err(_) => continue, + }; + outcome.scanned_files += 1; + + if metadata.len() > max_file_bytes as u64 { + outcome.skipped_large += 1; + continue; + } + + let bytes = match fs::read(full_path) { + Ok(b) => b, + Err(_) => continue, + }; + + if looks_binary(&bytes) { + outcome.skipped_binary += 1; + continue; + } + + let content = String::from_utf8_lossy(&bytes).to_string(); + let display_path = compact_display_path(full_path, root); + if let Some(hit) = analyze_file( + &display_path, + &content, + query, + context_lines, + snippets_per_file, + ) { + outcome.hits.push(hit); + } + } + + outcome.hits.sort_by(|a, b| { + b.score + .total_cmp(&a.score) + .then_with(|| a.path.to_lowercase().cmp(&b.path.to_lowercase())) + }); + + outcome.raw_output = build_raw_output(&outcome.hits); + Ok(outcome) +} + +fn analyze_file( + path: &str, + content: &str, + query: &QueryModel, + context_lines: usize, + snippets_per_file: usize, +) -> Option { + let mut candidates = Vec::new(); + for (idx, line) in content.lines().enumerate() { + if let Some(candidate) = score_line(idx, line, query) { + candidates.push(candidate); + } + } + + let path_score = score_path(path, query); + if candidates.is_empty() && path_score < MIN_FILE_SCORE { + return None; + } + + candidates.sort_by(|a, b| { + b.score + .total_cmp(&a.score) + .then_with(|| a.line_idx.cmp(&b.line_idx)) + }); + + let mut selected = Vec::new(); + let overlap_window = (context_lines * 2 + 1) as isize; + for cand in candidates.iter().cloned() { + let overlaps = selected.iter().any(|existing: &LineCandidate| { + let delta = existing.line_idx as isize - cand.line_idx as isize; + delta.abs() <= overlap_window + }); + if overlaps { + continue; + } + selected.push(cand); + if selected.len() >= snippets_per_file { + break; + } + } + + if selected.is_empty() { + return None; + } + + let lines: Vec<&str> = content.lines().collect(); + let mut snippets = Vec::new(); + for cand in &selected { + snippets.push(build_snippet(&lines, cand, context_lines)); + } + + let mut file_score = path_score + (candidates.len() as f64).ln_1p(); + for (idx, cand) in selected.iter().enumerate() { + let weight = match idx { + 0 => 1.0, + 1 => 0.45, + _ => 0.25, + }; + file_score += cand.score * weight; + } + + if file_score < MIN_FILE_SCORE { + return None; + } + + Some(SearchHit { + path: path.to_string(), + score: file_score, + matched_lines: candidates.len(), + snippets, + }) +} + +fn build_snippet(lines: &[&str], candidate: &LineCandidate, context_lines: usize) -> Snippet { + if lines.is_empty() { + return Snippet { + lines: vec![(candidate.line_idx + 1, String::new())], + matched_terms: candidate.matched_terms.clone(), + }; + } + + let start = candidate.line_idx.saturating_sub(context_lines); + let end = (candidate.line_idx + context_lines + 1).min(lines.len()); + let mut rendered_lines = Vec::new(); + + for (idx, line) in lines.iter().enumerate().take(end).skip(start) { + let cleaned = line.trim(); + if cleaned.is_empty() { + continue; + } + rendered_lines.push((idx + 1, truncate_chars(cleaned, MAX_SNIPPET_LINE_LEN))); + } + + if rendered_lines.is_empty() { + rendered_lines.push((candidate.line_idx + 1, String::new())); + } + + Snippet { + lines: rendered_lines, + matched_terms: candidate.matched_terms.clone(), + } +} + +fn build_raw_output(hits: &[SearchHit]) -> String { + let mut raw = String::new(); + for hit in hits.iter().take(60) { + for snippet in &hit.snippets { + for (line_no, line) in &snippet.lines { + raw.push_str(&format!("{}:{}:{}\n", hit.path, line_no, line)); + } + } + } + raw +} + +fn score_line(line_idx: usize, line: &str, query: &QueryModel) -> Option { + let trimmed = line.trim(); + if trimmed.is_empty() { + return None; + } + + let lower = trimmed.to_lowercase(); + let mut score = 0.0; + let mut matched_terms = Vec::new(); + + if query.phrase.len() >= 3 && lower.contains(&query.phrase) { + score += 6.0; + } + + for term in &query.terms { + if lower.contains(term) { + score += if term.len() >= 5 { 1.7 } else { 1.4 }; + matched_terms.push(term.clone()); + } + } + + let unique_matches = dedup_terms(matched_terms); + if unique_matches.is_empty() { + return None; + } + + if unique_matches.len() > 1 { + score += 1.2; + } + + if is_symbol_definition(trimmed) { + score += 2.5; + } + + if is_comment_line(trimmed) { + score *= 0.7; + } + + if trimmed.chars().count() > 220 { + score *= 0.9; + } + + if score < 1.2 { + return None; + } + + Some(LineCandidate { + line_idx, + score, + matched_terms: unique_matches, + }) +} + +fn score_path(path: &str, query: &QueryModel) -> f64 { + let lower = path.to_lowercase(); + let mut score = 0.0; + + if query.phrase.len() >= 3 && lower.contains(&query.phrase) { + score += 3.5; + } + + for term in &query.terms { + if lower.contains(term) { + score += 1.2; + } + } + + score +} + +fn build_query_model(query: &str) -> QueryModel { + let phrase = query.trim().to_lowercase(); + let mut terms = Vec::new(); + let mut seen = HashSet::new(); + + for token in split_terms(&phrase) { + if token.len() < 2 || STOP_WORDS.contains(&token.as_str()) { + continue; + } + push_unique(&mut terms, &mut seen, &token); + + let stemmed = stem_token(&token); + if stemmed != token && stemmed.len() >= 2 { + push_unique(&mut terms, &mut seen, &stemmed); + } + } + + if terms.is_empty() && !phrase.is_empty() { + terms.push(phrase.clone()); + } + + QueryModel { phrase, terms } +} + +fn split_terms(input: &str) -> Vec { + let mut tokens = Vec::new(); + let mut current = String::new(); + + for ch in input.chars() { + if ch.is_alphanumeric() || ch == '_' { + current.extend(ch.to_lowercase()); + } else if !current.is_empty() { + tokens.push(std::mem::take(&mut current)); + } + } + + if !current.is_empty() { + tokens.push(current); + } + + tokens +} + +fn stem_token(token: &str) -> String { + if !token.is_ascii() { + return token.to_string(); + } + + let suffixes = ["ingly", "edly", "ing", "ed", "es", "s"]; + for suffix in suffixes { + if token.len() > suffix.len() + 2 && token.ends_with(suffix) { + return token[..token.len() - suffix.len()].to_string(); + } + } + token.to_string() +} + +fn push_unique(out: &mut Vec, seen: &mut HashSet, item: &str) { + if seen.insert(item.to_string()) { + out.push(item.to_string()); + } +} + +fn dedup_terms(input: Vec) -> Vec { + let mut out = Vec::new(); + let mut seen = HashSet::new(); + for item in input { + if seen.insert(item.clone()) { + out.push(item); + } + } + out +} + +fn is_symbol_definition(line: &str) -> bool { + SYMBOL_DEF_RE.is_match(line) +} + +fn is_comment_line(line: &str) -> bool { + let trimmed = line.trim_start(); + trimmed.starts_with("//") + || trimmed.starts_with('#') + || trimmed.starts_with('*') + || trimmed.starts_with("/*") + || trimmed.starts_with("--") +} + +fn looks_binary(bytes: &[u8]) -> bool { + bytes.iter().take(4096).any(|b| *b == 0) +} + +fn is_supported_text_file(path: &Path) -> bool { + let ext = path + .extension() + .and_then(|e| e.to_str()) + .map(|e| e.to_ascii_lowercase()) + .unwrap_or_default(); + + !matches!( + ext.as_str(), + "png" + | "jpg" + | "jpeg" + | "gif" + | "webp" + | "ico" + | "pdf" + | "zip" + | "gz" + | "tar" + | "7z" + | "mp3" + | "mp4" + | "mov" + | "db" + | "sqlite" + | "woff" + | "woff2" + | "ttf" + | "otf" + | "lock" + | "jar" + | "class" + | "wasm" + ) +} + +fn matches_file_type(path: &Path, file_type: &str) -> bool { + let wanted = file_type.trim_start_matches('.').to_ascii_lowercase(); + if wanted.is_empty() { + return true; + } + + let ext = path + .extension() + .and_then(|e| e.to_str()) + .map(|e| e.to_ascii_lowercase()) + .unwrap_or_default(); + + match wanted.as_str() { + "rust" | "rs" => ext == "rs", + "python" | "py" => ext == "py", + "javascript" | "js" => matches!(ext.as_str(), "js" | "jsx" | "mjs" | "cjs"), + "typescript" | "ts" => matches!(ext.as_str(), "ts" | "tsx"), + "go" => ext == "go", + "java" => ext == "java", + "c" => matches!(ext.as_str(), "c" | "h"), + "cpp" | "c++" => matches!(ext.as_str(), "cc" | "cpp" | "cxx" | "hpp" | "hh" | "hxx"), + "markdown" | "md" => matches!(ext.as_str(), "md" | "mdx"), + "json" => ext == "json", + other => ext == other, + } +} + +fn compact_display_path(path: &Path, root: &Path) -> String { + let rel = match path.strip_prefix(root) { + Ok(r) => r.to_path_buf(), + Err(_) => { + if let Ok(cwd) = std::env::current_dir() { + match path.strip_prefix(cwd) { + Ok(r) => r.to_path_buf(), + Err(_) => PathBuf::from(path), + } + } else { + PathBuf::from(path) + } + } + }; + rel.to_string_lossy().trim_start_matches("./").to_string() +} + +fn compact_path(path: &str) -> String { + if path.len() <= 58 { + return path.to_string(); + } + + let parts: Vec<&str> = path.split('/').collect(); + if parts.len() <= 3 { + return path.to_string(); + } + + format!( + "{}/.../{}/{}", + parts[0], + parts[parts.len() - 2], + parts[parts.len() - 1] + ) +} + +fn truncate_chars(input: &str, max_len: usize) -> String { + if input.chars().count() <= max_len { + return input.to_string(); + } + if max_len <= 3 { + return "...".to_string(); + } + let clipped: String = input.chars().take(max_len - 3).collect(); + format!("{clipped}...") +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn build_query_model_removes_stop_words() { + let model = build_query_model("how to find auth token refresh"); + assert!(model.terms.contains(&"auth".to_string())); + assert!(model.terms.contains(&"token".to_string())); + assert!(model.terms.contains(&"refresh".to_string())); + assert!(!model.terms.contains(&"how".to_string())); + assert!(!model.terms.contains(&"find".to_string())); + } + + #[test] + fn score_line_prefers_symbol_definitions() { + let query = build_query_model("refresh token"); + let line = "pub fn refresh_token(session: &Session) -> Result {"; + let cand = score_line(10, line, &query).expect("line should match"); + assert!(cand.score > 3.0); + assert!(cand.matched_terms.contains(&"refresh".to_string())); + assert!(cand.matched_terms.contains(&"token".to_string())); + } + + #[test] + fn search_project_finds_most_relevant_file() { + let dir = tempdir().unwrap(); + let root = dir.path(); + fs::create_dir_all(root.join("src")).unwrap(); + fs::write( + root.join("src/auth.rs"), + r#" +pub struct Session {} + +pub fn refresh_token(session: &Session) -> String { + format!("new-token-{}", 1) +} +"#, + ) + .unwrap(); + fs::write( + root.join("src/logger.rs"), + r#" +pub fn log_info(msg: &str) { + println!("{}", msg); +} +"#, + ) + .unwrap(); + + let query = build_query_model("refresh token session"); + let outcome = search_project(&query, root, 0, 2, None, 256 * 1024, 0).unwrap(); + + assert!(!outcome.hits.is_empty()); + assert_eq!(outcome.hits[0].path, "src/auth.rs"); + } + + #[test] + fn matches_file_type_aliases() { + let p = Path::new("src/app.tsx"); + assert!(matches_file_type(p, "ts")); + assert!(matches_file_type(p, "typescript")); + assert!(!matches_file_type(p, "rust")); + } + + #[test] + fn truncate_chars_handles_unicode() { + let s = "ΠŸΡ€ΠΈΠ²Π΅Ρ‚ это длинная строка для тСста"; + let truncated = truncate_chars(s, 10); + assert!(truncated.chars().count() <= 10); + } +} diff --git a/src/utils.rs b/src/utils.rs index dbf9c91..6ea0698 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -395,5 +395,4 @@ mod tests { let result = truncate(cjk, 6); assert!(result.ends_with("...")); } - } From f82f3b7a0099851e880ab6d0345b08d01a4d0d1b Mon Sep 17 00:00:00 2001 From: heAdz0r Date: Sat, 14 Feb 2026 22:56:34 +0300 Subject: [PATCH 2/3] refactor(init): add upsert_rtk_block for idempotent CLAUDE.md management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace naive append-or-skip logic in run_claude_md_mode with upsert_rtk_block() that handles all 4 cases: - Added: no existing block β†’ append - Updated: stale block β†’ replace in-place - Unchanged: current block β†’ no-op - Malformed: opening marker without closing β†’ warn safely Includes 4 unit tests covering each case. --- src/init.rs | 159 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 152 insertions(+), 7 deletions(-) diff --git a/src/init.rs b/src/init.rs index 482f9f8..961e4ac 100644 --- a/src/init.rs +++ b/src/init.rs @@ -767,15 +767,48 @@ fn run_claude_md_mode(global: bool, verbose: u8) -> Result<()> { if path.exists() { let existing = fs::read_to_string(&path)?; + // upsert_rtk_block handles all 4 cases: add, update, unchanged, malformed + let (new_content, action) = upsert_rtk_block(&existing, RTK_INSTRUCTIONS); - if existing.contains(""; + + if let Some(start) = content.find(start_marker) { + if let Some(relative_end) = content[start..].find(end_marker) { + let end = start + relative_end; + let end_pos = end + end_marker.len(); + let current_block = content[start..end_pos].trim(); + let desired_block = block.trim(); + + if current_block == desired_block { + return (content.to_string(), RtkBlockUpsert::Unchanged); + } + + // Replace stale block with desired block + let before = content[..start].trim_end(); + let after = content[end_pos..].trim_start(); + + let result = match (before.is_empty(), after.is_empty()) { + (true, true) => desired_block.to_string(), + (true, false) => format!("{desired_block}\n\n{after}"), + (false, true) => format!("{before}\n\n{desired_block}"), + (false, false) => format!("{before}\n\n{desired_block}\n\n{after}"), + }; + + return (result, RtkBlockUpsert::Updated); + } + + // Opening marker without closing marker β€” malformed + return (content.to_string(), RtkBlockUpsert::Malformed); + } + + // No existing block β€” append + let trimmed = content.trim(); + if trimmed.is_empty() { + (block.to_string(), RtkBlockUpsert::Added) + } else { + ( + format!("{trimmed}\n\n{}", block.trim()), + RtkBlockUpsert::Added, + ) + } +} + /// Patch CLAUDE.md: add @RTK.md, migrate if old block exists fn patch_claude_md(path: &Path, verbose: u8) -> Result { let mut content = if path.exists() { @@ -1103,6 +1199,55 @@ More content"#; assert!(RTK_INSTRUCTIONS.len() > 4000); } + // --- upsert_rtk_block tests --- + + #[test] + fn test_upsert_rtk_block_appends_when_missing() { + let input = "# Team instructions"; + let (content, action) = upsert_rtk_block(input, RTK_INSTRUCTIONS); + assert_eq!(action, RtkBlockUpsert::Added); + assert!(content.contains("# Team instructions")); + assert!(content.contains(" +OLD RTK CONTENT + + +More notes +"#; + + let (content, action) = upsert_rtk_block(input, RTK_INSTRUCTIONS); + assert_eq!(action, RtkBlockUpsert::Updated); + assert!(!content.contains("OLD RTK CONTENT")); + assert!(content.contains("rtk cargo test")); // from current RTK_INSTRUCTIONS + assert!(content.contains("# Team instructions")); + assert!(content.contains("More notes")); + } + + #[test] + fn test_upsert_rtk_block_noop_when_already_current() { + let input = format!( + "# Team instructions\n\n{}\n\nMore notes\n", + RTK_INSTRUCTIONS + ); + let (content, action) = upsert_rtk_block(&input, RTK_INSTRUCTIONS); + assert_eq!(action, RtkBlockUpsert::Unchanged); + assert_eq!(content, input); + } + + #[test] + fn test_upsert_rtk_block_detects_malformed_block() { + let input = "\npartial"; + let (content, action) = upsert_rtk_block(input, RTK_INSTRUCTIONS); + assert_eq!(action, RtkBlockUpsert::Malformed); + assert_eq!(content, input); + } + #[test] fn test_init_is_idempotent() { let temp = TempDir::new().unwrap(); From 3f13d96e43a984e91364eb0fe22ad864c6abbb15 Mon Sep 17 00:00:00 2001 From: heAdz0r Date: Sat, 14 Feb 2026 23:15:22 +0300 Subject: [PATCH 3/3] feat(docs,hooks): enforce rgai-first search policy across docs and hooks Search priority (mandatory): rgai > rg > grep. Hook changes: - Add rewrite rules: grepai/rgai search -> rtk rgai (Tier 1) - Split rg and grep into separate rules (Tier 2/3) - Source-of-truth comment for hook sync - Test infrastructure: HOOK env override, script-relative path Doc updates (README, INSTALL, TROUBLESHOOTING, awareness template): - Add search priority section - Update command tables with rtk rgai examples - Add search ladder (rgai -> grep -> proxy) - Remove unverifiable benchmark table Template updates (init.rs): - RTK_INSTRUCTIONS: add rtk rgai to Files & Search section - show_config: display search priority hint - Tests: assert rtk rgai in top-level commands list Test fixes: - Fix pre-existing find/tree/wget test expectations (hook already rewrites them on master, tests incorrectly expected no rewrite) - Add 7 new hook tests for rgai/grepai rewrite rules --- .claude/hooks/rtk-rewrite.sh | 15 ++++++++++-- INSTALL.md | 13 +++++++++- README.md | 21 +++++++++++++++- docs/TROUBLESHOOTING.md | 9 +++++++ hooks/rtk-awareness.md | 9 +++++++ hooks/rtk-rewrite.sh | 15 ++++++++++-- hooks/test-rtk-rewrite.sh | 46 +++++++++++++++++++++++++++++------- src/init.rs | 13 ++++++---- 8 files changed, 123 insertions(+), 18 deletions(-) diff --git a/.claude/hooks/rtk-rewrite.sh b/.claude/hooks/rtk-rewrite.sh index 6574b01..a62f23b 100755 --- a/.claude/hooks/rtk-rewrite.sh +++ b/.claude/hooks/rtk-rewrite.sh @@ -2,6 +2,7 @@ # RTK auto-rewrite hook for Claude Code PreToolUse:Bash # Transparently rewrites raw commands to their rtk equivalents. # Outputs JSON with updatedInput to modify the command before execution. +# Source of truth: hooks/rtk-rewrite.sh (keep .claude/hooks copy in sync) # Guards: skip silently if dependencies missing if ! command -v rtk &>/dev/null || ! command -v jq &>/dev/null; then @@ -88,10 +89,20 @@ elif echo "$MATCH_CMD" | grep -qE '^cargo[[:space:]]+fmt([[:space:]]|$)'; then REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^cargo fmt/rtk cargo fmt/')" # --- File operations --- +# Search priority (mandatory): rgai > rg > grep +# Tier 1: semantic intent search (grepai/rgai) -> rtk rgai +# Tier 2: exact search via ripgrep -> rtk grep (rtk grep runs rg -> grep fallback internally) +# Tier 3: exact search via grep -> rtk grep +elif echo "$MATCH_CMD" | grep -qE '^(grepai|rgai)[[:space:]]+search([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed -E 's/^(grepai|rgai)[[:space:]]+search[[:space:]]+/rtk rgai /')" +elif echo "$MATCH_CMD" | grep -qE '^rgai[[:space:]]+'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed -E 's/^rgai[[:space:]]+/rtk rgai /')" elif echo "$MATCH_CMD" | grep -qE '^cat[[:space:]]+'; then REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^cat /rtk read /')" -elif echo "$MATCH_CMD" | grep -qE '^(rg|grep)[[:space:]]+'; then - REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed -E 's/^(rg|grep) /rtk grep /')" +elif echo "$MATCH_CMD" | grep -qE '^rg[[:space:]]+'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed -E 's/^rg /rtk grep /')" +elif echo "$MATCH_CMD" | grep -qE '^grep[[:space:]]+'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed -E 's/^grep /rtk grep /')" elif echo "$MATCH_CMD" | grep -qE '^ls([[:space:]]|$)'; then REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^ls/rtk ls/')" elif echo "$MATCH_CMD" | grep -qE '^tree([[:space:]]|$)'; then diff --git a/INSTALL.md b/INSTALL.md index 55b32fd..6cbf2d7 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -69,6 +69,15 @@ rtk gain # MUST show token savings, not "command not found" ## Project Initialization +### Search Priority + +**Search priority: rgai > rg > grep.** + +- Use `rtk rgai` first for semantic/intention-based discovery. +- Use `rtk grep` for exact/regex matching. +- `rtk grep` internally follows `rg -> grep` backend fallback. +- For fully raw output, use `rtk proxy `. + ### Recommended: Global Hook-First Setup **Best for: All projects, automatic RTK usage** @@ -229,7 +238,8 @@ cp ~/.claude/settings.json.bak ~/.claude/settings.json ```bash rtk ls . # Compact tree view rtk read file.rs # Optimized reading -rtk grep "pattern" . # Grouped search results +rtk rgai "query" # Semantic code search +rtk grep "pattern" . # Exact/regex search (internal rg -> grep fallback) ``` ### Git @@ -332,6 +342,7 @@ Before each session: - [ ] Verify RTK is installed: `rtk --version` - [ ] If not installed β†’ follow "Install from fork" - [ ] If project not initialized β†’ `rtk init` +- [ ] Search: use `rtk rgai` first, then `rtk grep` for exact/regex - [ ] Use `rtk` for ALL git/pnpm/test/vitest commands - [ ] Check savings: `rtk gain` diff --git a/README.md b/README.md index 910e6b5..3826ffb 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,15 @@ With rtk: **~45,000 tokens** β†’ **70% reduction** > Estimates based on medium-sized TypeScript/Rust projects. Actual savings vary by project size. +## Search Priority + +**Search priority: rgai > rg > grep.** + +- Use `rtk rgai` first for semantic/intention-based discovery. +- Use `rtk grep` for exact/regex matching. +- `rtk grep` internally follows `rg -> grep` backend fallback. +- For fully raw output, use `rtk proxy `. + ## Installation ### ⚠️ Pre-Installation Check (REQUIRED) @@ -130,7 +139,8 @@ rtk read file.rs # Smart file reading rtk read file.rs -l aggressive # Signatures only (strips bodies) rtk smart file.rs # 2-line heuristic code summary rtk find "*.rs" . # Compact find results -rtk grep "pattern" . # Grouped search results +rtk rgai "auth token refresh" # Semantic code search +rtk grep "pattern" . # Exact/regex search (internal rg -> grep fallback) ``` ### Git @@ -400,6 +410,11 @@ The most effective way to use rtk is with the **auto-rewrite hook** for Claude C **Result**: 100% rtk adoption across all conversations and subagents, zero token overhead in Claude's context. +**Search ladder**: +- `rtk rgai` for semantic discovery +- `rtk grep` for exact/regex follow-up (`rg -> grep` fallback) +- `rtk proxy ...` when you need fully raw behavior + ### What Are Hooks? **For Beginners**: @@ -491,12 +506,16 @@ The hook is included in this repository at `.claude/hooks/rtk-rewrite.sh`. To us ### Commands Rewritten +Search rewrite order is strict and deterministic: `rgai > rg > grep`. + | Raw Command | Rewritten To | |-------------|-------------| | `git status/diff/log/add/commit/push/pull/branch/fetch/stash` | `rtk git ...` | | `gh pr/issue/run` | `rtk gh ...` | | `cargo test/build/clippy` | `rtk cargo ...` | | `cat ` | `rtk read ` | +| `grepai/rgai search ` | `rtk rgai ` | +| `rgai ` | `rtk rgai ` | | `rg/grep ` | `rtk grep ` | | `ls` | `rtk ls` | | `vitest/pnpm test` | `rtk vitest run` | diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 64d4576..01d8b2f 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -165,6 +165,15 @@ Then add to `~/.claude/settings.json` (replace `~` with full path): **Note**: Use absolute path in `settings.json`, not `~/.claude/...` +### Search Priority + +**Search priority: rgai > rg > grep.** + +- Use `rtk rgai` first for semantic/intention-based discovery. +- Use `rtk grep` for exact/regex matching. +- `rtk grep` internally follows `rg -> grep` backend fallback. +- If you need fully raw output for debugging, use `rtk proxy `. + --- ## Problem: "command not found: rtk" after installation diff --git a/hooks/rtk-awareness.md b/hooks/rtk-awareness.md index 0eaf3d5..639758b 100644 --- a/hooks/rtk-awareness.md +++ b/hooks/rtk-awareness.md @@ -25,5 +25,14 @@ which rtk # Verify correct binary All other commands are automatically rewritten by the Claude Code hook. Example: `git status` β†’ `rtk git status` (transparent, 0 tokens overhead) +Example: `grepai search "auth token refresh"` β†’ `rtk rgai "auth token refresh"` + +## Semantic Search + +```bash +rtk rgai "auth token refresh" # Intent-aware code search +rtk rgai auth token refresh --compact # Unquoted multi-word query +rtk rgai "auth token refresh" --json # Machine-readable output +``` Refer to CLAUDE.md for full command reference. diff --git a/hooks/rtk-rewrite.sh b/hooks/rtk-rewrite.sh index 6574b01..a62f23b 100644 --- a/hooks/rtk-rewrite.sh +++ b/hooks/rtk-rewrite.sh @@ -2,6 +2,7 @@ # RTK auto-rewrite hook for Claude Code PreToolUse:Bash # Transparently rewrites raw commands to their rtk equivalents. # Outputs JSON with updatedInput to modify the command before execution. +# Source of truth: hooks/rtk-rewrite.sh (keep .claude/hooks copy in sync) # Guards: skip silently if dependencies missing if ! command -v rtk &>/dev/null || ! command -v jq &>/dev/null; then @@ -88,10 +89,20 @@ elif echo "$MATCH_CMD" | grep -qE '^cargo[[:space:]]+fmt([[:space:]]|$)'; then REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^cargo fmt/rtk cargo fmt/')" # --- File operations --- +# Search priority (mandatory): rgai > rg > grep +# Tier 1: semantic intent search (grepai/rgai) -> rtk rgai +# Tier 2: exact search via ripgrep -> rtk grep (rtk grep runs rg -> grep fallback internally) +# Tier 3: exact search via grep -> rtk grep +elif echo "$MATCH_CMD" | grep -qE '^(grepai|rgai)[[:space:]]+search([[:space:]]|$)'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed -E 's/^(grepai|rgai)[[:space:]]+search[[:space:]]+/rtk rgai /')" +elif echo "$MATCH_CMD" | grep -qE '^rgai[[:space:]]+'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed -E 's/^rgai[[:space:]]+/rtk rgai /')" elif echo "$MATCH_CMD" | grep -qE '^cat[[:space:]]+'; then REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^cat /rtk read /')" -elif echo "$MATCH_CMD" | grep -qE '^(rg|grep)[[:space:]]+'; then - REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed -E 's/^(rg|grep) /rtk grep /')" +elif echo "$MATCH_CMD" | grep -qE '^rg[[:space:]]+'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed -E 's/^rg /rtk grep /')" +elif echo "$MATCH_CMD" | grep -qE '^grep[[:space:]]+'; then + REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed -E 's/^grep /rtk grep /')" elif echo "$MATCH_CMD" | grep -qE '^ls([[:space:]]|$)'; then REWRITTEN="${ENV_PREFIX}$(echo "$CMD_BODY" | sed 's/^ls/rtk ls/')" elif echo "$MATCH_CMD" | grep -qE '^tree([[:space:]]|$)'; then diff --git a/hooks/test-rtk-rewrite.sh b/hooks/test-rtk-rewrite.sh index 2a68ff8..61b5b86 100755 --- a/hooks/test-rtk-rewrite.sh +++ b/hooks/test-rtk-rewrite.sh @@ -2,9 +2,11 @@ # Test suite for rtk-rewrite.sh # Feeds mock JSON through the hook and verifies the rewritten commands. # -# Usage: bash ~/.claude/hooks/test-rtk-rewrite.sh +# Usage: bash hooks/test-rtk-rewrite.sh +# Override hook path: HOOK=/path/to/rtk-rewrite.sh bash hooks/test-rtk-rewrite.sh -HOOK="$HOME/.claude/hooks/rtk-rewrite.sh" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +HOOK="${HOOK:-$SCRIPT_DIR/rtk-rewrite.sh}" PASS=0 FAIL=0 TOTAL=0 @@ -109,6 +111,22 @@ test_rewrite "rg pattern src/" \ "rg pattern src/" \ "rtk grep pattern src/" +test_rewrite "grepai search query" \ + "grepai search auth middleware" \ + "rtk rgai auth middleware" + +test_rewrite "grepai search with flags" \ + "grepai search \"error handler\" --json --compact" \ + "rtk rgai \"error handler\" --json --compact" + +test_rewrite "rgai search query (priority over rg/grep)" \ + "rgai search auth middleware --compact" \ + "rtk rgai auth middleware --compact" + +test_rewrite "plain rgai" \ + "rgai auth middleware --json" \ + "rtk rgai auth middleware --json" + test_rewrite "cargo test" \ "cargo test" \ "rtk cargo test" @@ -149,6 +167,18 @@ test_rewrite "env + docker compose" \ "COMPOSE_PROJECT_NAME=test docker compose up -d" \ "COMPOSE_PROJECT_NAME=test rtk docker compose up -d" +test_rewrite "env + grepai search" \ + "NODE_ENV=test grepai search token refresh --json" \ + "NODE_ENV=test rtk rgai token refresh --json" + +test_rewrite "env + rg exact search" \ + "RG_IGNORE_DOT=1 rg token src/" \ + "RG_IGNORE_DOT=1 rtk grep token src/" + +test_rewrite "env + grep exact search" \ + "LC_ALL=C grep -rn token src/" \ + "LC_ALL=C rtk grep -rn token src/" + echo "" # ---- SECTION 3: New patterns ---- @@ -193,17 +223,17 @@ test_rewrite "docker exec -it db psql" \ "docker exec -it db psql" \ "rtk docker exec -it db psql" -test_rewrite "find (NOT rewritten β€” different arg format)" \ +test_rewrite "find with native args" \ "find . -name '*.ts'" \ - "" + "rtk find . -name '*.ts'" -test_rewrite "tree (NOT rewritten β€” different arg format)" \ +test_rewrite "tree with path arg" \ "tree src/" \ - "" + "rtk tree src/" -test_rewrite "wget (NOT rewritten β€” different arg format)" \ +test_rewrite "wget URL" \ "wget https://example.com/file" \ - "" + "rtk wget https://example.com/file" test_rewrite "gh api repos/owner/repo" \ "gh api repos/owner/repo" \ diff --git a/src/init.rs b/src/init.rs index 961e4ac..653dad0 100644 --- a/src/init.rs +++ b/src/init.rs @@ -102,11 +102,12 @@ rtk npx # Compact npx command output rtk prisma # Prisma without ASCII art (88%) ``` -### Files & Search (60-75% savings) +### Files & Search (60-85% savings) ```bash rtk ls # Tree format, compact (65%) rtk read # Code reading with filtering (60%) -rtk grep # Search grouped by file (75%) +rtk rgai # Semantic search ranked by relevance (85%) +rtk grep # Exact/regex search (internal rg -> grep fallback) rtk find # Find grouped by directory (70%) ``` @@ -155,7 +156,7 @@ rtk init --global # Add RTK to ~/.claude/CLAUDE.md | Git | status, log, diff, add, commit | 59-80% | | GitHub | gh pr, gh run, gh issue | 26-87% | | Package Managers | pnpm, npm, npx | 70-90% | -| Files | ls, read, grep, find | 60-75% | +| Files | ls, read, grep, rgai, find | 60-85% | | Infrastructure | docker, kubectl | 85% | | Network | curl, wget | 65-70% | @@ -1081,7 +1082,9 @@ pub fn show_config() -> Result<()> { println!("βšͺ settings.json: not found"); } - println!("\nUsage:"); + println!("\nSearch priority: rgai > rg > grep."); + println!(" Use rtk rgai first; use rtk grep for exact/regex.\n"); + println!("Usage:"); println!(" rtk init # Full injection into local CLAUDE.md"); println!(" rtk init -g # Hook + RTK.md + @RTK.md + settings.json (recommended)"); println!(" rtk init -g --auto-patch # Same as above but no prompt"); @@ -1116,6 +1119,7 @@ mod tests { "rtk git", "rtk docker", "rtk kubectl", + "rtk rgai", ] { assert!( RTK_INSTRUCTIONS.contains(cmd), @@ -1195,6 +1199,7 @@ More content"#; // Just verify RTK_INSTRUCTIONS constant has the right content assert!(RTK_INSTRUCTIONS.contains("")); assert!(RTK_INSTRUCTIONS.len() > 4000); }