From 0ca42b71173e6fca3186ff3678d4b961a6c6a812 Mon Sep 17 00:00:00 2001 From: Tim Waugh Date: Sat, 22 Nov 2025 21:05:47 +0000 Subject: [PATCH] fix(rag): use config top_k instead of hardcoded value Phase 2 RAG search was using hardcoded top_k=10 instead of reading from config.rag.top_k (which defaults to 20). This meant RAG was only fetching 10 chunks when the user configured 20, reducing the diversity of candidate pages shown to the LLM for integration decisions. Changes: - Pass config to Phase2Screen from LogsqueakApp - Store config in Phase2Screen instance - Read config.rag.top_k in RAG worker (fallback to 10 for tests) - Add Any to typing imports Result: RAG now respects user's top_k configuration, returning more diverse candidate pages for better integration suggestions. Assisted-by: Claude Code --- src/logsqueak/tui/app.py | 1 + src/logsqueak/tui/screens/content_editing.py | 11 ++++-- tests/ui/test_phase2_rag_blocking.py | 37 ++++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/logsqueak/tui/app.py b/src/logsqueak/tui/app.py index 25b2a22..e2a9875 100644 --- a/src/logsqueak/tui/app.py +++ b/src/logsqueak/tui/app.py @@ -505,6 +505,7 @@ def transition_to_phase2(self, selected_blocks: List[BlockState]) -> None: graph_paths=graph_paths, llm_client=self.llm_client, rag_search=self.rag_search, + config=self.config, auto_start_workers=True, name="phase2", ) diff --git a/src/logsqueak/tui/screens/content_editing.py b/src/logsqueak/tui/screens/content_editing.py index 0f1d711..b4531f0 100644 --- a/src/logsqueak/tui/screens/content_editing.py +++ b/src/logsqueak/tui/screens/content_editing.py @@ -4,7 +4,7 @@ Users can see LLM-suggested rewordings, manually edit content, and review RAG search results. """ -from typing import Optional, Dict +from typing import Optional, Dict, Any import asyncio from textual.app import ComposeResult from textual.screen import Screen @@ -122,6 +122,7 @@ def __init__( graph_paths: GraphPaths, llm_client: Optional[LLMClient] = None, rag_search: Optional[RAGSearch] = None, + config: Optional[Any] = None, auto_start_workers: bool = True, **kwargs ): @@ -134,6 +135,7 @@ def __init__( graph_paths: GraphPaths instance for loading page contents llm_client: LLM client instance (None for testing) rag_search: RAG search service instance (None for testing) + config: Application config (for RAG top_k, etc.) auto_start_workers: Whether to auto-start background workers (default True) """ super().__init__(**kwargs) @@ -143,6 +145,7 @@ def __init__( self.graph_paths = graph_paths self.llm_client = llm_client self.rag_search = rag_search + self.config = config self.auto_start_workers = auto_start_workers # Map block_id to EditedContent for quick lookup @@ -894,11 +897,15 @@ async def _rag_search_worker(self) -> None: } # Find candidate chunks (returns hierarchical chunks + frontmatter from ChromaDB) + top_k = 10 # Default fallback for tests + if self.config and hasattr(self.config, 'rag') and hasattr(self.config.rag, 'top_k'): + top_k = self.config.rag.top_k + rag_results = await self.rag_search.find_candidates( edited_content=self.edited_content, original_contexts=original_contexts, graph_paths=self.graph_paths, - top_k=10 # TODO: Get from config + top_k=top_k ) # Unpack results: chunks per block and page frontmatter diff --git a/tests/ui/test_phase2_rag_blocking.py b/tests/ui/test_phase2_rag_blocking.py index 33fd067..77fab61 100644 --- a/tests/ui/test_phase2_rag_blocking.py +++ b/tests/ui/test_phase2_rag_blocking.py @@ -327,3 +327,40 @@ async def test_rag_search_error_shows_message(sample_blocks, sample_edited_conte # Error message should be displayed assert screen.rag_search_state == BackgroundTaskState.FAILED assert screen.rag_search_error is not None + + +def test_rag_config_top_k_extraction(): + """Test that top_k is correctly extracted from config.""" + from logsqueak.models.config import Config, LogseqConfig, LLMConfig, RAGConfig + from pathlib import Path + import tempfile + + with tempfile.TemporaryDirectory() as tmpdir: + # Create config with custom top_k + config = Config( + logseq=LogseqConfig(graph_path=tmpdir), + llm=LLMConfig(endpoint="http://fake", model="fake", api_key="fake"), + rag=RAGConfig(top_k=25) + ) + + # Verify the config has the right value + assert config.rag.top_k == 25 + + # Simulate the extraction logic used in _rag_search_worker + top_k = 10 # Default fallback + if config and hasattr(config, 'rag') and hasattr(config.rag, 'top_k'): + top_k = config.rag.top_k + + assert top_k == 25 + + +def test_rag_default_top_k_without_config(): + """Test that top_k defaults to 10 when config not provided.""" + config = None + + # Simulate the extraction logic used in _rag_search_worker + top_k = 10 # Default fallback + if config and hasattr(config, 'rag') and hasattr(config.rag, 'top_k'): + top_k = config.rag.top_k + + assert top_k == 10