From 0ca42b71173e6fca3186ff3678d4b961a6c6a812 Mon Sep 17 00:00:00 2001
From: Tim Waugh <twaugh@redhat.com>
Date: Sat, 22 Nov 2025 21:05:47 +0000
Subject: [PATCH] fix(rag): use config top_k instead of hardcoded value

Phase 2 RAG search was using hardcoded top_k=10 instead of reading
from config.rag.top_k (which defaults to 20).

This meant RAG was only fetching 10 chunks when the user configured
20, reducing the diversity of candidate pages shown to the LLM for
integration decisions.

Changes:
- Pass config to Phase2Screen from LogsqueakApp
- Store config in Phase2Screen instance
- Read config.rag.top_k in RAG worker (fallback to 10 for tests)
- Add Any to typing imports

Result: RAG now respects user's top_k configuration, returning more
diverse candidate pages for better integration suggestions.

Assisted-by: Claude Code
---
 src/logsqueak/tui/app.py                     |  1 +
 src/logsqueak/tui/screens/content_editing.py | 11 ++++--
 tests/ui/test_phase2_rag_blocking.py         | 37 ++++++++++++++++++++
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/logsqueak/tui/app.py b/src/logsqueak/tui/app.py
index 25b2a22..e2a9875 100644
--- a/src/logsqueak/tui/app.py
+++ b/src/logsqueak/tui/app.py
@@ -505,6 +505,7 @@ def transition_to_phase2(self, selected_blocks: List[BlockState]) -> None:
             graph_paths=graph_paths,
             llm_client=self.llm_client,
             rag_search=self.rag_search,
+            config=self.config,
             auto_start_workers=True,
             name="phase2",
         )
diff --git a/src/logsqueak/tui/screens/content_editing.py b/src/logsqueak/tui/screens/content_editing.py
index 0f1d711..b4531f0 100644
--- a/src/logsqueak/tui/screens/content_editing.py
+++ b/src/logsqueak/tui/screens/content_editing.py
@@ -4,7 +4,7 @@
 Users can see LLM-suggested rewordings, manually edit content, and review RAG search results.
 """
 
-from typing import Optional, Dict
+from typing import Optional, Dict, Any
 import asyncio
 from textual.app import ComposeResult
 from textual.screen import Screen
@@ -122,6 +122,7 @@ def __init__(
         graph_paths: GraphPaths,
         llm_client: Optional[LLMClient] = None,
         rag_search: Optional[RAGSearch] = None,
+        config: Optional[Any] = None,
         auto_start_workers: bool = True,
         **kwargs
     ):
@@ -134,6 +135,7 @@ def __init__(
             graph_paths: GraphPaths instance for loading page contents
             llm_client: LLM client instance (None for testing)
             rag_search: RAG search service instance (None for testing)
+            config: Application config (for RAG top_k, etc.)
             auto_start_workers: Whether to auto-start background workers (default True)
         """
         super().__init__(**kwargs)
@@ -143,6 +145,7 @@ def __init__(
         self.graph_paths = graph_paths
         self.llm_client = llm_client
         self.rag_search = rag_search
+        self.config = config
         self.auto_start_workers = auto_start_workers
 
         # Map block_id to EditedContent for quick lookup
@@ -894,11 +897,15 @@ async def _rag_search_worker(self) -> None:
             }
 
             # Find candidate chunks (returns hierarchical chunks + frontmatter from ChromaDB)
+            top_k = 10  # Default fallback for tests
+            if self.config and hasattr(self.config, 'rag') and hasattr(self.config.rag, 'top_k'):
+                top_k = self.config.rag.top_k
+
             rag_results = await self.rag_search.find_candidates(
                 edited_content=self.edited_content,
                 original_contexts=original_contexts,
                 graph_paths=self.graph_paths,
-                top_k=10  # TODO: Get from config
+                top_k=top_k
             )
 
             # Unpack results: chunks per block and page frontmatter
diff --git a/tests/ui/test_phase2_rag_blocking.py b/tests/ui/test_phase2_rag_blocking.py
index 33fd067..77fab61 100644
--- a/tests/ui/test_phase2_rag_blocking.py
+++ b/tests/ui/test_phase2_rag_blocking.py
@@ -327,3 +327,40 @@ async def test_rag_search_error_shows_message(sample_blocks, sample_edited_conte
         # Error message should be displayed
         assert screen.rag_search_state == BackgroundTaskState.FAILED
         assert screen.rag_search_error is not None
+
+
+def test_rag_config_top_k_extraction():
+    """Test that top_k is correctly extracted from config."""
+    from logsqueak.models.config import Config, LogseqConfig, LLMConfig, RAGConfig
+    from pathlib import Path
+    import tempfile
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create config with custom top_k
+        config = Config(
+            logseq=LogseqConfig(graph_path=tmpdir),
+            llm=LLMConfig(endpoint="http://fake", model="fake", api_key="fake"),
+            rag=RAGConfig(top_k=25)
+        )
+
+        # Verify the config has the right value
+        assert config.rag.top_k == 25
+
+        # Simulate the extraction logic used in _rag_search_worker
+        top_k = 10  # Default fallback
+        if config and hasattr(config, 'rag') and hasattr(config.rag, 'top_k'):
+            top_k = config.rag.top_k
+
+        assert top_k == 25
+
+
+def test_rag_default_top_k_without_config():
+    """Test that top_k defaults to 10 when config not provided."""
+    config = None
+
+    # Simulate the extraction logic used in _rag_search_worker
+    top_k = 10  # Default fallback
+    if config and hasattr(config, 'rag') and hasattr(config.rag, 'top_k'):
+        top_k = config.rag.top_k
+
+    assert top_k == 10