From 735bc483ff02c36a5153e49d9dd52f2168b5ca59 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 13 Jan 2026 18:36:28 +0000 Subject: [PATCH 1/4] Initial plan From cc014659c2b6cb0b333c1c9584834c36b581304d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 13 Jan 2026 18:48:29 +0000 Subject: [PATCH 2/4] Add search feature implementation with API, indexing, and tests Co-authored-by: lcharette <2566513+lcharette@users.noreply.github.com> --- app/src/Bakery/BakeCommandListener.php | 3 +- app/src/Bakery/SearchIndexCommand.php | 90 +++++++ app/src/Controller/SearchController.php | 76 ++++++ app/src/MyRoutes.php | 5 + app/src/Recipe.php | 19 +- app/src/Search/SearchIndex.php | 216 ++++++++++++++++ app/src/Search/SearchService.php | 235 ++++++++++++++++++ .../SearchServicesProvider.php | 31 +++ app/tests/Controller/SearchControllerTest.php | 189 ++++++++++++++ app/tests/Search/SearchIndexTest.php | 192 ++++++++++++++ app/tests/Search/SearchServiceTest.php | 182 ++++++++++++++ 11 files changed, 1236 insertions(+), 2 deletions(-) create mode 100644 app/src/Bakery/SearchIndexCommand.php create mode 100644 app/src/Controller/SearchController.php create mode 100644 app/src/Search/SearchIndex.php create mode 100644 app/src/Search/SearchService.php create mode 100644 app/src/ServicesProvider/SearchServicesProvider.php create mode 100644 app/tests/Controller/SearchControllerTest.php create mode 100644 app/tests/Search/SearchIndexTest.php create mode 100644 app/tests/Search/SearchServiceTest.php diff --git a/app/src/Bakery/BakeCommandListener.php b/app/src/Bakery/BakeCommandListener.php index 4e7c51c9..4c9090bc 100644 --- a/app/src/Bakery/BakeCommandListener.php +++ b/app/src/Bakery/BakeCommandListener.php @@ -24,7 +24,8 @@ public function __invoke(BakeCommandEvent $event): void $event->setCommands([ 'debug', 'assets:build', - 'clear-cache' + 'clear-cache', + 'search:index' ]); } } diff --git a/app/src/Bakery/SearchIndexCommand.php b/app/src/Bakery/SearchIndexCommand.php new file mode 100644 index 00000000..4939988c --- /dev/null +++ b/app/src/Bakery/SearchIndexCommand.php @@ -0,0 +1,90 @@ +setName('search:index') + ->setDescription('Build or rebuild the search index for documentation') + ->addOption( + 'version', + null, + InputOption::VALUE_OPTIONAL, + 'Documentation version to index (omit to index all versions)' + ) + ->addOption( + 'clear', + null, + InputOption::VALUE_NONE, + 'Clear the search index before rebuilding' + ); + } + + /** + * {@inheritdoc} + */ + protected function execute(InputInterface $input, OutputInterface $output): int + { + $this->io->title('Documentation Search Index'); + + /** @var string|null $version */ + $version = $input->getOption('version'); + $clear = $input->getOption('clear'); + + // Clear index if requested + if ($clear) { + $this->io->writeln('Clearing search index...'); + $this->searchIndex->clearIndex($version); + $this->io->success('Search index cleared.'); + } + + // Build index + $versionText = $version !== null ? "version {$version}" : 'all versions'; + $this->io->writeln("Building search index for {$versionText}..."); + + try { + $count = $this->searchIndex->buildIndex($version); + $this->io->success("Search index built successfully. Indexed {$count} pages."); + } catch (\Exception $e) { + $this->io->error("Failed to build search index: {$e->getMessage()}"); + + return Command::FAILURE; + } + + return Command::SUCCESS; + } +} diff --git a/app/src/Controller/SearchController.php b/app/src/Controller/SearchController.php new file mode 100644 index 00000000..a2c6559e --- /dev/null +++ b/app/src/Controller/SearchController.php @@ -0,0 +1,76 @@ +getQueryParams(); + + // Get query parameter + $query = $params['q'] ?? ''; + + if (empty($query)) { + $result = [ + 'rows' => [], + 'count' => 0, + 'count_filtered' => 0, + ]; + + $response->getBody()->write(json_encode($result, JSON_THROW_ON_ERROR)); + + return $response->withHeader('Content-Type', 'application/json'); + } + + // Get pagination parameters + $page = isset($params['page']) ? max(1, (int) $params['page']) : 1; + $size = isset($params['size']) ? min(100, max(1, (int) $params['size'])) : 10; + + // Get version parameter + $version = $params['version'] ?? null; + + // Perform search + $result = $this->searchService->search($query, $version, $page, $size); + + // Write JSON response + $response->getBody()->write(json_encode($result, JSON_THROW_ON_ERROR)); + + return $response->withHeader('Content-Type', 'application/json'); + } +} diff --git a/app/src/MyRoutes.php b/app/src/MyRoutes.php index ea707f3c..72da56cf 100644 --- a/app/src/MyRoutes.php +++ b/app/src/MyRoutes.php @@ -12,6 +12,7 @@ use Slim\App; use UserFrosting\Learn\Controller\DocumentationController; +use UserFrosting\Learn\Controller\SearchController; use UserFrosting\Learn\Middleware\TwigGlobals; use UserFrosting\Routes\RouteDefinitionInterface; @@ -19,6 +20,10 @@ class MyRoutes implements RouteDefinitionInterface { public function register(App $app): void { + // Route for search API + $app->get('/api/search', [SearchController::class, 'search']) + ->setName('api.search'); + // Route for versioned and non-versioned images $app->get('/{version:\d+\.\d+}/images/{path:.*}', [DocumentationController::class, 'imageVersioned']) ->add(TwigGlobals::class) diff --git a/app/src/Recipe.php b/app/src/Recipe.php index bf45804c..9a6ce66a 100644 --- a/app/src/Recipe.php +++ b/app/src/Recipe.php @@ -14,10 +14,13 @@ use UserFrosting\Learn\Bakery\BakeCommandListener; use UserFrosting\Learn\Bakery\DebugCommandListener; use UserFrosting\Learn\Bakery\DebugVerboseCommandListener; +use UserFrosting\Learn\Bakery\SearchIndexCommand; use UserFrosting\Learn\Bakery\SetupCommandListener; use UserFrosting\Learn\Listeners\ResourceLocatorInitiated; use UserFrosting\Learn\ServicesProvider\MarkdownService; +use UserFrosting\Learn\ServicesProvider\SearchServicesProvider; use UserFrosting\Learn\Twig\Extensions\FileTreeExtension; +use UserFrosting\Sprinkle\BakeryRecipe; use UserFrosting\Sprinkle\Core\Bakery\Event\BakeCommandEvent; use UserFrosting\Sprinkle\Core\Bakery\Event\DebugCommandEvent; use UserFrosting\Sprinkle\Core\Bakery\Event\DebugVerboseCommandEvent; @@ -35,7 +38,8 @@ class Recipe implements SprinkleRecipe, EventListenerRecipe, - TwigExtensionRecipe + TwigExtensionRecipe, + BakeryRecipe { /** * Return the Sprinkle name. @@ -104,6 +108,19 @@ public function getServices(): array { return [ MarkdownService::class, + SearchServicesProvider::class, + ]; + } + + /** + * Return an array of all registered Bakery Commands. + * + * {@inheritdoc} + */ + public function getBakeryCommands(): array + { + return [ + SearchIndexCommand::class, ]; } diff --git a/app/src/Search/SearchIndex.php b/app/src/Search/SearchIndex.php new file mode 100644 index 00000000..f80c3667 --- /dev/null +++ b/app/src/Search/SearchIndex.php @@ -0,0 +1,216 @@ +config->get('learn.versions.available', []); + foreach (array_keys($available) as $versionId) { + $versions[] = $this->versionValidator->getVersion($versionId); + } + } else { + // Index specific version + $versions[] = $this->versionValidator->getVersion($version); + } + + $totalPages = 0; + + foreach ($versions as $versionObj) { + $pages = $this->indexVersion($versionObj); + $totalPages += count($pages); + + // Store in cache + $this->cache->put( + $this->getCacheKey($versionObj->id), + $pages, + $this->getCacheTtl() + ); + } + + return $totalPages; + } + + /** + * Index all pages for a specific version. + * + * @param Version $version + * + * @return array + */ + protected function indexVersion(Version $version): array + { + $tree = $this->repository->getTree($version->id); + $pages = $this->flattenTree($tree); + + $indexed = []; + + foreach ($pages as $page) { + $indexed[] = $this->indexPage($page); + } + + return $indexed; + } + + /** + * Index a single page. + * + * @param PageResource $page + * + * @return array{title: string, slug: string, route: string, content: string, version: string} + */ + protected function indexPage(PageResource $page): array + { + // Get the HTML content and strip HTML tags to get plain text + $htmlContent = $page->getContent(); + $plainText = $this->stripHtmlTags($htmlContent); + + return [ + 'title' => $page->getTitle(), + 'slug' => $page->getSlug(), + 'route' => $page->getRoute(), + 'content' => $plainText, + 'version' => $page->getVersion()->id, + ]; + } + + /** + * Strip HTML tags from content to get searchable plain text. + * Preserves code blocks and adds spacing for better search results. + * + * @param string $html + * + * @return string + */ + protected function stripHtmlTags(string $html): string + { + // Convert HTML to plain text, preserving code blocks + // Add space before/after block elements to prevent word concatenation + $html = (string) preg_replace('/<(div|p|h[1-6]|li|pre|code|blockquote)[^>]*>/i', ' $0', $html); + $html = (string) preg_replace('/<\/(div|p|h[1-6]|li|pre|code|blockquote)>/i', '$0 ', $html); + + // Remove script and style tags with their content + $html = (string) preg_replace('/<(script|style)[^>]*>.*?<\/\1>/is', '', $html); + + // Strip remaining HTML tags + $text = strip_tags($html); + + // Decode HTML entities + $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); + + // Normalize whitespace + $text = (string) preg_replace('/\s+/', ' ', $text); + + return trim($text); + } + + /** + * Flatten a tree structure into a flat array of pages. + * + * @param PageResource[] $tree + * + * @return PageResource[] + */ + protected function flattenTree(array $tree): array + { + $flat = []; + + foreach ($tree as $page) { + $flat[] = $page; + if ($page->getChildren()) { + $flat = array_merge($flat, $this->flattenTree($page->getChildren())); + } + } + + return $flat; + } + + /** + * Get the cache key for the search index of a specific version. + * + * @param string $version + * + * @return string + */ + protected function getCacheKey(string $version): string + { + $keyFormat = $this->config->get('learn.cache.key', '%s.%s'); + + return sprintf($keyFormat, 'search-index', $version); + } + + /** + * Get the cache TTL for the search index. + * + * @return int The cache TTL in seconds + */ + protected function getCacheTtl(): int + { + // Use a longer TTL for search index since it's expensive to rebuild + return $this->config->get('learn.cache.ttl', 3600) * 24; // 24 hours by default + } + + /** + * Clear the search index for a specific version or all versions. + * + * @param string|null $version The version to clear, or null for all versions + */ + public function clearIndex(?string $version = null): void + { + if ($version === null) { + // Clear all versions + $available = $this->config->get('learn.versions.available', []); + foreach (array_keys($available) as $versionId) { + $this->cache->forget($this->getCacheKey($versionId)); + } + } else { + // Clear specific version + $this->cache->forget($this->getCacheKey($version)); + } + } +} diff --git a/app/src/Search/SearchService.php b/app/src/Search/SearchService.php new file mode 100644 index 00000000..450e346a --- /dev/null +++ b/app/src/Search/SearchService.php @@ -0,0 +1,235 @@ +config->get('learn.versions.latest', '6.0'); + + // Get the index from cache + $index = $this->getIndex($versionId); + + if (empty($index)) { + return [ + 'rows' => [], + 'count' => 0, + 'count_filtered' => 0, + ]; + } + + // Search through the index + $results = $this->performSearch($query, $index); + + // Paginate results + $totalResults = count($results); + $offset = ($page - 1) * $perPage; + $paginatedResults = array_slice($results, $offset, $perPage); + + return [ + 'rows' => $paginatedResults, + 'count' => count($index), + 'count_filtered' => $totalResults, + ]; + } + + /** + * Perform the actual search and generate results with snippets. + * + * @param string $query + * @param array $index + * + * @return array + */ + protected function performSearch(string $query, array $index): array + { + $results = []; + $query = trim($query); + + if (empty($query)) { + return $results; + } + + // Determine if query contains wildcards + $hasWildcards = str_contains($query, '*') || str_contains($query, '?'); + + foreach ($index as $page) { + $matches = []; + + if ($hasWildcards) { + // Use wildcard matching + $matches = $this->searchWithWildcard($query, $page['content']); + } else { + // Use simple case-insensitive search + $matches = $this->searchPlain($query, $page['content']); + } + + if (!empty($matches)) { + $results[] = [ + 'title' => $page['title'], + 'slug' => $page['slug'], + 'route' => $page['route'], + 'snippet' => $this->generateSnippet($page['content'], $matches[0]), + 'matches' => count($matches), + 'version' => $page['version'], + ]; + } + } + + // Sort by number of matches (descending) + usort($results, fn ($a, $b) => $b['matches'] <=> $a['matches']); + + return array_slice($results, 0, self::MAX_RESULTS); + } + + /** + * Search for plain text matches (case-insensitive). + * + * @param string $query + * @param string $content + * + * @return array Array of match positions + */ + protected function searchPlain(string $query, string $content): array + { + $matches = []; + $offset = 0; + $queryLower = mb_strtolower($query); + $contentLower = mb_strtolower($content); + + while (($pos = mb_strpos($contentLower, $queryLower, $offset)) !== false) { + $matches[] = $pos; + $offset = $pos + 1; + } + + return $matches; + } + + /** + * Search for wildcard pattern matches. + * + * @param string $pattern Pattern with wildcards (* and ?) + * @param string $content + * + * @return array Array of match positions + */ + protected function searchWithWildcard(string $pattern, string $content): array + { + $matches = []; + + // Convert wildcard pattern to regex + // Escape special regex characters except * and ? + $regex = preg_quote($pattern, '/'); + $regex = str_replace(['\*', '\?'], ['.*', '.'], $regex); + $regex = '/' . $regex . '/i'; // Case-insensitive + + // Split content into words and check each word + $words = preg_split('/\s+/', $content); + $offset = 0; + + if ($words === false) { + return $matches; + } + + foreach ($words as $word) { + if (preg_match($regex, $word)) { + $matches[] = $offset; + } + $offset += mb_strlen($word) + 1; // +1 for space + } + + return $matches; + } + + /** + * Generate a snippet of text around a match position. + * + * @param string $content Full content + * @param int $matchPosition Position of the match + * + * @return string Snippet with context + */ + protected function generateSnippet(string $content, int $matchPosition): string + { + $contextLength = self::SNIPPET_CONTEXT_LENGTH; + + // Calculate start and end positions + $start = max(0, $matchPosition - $contextLength); + $end = min(mb_strlen($content), $matchPosition + $contextLength); + + // Extract snippet + $snippet = mb_substr($content, $start, $end - $start); + + // Add ellipsis if we're not at the beginning/end + if ($start > 0) { + $snippet = '...' . $snippet; + } + if ($end < mb_strlen($content)) { + $snippet .= '...'; + } + + return $snippet; + } + + /** + * Get the search index for a specific version from cache. + * + * @param string $version + * + * @return array + */ + protected function getIndex(string $version): array + { + $keyFormat = $this->config->get('learn.cache.key', '%s.%s'); + $cacheKey = sprintf($keyFormat, 'search-index', $version); + + $index = $this->cache->get($cacheKey); + + return is_array($index) ? $index : []; + } +} diff --git a/app/src/ServicesProvider/SearchServicesProvider.php b/app/src/ServicesProvider/SearchServicesProvider.php new file mode 100644 index 00000000..9a84bab7 --- /dev/null +++ b/app/src/ServicesProvider/SearchServicesProvider.php @@ -0,0 +1,31 @@ + \DI\autowire(), + SearchService::class => \DI\autowire(), + ]; + } +} diff --git a/app/tests/Controller/SearchControllerTest.php b/app/tests/Controller/SearchControllerTest.php new file mode 100644 index 00000000..e4536934 --- /dev/null +++ b/app/tests/Controller/SearchControllerTest.php @@ -0,0 +1,189 @@ +ci->get(Config::class); + $config->set('learn.versions.latest', '6.0'); + $config->set('learn.versions.available', [ + '6.0' => '6.0 Beta', + ]); + + // Use the test pages directory + /** @var ResourceLocatorInterface $locator */ + $locator = $this->ci->get(ResourceLocatorInterface::class); + $locator->removeStream('pages'); + $locator->addStream(new ResourceStream('pages', shared: true, readonly: true, path: __DIR__ . '/../pages')); + + // Build index for testing + $searchIndex = $this->ci->get(SearchIndex::class); + $searchIndex->buildIndex('6.0'); + } + + /** + * Test search API endpoint with query. + */ + public function testSearchEndpoint(): void + { + // Create request to search API + $request = $this->createRequest('GET', '/api/search?q=first'); + $response = $this->handleRequest($request); + + // Assert successful response + $this->assertResponseStatus(200, $response); + + // Parse JSON response + $body = (string) $response->getBody(); + $data = json_decode($body, true); + + $this->assertIsArray($data); + $this->assertArrayHasKey('rows', $data); + $this->assertArrayHasKey('count', $data); + $this->assertArrayHasKey('count_filtered', $data); + + // Should have some results + $this->assertGreaterThan(0, $data['count_filtered']); + $this->assertNotEmpty($data['rows']); + + // Check structure of first result + if (!empty($data['rows'])) { + $firstResult = $data['rows'][0]; + $this->assertArrayHasKey('title', $firstResult); + $this->assertArrayHasKey('slug', $firstResult); + $this->assertArrayHasKey('route', $firstResult); + $this->assertArrayHasKey('snippet', $firstResult); + $this->assertArrayHasKey('matches', $firstResult); + $this->assertArrayHasKey('version', $firstResult); + } + } + + /** + * Test search API endpoint with empty query. + */ + public function testSearchEndpointEmptyQuery(): void + { + // Create request without query + $request = $this->createRequest('GET', '/api/search'); + $response = $this->handleRequest($request); + + // Assert successful response + $this->assertResponseStatus(200, $response); + + // Parse JSON response + $body = (string) $response->getBody(); + $data = json_decode($body, true); + + $this->assertIsArray($data); + $this->assertSame(0, $data['count_filtered']); + $this->assertEmpty($data['rows']); + } + + /** + * Test search API endpoint with pagination. + */ + public function testSearchEndpointPagination(): void + { + // Create request with pagination parameters + $request = $this->createRequest('GET', '/api/search?q=page&page=1&size=2'); + $response = $this->handleRequest($request); + + // Assert successful response + $this->assertResponseStatus(200, $response); + + // Parse JSON response + $body = (string) $response->getBody(); + $data = json_decode($body, true); + + $this->assertIsArray($data); + + // Should return at most 2 results + $this->assertLessThanOrEqual(2, count($data['rows'])); + } + + /** + * Test search API endpoint with version parameter. + */ + public function testSearchEndpointWithVersion(): void + { + // Create request with version parameter + $request = $this->createRequest('GET', '/api/search?q=first&version=6.0'); + $response = $this->handleRequest($request); + + // Assert successful response + $this->assertResponseStatus(200, $response); + + // Parse JSON response + $body = (string) $response->getBody(); + $data = json_decode($body, true); + + $this->assertIsArray($data); + + // Verify results are from correct version + if (!empty($data['rows'])) { + foreach ($data['rows'] as $result) { + $this->assertSame('6.0', $result['version']); + } + } + } + + /** + * Test search API endpoint with wildcard query. + */ + public function testSearchEndpointWildcard(): void + { + // Create request with wildcard query + $request = $this->createRequest('GET', '/api/search?q=f*'); + $response = $this->handleRequest($request); + + // Assert successful response + $this->assertResponseStatus(200, $response); + + // Parse JSON response + $body = (string) $response->getBody(); + $data = json_decode($body, true); + + $this->assertIsArray($data); + $this->assertArrayHasKey('rows', $data); + } + + /** + * Test that response is valid JSON. + */ + public function testSearchEndpointReturnsJson(): void + { + $request = $this->createRequest('GET', '/api/search?q=test'); + $response = $this->handleRequest($request); + + // Check content type header + $this->assertTrue($response->hasHeader('Content-Type')); + $contentType = $response->getHeaderLine('Content-Type'); + $this->assertStringContainsString('application/json', $contentType); + } +} diff --git a/app/tests/Search/SearchIndexTest.php b/app/tests/Search/SearchIndexTest.php new file mode 100644 index 00000000..62fa8175 --- /dev/null +++ b/app/tests/Search/SearchIndexTest.php @@ -0,0 +1,192 @@ +ci->get(Config::class); + $config->set('learn.versions.latest', '6.0'); + $config->set('learn.versions.available', [ + '6.0' => '6.0 Beta', + ]); + + // Use the test pages directory + /** @var ResourceLocatorInterface $locator */ + $locator = $this->ci->get(ResourceLocatorInterface::class); + $locator->removeStream('pages'); + $locator->addStream(new ResourceStream('pages', shared: true, readonly: true, path: __DIR__ . '/../pages')); + } + + public function testBuildIndexForVersion(): void + { + $searchIndex = $this->ci->get(SearchIndex::class); + + // Build index for version 6.0 + $count = $searchIndex->buildIndex('6.0'); + + // Should have indexed 9 pages (based on test data structure) + $this->assertSame(9, $count); + } + + public function testBuildIndexForAllVersions(): void + { + $searchIndex = $this->ci->get(SearchIndex::class); + + // Build index for all versions + $count = $searchIndex->buildIndex(null); + + // Should have indexed 9 pages (only 6.0 has test data) + $this->assertSame(9, $count); + } + + public function testIndexPageContent(): void + { + $searchIndex = $this->ci->get(SearchIndex::class); + + // Build index + $searchIndex->buildIndex('6.0'); + + // Use reflection to access protected method + $reflection = new \ReflectionClass($searchIndex); + $getCacheKeyMethod = $reflection->getMethod('getCacheKey'); + + // Get cache key and retrieve index + $cacheKey = $getCacheKeyMethod->invoke($searchIndex, '6.0'); + + /** @var \Illuminate\Cache\Repository $cache */ + $cache = $this->ci->get(\Illuminate\Cache\Repository::class); + $index = $cache->get($cacheKey); + + $this->assertIsArray($index); + $this->assertNotEmpty($index); + + // Check first page structure + $firstPage = $index[0]; + $this->assertArrayHasKey('title', $firstPage); + $this->assertArrayHasKey('slug', $firstPage); + $this->assertArrayHasKey('route', $firstPage); + $this->assertArrayHasKey('content', $firstPage); + $this->assertArrayHasKey('version', $firstPage); + + // Content should be plain text (no HTML tags) + $this->assertStringNotContainsString('<', $firstPage['content']); + $this->assertStringNotContainsString('>', $firstPage['content']); + } + + public function testStripHtmlTags(): void + { + $searchIndex = $this->ci->get(SearchIndex::class); + + // Use reflection to test protected method + $reflection = new \ReflectionClass($searchIndex); + $method = $reflection->getMethod('stripHtmlTags'); + + // Test with HTML content + $html = '

Title

This is a test paragraph.

some code
'; + $plain = $method->invoke($searchIndex, $html); + + $this->assertStringNotContainsString('

', $plain); + $this->assertStringNotContainsString('

', $plain); + $this->assertStringNotContainsString('', $plain); + $this->assertStringContainsString('Title', $plain); + $this->assertStringContainsString('test', $plain); + $this->assertStringContainsString('some code', $plain); + } + + public function testClearIndex(): void + { + $searchIndex = $this->ci->get(SearchIndex::class); + + // Build index + $searchIndex->buildIndex('6.0'); + + // Clear index + $searchIndex->clearIndex('6.0'); + + // Verify cache is cleared + $reflection = new \ReflectionClass($searchIndex); + $getCacheKeyMethod = $reflection->getMethod('getCacheKey'); + $cacheKey = $getCacheKeyMethod->invoke($searchIndex, '6.0'); + + /** @var \Illuminate\Cache\Repository $cache */ + $cache = $this->ci->get(\Illuminate\Cache\Repository::class); + $index = $cache->get($cacheKey); + + $this->assertNull($index); + } + + public function testClearAllIndexes(): void + { + $searchIndex = $this->ci->get(SearchIndex::class); + + // Build index for all versions + $searchIndex->buildIndex(null); + + // Clear all indexes + $searchIndex->clearIndex(null); + + // Verify cache is cleared + $reflection = new \ReflectionClass($searchIndex); + $getCacheKeyMethod = $reflection->getMethod('getCacheKey'); + $cacheKey = $getCacheKeyMethod->invoke($searchIndex, '6.0'); + + /** @var \Illuminate\Cache\Repository $cache */ + $cache = $this->ci->get(\Illuminate\Cache\Repository::class); + $index = $cache->get($cacheKey); + + $this->assertNull($index); + } + + public function testFlattenTree(): void + { + $searchIndex = $this->ci->get(SearchIndex::class); + + // Build index to get tree + $searchIndex->buildIndex('6.0'); + + // Use reflection to access the repository and get tree + /** @var \UserFrosting\Learn\Documentation\DocumentationRepository $repository */ + $repository = $this->ci->get(\UserFrosting\Learn\Documentation\DocumentationRepository::class); + $tree = $repository->getTree('6.0'); + + // Use reflection to test flattenTree + $reflection = new \ReflectionClass($searchIndex); + $method = $reflection->getMethod('flattenTree'); + + $flat = $method->invoke($searchIndex, $tree); + + // Should have 9 pages total + $this->assertCount(9, $flat); + + // Verify they're all PageResource objects + foreach ($flat as $page) { + $this->assertInstanceOf(\UserFrosting\Learn\Documentation\PageResource::class, $page); + } + } +} diff --git a/app/tests/Search/SearchServiceTest.php b/app/tests/Search/SearchServiceTest.php new file mode 100644 index 00000000..0386f666 --- /dev/null +++ b/app/tests/Search/SearchServiceTest.php @@ -0,0 +1,182 @@ +ci->get(Config::class); + $config->set('learn.versions.latest', '6.0'); + $config->set('learn.versions.available', [ + '6.0' => '6.0 Beta', + ]); + + // Use the test pages directory + /** @var ResourceLocatorInterface $locator */ + $locator = $this->ci->get(ResourceLocatorInterface::class); + $locator->removeStream('pages'); + $locator->addStream(new ResourceStream('pages', shared: true, readonly: true, path: __DIR__ . '/../pages')); + + // Build index for testing + $searchIndex = $this->ci->get(SearchIndex::class); + $searchIndex->buildIndex('6.0'); + } + + public function testSearchWithPlainText(): void + { + $searchService = $this->ci->get(SearchService::class); + + // Search for "first" - should match "First page" + $result = $searchService->search('first', '6.0'); + + $this->assertIsArray($result); + $this->assertArrayHasKey('rows', $result); + $this->assertArrayHasKey('count', $result); + $this->assertArrayHasKey('count_filtered', $result); + + $this->assertGreaterThan(0, $result['count_filtered']); + $this->assertNotEmpty($result['rows']); + + // Check structure of first result + $firstResult = $result['rows'][0]; + $this->assertArrayHasKey('title', $firstResult); + $this->assertArrayHasKey('slug', $firstResult); + $this->assertArrayHasKey('route', $firstResult); + $this->assertArrayHasKey('snippet', $firstResult); + $this->assertArrayHasKey('matches', $firstResult); + $this->assertArrayHasKey('version', $firstResult); + } + + public function testSearchWithEmptyQuery(): void + { + $searchService = $this->ci->get(SearchService::class); + + $result = $searchService->search('', '6.0'); + + $this->assertSame(0, $result['count_filtered']); + $this->assertEmpty($result['rows']); + } + + public function testSearchWithWildcard(): void + { + $searchService = $this->ci->get(SearchService::class); + + // Search for "f*" - should match words starting with 'f' + $result = $searchService->search('f*', '6.0'); + + $this->assertGreaterThanOrEqual(0, $result['count_filtered']); + } + + public function testSearchPagination(): void + { + $searchService = $this->ci->get(SearchService::class); + + // Search with pagination + $result = $searchService->search('page', '6.0', 1, 2); + + $this->assertLessThanOrEqual(2, count($result['rows'])); + } + + public function testSearchResultSnippet(): void + { + $searchService = $this->ci->get(SearchService::class); + + $result = $searchService->search('first', '6.0'); + + if (!empty($result['rows'])) { + $firstResult = $result['rows'][0]; + $this->assertIsString($firstResult['snippet']); + $this->assertNotEmpty($firstResult['snippet']); + } + } + + public function testSearchPlainMethod(): void + { + $searchService = $this->ci->get(SearchService::class); + + // Use reflection to test protected method + $reflection = new \ReflectionClass($searchService); + $method = $reflection->getMethod('searchPlain'); + + $content = 'This is a test content with multiple test words.'; + $matches = $method->invoke($searchService, 'test', $content); + + $this->assertIsArray($matches); + $this->assertCount(2, $matches); // Should find 2 matches + } + + public function testGenerateSnippet(): void + { + $searchService = $this->ci->get(SearchService::class); + + // Use reflection to test protected method + $reflection = new \ReflectionClass($searchService); + $method = $reflection->getMethod('generateSnippet'); + + $content = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. This is the important part. More text follows here.'; + $matchPosition = strpos($content, 'important'); + + if ($matchPosition !== false) { + $snippet = $method->invoke($searchService, $content, $matchPosition); + + $this->assertIsString($snippet); + $this->assertStringContainsString('important', $snippet); + $this->assertStringContainsString('...', $snippet); // Should have ellipsis + } + } + + public function testSearchWithNoIndex(): void + { + // Clear the index + $searchIndex = $this->ci->get(SearchIndex::class); + $searchIndex->clearIndex('6.0'); + + $searchService = $this->ci->get(SearchService::class); + $result = $searchService->search('test', '6.0'); + + $this->assertSame(0, $result['count_filtered']); + $this->assertEmpty($result['rows']); + } + + public function testSearchResultSorting(): void + { + $searchService = $this->ci->get(SearchService::class); + + // Search for a common term that might appear multiple times + $result = $searchService->search('page', '6.0'); + + if (count($result['rows']) > 1) { + // Verify results are sorted by number of matches (descending) + $firstMatches = $result['rows'][0]['matches']; + $lastMatches = $result['rows'][count($result['rows']) - 1]['matches']; + + $this->assertGreaterThanOrEqual($lastMatches, $firstMatches); + } + } +} From 07b03aedba1a01bf981aa40958eaa90990d28fd3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 13 Jan 2026 18:58:17 +0000 Subject: [PATCH 3/4] Address code review feedback: optimize search performance and improve test robustness Co-authored-by: lcharette <2566513+lcharette@users.noreply.github.com> --- app/src/Search/SearchIndex.php | 17 ++++++++----- app/src/Search/SearchService.php | 25 ++++++++++-------- app/tests/Search/SearchIndexTest.php | 38 +++++++++++++++++++++++----- 3 files changed, 56 insertions(+), 24 deletions(-) diff --git a/app/src/Search/SearchIndex.php b/app/src/Search/SearchIndex.php index f80c3667..66966e78 100644 --- a/app/src/Search/SearchIndex.php +++ b/app/src/Search/SearchIndex.php @@ -129,13 +129,16 @@ protected function indexPage(PageResource $page): array */ protected function stripHtmlTags(string $html): string { - // Convert HTML to plain text, preserving code blocks - // Add space before/after block elements to prevent word concatenation - $html = (string) preg_replace('/<(div|p|h[1-6]|li|pre|code|blockquote)[^>]*>/i', ' $0', $html); - $html = (string) preg_replace('/<\/(div|p|h[1-6]|li|pre|code|blockquote)>/i', '$0 ', $html); - - // Remove script and style tags with their content - $html = (string) preg_replace('/<(script|style)[^>]*>.*?<\/\1>/is', '', $html); + // Combined regex: Add space before/after block elements to prevent word concatenation + $html = (string) preg_replace([ + '/<(div|p|h[1-6]|li|pre|code|blockquote)[^>]*>/i', // Opening tags + '/<\/(div|p|h[1-6]|li|pre|code|blockquote)>/i', // Closing tags + '/<(script|style)[^>]*>.*?<\/\1>/is', // Remove script/style with content + ], [ + ' $0', // Space before opening tags + '$0 ', // Space after closing tags + '', // Remove script/style entirely + ], $html); // Strip remaining HTML tags $text = strip_tags($html); diff --git a/app/src/Search/SearchService.php b/app/src/Search/SearchService.php index 450e346a..920890ab 100644 --- a/app/src/Search/SearchService.php +++ b/app/src/Search/SearchService.php @@ -95,15 +95,23 @@ protected function performSearch(string $query, array $index): array return $results; } - // Determine if query contains wildcards + // Determine if query contains wildcards (check once before loop) $hasWildcards = str_contains($query, '*') || str_contains($query, '?'); + // Pre-compile regex for wildcard searches to avoid recompiling in loop + $wildcardRegex = null; + if ($hasWildcards) { + $pattern = preg_quote($query, '/'); + $pattern = str_replace(['\*', '\?'], ['.*', '.'], $pattern); + $wildcardRegex = '/' . $pattern . '/i'; + } + foreach ($index as $page) { $matches = []; if ($hasWildcards) { - // Use wildcard matching - $matches = $this->searchWithWildcard($query, $page['content']); + // Use wildcard matching with pre-compiled regex + $matches = $this->searchWithWildcard($wildcardRegex, $page['content']); } else { // Use simple case-insensitive search $matches = $this->searchPlain($query, $page['content']); @@ -153,26 +161,21 @@ protected function searchPlain(string $query, string $content): array /** * Search for wildcard pattern matches. * - * @param string $pattern Pattern with wildcards (* and ?) + * @param string $regex Pre-compiled regex pattern * @param string $content * * @return array Array of match positions */ - protected function searchWithWildcard(string $pattern, string $content): array + protected function searchWithWildcard(string $regex, string $content): array { $matches = []; - // Convert wildcard pattern to regex - // Escape special regex characters except * and ? - $regex = preg_quote($pattern, '/'); - $regex = str_replace(['\*', '\?'], ['.*', '.'], $regex); - $regex = '/' . $regex . '/i'; // Case-insensitive - // Split content into words and check each word $words = preg_split('/\s+/', $content); $offset = 0; if ($words === false) { + // Log error if needed in the future, but for now just return empty return $matches; } diff --git a/app/tests/Search/SearchIndexTest.php b/app/tests/Search/SearchIndexTest.php index 62fa8175..4c32dc0b 100644 --- a/app/tests/Search/SearchIndexTest.php +++ b/app/tests/Search/SearchIndexTest.php @@ -50,8 +50,19 @@ public function testBuildIndexForVersion(): void // Build index for version 6.0 $count = $searchIndex->buildIndex('6.0'); - // Should have indexed 9 pages (based on test data structure) - $this->assertSame(9, $count); + // Should have indexed pages (at least some) + $this->assertGreaterThan(0, $count, 'Should have indexed at least one page'); + + // Verify it matches the number of test pages + /** @var \UserFrosting\Learn\Documentation\DocumentationRepository $repository */ + $repository = $this->ci->get(\UserFrosting\Learn\Documentation\DocumentationRepository::class); + + // Use reflection to get pages count + $reflection = new \ReflectionClass($repository); + $method = $reflection->getMethod('getFlattenedTree'); + $flatPages = $method->invoke($repository, '6.0'); + + $this->assertSame(count($flatPages), $count, 'Index count should match actual page count'); } public function testBuildIndexForAllVersions(): void @@ -61,8 +72,8 @@ public function testBuildIndexForAllVersions(): void // Build index for all versions $count = $searchIndex->buildIndex(null); - // Should have indexed 9 pages (only 6.0 has test data) - $this->assertSame(9, $count); + // Should have indexed pages (at least some) + $this->assertGreaterThan(0, $count, 'Should have indexed at least one page'); } public function testIndexPageContent(): void @@ -181,12 +192,27 @@ public function testFlattenTree(): void $flat = $method->invoke($searchIndex, $tree); - // Should have 9 pages total - $this->assertCount(9, $flat); + // Should have multiple pages + $this->assertGreaterThan(0, count($flat), 'Should have at least one page'); // Verify they're all PageResource objects foreach ($flat as $page) { $this->assertInstanceOf(\UserFrosting\Learn\Documentation\PageResource::class, $page); } + + // Verify flat count matches tree structure (all pages including nested) + $countTreePages = function ($pages) use (&$countTreePages) { + $count = 0; + foreach ($pages as $page) { + $count++; + if ($page->getChildren()) { + $count += $countTreePages($page->getChildren()); + } + } + return $count; + }; + + $expectedCount = $countTreePages($tree); + $this->assertSame($expectedCount, count($flat), 'Flattened tree should contain all pages'); } } From 1a1deccd12cac1a507c3332607a58e7ac220beb2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:00:48 +0000 Subject: [PATCH 4/4] Fix error handling in preg_replace and cache retrieval Co-authored-by: lcharette <2566513+lcharette@users.noreply.github.com> --- app/src/Search/SearchIndex.php | 18 +++++++++++++++--- app/src/Search/SearchService.php | 7 ++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/app/src/Search/SearchIndex.php b/app/src/Search/SearchIndex.php index 66966e78..eef4bbfc 100644 --- a/app/src/Search/SearchIndex.php +++ b/app/src/Search/SearchIndex.php @@ -130,7 +130,7 @@ protected function indexPage(PageResource $page): array protected function stripHtmlTags(string $html): string { // Combined regex: Add space before/after block elements to prevent word concatenation - $html = (string) preg_replace([ + $result = preg_replace([ '/<(div|p|h[1-6]|li|pre|code|blockquote)[^>]*>/i', // Opening tags '/<\/(div|p|h[1-6]|li|pre|code|blockquote)>/i', // Closing tags '/<(script|style)[^>]*>.*?<\/\1>/is', // Remove script/style with content @@ -140,14 +140,26 @@ protected function stripHtmlTags(string $html): string '', // Remove script/style entirely ], $html); + // Check if preg_replace failed + if ($result === null) { + // Fallback to original HTML if regex fails + $result = $html; + } + // Strip remaining HTML tags - $text = strip_tags($html); + $text = strip_tags($result); // Decode HTML entities $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); // Normalize whitespace - $text = (string) preg_replace('/\s+/', ' ', $text); + $text = preg_replace('/\s+/', ' ', $text); + + // Check if preg_replace failed + if ($text === null) { + // Fallback: at least decode entities from stripped HTML + $text = html_entity_decode(strip_tags($html), ENT_QUOTES | ENT_HTML5, 'UTF-8'); + } return trim($text); } diff --git a/app/src/Search/SearchService.php b/app/src/Search/SearchService.php index 920890ab..b123515d 100644 --- a/app/src/Search/SearchService.php +++ b/app/src/Search/SearchService.php @@ -233,6 +233,11 @@ protected function getIndex(string $version): array $index = $this->cache->get($cacheKey); - return is_array($index) ? $index : []; + // Ensure we return an array even if cache returns null or unexpected type + if (!is_array($index)) { + return []; + } + + return $index; } }