diff --git a/app/api/ai-search/route.ts b/app/api/ai-search/route.ts new file mode 100644 index 0000000..e363965 --- /dev/null +++ b/app/api/ai-search/route.ts @@ -0,0 +1,556 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +import { NextResponse } from 'next/server'; +import { documentIndexer, DocumentChunk } from '../../../lib/documentIndexer'; + +interface AISearchResult { + id: string; + title: string; + url: string; + excerpt: string; + relevanceScore: number; + metadata: { + description?: string; + parent?: string; + nav_order?: number; + tags?: string[]; + }; + matchedContent: string; + sections: Array<{ + id: string; + title: string; + level: number; + content: string; + url: string; + }>; + matchedSection?: { + id: string; + title: string; + level: number; + content: string; + url: string; + }; +} + +export async function POST(req: Request) { + try { + const { query, limit = 10 } = await req.json(); + + if (!query || typeof query !== 'string') { + return NextResponse.json( + { error: 'Query is required and must be a string' }, + { status: 400 } + ); + } + + // Load or create document index + let index = documentIndexer.loadIndex(); + if (!index || documentIndexer.needsUpdate()) { + console.log('Index not found or needs update, creating new index...'); + index = await documentIndexer.indexDocuments(); + } + + if (!index || index.documents.length === 0) { + return NextResponse.json( + { error: 'No documents found in index' }, + { status: 404 } + ); + } + + // Use hybrid search (AI + keyword) + const searchResults = await performHybridSearch(query, index.documents, limit); + + return NextResponse.json({ + results: searchResults, + totalFound: searchResults.length, + query, + timestamp: new Date().toISOString() + }); + + } catch (error) { + console.error('AI Search API error:', error); + return NextResponse.json( + { error: 'Failed to perform AI search' }, + { status: 500 } + ); + } +} + +async function performHybridSearch( + query: string, + documents: DocumentChunk[], + limit: number +): Promise { + + // Extract search terms from query + const searchTerms = query.toLowerCase() + .replace(/[^\w\s-]/g, ' ') + .split(/\s+/) + .filter(word => word.length > 2); + + // Get AI-generated search terms + const aiTerms = await generateSearchTerms(query); + + // Combine all search terms + const allSearchTerms = Array.from(new Set([...searchTerms, ...aiTerms])); + + // Get keyword-based scores + const keywordResults = scoreDocumentsByKeywords(query, searchTerms, documents); + + // Get AI-enhanced scores + const aiResults = scoreDocumentsByAI(query, allSearchTerms, documents); + + // Combine scores using weighted average + const combinedResults = combineSearchResults(keywordResults, aiResults); + + // Rank and return top results + return combinedResults + .sort((a, b) => b.relevanceScore - a.relevanceScore) + .slice(0, limit) + .map(doc => { + // Find the best matching section + const matchedSection = findBestMatchingSection(query, allSearchTerms, doc.sections); + + return { + id: doc.id, + title: doc.title, + url: matchedSection ? matchedSection.url : doc.url, + excerpt: doc.excerpt, + relevanceScore: doc.relevanceScore, + metadata: doc.metadata, + matchedContent: doc.matchedContent, + sections: doc.sections, + matchedSection + }; + }); +} + +function findBestMatchingSection( + query: string, + searchTerms: string[], + sections: Array<{ + id: string; + title: string; + level: number; + content: string; + url: string; + }> +): any | null { + if (!sections || sections.length === 0) return null; + + let bestSection = null; + let bestScore = 0; + + const queryLower = query.toLowerCase(); + + for (const section of sections) { + let score = 0; + const titleLower = section.title.toLowerCase(); + const contentLower = section.content.toLowerCase(); + + // Title matching (highest weight) + if (titleLower.includes(queryLower)) { + score += 10; + } + + // Search terms in title + searchTerms.forEach(term => { + if (titleLower.includes(term.toLowerCase())) { + score += 5; + } + }); + + // Search terms in content + searchTerms.forEach(term => { + const termLower = term.toLowerCase(); + if (contentLower.includes(termLower)) { + score += 3; + } + }); + + // Word frequency in content + const queryWords = queryLower.split(/\s+/); + queryWords.forEach(word => { + if (word.length > 2) { + const contentMatches = (contentLower.match(new RegExp(word, 'g')) || []).length; + score += contentMatches; + } + }); + + // Prefer more specific headings (h2, h3 over h1) for better precision + // H1 headings are usually too general, prefer H2 and H3 for specific topics + if (section.level === 1) { + score -= 2; // Penalize H1 headings as they're usually too general + } else if (section.level === 2) { + score += 3; // Prefer H2 headings + } else if (section.level === 3) { + score += 2; // Also good for H3 headings + } + + if (score > bestScore) { + bestScore = score; + bestSection = section; + } + } + + // Only return if score is meaningful and we have a good match + // Require higher scores for H1 headings since they're usually too general + const minScore = bestSection && bestSection.level === 1 ? 5 : 2; + return bestScore > minScore ? bestSection : null; +} + +// Synonym mapping for better search results +const SYNONYM_MAP: Record = { + 'caching': ['cache', 'node cache', 'python cache', 'memory', 'storage', 'performance'], + 'cache': ['caching', 'node cache', 'python cache', 'memory', 'storage', 'performance'], + 'storage': ['cache', 'caching', 'database', 'persistence', 'data'], + 'database': ['storage', 'data', 'persistence', 'resilientdb'], + 'consensus': ['agreement', 'pbft', 'consensus management', 'voting'], + 'network': ['communication', 'network communication', 'messaging', 'replica'], + 'transaction': ['txn', 'transaction execution', 'execution', 'processing'], + 'client': ['client interaction', 'kv client', 'utxo client', 'contract client'], + 'configuration': ['config', 'setup', 'resdbconfig', 'settings'], + 'checkpointing': ['checkpoint', 'recovery', 'backup', 'snapshot'], + 'graphql': ['resilientdb graphql', 'api', 'query', 'mutation'], + 'orm': ['resdb orm', 'object relational mapping', 'database orm'], + 'vault': ['resvault', 'secure storage', 'encryption'], + 'lens': ['reslens', 'monitoring', 'observability', 'metrics'], + 'cli': ['rescli', 'command line', 'terminal', 'tools'] +}; + +function expandSearchTerms(query: string): string[] { + const terms = query.toLowerCase().split(/\s+/); + const expandedTerms = new Set(); + + // Add original terms + terms.forEach(term => expandedTerms.add(term)); + + // Add synonyms + terms.forEach(term => { + if (SYNONYM_MAP[term]) { + SYNONYM_MAP[term].forEach(synonym => expandedTerms.add(synonym)); + } + }); + + // Add partial matches (e.g., "caching" should match "node cache") + terms.forEach(term => { + Object.keys(SYNONYM_MAP).forEach(key => { + if (key.includes(term) || term.includes(key)) { + SYNONYM_MAP[key].forEach(synonym => expandedTerms.add(synonym)); + } + }); + }); + + return Array.from(expandedTerms); +} + +async function generateSearchTerms(query: string): Promise { + try { + // First, expand with synonyms + const expandedTerms = expandSearchTerms(query); + + const response = await fetch('https://api.deepseek.com/v1/chat/completions', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${process.env.DEEPSEEK_API_KEY}`, + }, + body: JSON.stringify({ + model: 'deepseek-chat', + messages: [ + { + role: 'system', + content: `You are a search term generator for ResilientDB documentation. Given a user query, generate 5-8 relevant search terms that would help find related documentation content. + +Focus on: +- Technical terms and concepts +- Related features and components +- Synonyms and alternative terms +- Specific ResilientDB terminology +- Include terms like "node cache", "python cache", "resilientdb graphql", etc. + +Return only the search terms, one per line, without any other text.` + }, + { + role: 'user', + content: `Generate search terms for: "${query}"` + } + ], + max_tokens: 200, + temperature: 0.3 + }), + }); + + if (!response.ok) { + throw new Error('Failed to generate search terms'); + } + + const data = await response.json(); + const aiTerms = data.choices[0]?.message?.content + ?.split('\n') + .map((term: string) => term.trim()) + .filter((term: string) => term.length > 0) || []; + + // Combine AI terms with expanded terms + const allTerms = [...expandedTerms, ...aiTerms]; + const uniqueTerms = Array.from(new Set(allTerms)); + + return uniqueTerms.slice(0, 12); // Increased limit for better coverage + } catch (error) { + console.error('Error generating search terms:', error); + // Fallback to expanded terms + return expandSearchTerms(query); + } +} + +function scoreDocumentsByKeywords( + query: string, + searchTerms: string[], + documents: DocumentChunk[] +): (DocumentChunk & { relevanceScore: number; matchedContent: string })[] { + + const scoredDocs = documents.map(doc => { + let score = 0; + let matchedContent = ''; + + const queryLower = query.toLowerCase(); + const contentLower = doc.content.toLowerCase(); + const titleLower = doc.title.toLowerCase(); + const urlLower = doc.url.toLowerCase(); + + // Keyword matching (highest priority) + searchTerms.forEach(term => { + const termLower = term.toLowerCase(); + + // Check if term matches any keyword + const keywordMatch = doc.keywords.some(keyword => + keyword.includes(termLower) || termLower.includes(keyword) + ); + + if (keywordMatch) { + score += 20; // High score for keyword match + + // Find which keyword matched + const matchedKeyword = doc.keywords.find(keyword => + keyword.includes(termLower) || termLower.includes(keyword) + ); + + if (matchedKeyword && !matchedContent) { + matchedContent = `Keyword match: ${matchedKeyword}`; + } + } + + // Exact title match + if (titleLower === termLower) { + score += 25; + matchedContent = doc.title; + } else if (titleLower.includes(termLower)) { + score += 15; + if (!matchedContent) matchedContent = doc.title; + } + + // URL/path matching (e.g., "node cache" in URL) + if (urlLower.includes(termLower)) { + score += 18; + if (!matchedContent) matchedContent = `URL match: ${doc.url}`; + } + + // Content matching + if (contentLower.includes(termLower)) { + score += 8; + + if (!matchedContent) { + const startIndex = contentLower.indexOf(termLower); + matchedContent = doc.content.substring( + Math.max(0, startIndex - 50), + Math.min(doc.content.length, startIndex + termLower.length + 50) + ); + } + } + }); + + // Exact phrase matching in title + if (titleLower.includes(queryLower)) { + score += 30; + matchedContent = doc.title; + } + + // Exact phrase matching in content + if (contentLower.includes(queryLower)) { + score += 12; + if (!matchedContent) { + const startIndex = contentLower.indexOf(queryLower); + matchedContent = doc.content.substring( + Math.max(0, startIndex - 50), + Math.min(doc.content.length, startIndex + queryLower.length + 50) + ); + } + } + + // Use excerpt if no specific content matched + if (!matchedContent) { + matchedContent = doc.excerpt; + } + + return { + ...doc, + relevanceScore: score, + matchedContent: matchedContent.trim() + }; + }); + + // Filter out documents with very low scores + return scoredDocs.filter(doc => doc.relevanceScore > 0); +} + +function scoreDocumentsByAI( + query: string, + searchTerms: string[], + documents: DocumentChunk[] +): (DocumentChunk & { relevanceScore: number; matchedContent: string })[] { + + const scoredDocs = documents.map(doc => { + let score = 0; + let matchedContent = ''; + + const queryLower = query.toLowerCase(); + const contentLower = doc.content.toLowerCase(); + const titleLower = doc.title.toLowerCase(); + const urlLower = doc.url.toLowerCase(); + + // AI-enhanced semantic matching + searchTerms.forEach(term => { + const termLower = term.toLowerCase(); + + // Semantic similarity scoring (AI-generated terms get higher weight) + const isAiTerm = searchTerms.length > 3 && searchTerms.indexOf(term) >= 3; // Assume AI terms come after direct terms + + // Title semantic matching + if (titleLower.includes(termLower)) { + score += isAiTerm ? 12 : 8; + if (!matchedContent) matchedContent = doc.title; + } + + // URL semantic matching + if (urlLower.includes(termLower)) { + score += isAiTerm ? 15 : 10; + if (!matchedContent) matchedContent = `URL match: ${doc.url}`; + } + + // Content semantic matching + if (contentLower.includes(termLower)) { + score += isAiTerm ? 6 : 4; + + if (!matchedContent) { + const startIndex = contentLower.indexOf(termLower); + matchedContent = doc.content.substring( + Math.max(0, startIndex - 50), + Math.min(doc.content.length, startIndex + termLower.length + 50) + ); + } + } + + // Keyword semantic matching + const keywordMatch = doc.keywords.some(keyword => + keyword.includes(termLower) || termLower.includes(keyword) + ); + + if (keywordMatch) { + score += isAiTerm ? 10 : 6; + if (!matchedContent) { + const matchedKeyword = doc.keywords.find(keyword => + keyword.includes(termLower) || termLower.includes(keyword) + ); + matchedContent = `Semantic match: ${matchedKeyword}`; + } + } + }); + + // Exact phrase matching (higher weight for AI) + if (titleLower.includes(queryLower)) { + score += 20; + matchedContent = doc.title; + } + + if (contentLower.includes(queryLower)) { + score += 10; + if (!matchedContent) { + const startIndex = contentLower.indexOf(queryLower); + matchedContent = doc.content.substring( + Math.max(0, startIndex - 50), + Math.min(doc.content.length, startIndex + queryLower.length + 50) + ); + } + } + + // Use excerpt if no specific content matched + if (!matchedContent) { + matchedContent = doc.excerpt; + } + + return { + ...doc, + relevanceScore: score, + matchedContent: matchedContent.trim() + }; + }); + + return scoredDocs.filter(doc => doc.relevanceScore > 0); +} + +function combineSearchResults( + keywordResults: (DocumentChunk & { relevanceScore: number; matchedContent: string })[], + aiResults: (DocumentChunk & { relevanceScore: number; matchedContent: string })[] +): (DocumentChunk & { relevanceScore: number; matchedContent: string })[] { + + const combinedMap = new Map(); + + // Add keyword results with weight 0.6 + keywordResults.forEach(doc => { + const combined = { + ...doc, + relevanceScore: doc.relevanceScore * 0.6, + matchedContent: doc.matchedContent + }; + combinedMap.set(doc.id, combined); + }); + + // Add AI results with weight 0.4, combining scores if document exists + aiResults.forEach(doc => { + const existing = combinedMap.get(doc.id); + if (existing) { + // Combine scores + existing.relevanceScore += doc.relevanceScore * 0.4; + // Prefer more specific matched content + if (doc.matchedContent.length > existing.matchedContent.length) { + existing.matchedContent = doc.matchedContent; + } + } else { + // Add new result + combinedMap.set(doc.id, { + ...doc, + relevanceScore: doc.relevanceScore * 0.4, + matchedContent: doc.matchedContent + }); + } + }); + + return Array.from(combinedMap.values()); +} diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index abdf37d..6f63d14 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -23,25 +23,31 @@ export async function POST(req: Request) { try { const { messages, code } = await req.json(); - const systemPrompt = `You are a helpful Python programming assistant. The user's current code is: -\`\`\`python -${code} -\`\`\` - -Follow these guidelines: -1. For simple greetings or basic questions, respond briefly and directly -2. For complex programming questions: - - Guide the student through the solution process - - Ask leading questions to help them discover the answer - - Provide hints rather than direct solutions - - Explain concepts and best practices -3. Format your responses in markdown: - - Use \`\`\`python for code blocks - - Use **bold** for emphasis - - Use bullet points for lists - - Use > for important notes -4. Never provide complete solutions directly -5. Encourage learning through guided discovery`; + const systemPrompt = `You are a helpful assistant for ResilientDB documentation. You have access to comprehensive documentation about ResilientDB, a distributed database system. + +Your role is to: +1. Answer questions about ResilientDB concepts, architecture, and usage +2. Provide accurate information based on the documentation +3. Explain complex concepts in clear, understandable terms +4. Offer practical examples and use cases when relevant +5. Reference specific documentation sections when helpful + +Guidelines: +- Be accurate and factual in your responses +- Use markdown formatting for better readability +- Provide detailed explanations for technical concepts +- Include relevant examples from the documentation +- If you're unsure about something, say so rather than guessing +- Focus on helping users understand ResilientDB better + +Format your responses with: +- **Bold** for emphasis +- \`code blocks\` for technical terms and code +- Bullet points for lists +- > for important notes or tips +- Clear headings for different sections + +Always aim to be helpful, accurate, and educational.`; const response = await fetch('https://api.deepseek.com/v1/chat/completions', { method: 'POST', diff --git a/app/global.css b/app/global.css index 38f9f54..c56a478 100644 --- a/app/global.css +++ b/app/global.css @@ -218,4 +218,61 @@ opacity: 0.8; pointer-events: none; } + + /* Custom Pagefind Search Styles */ + .pagefind-search { + position: relative; + z-index: 10000; + } + + .pagefind-search-overlay { + position: fixed; + top: 0; + left: 0; + right: 0; + bottom: 0; + background: rgba(0, 0, 0, 0.8); + backdrop-filter: blur(8px); + z-index: 10000; + display: flex; + align-items: flex-start; + justify-content: center; + padding-top: 10vh; + } + + .pagefind-search-container { + background: rgba(255, 255, 255, 0.1); + border: 1px solid rgba(255, 255, 255, 0.2); + border-radius: 12px; + backdrop-filter: blur(10px); + min-width: 600px; + max-width: 90vw; + max-height: 80vh; + overflow: hidden; + } + + .pagefind-search-input { + background: transparent; + border: none; + outline: none; + color: rgba(255, 255, 255, 0.9); + font-size: 16px; + flex: 1; + min-width: 0; + } + + .pagefind-search-result { + padding: 16px 20px; + border-bottom: 1px solid rgba(255, 255, 255, 0.05); + cursor: pointer; + transition: background-color 0.2s ease; + } + + .pagefind-search-result:hover { + background: rgba(255, 255, 255, 0.1); + } + + .pagefind-search-result:last-child { + border-bottom: none; + } } diff --git a/app/search/page.tsx b/app/search/page.tsx new file mode 100644 index 0000000..6438ab1 --- /dev/null +++ b/app/search/page.tsx @@ -0,0 +1,25 @@ +'use client' + +import { PagefindSearch } from '../../components/SearchBar/PagefindSearch'; + +export default function SearchPage() { + return ( +
+
+

+ Search Documentation +

+ +
+
+ ); +} diff --git a/components/FloatingAssistant.tsx b/components/FloatingAssistant.tsx index b14d72d..0d64ebd 100644 --- a/components/FloatingAssistant.tsx +++ b/components/FloatingAssistant.tsx @@ -47,7 +47,7 @@ export function FloatingAssistant() { const [isAskingQuestion, setIsAskingQuestion] = useState(false); const [debounceTimer, setDebounceTimer] = useState(null); const [lastSelection, setLastSelection] = useState(''); - const [highlightMode, setHighlightMode] = useState(true); + const [highlightMode, setHighlightMode] = useState(false); const [showSettings, setShowSettings] = useState(false); // Function to check if element is within a code editor @@ -185,6 +185,17 @@ Please: setQuestion(''); }; + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + if (question.trim()) { + setIsAskingQuestion(true); + getExplanation(selectedText || '', question); + setQuestion(''); + } + } + }; + return ( <>
) => setQuestion(e.target.value)} + onKeyDown={handleKeyDown} style={{ flex: 1 }} disabled={isLoading} autosize diff --git a/components/MantineNavBar/MantineNavBar.tsx b/components/MantineNavBar/MantineNavBar.tsx index 09bc07b..83ebb0d 100644 --- a/components/MantineNavBar/MantineNavBar.tsx +++ b/components/MantineNavBar/MantineNavBar.tsx @@ -20,7 +20,7 @@ export const MantineNavBar = () => { return ( <> - +
diff --git a/components/SearchBar/PagefindSearch.tsx b/components/SearchBar/PagefindSearch.tsx new file mode 100644 index 0000000..705a835 --- /dev/null +++ b/components/SearchBar/PagefindSearch.tsx @@ -0,0 +1,681 @@ +'use client' + +import { useState, useEffect, useRef } from 'react'; +import { useRouter } from 'next/navigation'; + +interface SearchResult { + id: string; + data: () => Promise<{ + url: string; + meta: { + title: string; + description?: string; + }; + excerpt: string; + }>; +} + +interface PagefindSearchProps { + onClose?: () => void; +} + +export function PagefindSearch({ onClose }: PagefindSearchProps) { + const [query, setQuery] = useState(''); + const [results, setResults] = useState([]); + const [isLoading, setIsLoading] = useState(false); + const [isOpen, setIsOpen] = useState(false); + const [pagefind, setPagefind] = useState(null); + const [mode, setMode] = useState<'search' | 'agent'>('search'); + const [agentResults, setAgentResults] = useState([]); + const [isAgentLoading, setIsAgentLoading] = useState(false); + const inputRef = useRef(null); + const resultsRef = useRef(null); + const router = useRouter(); + + // Load Pagefind + useEffect(() => { + async function loadPagefind() { + if (typeof window !== 'undefined' && typeof (window as any).pagefind === 'undefined') { + try { + (window as any).pagefind = await import( + // @ts-expect-error pagefind.js generated after build + /* webpackIgnore: true */ '/_pagefind/pagefind.js' + ); + setPagefind((window as any).pagefind); + } catch (e) { + console.log('Pagefind not available in development'); + // Mock pagefind for development + (window as any).pagefind = { + search: () => ({ results: [] }), + debouncedSearch: () => ({ results: [] }) + }; + setPagefind((window as any).pagefind); + } + } else if (typeof window !== 'undefined') { + setPagefind((window as any).pagefind); + } + } + loadPagefind(); + }, []); + + // Highlight search terms in text + const highlightSearchTerms = (text: string, query: string): string => { + if (!query.trim()) return text; + + const terms = query.toLowerCase().split(/\s+/).filter(term => term.length > 2); + let highlightedText = text; + + terms.forEach(term => { + const regex = new RegExp(`(${term})`, 'gi'); + highlightedText = highlightedText.replace(regex, '$1'); + }); + + return highlightedText; + }; + + // Handle search + const handleSearch = async (searchQuery: string) => { + if (!pagefind || !searchQuery.trim()) { + setResults([]); + return; + } + + setIsLoading(true); + try { + const search = await pagefind.debouncedSearch(searchQuery); + const results = search.results || []; + + // Add highlighting to results + const highlightedResults = results.map((result: any) => ({ + ...result, + highlightedExcerpt: highlightSearchTerms(result.excerpt || '', searchQuery) + })); + + setResults(highlightedResults); + } catch (error) { + console.error('Search error:', error); + setResults([]); + } finally { + setIsLoading(false); + } + }; + + // Handle AI Agent search + const handleAgentSearch = async (searchQuery: string) => { + if (!searchQuery.trim()) return; + + setIsAgentLoading(true); + setAgentResults([]); + + try { + const response = await fetch('/api/ai-search', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + query: searchQuery, + limit: 10 + }), + }); + + if (!response.ok) throw new Error('Failed to perform AI agent search'); + + const data = await response.json(); + const results = data.results || []; + + // Add highlighting to agent results + const highlightedResults = results.map((result: any) => ({ + ...result, + highlightedExcerpt: highlightSearchTerms(result.excerpt || '', searchQuery) + })); + + setAgentResults(highlightedResults); + } catch (error) { + console.error('AI Agent search error:', error); + setAgentResults([]); + } finally { + setIsAgentLoading(false); + } + }; + + // Handle Enter key and search button for agent mode + const handleSubmit = (e?: React.FormEvent) => { + if (e) e.preventDefault(); + if (query.trim() && mode === 'agent') { + handleAgentSearch(query); + } + }; + + // Handle input change with debouncing (only for search mode) + useEffect(() => { + const timer = setTimeout(() => { + if (query && mode === 'search') { + handleSearch(query); + } else if (mode === 'search') { + setResults([]); + } + }, 300); + + return () => clearTimeout(timer); + }, [query, pagefind, mode]); + + // Handle keyboard shortcuts + useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if ((e.metaKey || e.ctrlKey) && e.key === 'k') { + e.preventDefault(); + setIsOpen(true); + setTimeout(() => inputRef.current?.focus(), 100); + } + if ((e.metaKey || e.ctrlKey) && e.key === 'j') { + e.preventDefault(); + setMode(mode === 'search' ? 'agent' : 'search'); + } + if (e.key === 'Escape') { + setIsOpen(false); + setQuery(''); + setResults([]); + setAgentResults([]); + onClose?.(); + } + }; + + document.addEventListener('keydown', handleKeyDown); + return () => document.removeEventListener('keydown', handleKeyDown); + }, [onClose, mode]); + + // Clean URL by removing .html extension and fixing paths + const cleanUrl = (url: string): string => { + // Remove .html extension + let cleanUrl = url.replace(/\.html$/, ''); + + // Handle index pages - convert /docs/index to /docs + if (cleanUrl.endsWith('/index')) { + cleanUrl = cleanUrl.replace(/\/index$/, ''); + } + + // Ensure URL starts with / if it doesn't already + if (!cleanUrl.startsWith('/')) { + cleanUrl = '/' + cleanUrl; + } + + return cleanUrl; + }; + + // Handle result click + const handleResultClick = async (result: SearchResult) => { + try { + const data = await result.data(); + const cleanPath = cleanUrl(data.url); + router.push(cleanPath); + setIsOpen(false); + setQuery(''); + setResults([]); + onClose?.(); + } catch (error) { + console.error('Error navigating to result:', error); + } + }; + + if (!isOpen) { + return ( + + ); + } + + return ( +
{ + if (e.target === e.currentTarget) { + setIsOpen(false); + setQuery(''); + setResults([]); + onClose?.(); + } + }} + > +
+ {/* Search Input */} +
+ {/* Mode Toggle */} +
+ + +
+ +
+ {mode === 'search' ? ( + + + + ) : ( + + + + )} + setQuery(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter' && mode === 'agent') { + e.preventDefault(); + handleSubmit(); + } + }} + style={{ + background: 'transparent', + border: 'none', + outline: 'none', + color: 'rgba(255, 255, 255, 0.9)', + fontSize: 16, + flex: 1, + minWidth: 0, + }} + /> + {mode === 'agent' && query.trim() && !isAgentLoading && ( + + )} + {(isLoading || isAgentLoading) && ( +
+ {mode === 'search' ? 'Searching...' : 'Our agent is searching...'} +
+ )} +
+ + {/* Keyboard shortcuts hint */} +
+ ⌘K to open + ⌘J to switch mode + {mode === 'agent' && Enter to ask our agent} + Esc to close +
+
+ + {/* Results */} +
+ {mode === 'search' ? ( + // Search Results + results.length > 0 ? ( + results.map((result, index) => ( + handleResultClick(result)} + /> + )) + ) : query && !isLoading ? ( +
+ No results found for "{query}" +
+ ) : !query ? ( +
+ Start typing to search... +
+ ) : null + ) : ( + // Agent Results + agentResults.length > 0 ? ( + agentResults.map((result, index) => ( +
{ + router.push(result.url); + setIsOpen(false); // Auto-close search bar + setQuery(''); + setAgentResults([]); + }} + style={{ + padding: '16px 20px', + borderBottom: '1px solid rgba(255, 255, 255, 0.05)', + cursor: 'pointer', + transition: 'background-color 0.2s ease', + }} + onMouseEnter={(e) => { + e.currentTarget.style.background = 'rgba(255, 255, 255, 0.1)'; + }} + onMouseLeave={(e) => { + e.currentTarget.style.background = 'transparent'; + }} + > +
+ {result.title} + {result.matchedSection && ( + + → {result.matchedSection.title} + + )} +
+
+
+
+ + + + Ask Our Agent + + Beta + +
+ + {result.url} + + Score: {result.relevanceScore.toFixed(1)} + {result.matchedSection && ( + <> + + 📍 Section match + + )} +
+
+ )) + ) : query && !isAgentLoading ? ( +
+ No results found for "{query}" +
+ ) : !query ? ( +
+ Start typing to ask our agent... +
+ ) : null + )} +
+
+
+ ); +} + +function SearchResultItem({ result, onClick }: { result: SearchResult; onClick: () => void }) { + const [data, setData] = useState(null); + + useEffect(() => { + async function fetchData() { + try { + const resultData = await result.data(); + setData(resultData); + } catch (error) { + console.error('Error fetching result data:', error); + } + } + fetchData(); + }, [result]); + + if (!data) { + return ( +
+ Loading... +
+ ); + } + + return ( +
{ + e.currentTarget.style.background = 'rgba(255, 255, 255, 0.1)'; + }} + onMouseLeave={(e) => { + e.currentTarget.style.background = 'transparent'; + }} + > +
+ {data.meta.title} +
+ {data.meta.description && ( +
+ {data.meta.description} +
+ )} +
+
+ {data.url.replace(/\.html$/, '').replace(/\/index$/, '')} +
+
+ ); +} diff --git a/components/SearchBar/SearchBar.tsx b/components/SearchBar/SearchBar.tsx index 7195ec2..ca26cbb 100644 --- a/components/SearchBar/SearchBar.tsx +++ b/components/SearchBar/SearchBar.tsx @@ -1,47 +1,7 @@ -/* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -*/ - 'use client' -import { Button, Group, Kbd, Text } from '@mantine/core'; -import { IconSearch } from '@tabler/icons-react'; +import { PagefindSearch } from './PagefindSearch'; export function SearchBar() { - const handleSearch = () => { - // This triggers Nextra's built-in search using keyboard shortcut - const event = new KeyboardEvent('keydown', { - key: 'k', - metaKey: true, // For Mac - ctrlKey: true, // For Windows/Linux - }); - document.dispatchEvent(event); - }; - - return ( - - ); + return ; } \ No newline at end of file diff --git a/components/landing/Header.tsx b/components/landing/Header.tsx index c85ff7f..00afefd 100644 --- a/components/landing/Header.tsx +++ b/components/landing/Header.tsx @@ -5,7 +5,8 @@ import { SearchBar } from '../SearchBar/SearchBar'; export default function Header() { return (
-
); diff --git a/components/landing/PulsingCircle.tsx b/components/landing/PulsingCircle.tsx index 126cd31..7ab3e7f 100644 --- a/components/landing/PulsingCircle.tsx +++ b/components/landing/PulsingCircle.tsx @@ -51,3 +51,4 @@ export default function PulsingCircle() { + diff --git a/content/resilientdb/chapters/01_client_interaction.md b/content/resilientdb/chapters/01_client_interaction.md index 61ab86f..448d104 100644 --- a/content/resilientdb/chapters/01_client_interaction.md +++ b/content/resilientdb/chapters/01_client_interaction.md @@ -1,25 +1,6 @@ - - --- layout: default -title: 'Client Interaction' +title: 'Chapter 1: Client Interaction' parent: 'ResilientDB' nav_order: 1 --- diff --git a/content/resilientdb/chapters/02_network_communication.md b/content/resilientdb/chapters/02_network_communication.md index af46b47..a0d3db2 100644 --- a/content/resilientdb/chapters/02_network_communication.md +++ b/content/resilientdb/chapters/02_network_communication.md @@ -1,25 +1,6 @@ - - --- layout: default -title: 'Network Communication' +title: 'Chapter 2: Network Communication' parent: 'ResilientDB' nav_order: 2 --- diff --git a/content/resilientdb/chapters/03_consensus_management.md b/content/resilientdb/chapters/03_consensus_management.md index 179fb7a..97bfc76 100644 --- a/content/resilientdb/chapters/03_consensus_management.md +++ b/content/resilientdb/chapters/03_consensus_management.md @@ -1,25 +1,6 @@ - - --- layout: default -title: 'Consensus Management' +title: 'Chapter 3: Consensus Management' parent: 'ResilientDB' nav_order: 3 --- diff --git a/content/resilientdb/chapters/04_message_transaction_collection.md b/content/resilientdb/chapters/04_message_transaction_collection.md index 08f95d2..4723635 100644 --- a/content/resilientdb/chapters/04_message_transaction_collection.md +++ b/content/resilientdb/chapters/04_message_transaction_collection.md @@ -1,25 +1,6 @@ - - --- layout: default -title: 'Message/Transaction Collection' +title: 'Chapter 4: Message/Transaction Collection' parent: 'ResilientDB' nav_order: 4 --- diff --git a/content/resilientdb/chapters/05_transaction_execution.md b/content/resilientdb/chapters/05_transaction_execution.md index 594ff85..6a7f7e1 100644 --- a/content/resilientdb/chapters/05_transaction_execution.md +++ b/content/resilientdb/chapters/05_transaction_execution.md @@ -1,25 +1,6 @@ - - --- layout: default -title: 'Transaction Execution' +title: 'Chapter 5: Transaction Execution' parent: 'ResilientDB' nav_order: 5 --- diff --git a/content/resilientdb/chapters/06_storage_layer.md b/content/resilientdb/chapters/06_storage_layer.md index 4730e63..f09eb1b 100644 --- a/content/resilientdb/chapters/06_storage_layer.md +++ b/content/resilientdb/chapters/06_storage_layer.md @@ -1,25 +1,6 @@ - - --- layout: default -title: 'Storage Layer' +title: 'Chapter 6: Storage Layer' parent: 'ResilientDB' nav_order: 6 --- diff --git a/content/resilientdb/chapters/07_checkpointing.md b/content/resilientdb/chapters/07_checkpointing.md index 697b8c1..abd304b 100644 --- a/content/resilientdb/chapters/07_checkpointing.md +++ b/content/resilientdb/chapters/07_checkpointing.md @@ -1,25 +1,6 @@ - - --- layout: default -title: 'Checkpointing & Recovery' +title: 'Chapter 7: Checkpointing & Recovery' parent: 'ResilientDB' nav_order: 7 --- diff --git a/content/resilientdb/chapters/08_resilientdb_configuration.md b/content/resilientdb/chapters/08_resilientdb_configuration.md index 20fe1e6..26bec7f 100644 --- a/content/resilientdb/chapters/08_resilientdb_configuration.md +++ b/content/resilientdb/chapters/08_resilientdb_configuration.md @@ -1,25 +1,6 @@ - - --- layout: default -title: 'ResilientDB Configuration' +title: 'Chapter 8: ResilientDB Configuration' parent: 'ResilientDB' nav_order: 8 --- diff --git a/lib/documentIndexer.ts b/lib/documentIndexer.ts new file mode 100644 index 0000000..4914dfb --- /dev/null +++ b/lib/documentIndexer.ts @@ -0,0 +1,480 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +import fs from 'fs'; +import path from 'path'; +import matter from 'gray-matter'; +import { gzipSync, gunzipSync } from 'zlib'; + +export interface DocumentSection { + id: string; + title: string; + level: number; + content: string; + url: string; +} + +export interface DocumentChunk { + id: string; + title: string; + url: string; + content: string; + excerpt: string; + sections: DocumentSection[]; + keywords: string[]; + metadata: { + description?: string; + parent?: string; + nav_order?: number; + tags?: string[]; + }; + chunkIndex: number; + totalChunks: number; +} + +export interface DocumentIndex { + documents: DocumentChunk[]; + lastUpdated: string; + totalDocuments: number; +} + +class DocumentIndexer { + private contentDir: string; + private indexFile: string; + + constructor() { + this.contentDir = path.join(process.cwd(), 'content'); + this.indexFile = path.join(process.cwd(), 'public', 'document-index.json'); + } + + // Extract keywords from title, content, and URL + private extractKeywords(title: string, content: string, url: string, metadata: any): string[] { + const keywords = new Set(); + + // Extract from title + const titleWords = title.toLowerCase() + .replace(/[^\w\s-]/g, ' ') + .split(/\s+/) + .filter(word => word.length > 2); + titleWords.forEach(word => keywords.add(word)); + + // Extract from URL path + const urlParts = url.toLowerCase() + .replace(/[^\w\s-]/g, ' ') + .split(/\s+/) + .filter(word => word.length > 2); + urlParts.forEach(word => keywords.add(word)); + + // Extract from content (first 500 chars for performance) + const contentWords = content.toLowerCase() + .substring(0, 500) + .replace(/[^\w\s-]/g, ' ') + .split(/\s+/) + .filter(word => word.length > 3) + .filter(word => !this.isStopWord(word)); + + // Add most frequent content words + const wordCounts = new Map(); + contentWords.forEach(word => { + wordCounts.set(word, (wordCounts.get(word) || 0) + 1); + }); + + // Add top 10 most frequent words + Array.from(wordCounts.entries()) + .sort((a, b) => b[1] - a[1]) + .slice(0, 10) + .forEach(([word]) => keywords.add(word)); + + // Add metadata tags + if (metadata.tags) { + metadata.tags.forEach((tag: string) => keywords.add(tag.toLowerCase())); + } + + // Add parent category + if (metadata.parent) { + keywords.add(metadata.parent.toLowerCase()); + } + + return Array.from(keywords); + } + + // Common stop words to filter out + private isStopWord(word: string): boolean { + const stopWords = new Set([ + 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', + 'from', 'up', 'about', 'into', 'through', 'during', 'before', 'after', + 'above', 'below', 'between', 'among', 'this', 'that', 'these', 'those', + 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them', + 'my', 'your', 'his', 'her', 'its', 'our', 'their', 'mine', 'yours', 'ours', 'theirs', + 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', + 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', + 'can', 'shall', 'a', 'an', 'some', 'any', 'all', 'both', 'each', 'every', 'other', + 'another', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', + 'very', 'just', 'now', 'here', 'there', 'where', 'when', 'why', 'how', 'what', + 'which', 'who', 'whom', 'whose', 'whether', 'if', 'because', 'as', 'until', + 'while', 'whereas', 'although', 'though', 'unless', 'since', 'once', 'whenever', + 'wherever', 'however', 'therefore', 'thus', 'hence', 'moreover', 'furthermore', + 'nevertheless', 'nonetheless', 'meanwhile', 'consequently', 'accordingly' + ]); + return stopWords.has(word); + } + + // Extract headings and sections from content + private extractSections(content: string, baseUrl: string): DocumentSection[] { + const sections: DocumentSection[] = []; + const lines = content.split('\n'); + + // First pass: find all headings with their positions + const headings: Array<{ + lineIndex: number; + level: number; + title: string; + id: string; + }> = []; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const headerMatch = line.match(/^(#{1,6})\s+(.+)$/); + if (headerMatch) { + const level = headerMatch[1].length; + const title = headerMatch[2].trim(); + + // Generate ID that matches Next.js/MDX heading ID generation + const id = title + .toLowerCase() + .replace(/[^\w\s-]/g, '') // Remove special characters except word chars, spaces, and hyphens + .replace(/\s+/g, '-') // Replace spaces with hyphens + .replace(/-+/g, '-') // Replace multiple hyphens with single hyphen + .replace(/^-|-$/g, ''); // Remove leading/trailing hyphens + + headings.push({ + lineIndex: i, + level, + title, + id + }); + } + } + + // Second pass: extract content for each heading + for (let i = 0; i < headings.length; i++) { + const heading = headings[i]; + const nextHeading = headings[i + 1]; + + // Determine content range + const startLine = heading.lineIndex + 1; // Start after the heading line + const endLine = nextHeading ? nextHeading.lineIndex : lines.length; + + // Extract content lines + const contentLines: string[] = []; + for (let j = startLine; j < endLine; j++) { + const line = lines[j]; + // Stop if we hit another heading at the same or higher level + const lineHeaderMatch = line.match(/^(#{1,6})\s+/); + if (lineHeaderMatch && lineHeaderMatch[1].length <= heading.level) { + break; + } + if (line.trim()) { + contentLines.push(line); + } + } + + // For H1 headings, limit content to first paragraph or reasonable length + if (heading.level === 1 && contentLines.length > 0) { + // Find the first paragraph break (empty line) or limit to first few sentences + const firstParagraph: string[] = []; + let foundBreak = false; + + for (const line of contentLines) { + if (line.trim() === '') { + foundBreak = true; + break; + } + firstParagraph.push(line); + + // Also limit by length to prevent overly long H1 content + if (firstParagraph.join(' ').length > 500) { + break; + } + } + + // Use first paragraph if it's reasonable, otherwise use limited content + if (firstParagraph.length > 0 && firstParagraph.join(' ').length <= 500) { + contentLines.splice(0, contentLines.length, ...firstParagraph); + } else { + // Fallback: limit to first 500 characters + const limitedContent = contentLines.join(' ').substring(0, 500); + contentLines.splice(0, contentLines.length, limitedContent); + } + } + + const sectionContent = contentLines.join(' ').trim(); + + // Only add section if it has meaningful content + if (sectionContent && sectionContent.length > 10) { + sections.push({ + id: heading.id, + title: heading.title, + level: heading.level, + content: sectionContent, + url: `${baseUrl}#${heading.id}` + }); + } + } + + return sections; + } + + // Extract text content from MDX files + private extractContent(filePath: string): string { + try { + const fileContent = fs.readFileSync(filePath, 'utf-8'); + const { content } = matter(fileContent); + + // Remove frontmatter and clean up content + return content + .replace(/^---[\s\S]*?---\n/, '') // Remove frontmatter + .replace(/```[\s\S]*?```/g, '') // Remove code blocks + .replace(/`[^`]+`/g, '') // Remove inline code + .replace(/#{1,6}\s+/g, '') // Remove headers + .replace(/\*\*([^*]+)\*\*/g, '$1') // Remove bold + .replace(/\*([^*]+)\*/g, '$1') // Remove italic + .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // Remove links, keep text + .replace(/\n+/g, ' ') // Replace newlines with spaces + .trim(); + } catch (error) { + console.error(`Error reading file ${filePath}:`, error); + return ''; + } + } + + // Extract frontmatter metadata + private extractMetadata(filePath: string): any { + try { + const fileContent = fs.readFileSync(filePath, 'utf-8'); + const { data } = matter(fileContent); + return data; + } catch (error) { + console.error(`Error reading metadata from ${filePath}:`, error); + return {}; + } + } + + // Generate URL from file path + private generateUrl(filePath: string): string { + const relativePath = path.relative(this.contentDir, filePath); + const url = relativePath + .replace(/\.(mdx|md)$/, '') // Remove extension + .replace(/\/index$/, '') // Remove index suffix + .replace(/\\/g, '/'); // Normalize path separators + + return `/docs/${url}`; + } + + // Split content into chunks for better search + private chunkContent(content: string, maxChunkSize: number = 1000): string[] { + if (content.length <= maxChunkSize) { + return [content]; + } + + const chunks: string[] = []; + const sentences = content.split(/[.!?]+/); + let currentChunk = ''; + + for (const sentence of sentences) { + if (currentChunk.length + sentence.length > maxChunkSize && currentChunk.length > 0) { + chunks.push(currentChunk.trim()); + currentChunk = sentence; + } else { + currentChunk += sentence + '.'; + } + } + + if (currentChunk.trim()) { + chunks.push(currentChunk.trim()); + } + + return chunks; + } + + // Recursively find all MDX/MD files + private findMarkdownFiles(dir: string): string[] { + const files: string[] = []; + + try { + const items = fs.readdirSync(dir); + + for (const item of items) { + const fullPath = path.join(dir, item); + const stat = fs.statSync(fullPath); + + if (stat.isDirectory()) { + files.push(...this.findMarkdownFiles(fullPath)); + } else if (item.match(/\.(mdx|md)$/)) { + files.push(fullPath); + } + } + } catch (error) { + console.error(`Error reading directory ${dir}:`, error); + } + + return files; + } + + // Index all documents + public async indexDocuments(): Promise { + console.log('Starting document indexing...'); + + const files = this.findMarkdownFiles(this.contentDir); + const documents: DocumentChunk[] = []; + + for (const filePath of files) { + try { + const fileContent = fs.readFileSync(filePath, 'utf-8'); + const { content: rawContent } = matter(fileContent); + const metadata = this.extractMetadata(filePath); + const url = this.generateUrl(filePath); + + if (!rawContent.trim()) continue; + + // Extract sections first + const sections = this.extractSections(rawContent, url); + + // Extract clean content for search + const content = this.extractContent(filePath); + const chunks = this.chunkContent(content); + + // Extract keywords from the full content and metadata + const fullContent = content; // Use the full extracted content + const keywords = this.extractKeywords( + metadata.title || path.basename(filePath, path.extname(filePath)), + fullContent, + url, + metadata + ); + + chunks.forEach((chunk, index) => { + const document: DocumentChunk = { + id: `${path.basename(filePath, path.extname(filePath))}-${index}`, + title: metadata.title || path.basename(filePath, path.extname(filePath)), + url, + content: chunk, + excerpt: chunk.substring(0, 200) + (chunk.length > 200 ? '...' : ''), + sections, + keywords, // Add extracted keywords + metadata: { + description: metadata.description, + parent: metadata.parent, + nav_order: metadata.nav_order, + tags: metadata.tags || [] + }, + chunkIndex: index, + totalChunks: chunks.length + }; + + documents.push(document); + }); + + console.log(`Indexed: ${url} (${chunks.length} chunks, ${sections.length} sections)`); + } catch (error) { + console.error(`Error indexing ${filePath}:`, error); + } + } + + const index: DocumentIndex = { + documents, + lastUpdated: new Date().toISOString(), + totalDocuments: documents.length + }; + + // Save index to file + this.saveIndex(index); + + console.log(`Document indexing complete. Indexed ${documents.length} chunks from ${files.length} files.`); + return index; + } + + // Save index to compressed JSON file + private saveIndex(index: DocumentIndex): void { + try { + // Ensure public directory exists + const publicDir = path.dirname(this.indexFile); + if (!fs.existsSync(publicDir)) { + fs.mkdirSync(publicDir, { recursive: true }); + } + + // Compress the JSON data + const jsonData = JSON.stringify(index); + const compressed = gzipSync(jsonData); + + fs.writeFileSync(this.indexFile, compressed); + + const originalSize = Buffer.byteLength(jsonData, 'utf8'); + const compressedSize = compressed.length; + const compressionRatio = ((originalSize - compressedSize) / originalSize * 100).toFixed(1); + + console.log(`Index saved to ${this.indexFile}`); + console.log(`Compression: ${(originalSize / 1024).toFixed(1)}KB → ${(compressedSize / 1024).toFixed(1)}KB (${compressionRatio}% reduction)`); + } catch (error) { + console.error('Error saving index:', error); + } + } + + // Load index from compressed file + public loadIndex(): DocumentIndex | null { + try { + if (!fs.existsSync(this.indexFile)) { + return null; + } + + const compressedData = fs.readFileSync(this.indexFile); + + // Try to decompress first (new format) + try { + const decompressed = gunzipSync(compressedData); + return JSON.parse(decompressed.toString('utf-8')); + } catch (decompressionError) { + // Fallback to plain JSON (old format) + const indexData = compressedData.toString('utf-8'); + return JSON.parse(indexData); + } + } catch (error) { + console.error('Error loading index:', error); + return null; + } + } + + // Check if index needs updating + public needsUpdate(): boolean { + try { + const index = this.loadIndex(); + if (!index) return true; + + const indexTime = new Date(index.lastUpdated).getTime(); + const now = Date.now(); + const oneHour = 60 * 60 * 1000; // 1 hour + + return (now - indexTime) > oneHour; + } catch (error) { + return true; + } + } +} + +export const documentIndexer = new DocumentIndexer(); diff --git a/next-env.d.ts b/next-env.d.ts index b699436..3cd7048 100644 --- a/next-env.d.ts +++ b/next-env.d.ts @@ -1,22 +1,3 @@ -/* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -*/ - /// /// /// diff --git a/next.config.mjs b/next.config.mjs index 710f6e4..3e7896c 100644 --- a/next.config.mjs +++ b/next.config.mjs @@ -43,7 +43,6 @@ export default withNextra( optimizePackageImports: ['@mantine/core', '@mantine/hooks'], }, // Performance optimizations - swcMinify: true, compiler: { removeConsole: process.env.NODE_ENV === 'production', }, diff --git a/package-lock.json b/package-lock.json index 9af80c4..aac238b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -28,6 +28,7 @@ "@uiw/react-codemirror": "^4.23.12", "ai": "^4.3.16", "framer-motion": "12.23.14", + "gray-matter": "^4.0.3", "internmap": "^2.0.3", "next": "15.3.2", "next-auth": "^4.24.11", @@ -65,7 +66,7 @@ "eslint-plugin-react": "^7.37.5", "jest": "^29.7.0", "jest-environment-jsdom": "^29.7.0", - "pagefind": "^1.3.0", + "pagefind": "^1.4.0", "postcss": "^8.5.3", "postcss-import": "^16.1.0", "postcss-preset-mantine": "1.17.0", @@ -76,6 +77,7 @@ "stylelint": "^16.19.1", "stylelint-config-standard-scss": "^14.0.0", "ts-jest": "^29.3.2", + "tsx": "^4.20.5", "typescript": "5.8.3", "typescript-eslint": "^8.32.0" } @@ -4885,9 +4887,9 @@ } }, "node_modules/@pagefind/darwin-arm64": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/@pagefind/darwin-arm64/-/darwin-arm64-1.3.0.tgz", - "integrity": "sha512-365BEGl6ChOsauRjyVpBjXybflXAOvoMROw3TucAROHIcdBvXk9/2AmEvGFU0r75+vdQI4LJdJdpH4Y6Yqaj4A==", + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@pagefind/darwin-arm64/-/darwin-arm64-1.4.0.tgz", + "integrity": "sha512-2vMqkbv3lbx1Awea90gTaBsvpzgRs7MuSgKDxW0m9oV1GPZCZbZBJg/qL83GIUEN2BFlY46dtUZi54pwH+/pTQ==", "cpu": [ "arm64" ], @@ -4899,9 +4901,9 @@ ] }, "node_modules/@pagefind/darwin-x64": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/@pagefind/darwin-x64/-/darwin-x64-1.3.0.tgz", - "integrity": "sha512-zlGHA23uuXmS8z3XxEGmbHpWDxXfPZ47QS06tGUq0HDcZjXjXHeLG+cboOy828QIV5FXsm9MjfkP5e4ZNbOkow==", + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@pagefind/darwin-x64/-/darwin-x64-1.4.0.tgz", + "integrity": "sha512-e7JPIS6L9/cJfow+/IAqknsGqEPjJnVXGjpGm25bnq+NPdoD3c/7fAwr1OXkG4Ocjx6ZGSCijXEV4ryMcH2E3A==", "cpu": [ "x64" ], @@ -4912,10 +4914,24 @@ "darwin" ] }, + "node_modules/@pagefind/freebsd-x64": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@pagefind/freebsd-x64/-/freebsd-x64-1.4.0.tgz", + "integrity": "sha512-WcJVypXSZ+9HpiqZjFXMUobfFfZZ6NzIYtkhQ9eOhZrQpeY5uQFqNWLCk7w9RkMUwBv1HAMDW3YJQl/8OqsV0Q==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, "node_modules/@pagefind/linux-arm64": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/@pagefind/linux-arm64/-/linux-arm64-1.3.0.tgz", - "integrity": "sha512-8lsxNAiBRUk72JvetSBXs4WRpYrQrVJXjlRRnOL6UCdBN9Nlsz0t7hWstRk36+JqHpGWOKYiuHLzGYqYAqoOnQ==", + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@pagefind/linux-arm64/-/linux-arm64-1.4.0.tgz", + "integrity": "sha512-PIt8dkqt4W06KGmQjONw7EZbhDF+uXI7i0XtRLN1vjCUxM9vGPdtJc2mUyVPevjomrGz5M86M8bqTr6cgDp1Uw==", "cpu": [ "arm64" ], @@ -4927,9 +4943,9 @@ ] }, "node_modules/@pagefind/linux-x64": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/@pagefind/linux-x64/-/linux-x64-1.3.0.tgz", - "integrity": "sha512-hAvqdPJv7A20Ucb6FQGE6jhjqy+vZ6pf+s2tFMNtMBG+fzcdc91uTw7aP/1Vo5plD0dAOHwdxfkyw0ugal4kcQ==", + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@pagefind/linux-x64/-/linux-x64-1.4.0.tgz", + "integrity": "sha512-z4oddcWwQ0UHrTHR8psLnVlz6USGJ/eOlDPTDYZ4cI8TK8PgwRUPQZp9D2iJPNIPcS6Qx/E4TebjuGJOyK8Mmg==", "cpu": [ "x64" ], @@ -4941,9 +4957,9 @@ ] }, "node_modules/@pagefind/windows-x64": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/@pagefind/windows-x64/-/windows-x64-1.3.0.tgz", - "integrity": "sha512-BR1bIRWOMqkf8IoU576YDhij1Wd/Zf2kX/kCI0b2qzCKC8wcc2GQJaaRMCpzvCCrmliO4vtJ6RITp/AnoYUUmQ==", + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@pagefind/windows-x64/-/windows-x64-1.4.0.tgz", + "integrity": "sha512-NkT+YAdgS2FPCn8mIA9bQhiBs+xmniMGq1LFPDhcFn0+2yIUEiIG06t7bsZlhdjknEQRTSdT7YitP6fC5qwP0g==", "cpu": [ "x64" ], @@ -11498,7 +11514,6 @@ "version": "4.0.1", "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true, "license": "BSD-2-Clause", "bin": { "esparse": "bin/esparse.js", @@ -11838,6 +11853,18 @@ "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", "license": "MIT" }, + "node_modules/extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha512-zCnTtlxNoAiDc3gqY2aYAWFx7XWWiasuF2K8Me5WbN8otHKTUKBwjPtNpRs/rbUZm7KxWAaNj7P1a/p52GbVug==", + "license": "MIT", + "dependencies": { + "is-extendable": "^0.1.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/fast-content-type-parse": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/fast-content-type-parse/-/fast-content-type-parse-3.0.0.tgz", @@ -12522,6 +12549,19 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/get-tsconfig": { + "version": "4.10.1", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.1.tgz", + "integrity": "sha512-auHyJ4AgMz7vgS8Hp3N6HXSmlMdUyhSUrfBF16w153rxtLIEOE+HGqaBppczZvnHLqQJfiHotCYpNhl0lUROFQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, "node_modules/github-slugger": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/github-slugger/-/github-slugger-2.0.0.tgz", @@ -12690,6 +12730,43 @@ "dev": true, "license": "MIT" }, + "node_modules/gray-matter": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/gray-matter/-/gray-matter-4.0.3.tgz", + "integrity": "sha512-5v6yZd4JK3eMI3FqqCouswVqwugaA9r4dNZB1wwcmrD02QkV5H0y7XBQW8QwQqEaZY1pM9aqORSORhJRdNK44Q==", + "license": "MIT", + "dependencies": { + "js-yaml": "^3.13.1", + "kind-of": "^6.0.2", + "section-matter": "^1.0.0", + "strip-bom-string": "^1.0.0" + }, + "engines": { + "node": ">=6.0" + } + }, + "node_modules/gray-matter/node_modules/argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "license": "MIT", + "dependencies": { + "sprintf-js": "~1.0.2" + } + }, + "node_modules/gray-matter/node_modules/js-yaml": { + "version": "3.14.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", + "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "license": "MIT", + "dependencies": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, "node_modules/gzip-size": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/gzip-size/-/gzip-size-6.0.0.tgz", @@ -13904,6 +13981,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/is-extendable": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz", + "integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -15680,7 +15766,6 @@ "version": "6.0.3", "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.3.tgz", "integrity": "sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==", - "dev": true, "license": "MIT", "engines": { "node": ">=0.10.0" @@ -18863,20 +18948,21 @@ "license": "MIT" }, "node_modules/pagefind": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/pagefind/-/pagefind-1.3.0.tgz", - "integrity": "sha512-8KPLGT5g9s+olKMRTU9LFekLizkVIu9tes90O1/aigJ0T5LmyPqTzGJrETnSw3meSYg58YH7JTzhTTW/3z6VAw==", + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/pagefind/-/pagefind-1.4.0.tgz", + "integrity": "sha512-z2kY1mQlL4J8q5EIsQkLzQjilovKzfNVhX8De6oyE6uHpfFtyBaqUpcl/XzJC/4fjD8vBDyh1zolimIcVrCn9g==", "dev": true, "license": "MIT", "bin": { "pagefind": "lib/runner/bin.cjs" }, "optionalDependencies": { - "@pagefind/darwin-arm64": "1.3.0", - "@pagefind/darwin-x64": "1.3.0", - "@pagefind/linux-arm64": "1.3.0", - "@pagefind/linux-x64": "1.3.0", - "@pagefind/windows-x64": "1.3.0" + "@pagefind/darwin-arm64": "1.4.0", + "@pagefind/darwin-x64": "1.4.0", + "@pagefind/freebsd-x64": "1.4.0", + "@pagefind/linux-arm64": "1.4.0", + "@pagefind/linux-x64": "1.4.0", + "@pagefind/windows-x64": "1.4.0" } }, "node_modules/pako": { @@ -21103,6 +21189,16 @@ "node": ">=4" } }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, "node_modules/resolve-url-loader": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/resolve-url-loader/-/resolve-url-loader-5.0.0.tgz", @@ -21528,6 +21624,19 @@ "compute-scroll-into-view": "^3.0.2" } }, + "node_modules/section-matter": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/section-matter/-/section-matter-1.0.0.tgz", + "integrity": "sha512-vfD3pmTzGpufjScBh50YHKzEu2lxBWhVEHsNGoEXmCmn2hKGfeNLYMzCJpe8cD7gqX7TJluOVpBkAequ6dgMmA==", + "license": "MIT", + "dependencies": { + "extend-shallow": "^2.0.1", + "kind-of": "^6.0.0" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/secure-json-parse": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz", @@ -22022,7 +22131,6 @@ "version": "1.0.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", - "dev": true, "license": "BSD-3-Clause" }, "node_modules/stack-utils": { @@ -22367,6 +22475,15 @@ "node": ">=8" } }, + "node_modules/strip-bom-string": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/strip-bom-string/-/strip-bom-string-1.0.0.tgz", + "integrity": "sha512-uCC2VHvQRYu+lMh4My/sFNmF2klFymLX1wHJeXnbEJERpV/ZsVuonzerjfrGpIGF7LBVa1O7i9kjiWvJiFck8g==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/strip-final-newline": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz", @@ -23394,6 +23511,26 @@ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "license": "0BSD" }, + "node_modules/tsx": { + "version": "4.20.5", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.20.5.tgz", + "integrity": "sha512-+wKjMNU9w/EaQayHXb7WA7ZaHY6hN8WgfvHNQ3t1PnU91/7O8TcTnIhCDYTZwnt8JsO9IBqZ30Ln1r7pPF52Aw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "~0.25.0", + "get-tsconfig": "^4.7.5" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, "node_modules/tty-browserify": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/tty-browserify/-/tty-browserify-0.0.1.tgz", diff --git a/package.json b/package.json index 9a430aa..b57d491 100644 --- a/package.json +++ b/package.json @@ -5,8 +5,9 @@ "scripts": { "dev": "next dev", "build": "next build", - "postbuild": "pagefind --site .next/server/app --output-path public/_pagefind", + "postbuild": "pagefind --site .next/server/app --output-path public/_pagefind && npx tsx scripts/build-index.ts", "build:pagefind": "pagefind --site .next/server/app --output-path public/_pagefind", + "build:index": "npx tsx scripts/build-index.ts", "analyze": "ANALYZE=true next build", "start": "next start", "typecheck": "tsc --noEmit", @@ -42,6 +43,7 @@ "@uiw/react-codemirror": "^4.23.12", "ai": "^4.3.16", "framer-motion": "12.23.14", + "gray-matter": "^4.0.3", "internmap": "^2.0.3", "next": "15.3.2", "next-auth": "^4.24.11", @@ -79,7 +81,7 @@ "eslint-plugin-react": "^7.37.5", "jest": "^29.7.0", "jest-environment-jsdom": "^29.7.0", - "pagefind": "^1.3.0", + "pagefind": "^1.4.0", "postcss": "^8.5.3", "postcss-import": "^16.1.0", "postcss-preset-mantine": "1.17.0", @@ -90,6 +92,7 @@ "stylelint": "^16.19.1", "stylelint-config-standard-scss": "^14.0.0", "ts-jest": "^29.3.2", + "tsx": "^4.20.5", "typescript": "5.8.3", "typescript-eslint": "^8.32.0" }, diff --git a/scripts/build-index.js b/scripts/build-index.js new file mode 100644 index 0000000..5df845b --- /dev/null +++ b/scripts/build-index.js @@ -0,0 +1,47 @@ +#!/usr/bin/env node + +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +// Use tsx to run TypeScript directly +const { execSync } = require('child_process'); +const path = require('path'); + +// Run the TypeScript file directly +const scriptPath = path.join(__dirname, 'build-index.ts'); +execSync(`npx tsx ${scriptPath}`, { stdio: 'inherit' }); + +async function buildIndex() { + try { + console.log('🚀 Starting document index build...'); + + const index = await documentIndexer.indexDocuments(); + + console.log('✅ Document index built successfully!'); + console.log(`📊 Indexed ${index.totalDocuments} document chunks`); + console.log(`📅 Last updated: ${index.lastUpdated}`); + console.log(`💾 Index saved to: public/document-index.json`); + + } catch (error) { + console.error('❌ Error building document index:', error); + process.exit(1); + } +} + +buildIndex(); diff --git a/scripts/build-index.ts b/scripts/build-index.ts new file mode 100644 index 0000000..740f22a --- /dev/null +++ b/scripts/build-index.ts @@ -0,0 +1,41 @@ +#!/usr/bin/env tsx + +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +import { documentIndexer } from '../lib/documentIndexer'; + +async function buildIndex() { + try { + console.log('🚀 Starting document index build...'); + + const index = await documentIndexer.indexDocuments(); + + console.log('✅ Document index built successfully!'); + console.log(`📊 Indexed ${index.totalDocuments} document chunks`); + console.log(`📅 Last updated: ${index.lastUpdated}`); + console.log(`💾 Index saved to: public/document-index.json`); + + } catch (error) { + console.error('❌ Error building document index:', error); + process.exit(1); + } +} + +buildIndex();