From 0ed9b6ff8117702c4f45f1f55b253abcf7ca8013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20Blanchemain?= Date: Fri, 12 Dec 2025 11:10:41 -0800 Subject: [PATCH] Add `find-orphaned-files.js` script to identify orphaned files for maintenance --- package.json | 1 + scripts/find-orphaned-files.js | 575 +++++++++++++++++++++++++++++++++ 2 files changed, 576 insertions(+) create mode 100644 scripts/find-orphaned-files.js diff --git a/package.json b/package.json index 480232ce9c..ef2a9fed31 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "write-translations": "docusaurus write-translations", "write-heading-ids": "docusaurus write-heading-ids", "find-orphan-pages": "tsx scripts/find-orphan-pages.ts", + "find-orphaned-files": "node scripts/find-orphaned-files.js", "format": "yarn format:docs && yarn format:app && yarn format:check", "format:app": "prettier --write --config \"./.prettierrc.js\" -- \"./*.{js,json}\" \"src/**/*.{tsx,ts,scss,json,js}\"", "format:docs": "prettier --write --config \"./.prettierrc.js\" -- \"docs/**/*.{md,mdx}\"", diff --git a/scripts/find-orphaned-files.js b/scripts/find-orphaned-files.js new file mode 100644 index 0000000000..facdedfbf7 --- /dev/null +++ b/scripts/find-orphaned-files.js @@ -0,0 +1,575 @@ +#!/usr/bin/env node + +/** + * Find Orphaned Files Script + * + * This script identifies orphaned documentation files in the docs/ directory. + * + * Orphaned files are defined as: + * 1. Non-partial files that are NOT referenced in any sidebar AND NOT in an autogenerated directory + * OR + * 2. Partial files that are NOT referenced in any documentation file, sidebar, or React component + * + * Partial files are identified by: + * - Files in directories named "partials" + * - Files with names containing "-partial" or "_partial" + * - Files starting with underscore "_" + * + * Reference detection includes: + * - Sidebar configuration (type: 'doc', 'category' with link, etc.) + * - Autogenerated directories (type: 'autogenerated') + * - Import statements (import X from './file.mdx') + * - Markdown links ([text](./file.md)) + * - React components (FloatingHoverModal, etc.) + * + * Usage: + * node scripts/find-orphaned-files.js + * OR + * yarn find-orphaned-files + * + * Output: + * - Generates a report in orphaned-files-proposed-for-deletion.md + * - Console output shows summary statistics and list of orphaned files + * + * Features: + * - Handles Docusaurus numeric prefix convention (01-file.mdx referenced as file) + * - Detects autogenerated directories from sidebar configuration + * - Tracks both import statements and markdown links + * - Differentiates between partial and non-partial files + */ + +import path from 'path'; +import fs from 'fs'; +import { fileURLToPath } from 'url'; +import { dirname } from 'path'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +// Constants +const PROJECT_ROOT = path.resolve(__dirname, '..'); +const DOCS_ROOT = path.join(PROJECT_ROOT, 'docs'); +const SIDEBARS_FILE = path.join(PROJECT_ROOT, 'sidebars.js'); +const OUTPUT_FILE = path.join(PROJECT_ROOT, 'orphaned-files-proposed-for-deletion.md'); + +// Storage arrays +const allFiles = []; +const partialFiles = []; +const nonPartialFiles = []; +const filesReferencedInSidebars = new Set(); +const filesReferencedInDocs = new Set(); +const filesImportedInDocs = new Set(); +const autogeneratedDirs = new Set(); + +// Build-generated files mapping: generated file -> source directory +// When a generated file is referenced, all files in its source directory are considered referenced +const buildGeneratedFiles = { + '/partials/_glossary-partial': '/partials/glossary', // glossary source files +}; + +/** + * Determines if a file path represents a partial file + */ +function isPartialFile(filePath) { + const relativePath = path.relative(DOCS_ROOT, filePath); + const fileName = path.basename(filePath); + const dirName = path.basename(path.dirname(filePath)); + + // Check if file is in a "partials" directory + if (relativePath.includes('/partials/') || relativePath.startsWith('partials/')) { + return true; + } + + // Check if directory name is "partials" + if (dirName === 'partials') { + return true; + } + + // Check if filename contains "-partial" or "_partial" + if (fileName.includes('-partial') || fileName.includes('_partial')) { + return true; + } + + // Check if filename starts with underscore + if (fileName.startsWith('_')) { + return true; + } + + return false; +} + +/** + * Get normalized path for comparison (removes extension, starts with /) + */ +function getNormalizedPath(filePath) { + let normalized = filePath + .replace(DOCS_ROOT, '') + .replace(/\.mdx?$/, '') + .replace(/\\/g, '/'); + + if (!normalized.startsWith('/')) { + normalized = '/' + normalized; + } + + return normalized; +} + +/** + * Recursively scan directory for all markdown files + */ +function scanDirectory(dir) { + const entries = fs.readdirSync(dir, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + + if (entry.isDirectory()) { + // Skip node_modules and hidden directories + if (entry.name === 'node_modules' || entry.name.startsWith('.')) { + continue; + } + scanDirectory(fullPath); + } else if (entry.isFile() && /\.(md|mdx)$/i.test(entry.name)) { + const normalizedPath = getNormalizedPath(fullPath); + allFiles.push({ + absolutePath: fullPath, + normalizedPath: normalizedPath, + isPartial: isPartialFile(fullPath), + }); + + if (isPartialFile(fullPath)) { + partialFiles.push(normalizedPath); + } else { + nonPartialFiles.push(normalizedPath); + } + } + } +} + +/** + * Process sidebar item to extract referenced files + */ +function processSidebarItem(item) { + if (!item || typeof item !== 'object') { + return; + } + + // Handle different sidebar item types + switch (item.type) { + case 'doc': + if (item.id) { + const normalizedId = item.id.startsWith('/') ? item.id : '/' + item.id; + filesReferencedInSidebars.add(normalizedId); + } + break; + + case 'category': + if (item.items && Array.isArray(item.items)) { + item.items.forEach(processSidebarItem); + } + if (item.link && item.link.type === 'doc' && item.link.id) { + const normalizedId = item.link.id.startsWith('/') ? item.link.id : '/' + item.link.id; + filesReferencedInSidebars.add(normalizedId); + } + break; + + case 'autogenerated': + if (item.dirName) { + const normalizedDir = item.dirName.startsWith('/') ? item.dirName : '/' + item.dirName; + autogeneratedDirs.add(normalizedDir); + } + break; + + case 'link': + case 'html': + // These don't reference local files + break; + + default: + // Handle unknown types + break; + } +} + +/** + * Extract all file references from sidebars configuration + */ +async function extractSidebarReferences() { + try { + // Import sidebars dynamically with cache busting + const sidebarPath = SIDEBARS_FILE; + const sidebarsModule = await import(`file://${sidebarPath}?update=${Date.now()}`); + const sidebars = sidebarsModule.default || sidebarsModule; + + // Process each sidebar + for (const [sidebarName, sidebarConfig] of Object.entries(sidebars)) { + if (Array.isArray(sidebarConfig)) { + sidebarConfig.forEach(processSidebarItem); + } else if (typeof sidebarConfig === 'object' && sidebarConfig.sdkSidebar) { + // Handle special SDK sidebar structure + if (Array.isArray(sidebarConfig.sdkSidebar)) { + sidebarConfig.sdkSidebar.forEach(processSidebarItem); + } + } + } + } catch (error) { + console.error('Error extracting sidebar references:', error); + throw error; + } +} + +/** + * Resolve relative path to absolute normalized path + */ +function resolveRelativePath(linkPath, sourceFilePath) { + let resolved = linkPath; + + // Handle relative paths + if (linkPath.startsWith('./') || linkPath.startsWith('../')) { + const sourceDir = path.dirname(sourceFilePath); + const absolutePath = path.resolve(sourceDir, linkPath); + resolved = getNormalizedPath(absolutePath); + } else if (linkPath.startsWith('/')) { + // Already absolute from docs root + resolved = linkPath.replace(/\.mdx?$/, ''); + } else { + // Assume it's from docs root + resolved = '/' + linkPath.replace(/\.mdx?$/, ''); + } + + return resolved; +} + +/** + * Extract references from a markdown file + */ +function extractReferencesFromFile(filePath) { + try { + const content = fs.readFileSync(filePath, 'utf8'); + const normalizedSourcePath = getNormalizedPath(filePath); + + // Extract import statements + // Matches: import Something from './path/to/file.mdx' + const importRegex = /import\s+(?:\{[^}]*\}|\w+)\s+from\s+['"](\.{1,2}\/[^'"]+\.mdx?)['"]/g; + let importMatch; + while ((importMatch = importRegex.exec(content)) !== null) { + const importPath = importMatch[1]; + const resolvedPath = resolveRelativePath(importPath, filePath); + filesImportedInDocs.add(resolvedPath); + filesReferencedInDocs.add(resolvedPath); + } + + // Extract markdown links + // Matches: [text](./path/to/file.md) or [text](/path/to/file.mdx) + const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g; + let linkMatch; + while ((linkMatch = linkRegex.exec(content)) !== null) { + const linkPath = linkMatch[2]; + + // Skip external links, anchors, and non-doc links + if ( + linkPath.startsWith('http://') || + linkPath.startsWith('https://') || + linkPath.startsWith('#') || + linkPath.startsWith('@') || + linkPath.includes(':') + ) { + continue; + } + + // Remove anchor from link + let cleanedLink = linkPath.split('#')[0]; + if (!cleanedLink) continue; + + // Only process links to .md or .mdx files + if (/\.mdx?$/i.test(cleanedLink)) { + const resolvedPath = resolveRelativePath(cleanedLink, filePath); + filesReferencedInDocs.add(resolvedPath); + } + } + + // Extract image references (for partial images) + const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g; + let imageMatch; + while ((imageMatch = imageRegex.exec(content)) !== null) { + const imagePath = imageMatch[2]; + + // Skip external images + if (imagePath.startsWith('http://') || imagePath.startsWith('https://')) { + continue; + } + + // Only process .md or .mdx "images" (some partials might be referenced this way) + if (/\.mdx?$/i.test(imagePath)) { + const resolvedPath = resolveRelativePath(imagePath, filePath); + filesReferencedInDocs.add(resolvedPath); + } + } + + // Extract FloatingHoverModal references + // Matches: + const floatingModalRegex = /]*>/g; + let modalMatch; + while ((modalMatch = floatingModalRegex.exec(content)) !== null) { + const modalPath = modalMatch[1]; + + // Skip external links + if (modalPath.startsWith('http://') || modalPath.startsWith('https://')) { + continue; + } + + // Only process .md or .mdx files + if (/\.mdx?$/i.test(modalPath)) { + const resolvedPath = resolveRelativePath(modalPath, filePath); + filesReferencedInDocs.add(resolvedPath); + } + } + } catch (error) { + console.error(`Error extracting references from ${filePath}:`, error.message); + } +} + +/** + * Check if a file is in an autogenerated directory + */ +function isInAutogeneratedDir(normalizedPath) { + for (const autoDir of autogeneratedDirs) { + if (normalizedPath.startsWith(autoDir + '/') || normalizedPath === autoDir) { + return true; + } + } + return false; +} + +/** + * Check if two paths match, accounting for Docusaurus numeric prefix convention + * Docusaurus allows referencing files without their numeric prefixes + * E.g., "arbitrum-bridge/quickstart" matches "arbitrum-bridge/01-quickstart" + */ +function pathsMatch(sidebarPath, filePath) { + // Exact match + if (sidebarPath === filePath) { + return true; + } + + // Check if the file has a numeric prefix that sidebar path doesn't include + const filePathParts = filePath.split('/'); + const sidebarPathParts = sidebarPath.split('/'); + + if (filePathParts.length !== sidebarPathParts.length) { + return false; + } + + // Compare each part + for (let i = 0; i < filePathParts.length; i++) { + const filePart = filePathParts[i]; + const sidebarPart = sidebarPathParts[i]; + + if (filePart === sidebarPart) { + continue; + } + + // Check if file part has numeric prefix (e.g., "01-quickstart" vs "quickstart") + const numericPrefixRegex = /^(\d+-)?(.+)$/; + const fileMatch = filePart.match(numericPrefixRegex); + const sidebarMatch = sidebarPart.match(numericPrefixRegex); + + if (fileMatch && sidebarMatch) { + // Compare without numeric prefix + if (fileMatch[2] === sidebarMatch[2]) { + continue; + } + // Also check if sidebar has prefix and file doesn't (less common but possible) + if (fileMatch[2] === sidebarPart || filePart === sidebarMatch[2]) { + continue; + } + } + + // Parts don't match + return false; + } + + return true; +} + +/** + * Check if a file is referenced in sidebars (accounting for numeric prefixes) + */ +function isReferencedInSidebar(filePath) { + for (const sidebarPath of filesReferencedInSidebars) { + if (pathsMatch(sidebarPath, filePath)) { + return true; + } + } + return false; +} + +/** + * Identify orphaned files based on criteria + */ +function identifyOrphanedFiles() { + const orphanedFiles = []; + + for (const file of allFiles) { + const { normalizedPath, isPartial, absolutePath } = file; + + // Check if file is in autogenerated directory + const inAutogeneratedDir = isInAutogeneratedDir(normalizedPath); + + // Check if file is referenced in sidebar (with numeric prefix matching) + const inSidebar = isReferencedInSidebar(normalizedPath); + + // Check if file is referenced in docs + const referencedInDocs = + filesReferencedInDocs.has(normalizedPath) || filesImportedInDocs.has(normalizedPath); + + let isOrphaned = false; + let reason = ''; + + if (isPartial) { + // Rule 2: Partials must be referenced in some file or in a sidebar + if (!referencedInDocs && !inSidebar && !inAutogeneratedDir) { + isOrphaned = true; + reason = 'Partial file not referenced in any documentation file or sidebar'; + } + } else { + // Rule 1: Non-partials must be in a sidebar (or autogenerated dir) + if (!inSidebar && !inAutogeneratedDir) { + isOrphaned = true; + reason = 'Not referenced in any sidebar or autogenerated directory'; + } + } + + if (isOrphaned) { + orphanedFiles.push({ + path: normalizedPath, + absolutePath: absolutePath, + reason: reason, + isPartial: isPartial, + }); + } + } + + return orphanedFiles; +} + +/** + * Generate the markdown report + */ +function generateReport(orphanedFiles) { + const lines = []; + + lines.push('## Orphaned files proposed for deletion'); + lines.push(''); + lines.push(''); + lines.push('| File path | Reason for deletion |'); + lines.push('|:----------|:--------------------|'); + + if (orphanedFiles.length === 0) { + lines.push('| No orphaned files found | N/A |'); + } else { + // Sort orphaned files by path + orphanedFiles.sort((a, b) => a.path.localeCompare(b.path)); + + for (const file of orphanedFiles) { + // Use relative path from project root for cleaner display + const displayPath = path.relative(PROJECT_ROOT, file.absolutePath); + lines.push(`| ${displayPath} | ${file.reason} |`); + } + } + + lines.push(''); + lines.push(''); + + return lines.join('\n'); +} + +/** + * Main execution function + */ +async function main() { + console.log('šŸ” Scanning for orphaned files in documentation...\n'); + + // Step 1: Scan all markdown files + console.log('šŸ“ Scanning docs directory...'); + scanDirectory(DOCS_ROOT); + console.log(` Found ${allFiles.length} total files`); + console.log(` - ${partialFiles.length} partial files`); + console.log(` - ${nonPartialFiles.length} non-partial files\n`); + + // Step 2: Extract sidebar references + console.log('šŸ“‹ Analyzing sidebar configuration...'); + await extractSidebarReferences(); + console.log(` Found ${filesReferencedInSidebars.size} files referenced in sidebars`); + console.log(` Found ${autogeneratedDirs.size} autogenerated directories`); + if (autogeneratedDirs.size > 0) { + autogeneratedDirs.forEach((dir) => console.log(` - ${dir}`)); + } + console.log(''); + + // Step 3: Extract references from all documentation files + console.log('šŸ”— Extracting references from documentation files...'); + for (const file of allFiles) { + extractReferencesFromFile(file.absolutePath); + } + console.log(` Found ${filesReferencedInDocs.size} files referenced in docs`); + console.log(` Found ${filesImportedInDocs.size} files imported in docs\n`); + + // Step 3.5: Mark source files as referenced for build-generated files + console.log('šŸ”Ø Checking build-generated files...'); + let buildSourceFilesCount = 0; + for (const [generatedFile, sourceDir] of Object.entries(buildGeneratedFiles)) { + // Check if the generated file is referenced + if (filesReferencedInDocs.has(generatedFile) || filesImportedInDocs.has(generatedFile)) { + console.log(` Generated file referenced: ${generatedFile}`); + console.log(` Marking source files in ${sourceDir} as referenced`); + + // Mark all files in the source directory as referenced + for (const file of allFiles) { + if (file.normalizedPath.startsWith(sourceDir + '/') || file.normalizedPath === sourceDir) { + filesReferencedInDocs.add(file.normalizedPath); + buildSourceFilesCount++; + } + } + } + } + if (buildSourceFilesCount > 0) { + console.log(` Marked ${buildSourceFilesCount} build source files as referenced\n`); + } else { + console.log(` No build-generated files referenced\n`); + } + + // Step 4: Identify orphaned files + console.log('šŸ”Ž Identifying orphaned files...'); + const orphanedFiles = identifyOrphanedFiles(); + console.log(` Found ${orphanedFiles.length} orphaned files\n`); + + // Step 5: Generate report + console.log('šŸ“ Generating report...'); + const report = generateReport(orphanedFiles); + fs.writeFileSync(OUTPUT_FILE, report, 'utf8'); + console.log(` Report written to: ${path.relative(PROJECT_ROOT, OUTPUT_FILE)}\n`); + + // Step 6: Display summary + if (orphanedFiles.length > 0) { + console.log('āš ļø Orphaned files found:'); + console.log(''); + orphanedFiles.forEach((file) => { + const displayPath = path.relative(PROJECT_ROOT, file.absolutePath); + console.log(` - ${displayPath}`); + console.log(` Reason: ${file.reason}`); + }); + console.log(''); + console.log(`Review the full report at: ${path.relative(PROJECT_ROOT, OUTPUT_FILE)}`); + } else { + console.log('āœ… No orphaned files found!'); + } + + console.log('\n✨ Done!'); +} + +// Execute main function +main().catch((error) => { + console.error('āŒ Error:', error); + process.exit(1); +});