From 05ffa1a6c3247a41d2eb67660a1a3f5efca08b0d Mon Sep 17 00:00:00 2001 From: AnanthuNarashimman Date: Thu, 25 Dec 2025 16:08:47 +0530 Subject: [PATCH] fix(core): handle escaped quotes in CSV parser Fixes incorrect parsing of CSV fields containing double quotes. Includes unit tests. --- .../core/src/tools/read-data-file.test.ts | 271 ++++++++++++++++++ packages/core/src/tools/read-data-file.ts | 224 +++++++++------ 2 files changed, 400 insertions(+), 95 deletions(-) create mode 100644 packages/core/src/tools/read-data-file.test.ts diff --git a/packages/core/src/tools/read-data-file.test.ts b/packages/core/src/tools/read-data-file.test.ts new file mode 100644 index 0000000..205e46c --- /dev/null +++ b/packages/core/src/tools/read-data-file.test.ts @@ -0,0 +1,271 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; +import type { ReadDataFileToolParams } from "./read-data-file.js"; +import { ReadDataFileTool } from "./read-data-file.js"; +import path from "node:path"; +import os from "node:os"; +import fs from "node:fs"; +import fsp from "node:fs/promises"; +import type { Config } from "../config/config.js"; +import { FileDiscoveryService } from "../services/fileDiscoveryService.js"; +import { StandardFileSystemService } from "../services/fileSystemService.js"; +import { createMockWorkspaceContext } from "../test-utils/mockWorkspaceContext.js"; + +vi.mock("../telemetry/loggers.js", () => ({ + logFileOperation: vi.fn(), +})); + +describe("ReadDataFileTool - parseCSVLine Tests", () => { + let tempRootDir: string; + let tool: ReadDataFileTool; + const abortSignal = new AbortController().signal; + + beforeEach(async () => { + tempRootDir = await fsp.mkdtemp( + path.join(os.tmpdir(), "read-data-file-parsecsv-test-"), + ); + + const mockConfigInstance = { + getFileService: () => new FileDiscoveryService(tempRootDir), + getFileSystemService: () => new StandardFileSystemService(), + getTargetDir: () => tempRootDir, + getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir), + } as unknown as Config; + tool = new ReadDataFileTool(mockConfigInstance); + }); + + afterEach(async () => { + if (fs.existsSync(tempRootDir)) { + await fsp.rm(tempRootDir, { recursive: true, force: true }); + } + }); + + describe("parseCSVLine - Basic Parsing", () => { + it("should parse simple CSV with no quotes", async () => { + const csvContent = "name,age,city\nJohn,30,NYC\nJane,25,LA"; + const csvPath = path.join(tempRootDir, "simple.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + expect(result.llmContent).toContain('"name": "John"'); + expect(result.llmContent).toContain('"age": "30"'); + expect(result.llmContent).toContain('"city": "NYC"'); + }); + + it("should parse CSV with empty fields", async () => { + const csvContent = "name,age,city\nJohn,,NYC\n,25,LA\n,,"; + const csvPath = path.join(tempRootDir, "empty-fields.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + expect(result.llmContent).toContain('"name": "John"'); + expect(result.llmContent).toContain('"age": ""'); + }); + }); + + describe("parseCSVLine - Quoted Fields", () => { + it("should parse quoted fields with commas inside", async () => { + const csvContent = + 'name,address,phone\nJohn,"123 Main St, Apt 4",555-1234'; + const csvPath = path.join(tempRootDir, "quoted-commas.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + expect(result.llmContent).toContain('"address": "123 Main St, Apt 4"'); + }); + + it("should handle mixed quoted and unquoted fields", async () => { + const csvContent = + 'id,name,description\n1,John,"Software Engineer, Senior"\n2,Jane,Manager'; + const csvPath = path.join(tempRootDir, "mixed.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + expect(result.llmContent).toContain( + '"description": "Software Engineer, Senior"', + ); + expect(result.llmContent).toContain('"description": "Manager"'); + }); + }); + + describe('parseCSVLine - ESCAPED QUOTES FIX ("") → (")', () => { + it('should handle escaped quotes ("") inside quoted fields', async () => { + const csvContent = + 'name,quote,author\nJohn,"He said ""Hello World""",Smith'; + const csvPath = path.join(tempRootDir, "escaped-quotes.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + // The escaped quotes ("") should become single quotes (") + // In JSON output, quotes are escaped as \" + expect(result.llmContent).toContain('He said \\"Hello World\\"'); + }); + + it("should handle multiple escaped quotes in a single field", async () => { + const csvContent = 'id,text\n1,"The ""quick"" brown ""fox"" jumps"'; + const csvPath = path.join(tempRootDir, "multiple-escaped.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + // In JSON output, quotes are escaped as \" + expect(result.llmContent).toContain( + 'The \\"quick\\" brown \\"fox\\" jumps', + ); + }); + + it("should handle escaped quotes at field boundaries", async () => { + const csvContent = + 'col1,col2,col3\na,"""Start",b\nc,"End""",d\ne,"""Both""",f'; + const csvPath = path.join(tempRootDir, "boundary-quotes.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + // """Start" → "Start + // "End""" → End" + // """Both""" → "Both" + // In JSON output, quotes are escaped as \" + expect(result.llmContent).toContain('\\"Start'); + expect(result.llmContent).toContain('End\\"'); + expect(result.llmContent).toContain('\\"Both\\"'); + }); + + it("should handle consecutive escaped quotes", async () => { + const csvContent = 'col1,col2\na,""""\nb,""""""\nc,""""""""'; + const csvPath = path.join(tempRootDir, "consecutive.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + // """" → " + // """""" → "" + // """""""" → """ + }); + + it("should handle escaped quotes combined with commas", async () => { + const csvContent = + 'id,description\n1,"Item with ""quotes"", commas, and text"'; + const csvPath = path.join(tempRootDir, "quotes-and-commas.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + // In JSON output, quotes are escaped as \" + expect(result.llmContent).toContain( + 'Item with \\"quotes\\", commas, and text', + ); + }); + + it("should handle real-world example with SQL-like content", async () => { + const csvContent = + 'query,description\n"SELECT * FROM users WHERE name = ""John""","Fetch user ""John""\'s data"'; + const csvPath = path.join(tempRootDir, "sql-example.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + // In JSON output, quotes are escaped as \" + expect(result.llmContent).toContain( + 'SELECT * FROM users WHERE name = \\"John\\"', + ); + expect(result.llmContent).toContain('Fetch user \\"John\\"'); + }); + }); + + describe("parseCSVLine - Edge Cases", () => { + it("should handle empty CSV file", async () => { + const csvContent = ""; + const csvPath = path.join(tempRootDir, "empty.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + // Empty file now returns 0 rows instead of special message + expect(result.llmContent).toContain("0 rows"); + }); + + it("should handle CSV with only headers", async () => { + const csvContent = "name,age,city"; + const csvPath = path.join(tempRootDir, "headers-only.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + expect(result.llmContent).toContain("0 rows"); + }); + + it("should handle single column CSV", async () => { + const csvContent = "name\nJohn\nJane\nBob"; + const csvPath = path.join(tempRootDir, "single-column.csv"); + await fsp.writeFile(csvPath, csvContent, "utf-8"); + + const params: ReadDataFileToolParams = { absolute_path: csvPath }; + const invocation = tool.build(params); + if (typeof invocation === "string") throw new Error(invocation); + + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + expect(result.llmContent).toContain("1 columns"); + expect(result.llmContent).toContain("3 rows"); + }); + }); +}); diff --git a/packages/core/src/tools/read-data-file.ts b/packages/core/src/tools/read-data-file.ts index 422d247..1990279 100644 --- a/packages/core/src/tools/read-data-file.ts +++ b/packages/core/src/tools/read-data-file.ts @@ -4,15 +4,15 @@ * SPDX-License-Identifier: Apache-2.0 */ -import path from 'node:path'; -import fs from 'node:fs'; -import { makeRelative, shortenPath } from '../utils/paths.js'; -import type { ToolInvocation, ToolLocation, ToolResult } from './tools.js'; -import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; -import { ToolNames } from './tool-names.js'; -import type { Config } from '../config/config.js'; -import { ToolErrorType } from './tool-error.js'; -import { generateWorkspacePathError } from './workspace-error-helper.js'; +import path from "node:path"; +import fs from "node:fs"; +import { makeRelative, shortenPath } from "../utils/paths.js"; +import type { ToolInvocation, ToolLocation, ToolResult } from "./tools.js"; +import { BaseDeclarativeTool, BaseToolInvocation, Kind } from "./tools.js"; +import { ToolNames } from "./tool-names.js"; +import type { Config } from "../config/config.js"; +import { ToolErrorType } from "./tool-error.js"; +import { generateWorkspacePathError } from "./workspace-error-helper.js"; /** * Parameters for the ReadDataFile tool @@ -70,30 +70,42 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< * Parse CSV file into structured data with comprehensive analysis */ private async parseCSV(content: string): Promise { - const lines = content.trim().split('\n'); + const lines = content.trim().split("\n"); if (lines.length === 0) { return { - fileType: 'CSV', + fileType: "CSV", data: [], - summary: 'Empty CSV file', + summary: "Empty CSV file", rowCount: 0, }; } - // Simple CSV parser (handles basic cases, not production-grade) + // Simple CSV parser (Fixed to handle escaped quotes) const parseCSVLine = (line: string): string[] => { const result: string[] = []; - let current = ''; + let current = ""; let inQuotes = false; for (let i = 0; i < line.length; i++) { const char = line[i]; + const nextChar = line[i + 1]; // Look ahead + if (char === '"') { - inQuotes = !inQuotes; - } else if (char === ',' && !inQuotes) { + if (inQuotes && nextChar === '"') { + // CASE: Escaped quote ("") inside a quoted field. + // Treat as a literal quote and skip the next character. + current += '"'; + i++; + } else { + // CASE: Normal quote boundary (start or end of field) + inQuotes = !inQuotes; + } + } else if (char === "," && !inQuotes) { + // CASE: Comma outside of quotes -> New field result.push(current.trim()); - current = ''; + current = ""; } else { + // CASE: Normal character current += char; } } @@ -103,7 +115,7 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< const headers = parseCSVLine(lines[0]); const totalRows = lines.length - 1; - + // Parse rows, limit display if max_rows is set const allDataRows = lines.slice(1); const displayMaxRows = this.params.max_rows || 100; // Default to 100 for display @@ -113,20 +125,23 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< const values = parseCSVLine(line); const row: Record = {}; headers.forEach((header, index) => { - row[header] = values[index] || ''; + row[header] = values[index] || ""; }); return row; }); // Data to display (limited if max_rows is set) - const displayData = displayMaxRows ? allData.slice(0, displayMaxRows) : allData; + const displayData = displayMaxRows + ? allData.slice(0, displayMaxRows) + : allData; - const summaryText = displayMaxRows && totalRows > displayMaxRows - ? `CSV file with ${headers.length} columns and ${totalRows} rows (showing first ${displayMaxRows} rows)` - : `CSV file with ${headers.length} columns and ${totalRows} rows`; + const summaryText = + displayMaxRows && totalRows > displayMaxRows + ? `CSV file with ${headers.length} columns and ${totalRows} rows (showing first ${displayMaxRows} rows)` + : `CSV file with ${headers.length} columns and ${totalRows} rows`; return { - fileType: 'CSV', + fileType: "CSV", data: displayData, summary: summaryText, rowCount: totalRows, @@ -146,21 +161,22 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< const rowCount = isArray ? data.length : undefined; let columns: string[] | undefined; - if (isArray && data.length > 0 && typeof data[0] === 'object') { + if (isArray && data.length > 0 && typeof data[0] === "object") { columns = Object.keys(data[0]); } const displayMaxRows = this.params.max_rows || 100; // Default to 100 for display - const limitedData = isArray && displayMaxRows ? data.slice(0, displayMaxRows) : data; + const limitedData = + isArray && displayMaxRows ? data.slice(0, displayMaxRows) : data; const summaryText = isArray ? displayMaxRows && rowCount && rowCount > displayMaxRows - ? `JSON array with ${rowCount} items${columns ? ` and ${columns.length} fields` : ''} (showing first ${displayMaxRows} items)` - : `JSON array with ${rowCount} items${columns ? ` and ${columns.length} fields` : ''}` - : 'JSON object'; + ? `JSON array with ${rowCount} items${columns ? ` and ${columns.length} fields` : ""} (showing first ${displayMaxRows} items)` + : `JSON array with ${rowCount} items${columns ? ` and ${columns.length} fields` : ""}` + : "JSON object"; return { - fileType: 'JSON', + fileType: "JSON", data: limitedData, summary: summaryText, rowCount, @@ -179,12 +195,12 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< * Parse TXT file (treat as plain text with line-by-line analysis) */ private async parseTXT(content: string): Promise { - const lines = content.split('\n'); + const lines = content.split("\n"); const maxRows = this.params.max_rows || 100; const limitedLines = lines.slice(0, maxRows); return { - fileType: 'TXT', + fileType: "TXT", data: limitedLines, summary: `Text file with ${lines.length} lines (showing first ${limitedLines.length} lines)`, rowCount: lines.length, @@ -197,43 +213,49 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< private async parseXLSX(filePath: string): Promise { try { // Dynamic import to handle optional dependency - use default export - const { default: XLSX } = await import('xlsx'); - + const { default: XLSX } = await import("xlsx"); + const workbook = XLSX.readFile(filePath); const sheetNames = workbook.SheetNames; - + if (sheetNames.length === 0) { return { - fileType: 'XLSX', + fileType: "XLSX", data: [], - summary: 'Empty Excel file with no sheets', + summary: "Empty Excel file with no sheets", sheets: [], }; } const maxRows = this.params.max_rows || 100; - + // Parse all sheets and collect their data const allSheetsData: Record = {}; let totalRows = 0; let firstSheetColumns: string[] = []; - + for (const sheetName of sheetNames) { const worksheet = workbook.Sheets[sheetName]; - + // Convert to JSON with proper options const jsonData = XLSX.utils.sheet_to_json(worksheet, { raw: false, // Format numbers and dates - defval: '', // Default value for empty cells + defval: "", // Default value for empty cells }); - + allSheetsData[sheetName] = jsonData; totalRows += jsonData.length; - + // Get column names from first sheet's first row - if (sheetName === sheetNames[0] && jsonData.length > 0 && - typeof jsonData[0] === 'object' && jsonData[0] !== null) { - firstSheetColumns = Object.keys(jsonData[0] as Record); + if ( + sheetName === sheetNames[0] && + jsonData.length > 0 && + typeof jsonData[0] === "object" && + jsonData[0] !== null + ) { + firstSheetColumns = Object.keys( + jsonData[0] as Record, + ); } } @@ -243,12 +265,12 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< const limitedData = firstSheetData.slice(0, maxRows); // Create a summary of all sheets - const sheetsSummary = sheetNames.map(name => - `"${name}" (${allSheetsData[name]?.length || 0} rows)` - ).join(', '); + const sheetsSummary = sheetNames + .map((name) => `"${name}" (${allSheetsData[name]?.length || 0} rows)`) + .join(", "); return { - fileType: 'XLSX', + fileType: "XLSX", data: { // Primary data from first sheet (limited) firstSheet: limitedData, @@ -256,8 +278,8 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< allSheets: Object.fromEntries( Object.entries(allSheetsData).map(([name, data]) => [ name, - data.slice(0, maxRows) - ]) + data.slice(0, maxRows), + ]), ), }, summary: `Excel file with ${sheetNames.length} sheet(s): ${sheetsSummary}. Total ${totalRows} rows across all sheets. First sheet "${firstSheetName}" has ${firstSheetData.length} rows and ${firstSheetColumns.length} columns (showing first ${limitedData.length} rows).`, @@ -267,17 +289,19 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< sheets: sheetNames, }; } catch (error) { - if ((error as NodeJS.ErrnoException).code === 'MODULE_NOT_FOUND' || - (error as Error).message?.includes('Cannot find module')) { + if ( + (error as NodeJS.ErrnoException).code === "MODULE_NOT_FOUND" || + (error as Error).message?.includes("Cannot find module") + ) { return { - fileType: 'XLSX', + fileType: "XLSX", data: null, summary: 'XLSX parsing requires the "xlsx" library. Please install it with: npm install xlsx', }; } throw new Error( - `Failed to parse XLSX file: ${error instanceof Error ? error.message : String(error)}` + `Failed to parse XLSX file: ${error instanceof Error ? error.message : String(error)}`, ); } } @@ -288,14 +312,14 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< private async parseDOCX(filePath: string): Promise { try { // Dynamic import to handle optional dependency - use default export - const { default: mammoth } = await import('mammoth'); - + const { default: mammoth } = await import("mammoth"); + const result = await mammoth.extractRawText({ path: filePath }); const text = result.value; - + // Split into paragraphs const paragraphs = text - .split('\n') + .split("\n") .map((p) => p.trim()) .filter((p) => p.length > 0); @@ -303,23 +327,25 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< const limitedParagraphs = paragraphs.slice(0, maxRows); return { - fileType: 'DOCX', + fileType: "DOCX", data: limitedParagraphs, summary: `Word document with ${paragraphs.length} paragraphs (showing first ${limitedParagraphs.length})`, rowCount: paragraphs.length, }; } catch (error) { - if ((error as NodeJS.ErrnoException).code === 'MODULE_NOT_FOUND' || - (error as Error).message?.includes('Cannot find module')) { + if ( + (error as NodeJS.ErrnoException).code === "MODULE_NOT_FOUND" || + (error as Error).message?.includes("Cannot find module") + ) { return { - fileType: 'DOCX', + fileType: "DOCX", data: null, summary: 'DOCX parsing requires the "mammoth" library. Please install it with: npm install mammoth', }; } throw new Error( - `Failed to parse DOCX file: ${error instanceof Error ? error.message : String(error)}` + `Failed to parse DOCX file: ${error instanceof Error ? error.message : String(error)}`, ); } } @@ -332,7 +358,7 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< if (!fs.existsSync(filePath)) { return { llmContent: `File not found: ${filePath}`, - returnDisplay: 'File not found', + returnDisplay: "File not found", error: { message: `File not found: ${filePath}`, type: ToolErrorType.FILE_NOT_FOUND, @@ -345,7 +371,7 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< if (stats.isDirectory()) { return { llmContent: `Path is a directory, not a file: ${filePath}`, - returnDisplay: 'Path is a directory', + returnDisplay: "Path is a directory", error: { message: `Path is a directory: ${filePath}`, type: ToolErrorType.TARGET_IS_DIRECTORY, @@ -361,35 +387,35 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< // Parse based on file type switch (ext) { - case '.csv': { - const content = await fs.promises.readFile(filePath, 'utf-8'); + case ".csv": { + const content = await fs.promises.readFile(filePath, "utf-8"); result = await this.parseCSV(content); break; } - case '.json': { - const content = await fs.promises.readFile(filePath, 'utf-8'); + case ".json": { + const content = await fs.promises.readFile(filePath, "utf-8"); result = await this.parseJSON(content); break; } - case '.txt': { - const content = await fs.promises.readFile(filePath, 'utf-8'); + case ".txt": { + const content = await fs.promises.readFile(filePath, "utf-8"); result = await this.parseTXT(content); break; } - case '.xlsx': - case '.xls': { + case ".xlsx": + case ".xls": { result = await this.parseXLSX(filePath); break; } - case '.docx': - case '.doc': { + case ".docx": + case ".doc": { result = await this.parseDOCX(filePath); break; } - case '.pdf': { + case ".pdf": { return { llmContent: `PDF files are already supported by the read_file tool. Please use read_file instead for: ${relativePath}`, - returnDisplay: 'Use read_file for PDF files', + returnDisplay: "Use read_file for PDF files", }; } default: { @@ -416,13 +442,13 @@ class ReadDataFileToolInvocation extends BaseToolInvocation< - **Type**: ${result.fileType} - **File Size**: ${fileSizeMB} MB - **Summary**: ${result.summary} -${result.rowCount !== undefined ? `- **Total Rows**: ${result.rowCount}` : ''} -${result.fullDataCount !== undefined && result.fullDataCount !== result.rowCount ? `- **Full Dataset Rows**: ${result.fullDataCount}` : ''} -${result.columnCount !== undefined ? `- **Columns**: ${result.columnCount}` : ''} -${result.columns ? `- **Column Names**: ${result.columns.join(', ')}` : ''} -${result.sheets ? `- **Sheets**: ${result.sheets.join(', ')}` : ''} +${result.rowCount !== undefined ? `- **Total Rows**: ${result.rowCount}` : ""} +${result.fullDataCount !== undefined && result.fullDataCount !== result.rowCount ? `- **Full Dataset Rows**: ${result.fullDataCount}` : ""} +${result.columnCount !== undefined ? `- **Columns**: ${result.columnCount}` : ""} +${result.columns ? `- **Column Names**: ${result.columns.join(", ")}` : ""} +${result.sheets ? `- **Sheets**: ${result.sheets.join(", ")}` : ""} -## Sample Data (First ${Array.isArray(result.data) ? Math.min(result.data.length, 100) : 'N/A'} rows) +## Sample Data (First ${Array.isArray(result.data) ? Math.min(result.data.length, 100) : "N/A"} rows) **Note:** This is a sample of the data. For complete analysis, write a Python script to read and analyze the entire file. @@ -467,7 +493,7 @@ export class ReadDataFileTool extends BaseDeclarativeTool< constructor(private config: Config) { super( ReadDataFileTool.Name, - 'ReadDataFile', + "ReadDataFile", `Reads and parses structured data files (CSV, JSON, TXT, XLSX, DOCX, DOC) and returns the parsed data in a structured format. Use this tool to read data files and extract their content. For analysis, write a Python script to process the complete dataset.`, Kind.Read, { @@ -475,16 +501,16 @@ export class ReadDataFileTool extends BaseDeclarativeTool< absolute_path: { description: "The absolute path to the data file to read and parse (e.g., '/home/user/project/data.csv'). Supported file types: .csv, .json, .txt, .xlsx, .xls, .docx, .doc. Relative paths are not supported.", - type: 'string', + type: "string", }, max_rows: { description: - 'Optional: Maximum number of rows/items to DISPLAY in the output (default: 100). This only controls how many sample rows are shown. Set higher for more sample data or lower for less.', - type: 'number', + "Optional: Maximum number of rows/items to DISPLAY in the output (default: 100). This only controls how many sample rows are shown. Set higher for more sample data or lower for less.", + type: "number", }, }, - required: ['absolute_path'], - type: 'object', + required: ["absolute_path"], + type: "object", }, ); } @@ -494,7 +520,7 @@ export class ReadDataFileTool extends BaseDeclarativeTool< ): string | null { const filePath = params.absolute_path; - if (filePath.trim() === '') { + if (filePath.trim() === "") { return "The 'absolute_path' parameter must be non-empty."; } @@ -510,13 +536,21 @@ export class ReadDataFileTool extends BaseDeclarativeTool< // Validate file extension const ext = path.extname(filePath).toLowerCase(); - const supportedExtensions = ['.csv', '.json', '.txt', '.xlsx', '.xls', '.docx', '.doc']; + const supportedExtensions = [ + ".csv", + ".json", + ".txt", + ".xlsx", + ".xls", + ".docx", + ".doc", + ]; if (!supportedExtensions.includes(ext)) { - return `Unsupported file type: ${ext}. Supported types: ${supportedExtensions.join(', ')}`; + return `Unsupported file type: ${ext}. Supported types: ${supportedExtensions.join(", ")}`; } if (params.max_rows !== undefined && params.max_rows <= 0) { - return 'max_rows must be a positive number'; + return "max_rows must be a positive number"; } const fileService = this.config.getFileService();