Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
271 changes: 271 additions & 0 deletions packages/core/src/tools/read-data-file.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/

import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
import type { ReadDataFileToolParams } from "./read-data-file.js";
import { ReadDataFileTool } from "./read-data-file.js";
import path from "node:path";
import os from "node:os";
import fs from "node:fs";
import fsp from "node:fs/promises";
import type { Config } from "../config/config.js";
import { FileDiscoveryService } from "../services/fileDiscoveryService.js";
import { StandardFileSystemService } from "../services/fileSystemService.js";
import { createMockWorkspaceContext } from "../test-utils/mockWorkspaceContext.js";

vi.mock("../telemetry/loggers.js", () => ({
logFileOperation: vi.fn(),
}));

describe("ReadDataFileTool - parseCSVLine Tests", () => {
let tempRootDir: string;
let tool: ReadDataFileTool;
const abortSignal = new AbortController().signal;

beforeEach(async () => {
tempRootDir = await fsp.mkdtemp(
path.join(os.tmpdir(), "read-data-file-parsecsv-test-"),
);

const mockConfigInstance = {
getFileService: () => new FileDiscoveryService(tempRootDir),
getFileSystemService: () => new StandardFileSystemService(),
getTargetDir: () => tempRootDir,
getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir),
} as unknown as Config;
tool = new ReadDataFileTool(mockConfigInstance);
});

afterEach(async () => {
if (fs.existsSync(tempRootDir)) {
await fsp.rm(tempRootDir, { recursive: true, force: true });
}
});

describe("parseCSVLine - Basic Parsing", () => {
it("should parse simple CSV with no quotes", async () => {
const csvContent = "name,age,city\nJohn,30,NYC\nJane,25,LA";
const csvPath = path.join(tempRootDir, "simple.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
expect(result.llmContent).toContain('"name": "John"');
expect(result.llmContent).toContain('"age": "30"');
expect(result.llmContent).toContain('"city": "NYC"');
});

it("should parse CSV with empty fields", async () => {
const csvContent = "name,age,city\nJohn,,NYC\n,25,LA\n,,";
const csvPath = path.join(tempRootDir, "empty-fields.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
expect(result.llmContent).toContain('"name": "John"');
expect(result.llmContent).toContain('"age": ""');
});
});

describe("parseCSVLine - Quoted Fields", () => {
it("should parse quoted fields with commas inside", async () => {
const csvContent =
'name,address,phone\nJohn,"123 Main St, Apt 4",555-1234';
const csvPath = path.join(tempRootDir, "quoted-commas.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
expect(result.llmContent).toContain('"address": "123 Main St, Apt 4"');
});

it("should handle mixed quoted and unquoted fields", async () => {
const csvContent =
'id,name,description\n1,John,"Software Engineer, Senior"\n2,Jane,Manager';
const csvPath = path.join(tempRootDir, "mixed.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
expect(result.llmContent).toContain(
'"description": "Software Engineer, Senior"',
);
expect(result.llmContent).toContain('"description": "Manager"');
});
});

describe('parseCSVLine - ESCAPED QUOTES FIX ("") → (")', () => {
it('should handle escaped quotes ("") inside quoted fields', async () => {
const csvContent =
'name,quote,author\nJohn,"He said ""Hello World""",Smith';
const csvPath = path.join(tempRootDir, "escaped-quotes.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
// The escaped quotes ("") should become single quotes (")
// In JSON output, quotes are escaped as \"
expect(result.llmContent).toContain('He said \\"Hello World\\"');
});

it("should handle multiple escaped quotes in a single field", async () => {
const csvContent = 'id,text\n1,"The ""quick"" brown ""fox"" jumps"';
const csvPath = path.join(tempRootDir, "multiple-escaped.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
// In JSON output, quotes are escaped as \"
expect(result.llmContent).toContain(
'The \\"quick\\" brown \\"fox\\" jumps',
);
});

it("should handle escaped quotes at field boundaries", async () => {
const csvContent =
'col1,col2,col3\na,"""Start",b\nc,"End""",d\ne,"""Both""",f';
const csvPath = path.join(tempRootDir, "boundary-quotes.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
// """Start" → "Start
// "End""" → End"
// """Both""" → "Both"
// In JSON output, quotes are escaped as \"
expect(result.llmContent).toContain('\\"Start');
expect(result.llmContent).toContain('End\\"');
expect(result.llmContent).toContain('\\"Both\\"');
});

it("should handle consecutive escaped quotes", async () => {
const csvContent = 'col1,col2\na,""""\nb,""""""\nc,""""""""';
const csvPath = path.join(tempRootDir, "consecutive.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
// """" → "
// """""" → ""
// """""""" → """
});

it("should handle escaped quotes combined with commas", async () => {
const csvContent =
'id,description\n1,"Item with ""quotes"", commas, and text"';
const csvPath = path.join(tempRootDir, "quotes-and-commas.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
// In JSON output, quotes are escaped as \"
expect(result.llmContent).toContain(
'Item with \\"quotes\\", commas, and text',
);
});

it("should handle real-world example with SQL-like content", async () => {
const csvContent =
'query,description\n"SELECT * FROM users WHERE name = ""John""","Fetch user ""John""\'s data"';
const csvPath = path.join(tempRootDir, "sql-example.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
// In JSON output, quotes are escaped as \"
expect(result.llmContent).toContain(
'SELECT * FROM users WHERE name = \\"John\\"',
);
expect(result.llmContent).toContain('Fetch user \\"John\\"');
});
});

describe("parseCSVLine - Edge Cases", () => {
it("should handle empty CSV file", async () => {
const csvContent = "";
const csvPath = path.join(tempRootDir, "empty.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
// Empty file now returns 0 rows instead of special message
expect(result.llmContent).toContain("0 rows");
});

it("should handle CSV with only headers", async () => {
const csvContent = "name,age,city";
const csvPath = path.join(tempRootDir, "headers-only.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
expect(result.llmContent).toContain("0 rows");
});

it("should handle single column CSV", async () => {
const csvContent = "name\nJohn\nJane\nBob";
const csvPath = path.join(tempRootDir, "single-column.csv");
await fsp.writeFile(csvPath, csvContent, "utf-8");

const params: ReadDataFileToolParams = { absolute_path: csvPath };
const invocation = tool.build(params);
if (typeof invocation === "string") throw new Error(invocation);

const result = await invocation.execute(abortSignal);
expect(result.error).toBeUndefined();
expect(result.llmContent).toContain("1 columns");
expect(result.llmContent).toContain("3 rows");
});
});
});
Loading