From f5af8b83bd4a5aa2fa97aa5c6afbb9ba8fa233ed Mon Sep 17 00:00:00 2001 From: Alex K Date: Tue, 24 Jun 2025 23:53:37 +0200 Subject: [PATCH] feat: implement DataFrame filtering and indexing methods with tests --- .../src/methods/dataframe/display/index.js | 17 + .../src/methods/dataframe/filtering/expr$.js | 33 +- .../src/methods/dataframe/filtering/filter.js | 29 +- .../src/methods/dataframe/filtering/iloc.js | 31 +- .../src/methods/dataframe/filtering/loc.js | 101 ++++-- .../src/methods/dataframe/filtering/query$.js | 3 +- .../src/methods/dataframe/filtering/where.js | 29 +- packages/core/src/methods/dataframe/index.js | 13 + .../core/src/methods/dataframe/indexing/at.js | 49 +++ .../src/methods/dataframe/indexing/head.js | 48 +++ .../src/methods/dataframe/indexing/iloc.js | 156 +++++++++ .../src/methods/dataframe/indexing/index.js | 18 + .../src/methods/dataframe/indexing/loc.js | 328 ++++++++++++++++++ .../src/methods/dataframe/indexing/pool.js | 20 ++ .../src/methods/dataframe/indexing/sample.js | 123 +++++++ .../methods/dataframe/indexing/setIndex.js | 41 +++ .../src/methods/dataframe/indexing/tail.js | 49 +++ packages/core/src/methods/dataframe/pool.js | 41 +-- .../dataframe/filtering/where.fixed.test.js | 212 +++++++++-- .../methods/dataframe/indexing/at.test.js | 97 ++++++ .../methods/dataframe/indexing/head.test.js | 100 ++++++ .../methods/dataframe/indexing/iloc.test.js | 141 ++++++++ .../methods/dataframe/indexing/loc.test.js | 159 +++++++++ .../methods/dataframe/indexing/sample.test.js | 175 ++++++++++ .../methods/dataframe/indexing/tail.test.js | 100 ++++++ 25 files changed, 1956 insertions(+), 157 deletions(-) create mode 100644 packages/core/src/methods/dataframe/display/index.js create mode 100644 packages/core/src/methods/dataframe/index.js create mode 100644 packages/core/src/methods/dataframe/indexing/at.js create mode 100644 packages/core/src/methods/dataframe/indexing/head.js create mode 100644 packages/core/src/methods/dataframe/indexing/iloc.js create mode 100644 packages/core/src/methods/dataframe/indexing/index.js create mode 100644 packages/core/src/methods/dataframe/indexing/loc.js create mode 100644 packages/core/src/methods/dataframe/indexing/pool.js create mode 100644 packages/core/src/methods/dataframe/indexing/sample.js create mode 100644 packages/core/src/methods/dataframe/indexing/setIndex.js create mode 100644 packages/core/src/methods/dataframe/indexing/tail.js create mode 100644 tests/core/methods/dataframe/indexing/at.test.js create mode 100644 tests/core/methods/dataframe/indexing/head.test.js create mode 100644 tests/core/methods/dataframe/indexing/iloc.test.js create mode 100644 tests/core/methods/dataframe/indexing/loc.test.js create mode 100644 tests/core/methods/dataframe/indexing/sample.test.js create mode 100644 tests/core/methods/dataframe/indexing/tail.test.js diff --git a/packages/core/src/methods/dataframe/display/index.js b/packages/core/src/methods/dataframe/display/index.js new file mode 100644 index 0000000..a2c5681 --- /dev/null +++ b/packages/core/src/methods/dataframe/display/index.js @@ -0,0 +1,17 @@ +/** + * DataFrame display methods + * + * This module exports all display methods for DataFrame. + * Methods are registered using extendDataFrame. + * + * @module methods/dataframe/display + */ + +import { DataFrame } from '../../../data/model/index.js'; +import { extendDataFrame } from '../../../data/model/extendDataFrame.js'; +import * as pool from './pool.js'; + +extendDataFrame(DataFrame.prototype, pool); // without namespace — base display methods + +// export directly (so that you can call display(df) if needed) +export * from './pool.js'; diff --git a/packages/core/src/methods/dataframe/filtering/expr$.js b/packages/core/src/methods/dataframe/filtering/expr$.js index 81d4a92..0e102bd 100644 --- a/packages/core/src/methods/dataframe/filtering/expr$.js +++ b/packages/core/src/methods/dataframe/filtering/expr$.js @@ -47,46 +47,52 @@ export function expr$(df, strings, ...values) { if (filteredRows.length === 0) { // Create a new DataFrame instance with the same options as the original const result = new df.constructor({}, df._options); - + // For each column, create a Series with the appropriate type for (const col of allColumns) { // Get the original column data to determine its type const originalColumn = df._columns[col]; const originalArray = originalColumn.vector.__data; - + // Create an empty array with the same type - if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) { + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { const TypedArrayConstructor = originalArray.constructor; const emptyTypedArray = new TypedArrayConstructor(0); result._columns[col] = createTypedSeries(emptyTypedArray, col, df); } else { result._columns[col] = createTypedSeries([], col, df); } - + // Add to column order if (!result._order.includes(col)) { result._order.push(col); } } - + return result; } // For non-empty results, create a new DataFrame with filtered rows // Create a new DataFrame instance with the same options as the original const result = new df.constructor({}, df._options); - + // For each column, create a Series with the appropriate type for (const col of allColumns) { // Get the original column data to determine its type const originalColumn = df._columns[col]; const originalArray = originalColumn.vector.__data; - + // Extract values for this column from the filtered rows - const values = filteredRows.map(row => row[col]); - + const values = filteredRows.map((row) => row[col]); + // Preserve the array type if it's a typed array - if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) { + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { const TypedArrayConstructor = originalArray.constructor; const typedValues = new TypedArrayConstructor(values.length); values.forEach((value, i) => { @@ -96,19 +102,19 @@ export function expr$(df, strings, ...values) { } else { result._columns[col] = createTypedSeries(values, col, df); } - + // Add to column order if (!result._order.includes(col)) { result._order.push(col); } } - + return result; } /** * Create a predicate function for filtering rows - * + * * @param {string} expr - Expression to evaluate * @returns {Function} - Predicate function * @private @@ -134,4 +140,3 @@ function createPredicate(expr) { } // Export the expr$ method directly -export { expr$ }; diff --git a/packages/core/src/methods/dataframe/filtering/filter.js b/packages/core/src/methods/dataframe/filtering/filter.js index d92e940..254e2cb 100644 --- a/packages/core/src/methods/dataframe/filtering/filter.js +++ b/packages/core/src/methods/dataframe/filtering/filter.js @@ -31,44 +31,50 @@ export function filter(df, predicate) { if (filteredRows.length === 0) { // Create a new DataFrame instance with the same options as the original const result = new df.constructor({}, df._options); - + // For each column, create a Series with the appropriate type for (const col of allColumns) { // Get the original column data to determine its type const originalColumn = df._columns[col]; const originalArray = originalColumn.vector.__data; - + // Create an empty array with the same type - if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) { + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { const TypedArrayConstructor = originalArray.constructor; const emptyTypedArray = new TypedArrayConstructor(0); result._columns[col] = createTypedSeries(emptyTypedArray, col, df); } else { result._columns[col] = createTypedSeries([], col, df); } - + // Add to column order if (!result._order.includes(col)) { result._order.push(col); } } - + return result; } // For non-empty results, create a new DataFrame with filtered rows // Create a new DataFrame instance with the same options as the original const result = new df.constructor({}, df._options); - + // For each column, create a Series with the appropriate type for (const col of allColumns) { // Get the original column data to determine its type const originalColumn = df._columns[col]; const originalArray = originalColumn.vector.__data; - const values = filteredRows.map(row => row[col]); - + const values = filteredRows.map((row) => row[col]); + // Preserve the array type if it's a typed array - if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) { + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { const TypedArrayConstructor = originalArray.constructor; const typedValues = new TypedArrayConstructor(values.length); values.forEach((value, i) => { @@ -78,15 +84,14 @@ export function filter(df, predicate) { } else { result._columns[col] = createTypedSeries(values, col, df); } - + // Add to column order if (!result._order.includes(col)) { result._order.push(col); } } - + return result; } // Export the filter method directly -export { filter }; diff --git a/packages/core/src/methods/dataframe/filtering/iloc.js b/packages/core/src/methods/dataframe/filtering/iloc.js index 035e756..40bd82f 100644 --- a/packages/core/src/methods/dataframe/filtering/iloc.js +++ b/packages/core/src/methods/dataframe/filtering/iloc.js @@ -1,12 +1,12 @@ /*-------------------------------------------------------------------------* | DataFrame -› filtering · iloc() | | | - | Выбор строк и колонок из DataFrame по целочисленным позициям. | + | Selection of rows and columns from DataFrame by integer positions. | | | - | df.iloc(5) → выбор строки с индексом 5 | - | df.iloc([1, 3, 5]) → выбор строк с указанными индексами | - | df.iloc(5, 2) → выбор значения в строке 5, колонке 2 | - | df.iloc([1, 3], [0, 2]) → выбор строк 1,3 и колонок 0,2 | + | df.iloc(5) → select row with index 5 | + | df.iloc([1, 3, 5]) → select rows with specified indices | + | df.iloc(5, 2) → select value in row 5, column 2 | + | df.iloc([1, 3], [0, 2]) → select rows 1,3 and columns 0,2 | *-------------------------------------------------------------------------*/ /** @@ -75,7 +75,10 @@ export function iloc(df, rowSelector = null, colSelector = null) { // Process column selector if (colSelector === null || colSelector === undefined) { // If selector is null, select all columns - selectedColumnIndices = Array.from({ length: allColumns.length }, (_, i) => i); + selectedColumnIndices = Array.from( + { length: allColumns.length }, + (_, i) => i, + ); } else if (typeof colSelector === 'number') { // Single column index const idx = colSelector < 0 ? allColumns.length + colSelector : colSelector; @@ -118,16 +121,19 @@ export function iloc(df, rowSelector = null, colSelector = null) { // Create a new DataFrame instance with the same options as the original const result = new df.constructor({}, df._options); - + // For each selected column, create a Series with the appropriate type for (const col of selectedColumns) { // Get the original column data to determine its type const originalColumn = df._columns[col]; const originalArray = originalColumn.vector.__data; - const values = selectedIndices.map(index => rows[index][col]); - + const values = selectedIndices.map((index) => rows[index][col]); + // Preserve the array type if it's a typed array - if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) { + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { const TypedArrayConstructor = originalArray.constructor; const typedValues = new TypedArrayConstructor(values.length); values.forEach((value, i) => { @@ -137,15 +143,14 @@ export function iloc(df, rowSelector = null, colSelector = null) { } else { result._columns[col] = createTypedSeries(values, col, df); } - + // Add to column order if (!result._order.includes(col)) { result._order.push(col); } } - + return result; } // Export the method for the pool -export default { iloc }; diff --git a/packages/core/src/methods/dataframe/filtering/loc.js b/packages/core/src/methods/dataframe/filtering/loc.js index 86f96bd..aef9e9b 100644 --- a/packages/core/src/methods/dataframe/filtering/loc.js +++ b/packages/core/src/methods/dataframe/filtering/loc.js @@ -13,7 +13,7 @@ /** * Row and column selection by label or position - * + * * @module methods/dataframe/filtering/loc */ @@ -21,7 +21,7 @@ import { createTypedArray } from '../../../data/utils/createTypedArray.js'; /** * Selects rows and columns by label or position - * + * * @param {DataFrame} df - DataFrame to select from * @param {*} rowSelector - Row selector (label, array of labels, predicate function, or condition object) * @param {*} colSelector - Column selector (name, array of names, or null for all columns) @@ -37,7 +37,8 @@ export function loc(df, rowSelector, colSelector) { let selectedIndices = []; // Check if DataFrame has an index set - const hasIndex = df._index !== null && df._indexMap !== undefined && df._indexMap.size > 0; + const hasIndex = + df._index !== null && df._indexMap !== undefined && df._indexMap.size > 0; if (rowSelector === null) { // If rowSelector is null, select all rows @@ -49,7 +50,7 @@ export function loc(df, rowSelector, colSelector) { // Use index for selection selectedIndices = []; selectedRows = []; - + for (const label of rowSelector) { const index = df._indexMap.get(label); if (index === undefined) { @@ -70,7 +71,10 @@ export function loc(df, rowSelector, colSelector) { selectedIndices = rowSelector; selectedRows = rows.filter((_, index) => rowSelector.includes(index)); } - } else if (typeof rowSelector === 'number' || typeof rowSelector === 'string') { + } else if ( + typeof rowSelector === 'number' || + typeof rowSelector === 'string' + ) { // If rowSelector is a number or string (index or label) if (hasIndex && typeof rowSelector === 'string') { // Use index for selection @@ -127,12 +131,19 @@ export function loc(df, rowSelector, colSelector) { // In tests, we need to return a DataFrame with rowCount property // Create a DataFrame with one row const result = df.constructor.fromRecords([selectedRows[0]], df._options); - + // Copy column metadata to preserve typed arrays for (const col of result.columns) { - if (df._columns[col] && df._columns[col].vector && df._columns[col].vector.__data) { + if ( + df._columns[col] && + df._columns[col].vector && + df._columns[col].vector.__data + ) { const originalArray = df._columns[col].vector.__data; - if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) { + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { const TypedArrayConstructor = originalArray.constructor; // Create a new typed array with the same type const newArray = new TypedArrayConstructor([selectedRows[0][col]]); @@ -140,7 +151,7 @@ export function loc(df, rowSelector, colSelector) { } } } - + return result; } @@ -150,7 +161,10 @@ export function loc(df, rowSelector, colSelector) { for (const col of df.columns) { // Preserve array type if it's a typed array const originalArray = df._columns[col].vector.__data; - if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) { + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { const TypedArrayConstructor = originalArray.constructor; emptyData[col] = new TypedArrayConstructor(0); } else { @@ -162,21 +176,29 @@ export function loc(df, rowSelector, colSelector) { // Create a new DataFrame with the same options as the original const result = df.constructor.fromRecords(selectedRows, df._options); - + // Process each column to preserve typed arrays for (const col of df.columns) { - if (df._columns[col] && df._columns[col].vector && df._columns[col].vector.__data) { + if ( + df._columns[col] && + df._columns[col].vector && + df._columns[col].vector.__data + ) { const originalArray = df._columns[col].vector.__data; if (ArrayBuffer.isView(originalArray)) { // Get column options if specified const columnOptions = df._options?.columns?.[col] || {}; - + // Extract values for this column from selected rows - const values = selectedRows.map(row => row[col]); - + const values = selectedRows.map((row) => row[col]); + // Create a new typed array with the same type - const newArray = createTypedArray(values, originalArray, columnOptions); - + const newArray = createTypedArray( + values, + originalArray, + columnOptions, + ); + // Replace the array in the result DataFrame if (result._columns[col] && result._columns[col].vector) { result._columns[col].vector.__data = newArray; @@ -184,7 +206,7 @@ export function loc(df, rowSelector, colSelector) { } } } - + return result; } @@ -226,7 +248,10 @@ export function loc(df, rowSelector, colSelector) { for (const col of selectedColumns) { // Preserve array type if it's a typed array const originalArray = df._columns[col].vector.__data; - if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) { + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { const TypedArrayConstructor = originalArray.constructor; emptyData[col] = new TypedArrayConstructor(0); } else { @@ -235,27 +260,34 @@ export function loc(df, rowSelector, colSelector) { } return new df.constructor(emptyData, df._options); } - + // If only one row and one column are selected, but we need a DataFrame - if (selectedRows.length === 1 && selectedColumns.length === 1 && typeof rowSelector === 'function') { + if ( + selectedRows.length === 1 && + selectedColumns.length === 1 && + typeof rowSelector === 'function' + ) { const singleColData = {}; const col = selectedColumns[0]; const value = selectedRows[0][col]; - + // Preserve array type if it's a typed array const originalArray = df._columns[col].vector.__data; - if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) { + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { const TypedArrayConstructor = originalArray.constructor; singleColData[col] = new TypedArrayConstructor([value]); } else { singleColData[col] = [value]; } - + return new df.constructor(singleColData, df._options); } // Create a new DataFrame with only selected columns - const filteredRows = selectedRows.map(row => { + const filteredRows = selectedRows.map((row) => { const filteredRow = {}; for (const col of selectedColumns) { filteredRow[col] = row[col]; @@ -265,21 +297,25 @@ export function loc(df, rowSelector, colSelector) { // Create a new DataFrame with the same options as the original const result = df.constructor.fromRecords(filteredRows, df._options); - + // Process each column to preserve typed arrays for (const col of selectedColumns) { - if (df._columns[col] && df._columns[col].vector && df._columns[col].vector.__data) { + if ( + df._columns[col] && + df._columns[col].vector && + df._columns[col].vector.__data + ) { const originalArray = df._columns[col].vector.__data; if (ArrayBuffer.isView(originalArray)) { // Get column options if specified const columnOptions = df._options?.columns?.[col] || {}; - + // Extract values for this column from filtered rows - const values = filteredRows.map(row => row[col]); - + const values = filteredRows.map((row) => row[col]); + // Create a new typed array with the same type const newArray = createTypedArray(values, originalArray, columnOptions); - + // Replace the array in the result DataFrame if (result._columns[col] && result._columns[col].vector) { result._columns[col].vector.__data = newArray; @@ -287,9 +323,8 @@ export function loc(df, rowSelector, colSelector) { } } } - + return result; } // Export the loc method directly -export { loc }; diff --git a/packages/core/src/methods/dataframe/filtering/query$.js b/packages/core/src/methods/dataframe/filtering/query$.js index d2a13de..49b586d 100644 --- a/packages/core/src/methods/dataframe/filtering/query$.js +++ b/packages/core/src/methods/dataframe/filtering/query$.js @@ -87,7 +87,7 @@ export function query$(df, strings, ...values) { /** * Create a predicate function for filtering rows - * + * * @param {string} expr - Expression to evaluate * @returns {Function} - Predicate function * @private @@ -113,4 +113,3 @@ function createPredicate(expr) { } // Export the query$ method directly -export { query$ }; diff --git a/packages/core/src/methods/dataframe/filtering/where.js b/packages/core/src/methods/dataframe/filtering/where.js index 37dd417..c0529f1 100644 --- a/packages/core/src/methods/dataframe/filtering/where.js +++ b/packages/core/src/methods/dataframe/filtering/where.js @@ -8,21 +8,21 @@ import { validateColumn } from '../../../data/utils/validators.js'; /** Operator → predicate map */ const OPS = { - '==': (a, b) => a == b, // eslint-disable-line eqeqeq + '==': (a, b) => a == b, // eslint-disable-line eqeqeq '===': (a, b) => a === b, - '!=': (a, b) => a != b, // eslint-disable-line eqeqeq + '!=': (a, b) => a != b, // eslint-disable-line eqeqeq '!==': (a, b) => a !== b, - '>': (a, b) => a > b, - '>=': (a, b) => a >= b, - '<': (a, b) => a < b, - '<=': (a, b) => a <= b, - in: (a, b) => Array.isArray(b) && b.includes(a), - contains: (a, b) => String(a).includes(String(b)), - startsWith: (a, b) => String(a).startsWith(String(b)), - startswith: (a, b) => String(a).startsWith(String(b)), - endsWith: (a, b) => String(a).endsWith(String(b)), - endswith: (a, b) => String(a).endsWith(String(b)), - matches: (a, b) => + '>': (a, b) => a > b, + '>=': (a, b) => a >= b, + '<': (a, b) => a < b, + '<=': (a, b) => a <= b, + in: (a, b) => Array.isArray(b) && b.includes(a), + contains: (a, b) => String(a).includes(String(b)), + startsWith: (a, b) => String(a).startsWith(String(b)), + startswith: (a, b) => String(a).startsWith(String(b)), + endsWith: (a, b) => String(a).endsWith(String(b)), + endswith: (a, b) => String(a).endsWith(String(b)), + matches: (a, b) => b instanceof RegExp ? b.test(String(a)) : new RegExp(b).test(String(a)), }; @@ -53,10 +53,9 @@ export function where(df, column, operator, value) { // Create options for the new DataFrame with column type information const newOptions = { ...df._options }; - + // Create new DataFrame from filtered rows with preserved column types return df.constructor.fromRecords(outRows, newOptions); } // Export the where method directly -export { where }; \ No newline at end of file diff --git a/packages/core/src/methods/dataframe/index.js b/packages/core/src/methods/dataframe/index.js new file mode 100644 index 0000000..fdddd73 --- /dev/null +++ b/packages/core/src/methods/dataframe/index.js @@ -0,0 +1,13 @@ +/** + * DataFrame methods + * + * This module exports all methods for DataFrame from all subcategories. + * Methods are registered using extendDataFrame. + * + * @module methods/dataframe + */ + +// Экспорт всех методов из подкаталогов +export * from './aggregation/index.js'; +export * from './display/index.js'; +export * from './filtering/index.js'; diff --git a/packages/core/src/methods/dataframe/indexing/at.js b/packages/core/src/methods/dataframe/indexing/at.js new file mode 100644 index 0000000..baac196 --- /dev/null +++ b/packages/core/src/methods/dataframe/indexing/at.js @@ -0,0 +1,49 @@ +/*-------------------------------------------------------------------------* + | DataFrame - indexing - at() | + | | + | Get a single row or value from the DataFrame by position. | + | | + | df.at(5) -> returns an object representing the row at index 5. | + | df.at(5, 'age') -> returns the value at row 5, column 'age'. | + *-------------------------------------------------------------------------*/ +/** + * Returns a row at the specified index.
+ * `df.at(5)` -> returns an object representing the row at index 5. + * + * @param {import('../../../data/model/DataFrame.js').DataFrame} df + * @param {number} index - Row index to select + * @returns {Object} - Object representing the selected row + * @throws {Error} If index is invalid or out of bounds + */ +export function at(df, index) { + // Validate index is an integer + if (!Number.isInteger(index)) { + throw new Error( + `Index must be an integer, got ${typeof index === 'number' ? index : typeof index}`, + ); + } + + // Validate index is not negative + if (index < 0) { + throw new Error(`Index out of bounds: ${index} is negative`); + } + + const rows = df.toArray(); + + // Check if DataFrame is empty + if (rows.length === 0) { + throw new Error('Index out of bounds: DataFrame is empty'); + } + + // Check if index is within range + if (index >= rows.length) { + throw new Error(`Index out of bounds: ${index} >= ${rows.length}`); + } + + return rows[index]; +} + +/* -------------------------------------------------------------- * + | Pool for extendDataFrame | + * -------------------------------------------------------------- */ +export default { at }; diff --git a/packages/core/src/methods/dataframe/indexing/head.js b/packages/core/src/methods/dataframe/indexing/head.js new file mode 100644 index 0000000..8dacd23 --- /dev/null +++ b/packages/core/src/methods/dataframe/indexing/head.js @@ -0,0 +1,48 @@ +/*-------------------------------------------------------------------------* + | DataFrame - indexing - head() | + | | + | Returns the first n rows of the DataFrame. | + | | + | df.head() -> returns a new DataFrame with the first 5 rows. | + | df.head(10) -> returns a new DataFrame with the first 10 rows. | + *-------------------------------------------------------------------------*/ +/** + * Returns the first n rows of a DataFrame.
+ * `df.head(5)` -> returns a new DataFrame with the first 5 rows. + * Similar to pandas' head() function. + * + * @param {import('../../../data/model/DataFrame.js').DataFrame} df + * @param {number} [n=5] - Number of rows to return + * @param {Object} [options] - Additional options + * @param {boolean} [options.print=false] - Option for compatibility with other libraries + * @returns {DataFrame} - New DataFrame with the first n rows + * @throws {Error} If n is not a positive integer + */ +export function head(df, n = 5, options = { print: false }) { + // Validate input parameters + if (n <= 0) { + throw new Error('Number of rows must be a positive integer'); + } + if (!Number.isInteger(n)) { + throw new Error('Number of rows must be an integer'); + } + + // Get data from DataFrame + const rows = df.toArray(); + + // Select first n rows (or all if there are fewer than n) + const selectedRows = rows.slice(0, n); + + // Create a new DataFrame from the selected rows + const builder = + typeof df.constructor.fromRecords === 'function' + ? df.constructor.fromRecords + : (rows) => new df.constructor(rows); + + return builder(selectedRows); +} + +/* -------------------------------------------------------------- * + | Pool for extendDataFrame | + * -------------------------------------------------------------- */ +export default { head }; diff --git a/packages/core/src/methods/dataframe/indexing/iloc.js b/packages/core/src/methods/dataframe/indexing/iloc.js new file mode 100644 index 0000000..15ad32e --- /dev/null +++ b/packages/core/src/methods/dataframe/indexing/iloc.js @@ -0,0 +1,156 @@ +/*-------------------------------------------------------------------------* + | DataFrame - indexing - iloc() | + | | + | Selection of rows and columns from DataFrame by integer positions. | + | | + | df.iloc(5) -> select row with index 5 | + | df.iloc([1, 3, 5]) -> select rows with specified indices | + | df.iloc(5, 2) -> select value in row 5, column 2 | + | df.iloc([1, 3], [0, 2]) -> select rows 1,3 and columns 0,2 | + *-------------------------------------------------------------------------*/ + +/** + * Method for selecting rows and columns by indices + * + * @module methods/dataframe/filtering/iloc + */ + +// Import function for creating typed arrays +import { createTypedSeries } from '../../../data/utils/createTypedArray.js'; + +/** + * Method for selecting rows and columns by indices (similar to iloc in pandas) + * @param {DataFrame} df - DataFrame instance + * @param {number|number[]|function} rowSelector - Row index, array of indices, or predicate function + * @param {number|number[]|function} colSelector - Column index, array of indices, or predicate function + * @returns {DataFrame|*} - New DataFrame with selected rows and columns or a cell value + */ +export function iloc(df, rowSelector = null, colSelector = null) { + // Get all rows as array of objects + const rows = df.toArray(); + const allColumns = df.columns; + const rowCount = df.rowCount; + + if (rowCount === 0) { + throw new Error('Row index out of bounds'); + } + + // Indices of selected rows + let selectedIndices = []; + + // Process row selector + if (rowSelector === null || rowSelector === undefined) { + // If selector is null, select all rows + selectedIndices = Array.from({ length: rowCount }, (_, i) => i); + } else if (typeof rowSelector === 'number') { + // Single row index + const idx = rowSelector < 0 ? rowCount + rowSelector : rowSelector; + if (idx < 0 || idx >= rowCount) { + throw new Error('Row index out of bounds'); + } + selectedIndices = [idx]; + } else if (Array.isArray(rowSelector)) { + // Array of row indices + selectedIndices = rowSelector.map((idx) => { + const adjustedIdx = idx < 0 ? rowCount + idx : idx; + if (adjustedIdx < 0 || adjustedIdx >= rowCount) { + throw new Error('Row index out of bounds'); + } + return adjustedIdx; + }); + } else if (typeof rowSelector === 'function') { + // Function returning true/false for each row index + for (let i = 0; i < rowCount; i++) { + if (rowSelector(i)) { + selectedIndices.push(i); + } + } + } else { + throw new Error('Invalid row selector type'); + } + + // Indices of selected columns + let selectedColumnIndices = []; + + // Process column selector + if (colSelector === null || colSelector === undefined) { + // If selector is null, select all columns + selectedColumnIndices = Array.from( + { length: allColumns.length }, + (_, i) => i, + ); + } else if (typeof colSelector === 'number') { + // Single column index + const idx = colSelector < 0 ? allColumns.length + colSelector : colSelector; + if (idx < 0 || idx >= allColumns.length) { + throw new Error('Column index out of bounds'); + } + selectedColumnIndices = [idx]; + } else if (Array.isArray(colSelector)) { + // Array of column indices + selectedColumnIndices = colSelector.map((idx) => { + const adjustedIdx = idx < 0 ? allColumns.length + idx : idx; + if (adjustedIdx < 0 || adjustedIdx >= allColumns.length) { + throw new Error('Column index out of bounds'); + } + return adjustedIdx; + }); + } else if (typeof colSelector === 'function') { + // Function returning true/false for each column index + for (let i = 0; i < allColumns.length; i++) { + if (colSelector(i)) { + selectedColumnIndices.push(i); + } + } + } else { + throw new Error('Invalid column selector type'); + } + + // Get names of selected columns + const selectedColumns = selectedColumnIndices.map((idx) => allColumns[idx]); + + // If only one row and one column are selected, return the value + if ( + selectedIndices.length === 1 && + selectedColumns.length === 1 && + typeof rowSelector === 'number' && + typeof colSelector === 'number' + ) { + return rows[selectedIndices[0]][selectedColumns[0]]; + } + + // Create a new DataFrame instance with the same options as the original + const result = new df.constructor({}, df._options); + + // For each selected column, create a Series with the appropriate type + for (const col of selectedColumns) { + // Get the original column data to determine its type + const originalColumn = df._columns[col]; + const originalArray = originalColumn.vector.__data; + const values = selectedIndices.map((index) => rows[index][col]); + + // Preserve the array type if it's a typed array + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { + const TypedArrayConstructor = originalArray.constructor; + const typedValues = new TypedArrayConstructor(values.length); + values.forEach((value, i) => { + typedValues[i] = value; + }); + result._columns[col] = createTypedSeries(typedValues, col, df); + } else { + result._columns[col] = createTypedSeries(values, col, df); + } + + // Add to column order + if (!result._order.includes(col)) { + result._order.push(col); + } + } + + return result; +} + +// Export the method for the pool diff --git a/packages/core/src/methods/dataframe/indexing/index.js b/packages/core/src/methods/dataframe/indexing/index.js new file mode 100644 index 0000000..8742e76 --- /dev/null +++ b/packages/core/src/methods/dataframe/indexing/index.js @@ -0,0 +1,18 @@ +/** + * DataFrame indexing methods + * + * This module exports all indexing methods for DataFrame. + * Methods are registered using extendDataFrame. + * + * @module methods/dataframe/indexing + */ + +import { DataFrame } from '../../../data/model/index.js'; +import { extendDataFrame } from '../../../data/model/extendDataFrame.js'; +import * as pool from './pool.js'; + +// Register methods for DataFrame without namespace +extendDataFrame(DataFrame.prototype, pool); + +// Export methods directly for functional style calls +export * from './pool.js'; diff --git a/packages/core/src/methods/dataframe/indexing/loc.js b/packages/core/src/methods/dataframe/indexing/loc.js new file mode 100644 index 0000000..0894d4c --- /dev/null +++ b/packages/core/src/methods/dataframe/indexing/loc.js @@ -0,0 +1,328 @@ +/*-------------------------------------------------------------------------* + | DataFrame - indexing - loc() | + | | + | df.loc(5) -> select row with index 5 | + | df.loc([1, 3, 5]) -> select rows with specified indices | + | df.loc(5, 'age') -> select value in row 5, column 'age' | + | df.loc([1, 3], ['name', 'age']) -> select rows 1,3 and columns 'name','age' | + | df.loc(row => row.age > 30) -> select rows where age > 30 | + | df.loc({city: 'Chicago'}) -> select rows where city equals 'Chicago' | + *-------------------------------------------------------------------------*/ + +/** + * Row and column selection by label or position + * + * @module methods/dataframe/filtering/loc + */ + +import { createTypedArray } from '../../../data/utils/createTypedArray.js'; + +/** + * Selects rows and columns by label or position + * + * @param {DataFrame} df - DataFrame to select from + * @param {*} rowSelector - Row selector (label, array of labels, predicate function, or condition object) + * @param {*} colSelector - Column selector (name, array of names, or null for all columns) + * @returns {DataFrame} - New DataFrame with selected rows and columns + */ +export function loc(df, rowSelector, colSelector) { + // Get data from DataFrame + const rows = df.toArray(); + const rowCount = df.rowCount; + + // Define rows to select + let selectedRows = []; + let selectedIndices = []; + + // Check if DataFrame has an index set + const hasIndex = + df._index !== null && df._indexMap !== undefined && df._indexMap.size > 0; + + if (rowSelector === null) { + // If rowSelector is null, select all rows + selectedRows = [...rows]; + selectedIndices = Array.from({ length: rowCount }, (_, i) => i); + } else if (Array.isArray(rowSelector)) { + // If rowSelector is an array of indices or labels + if (hasIndex) { + // Use index for selection + selectedIndices = []; + selectedRows = []; + + for (const label of rowSelector) { + const index = df._indexMap.get(label); + if (index === undefined) { + throw new Error('Row label not found'); + } + selectedIndices.push(index); + selectedRows.push(rows[index]); + } + } else { + // Use numeric indices + for (const index of rowSelector) { + if (index < 0 || index >= rowCount) { + throw new Error( + `Row index ${index} is out of bounds for DataFrame with ${rowCount} rows`, + ); + } + } + selectedIndices = rowSelector; + selectedRows = rows.filter((_, index) => rowSelector.includes(index)); + } + } else if ( + typeof rowSelector === 'number' || + typeof rowSelector === 'string' + ) { + // If rowSelector is a number or string (index or label) + if (hasIndex && typeof rowSelector === 'string') { + // Use index for selection + const index = df._indexMap.get(rowSelector); + if (index === undefined) { + throw new Error('Row label not found'); + } + selectedIndices = [index]; + selectedRows = [rows[index]]; + } else if (typeof rowSelector === 'number') { + // Use numeric index + if (rowSelector < 0 || rowSelector >= rowCount) { + throw new Error( + `Row index ${rowSelector} is out of bounds for DataFrame with ${rowCount} rows`, + ); + } + selectedIndices = [rowSelector]; + selectedRows = [rows[rowSelector]]; + } else { + throw new Error('Row label not found'); + } + } else if (typeof rowSelector === 'function') { + // If rowSelector is a predicate function + selectedRows = rows.filter(rowSelector); + selectedIndices = rows + .map((row, index) => (rowSelector(row) ? index : -1)) + .filter((index) => index !== -1); + } else if (typeof rowSelector === 'object' && rowSelector !== null) { + // If rowSelector is an object with conditions + selectedIndices = []; + selectedRows = []; + rows.forEach((row, index) => { + let match = true; + for (const [key, value] of Object.entries(rowSelector)) { + if (row[key] !== value) { + match = false; + break; + } + } + if (match) { + selectedIndices.push(index); + selectedRows.push(row); + } + }); + } else { + throw new Error('Invalid row selector type'); + } + + // If column selector is not specified, return all columns + if (colSelector === undefined) { + // If only one row is selected and rowSelector is not a function, we need to decide + // whether to return an object or a DataFrame with one row + if (selectedRows.length === 1 && typeof rowSelector !== 'function') { + // In tests, we need to return a DataFrame with rowCount property + // Create a DataFrame with one row + const result = df.constructor.fromRecords([selectedRows[0]], df._options); + + // Copy column metadata to preserve typed arrays + for (const col of result.columns) { + if ( + df._columns[col] && + df._columns[col].vector && + df._columns[col].vector.__data + ) { + const originalArray = df._columns[col].vector.__data; + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { + const TypedArrayConstructor = originalArray.constructor; + // Create a new typed array with the same type + const newArray = new TypedArrayConstructor([selectedRows[0][col]]); + result._columns[col].vector.__data = newArray; + } + } + } + + return result; + } + + // If no results, create an empty DataFrame with the same columns + if (selectedRows.length === 0) { + const emptyData = {}; + for (const col of df.columns) { + // Preserve array type if it's a typed array + const originalArray = df._columns[col].vector.__data; + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { + const TypedArrayConstructor = originalArray.constructor; + emptyData[col] = new TypedArrayConstructor(0); + } else { + emptyData[col] = []; + } + } + return new df.constructor(emptyData, df._options); + } + + // Create a new DataFrame with the same options as the original + const result = df.constructor.fromRecords(selectedRows, df._options); + + // Process each column to preserve typed arrays + for (const col of df.columns) { + if ( + df._columns[col] && + df._columns[col].vector && + df._columns[col].vector.__data + ) { + const originalArray = df._columns[col].vector.__data; + if (ArrayBuffer.isView(originalArray)) { + // Get column options if specified + const columnOptions = df._options?.columns?.[col] || {}; + + // Extract values for this column from selected rows + const values = selectedRows.map((row) => row[col]); + + // Create a new typed array with the same type + const newArray = createTypedArray( + values, + originalArray, + columnOptions, + ); + + // Replace the array in the result DataFrame + if (result._columns[col] && result._columns[col].vector) { + result._columns[col].vector.__data = newArray; + } + } + } + } + + return result; + } + + // Define columns to select + let selectedColumns = []; + + if (colSelector === null) { + // If colSelector is null, select all columns + selectedColumns = df.columns; + } else if (Array.isArray(colSelector)) { + // If colSelector is an array of column names + selectedColumns = colSelector; + } else if (typeof colSelector === 'string') { + // If colSelector is a single column name + selectedColumns = [colSelector]; + } else { + throw new Error('Invalid column selector type'); + } + + // Check that all specified columns exist + for (const column of selectedColumns) { + if (!df.columns.includes(column)) { + throw new Error('Column not found'); + } + } + + // If only one row and one column are selected, return the value + if ( + selectedRows.length === 1 && + selectedColumns.length === 1 && + typeof rowSelector !== 'function' + ) { + return selectedRows[0][selectedColumns[0]]; + } + + // If no results, create an empty DataFrame with selected columns + if (selectedRows.length === 0) { + const emptyData = {}; + for (const col of selectedColumns) { + // Preserve array type if it's a typed array + const originalArray = df._columns[col].vector.__data; + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { + const TypedArrayConstructor = originalArray.constructor; + emptyData[col] = new TypedArrayConstructor(0); + } else { + emptyData[col] = []; + } + } + return new df.constructor(emptyData, df._options); + } + + // If only one row and one column are selected, but we need a DataFrame + if ( + selectedRows.length === 1 && + selectedColumns.length === 1 && + typeof rowSelector === 'function' + ) { + const singleColData = {}; + const col = selectedColumns[0]; + const value = selectedRows[0][col]; + + // Preserve array type if it's a typed array + const originalArray = df._columns[col].vector.__data; + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { + const TypedArrayConstructor = originalArray.constructor; + singleColData[col] = new TypedArrayConstructor([value]); + } else { + singleColData[col] = [value]; + } + + return new df.constructor(singleColData, df._options); + } + + // Create a new DataFrame with only selected columns + const filteredRows = selectedRows.map((row) => { + const filteredRow = {}; + for (const col of selectedColumns) { + filteredRow[col] = row[col]; + } + return filteredRow; + }); + + // Create a new DataFrame with the same options as the original + const result = df.constructor.fromRecords(filteredRows, df._options); + + // Process each column to preserve typed arrays + for (const col of selectedColumns) { + if ( + df._columns[col] && + df._columns[col].vector && + df._columns[col].vector.__data + ) { + const originalArray = df._columns[col].vector.__data; + if (ArrayBuffer.isView(originalArray)) { + // Get column options if specified + const columnOptions = df._options?.columns?.[col] || {}; + + // Extract values for this column from filtered rows + const values = filteredRows.map((row) => row[col]); + + // Create a new typed array with the same type + const newArray = createTypedArray(values, originalArray, columnOptions); + + // Replace the array in the result DataFrame + if (result._columns[col] && result._columns[col].vector) { + result._columns[col].vector.__data = newArray; + } + } + } + } + + return result; +} + +// Export the loc method directly diff --git a/packages/core/src/methods/dataframe/indexing/pool.js b/packages/core/src/methods/dataframe/indexing/pool.js new file mode 100644 index 0000000..9610c6a --- /dev/null +++ b/packages/core/src/methods/dataframe/indexing/pool.js @@ -0,0 +1,20 @@ +/** + * DataFrame indexing method pool + * + * This file re-exports all indexing methods for use with extendDataFrame + * + * @module methods/dataframe/indexing/pool + */ + +// Row/column access methods +export { at } from './at.js'; +export { iloc } from './iloc.js'; +export { loc } from './loc.js'; + +// Row sampling methods +export { sample } from './sample.js'; +export { head } from './head.js'; +export { tail } from './tail.js'; + +// Index management +export { setIndex } from './setIndex.js'; diff --git a/packages/core/src/methods/dataframe/indexing/sample.js b/packages/core/src/methods/dataframe/indexing/sample.js new file mode 100644 index 0000000..4942c1a --- /dev/null +++ b/packages/core/src/methods/dataframe/indexing/sample.js @@ -0,0 +1,123 @@ +/*-------------------------------------------------------------------------* + | DataFrame - indexing - sample() | + | | + | Returns a random sample of rows from the DataFrame. | + | | + | df.sample() -> returns a new DataFrame with a random sample of rows. | + | df.sample(10) -> returns a new DataFrame with 10 random rows. | + | df.sample({ fraction: 0.1 }) -> returns a sample of 10% of rows. | + *-------------------------------------------------------------------------*/ + +/** + * Returns a random sample of rows from a DataFrame.
+ * `df.sample(10)` -> returns a new DataFrame with 10 randomly selected rows.
+ * `df.sample({ fraction: 0.1 })` -> returns a sample of 10% of rows. + * + * @param {import('../../../data/model/DataFrame.js').DataFrame} df + * @param {number|Object} n - Number of rows to sample or options object + * @param {Object} [options] - Additional options + * @param {number} [options.seed] - Seed for random number generator + * @param {boolean} [options.replace=false] - Sample with replacement + * @param {number} [options.fraction] - Fraction of rows to sample (0 < fraction <= 1) + * @returns {DataFrame} - New DataFrame with sampled rows + * @throws {Error} If sampling parameters are invalid + */ +export function sample(df, n, options = {}) { + // Handle case when n is an options object + if (typeof n === 'object') { + options = n; + n = undefined; + } + + // Get data from DataFrame + const rows = df.toArray(); + if (rows.length === 0) { + // For empty DataFrame, return an empty DataFrame with the same structure + const builder = + typeof df.constructor.fromRecords === 'function' + ? df.constructor.fromRecords + : (rows) => new df.constructor(rows); + + return builder([]); + } + + // Determine sample size + let sampleSize; + if (options.fraction !== undefined) { + if (options.fraction <= 0 || options.fraction > 1) { + throw new Error('Fraction must be in the range (0, 1]'); + } + sampleSize = Math.round(rows.length * options.fraction); + } else { + sampleSize = n !== undefined ? n : 1; + } + + // Validate sample size + if (sampleSize <= 0) { + throw new Error('Number of rows to sample must be a positive integer'); + } + + // Check that sample size is an integer + if (!Number.isInteger(sampleSize)) { + throw new Error('Number of rows to sample must be an integer'); + } + + // If sampling without replacement and sample size is greater than number of rows + if (!options.replace && sampleSize > rows.length) { + throw new Error( + `Sample size (${sampleSize}) cannot be greater than number of rows (${rows.length})`, + ); + } + + // Create random number generator with seed if specified + const random = + options.seed !== undefined ? createSeededRandom(options.seed) : Math.random; + + // Sample rows + const sampledRows = []; + if (options.replace) { + // Sampling with replacement + for (let i = 0; i < sampleSize; i++) { + const index = Math.floor(random() * rows.length); + sampledRows.push(rows[index]); + } + } else { + // Sampling without replacement (using Fisher-Yates shuffle algorithm) + const indices = Array.from({ length: rows.length }, (_, i) => i); + for (let i = indices.length - 1; i > 0; i--) { + const j = Math.floor(random() * (i + 1)); + [indices[i], indices[j]] = [indices[j], indices[i]]; + } + for (let i = 0; i < sampleSize; i++) { + sampledRows.push(rows[indices[i]]); + } + } + + // Create a new DataFrame from the sampled rows + const builder = + typeof df.constructor.fromRecords === 'function' + ? df.constructor.fromRecords + : (rows) => new df.constructor(rows); + + return builder(sampledRows); +} + +/** + * Creates a seeded random number generator + * + * @param {number} seed - Seed for the random number generator + * @returns {Function} - Function that returns a pseudo-random number in the range [0, 1) + * @private + */ +function createSeededRandom(seed) { + return function () { + // Simple linear congruential generator + seed = (seed * 9301 + 49297) % 233280; + return seed / 233280; + }; +} + +/* -------------------------------------------------------------- * + | Pool for extendDataFrame | + * -------------------------------------------------------------- */ +export default { sample }; diff --git a/packages/core/src/methods/dataframe/indexing/setIndex.js b/packages/core/src/methods/dataframe/indexing/setIndex.js new file mode 100644 index 0000000..05968a2 --- /dev/null +++ b/packages/core/src/methods/dataframe/indexing/setIndex.js @@ -0,0 +1,41 @@ +/*-------------------------------------------------------------------------* + | DataFrame - indexing - setIndex() | + | | + | df.setIndex('id') -> sets 'id' column as the index | + *-------------------------------------------------------------------------*/ + +/** + * Sets a column as the index for a DataFrame + * + * @param {Object} df - DataFrame instance + * @param {string} columnName - Name of the column to use as index + * @returns {Object} - DataFrame with the specified column set as index + */ +export function setIndex(df, columnName) { + // For empty DataFrame, just set the index column name but don't create a map + if (df.rowCount === 0) { + df._index = columnName; + df._indexMap = new Map(); + return df; + } + + // Check if the column exists + if (!df.columns.includes(columnName)) { + throw new Error('Column not found'); + } + + // Set the index column + df._index = columnName; + + // Create a map for fast lookup by index value + df._indexMap = new Map(); + const rows = df.toArray(); + rows.forEach((row, i) => { + df._indexMap.set(row[columnName], i); + }); + + return df; +} + +// Export object with method for the pool +export default { setIndex }; diff --git a/packages/core/src/methods/dataframe/indexing/tail.js b/packages/core/src/methods/dataframe/indexing/tail.js new file mode 100644 index 0000000..72d603c --- /dev/null +++ b/packages/core/src/methods/dataframe/indexing/tail.js @@ -0,0 +1,49 @@ +/*-------------------------------------------------------------------------* + | DataFrame - indexing - tail() | + | | + | Returns the last n rows of the DataFrame. | + | | + | df.tail() -> returns a new DataFrame with the last 5 rows. | + | df.tail(10) -> returns a new DataFrame with the last 10 rows. | + *-------------------------------------------------------------------------*/ + +/** + * Returns the last n rows of a DataFrame.
+ * `df.tail(5)` -> returns a new DataFrame with the last 5 rows. + * Similar to pandas' tail() function. + * + * @param {import('../../../data/model/DataFrame.js').DataFrame} df + * @param {number} [n=5] - Number of rows to return + * @param {Object} [options] - Additional options + * @param {boolean} [options.print=false] - Option for compatibility with other libraries + * @returns {DataFrame} - New DataFrame with the last n rows + * @throws {Error} If n is not a positive integer + */ +export function tail(df, n = 5, options = { print: false }) { + // Validate input parameters + if (n <= 0) { + throw new Error('Number of rows must be a positive integer'); + } + if (!Number.isInteger(n)) { + throw new Error('Number of rows must be an integer'); + } + + // Get data from DataFrame + const rows = df.toArray(); + + // Select last n rows (or all if there are fewer than n) + const selectedRows = rows.slice(-n); + + // Create a new DataFrame from the selected rows + const builder = + typeof df.constructor.fromRecords === 'function' + ? df.constructor.fromRecords + : (rows) => new df.constructor(rows); + + return builder(selectedRows); +} + +/* -------------------------------------------------------------- * + | Pool for extendDataFrame | + * -------------------------------------------------------------- */ +export default { tail }; diff --git a/packages/core/src/methods/dataframe/pool.js b/packages/core/src/methods/dataframe/pool.js index a39b53d..1bd2148 100644 --- a/packages/core/src/methods/dataframe/pool.js +++ b/packages/core/src/methods/dataframe/pool.js @@ -1,41 +1,12 @@ /** * Pool of all DataFrame methods * - * This file exports all DataFrame methods to be registered on the DataFrame prototype. - * It serves as a central registry for all methods to facilitate tree-shaking. + * This file re-exports all DataFrame methods for use with extendDataFrame * - * @module core/methods/dataframe/pool + * @module methods/dataframe/pool */ -// Aggregation methods -export { count } from './aggregation/count.js'; -export { first } from './aggregation/first.js'; -export { last } from './aggregation/last.js'; -export { max } from './aggregation/max.js'; -export { mean } from './aggregation/mean.js'; -export { median } from './aggregation/median.js'; -export { min } from './aggregation/min.js'; -export { mode } from './aggregation/mode.js'; -export { std } from './aggregation/std.js'; -export { sum } from './aggregation/sum.js'; -export { variance } from './aggregation/variance.js'; - -// Group aggregation methods -export { - group, - groupBy, - groupAgg, - groupSum, - groupMean, - groupMin, - groupMax, - groupCount, -} from './aggregation/group.js'; - -// Display methods -export { display } from './display/display.js'; -export { print } from './display/print.js'; -export { renderTo } from './display/renderTo.js'; -export { toHTML } from './display/toHTML.js'; -export { toJupyter } from './display/toJupyter.js'; -export { toMarkdown } from './display/toMarkdown.js'; +// Реэкспорт всех методов из подкаталогов +export * from './aggregation/pool.js'; +export * from './display/pool.js'; +export * from './filtering/pool.js'; diff --git a/tests/core/methods/dataframe/filtering/where.fixed.test.js b/tests/core/methods/dataframe/filtering/where.fixed.test.js index 711e9c7..fe6a8c7 100644 --- a/tests/core/methods/dataframe/filtering/where.fixed.test.js +++ b/tests/core/methods/dataframe/filtering/where.fixed.test.js @@ -8,14 +8,32 @@ import { where } from '../../../../../packages/core/src/methods/dataframe/filter // Test data for use in all tests const testData = [ - { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] }, - { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] }, - { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] }, + { + name: 'Alice', + age: 25, + city: 'New York', + salary: 70000, + tags: ['dev', 'js'], + }, + { + name: 'Bob', + age: 30, + city: 'San Francisco', + salary: 85000, + tags: ['dev', 'python'], + }, + { + name: 'Charlie', + age: 35, + city: 'Chicago', + salary: 90000, + tags: ['manager'], + }, ]; describe('Where Method', () => { // Add where method to DataFrame prototype - DataFrame.prototype.where = function(column, operator, value) { + DataFrame.prototype.where = function (column, operator, value) { return where(this, column, operator, value); }; @@ -29,7 +47,13 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(1); expect(result.toArray()).toEqual([ - { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] }, + { + name: 'Bob', + age: 30, + city: 'San Francisco', + salary: 85000, + tags: ['dev', 'python'], + }, ]); }); @@ -39,7 +63,13 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(1); expect(result.toArray()).toEqual([ - { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] }, + { + name: 'Bob', + age: 30, + city: 'San Francisco', + salary: 85000, + tags: ['dev', 'python'], + }, ]); }); @@ -49,8 +79,20 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(2); expect(result.toArray()).toEqual([ - { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] }, - { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] }, + { + name: 'Alice', + age: 25, + city: 'New York', + salary: 70000, + tags: ['dev', 'js'], + }, + { + name: 'Charlie', + age: 35, + city: 'Chicago', + salary: 90000, + tags: ['manager'], + }, ]); }); @@ -60,8 +102,20 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(2); expect(result.toArray()).toEqual([ - { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] }, - { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] }, + { + name: 'Alice', + age: 25, + city: 'New York', + salary: 70000, + tags: ['dev', 'js'], + }, + { + name: 'Charlie', + age: 35, + city: 'Chicago', + salary: 90000, + tags: ['manager'], + }, ]); }); @@ -71,8 +125,20 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(2); expect(result.toArray()).toEqual([ - { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] }, - { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] }, + { + name: 'Bob', + age: 30, + city: 'San Francisco', + salary: 85000, + tags: ['dev', 'python'], + }, + { + name: 'Charlie', + age: 35, + city: 'Chicago', + salary: 90000, + tags: ['manager'], + }, ]); }); @@ -82,8 +148,20 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(2); expect(result.toArray()).toEqual([ - { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] }, - { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] }, + { + name: 'Bob', + age: 30, + city: 'San Francisco', + salary: 85000, + tags: ['dev', 'python'], + }, + { + name: 'Charlie', + age: 35, + city: 'Chicago', + salary: 90000, + tags: ['manager'], + }, ]); }); @@ -93,7 +171,13 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(1); expect(result.toArray()).toEqual([ - { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] }, + { + name: 'Alice', + age: 25, + city: 'New York', + salary: 70000, + tags: ['dev', 'js'], + }, ]); }); @@ -103,8 +187,20 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(2); expect(result.toArray()).toEqual([ - { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] }, - { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] }, + { + name: 'Alice', + age: 25, + city: 'New York', + salary: 70000, + tags: ['dev', 'js'], + }, + { + name: 'Bob', + age: 30, + city: 'San Francisco', + salary: 85000, + tags: ['dev', 'python'], + }, ]); }); @@ -114,8 +210,20 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(2); expect(result.toArray()).toEqual([ - { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] }, - { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] }, + { + name: 'Alice', + age: 25, + city: 'New York', + salary: 70000, + tags: ['dev', 'js'], + }, + { + name: 'Charlie', + age: 35, + city: 'Chicago', + salary: 90000, + tags: ['manager'], + }, ]); }); @@ -125,7 +233,13 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(1); expect(result.toArray()).toEqual([ - { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] }, + { + name: 'Bob', + age: 30, + city: 'San Francisco', + salary: 85000, + tags: ['dev', 'python'], + }, ]); }); @@ -135,7 +249,13 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(1); expect(result.toArray()).toEqual([ - { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] }, + { + name: 'Bob', + age: 30, + city: 'San Francisco', + salary: 85000, + tags: ['dev', 'python'], + }, ]); }); @@ -145,7 +265,13 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(1); expect(result.toArray()).toEqual([ - { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] }, + { + name: 'Alice', + age: 25, + city: 'New York', + salary: 70000, + tags: ['dev', 'js'], + }, ]); }); @@ -155,7 +281,13 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(1); expect(result.toArray()).toEqual([ - { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] }, + { + name: 'Charlie', + age: 35, + city: 'Chicago', + salary: 90000, + tags: ['manager'], + }, ]); }); @@ -165,7 +297,13 @@ describe('Where Method', () => { // Check that the filtered data is correct expect(result.rowCount).toBe(1); expect(result.toArray()).toEqual([ - { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] }, + { + name: 'Alice', + age: 25, + city: 'New York', + salary: 70000, + tags: ['dev', 'js'], + }, ]); }); @@ -174,16 +312,20 @@ describe('Where Method', () => { // Should be empty with no rows expect(result.rowCount).toBe(0); - // В новой реализации пустой DataFrame не сохраняет структуру колонок - // что является нормальным поведением для fromRecords([]) + // In the new implementation, an empty DataFrame does not save the column structure + // which is normal behavior for fromRecords([]) }); test('should throw error for non-existent column', () => { - expect(() => df.where('nonexistent', '===', 30)).toThrow("Column 'nonexistent' not found"); + expect(() => df.where('nonexistent', '===', 30)).toThrow( + "Column 'nonexistent' not found", + ); }); test('should throw error for invalid operator', () => { - expect(() => df.where('age', 'invalid', 30)).toThrow("Unsupported operator: 'invalid'"); + expect(() => df.where('age', 'invalid', 30)).toThrow( + "Unsupported operator: 'invalid'", + ); }); test('should return a new DataFrame instance', () => { @@ -206,15 +348,19 @@ describe('Where Method', () => { // Check that the result contains typed arrays expect(ArrayBuffer.isView(result._columns.age.vector.__data)).toBe(true); - expect(ArrayBuffer.isView(result._columns.salary.vector.__data)).toBe(true); - // Проверяем только наличие типизированных массивов, без проверки конкретных типов - // Типы могут быть разными в зависимости от реализации метода where + expect(ArrayBuffer.isView(result._columns.salary.vector.__data)).toBe( + true, + ); + // Check only the presence of typed arrays, without checking specific types + // Types may be different depending on the implementation of the where method }); test('should handle empty DataFrame', () => { const emptyDf = DataFrame.fromRecords([]); - - expect(() => emptyDf.where('age', '===', 30)).toThrow("Column 'age' not found"); + + expect(() => emptyDf.where('age', '===', 30)).toThrow( + "Column 'age' not found", + ); }); }); }); diff --git a/tests/core/methods/dataframe/indexing/at.test.js b/tests/core/methods/dataframe/indexing/at.test.js new file mode 100644 index 0000000..eace8e9 --- /dev/null +++ b/tests/core/methods/dataframe/indexing/at.test.js @@ -0,0 +1,97 @@ +/** + * Unit tests for at method + */ + +import { describe, test, expect } from 'vitest'; +import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js'; +import { at } from '../../../../../packages/core/src/methods/dataframe/filtering/at.js'; + +// Test data for use in all tests +const testData = [ + { name: 'Alice', age: 25, city: 'New York', salary: 70000 }, + { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 }, + { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 }, +]; + +describe('At Method', () => { + // Add at method to DataFrame prototype + DataFrame.prototype.at = function (index) { + return at(this, index); + }; + + describe('with standard storage', () => { + // Create DataFrame using fromRecords + const df = DataFrame.fromRecords(testData); + + test('should return row at specified index', () => { + const result = df.at(1); + + // Check that the result is the correct row + expect(result).toEqual({ + name: 'Bob', + age: 30, + city: 'San Francisco', + salary: 85000, + }); + }); + + test('should handle index 0', () => { + const result = df.at(0); + + expect(result).toEqual({ + name: 'Alice', + age: 25, + city: 'New York', + salary: 70000, + }); + }); + + test('should handle last index', () => { + const result = df.at(2); + + expect(result).toEqual({ + name: 'Charlie', + age: 35, + city: 'Chicago', + salary: 90000, + }); + }); + + test('should throw error for negative index', () => { + expect(() => df.at(-1)).toThrow('Index out of bounds: -1 is negative'); + }); + + test('should throw error for index >= rowCount', () => { + expect(() => df.at(3)).toThrow('Index out of bounds: 3 >= 3'); + }); + + test('should throw error for non-integer index', () => { + expect(() => df.at(1.5)).toThrow('Index must be an integer'); + }); + + test('should handle typed arrays correctly', () => { + // Create DataFrame with typed arrays + const typedDf = DataFrame.fromRecords(testData, { + columns: { + age: { type: 'int32' }, + salary: { type: 'float64' }, + }, + }); + + // Get row at index + const result = typedDf.at(1); + + // Check that the values are correct + expect(result.age).toBe(30); + expect(result.salary).toBe(85000); + }); + + test('should handle empty DataFrame', () => { + const emptyDf = DataFrame.fromRecords([]); + + expect(() => emptyDf.at(0)).toThrow( + 'Index out of bounds: DataFrame is empty', + ); + }); + }); +}); diff --git a/tests/core/methods/dataframe/indexing/head.test.js b/tests/core/methods/dataframe/indexing/head.test.js new file mode 100644 index 0000000..36fdcf5 --- /dev/null +++ b/tests/core/methods/dataframe/indexing/head.test.js @@ -0,0 +1,100 @@ +/** + * Unit tests for head method + */ + +import { describe, test, expect } from 'vitest'; +import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js'; +import { head } from '../../../../../packages/core/src/methods/dataframe/filtering/head.js'; + +// Test data for use in all tests +const testData = [ + { name: 'Alice', age: 25, city: 'New York', salary: 70000 }, + { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 }, + { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 }, + { name: 'David', age: 40, city: 'Boston', salary: 95000 }, + { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 }, + { name: 'Frank', age: 50, city: 'Denver', salary: 105000 }, + { name: 'Grace', age: 55, city: 'Miami', salary: 110000 }, +]; + +describe('Head Method', () => { + // Add head method to DataFrame prototype + DataFrame.prototype.head = function (n, options) { + return head(this, n, options); + }; + + describe('with standard storage', () => { + // Create DataFrame using fromRecords + const df = DataFrame.fromRecords(testData); + + test('should return first 5 rows by default', () => { + const result = df.head(); + + // Check that the result has 5 rows + expect(result.rowCount).toBe(5); + expect(result.toArray()).toEqual(testData.slice(0, 5)); + }); + + test('should return specified number of rows', () => { + const result = df.head(3); + + // Check that the result has 3 rows + expect(result.rowCount).toBe(3); + expect(result.toArray()).toEqual(testData.slice(0, 3)); + }); + + test('should handle n greater than number of rows', () => { + const result = df.head(10); + + // Should return all rows + expect(result.rowCount).toBe(testData.length); + expect(result.toArray()).toEqual(testData); + }); + + test('should throw error for negative n', () => { + expect(() => df.head(-1)).toThrow( + 'Number of rows must be a positive integer', + ); + }); + + test('should throw error for non-integer n', () => { + expect(() => df.head(2.5)).toThrow('Number of rows must be an integer'); + }); + + test('should return a new DataFrame instance', () => { + const result = df.head(3); + expect(result).toBeInstanceOf(DataFrame); + expect(result).not.toBe(df); // Should be a new instance + }); + + test('should preserve typed arrays', () => { + // Create DataFrame with typed arrays + const typedDf = DataFrame.fromRecords(testData, { + columns: { + age: { type: 'int32' }, + salary: { type: 'float64' }, + }, + }); + + // Get head of the data + const result = typedDf.head(3); + + // Check that the result has the correct columns and data + expect(result.columns.sort()).toEqual( + ['age', 'city', 'name', 'salary'].sort(), + ); + + // Check that the data is preserved correctly (using the public API) + const ageCol = result.col('age'); + const salaryCol = result.col('salary'); + expect(ageCol.toArray()).toEqual([25, 30, 35]); + expect(salaryCol.toArray()).toEqual([70000, 85000, 90000]); + }); + + test('should accept options object', () => { + // The print option is for API compatibility and doesn't affect the result + const result = df.head(3, { print: true }); + expect(result.rowCount).toBe(3); + }); + }); +}); diff --git a/tests/core/methods/dataframe/indexing/iloc.test.js b/tests/core/methods/dataframe/indexing/iloc.test.js new file mode 100644 index 0000000..b68fd08 --- /dev/null +++ b/tests/core/methods/dataframe/indexing/iloc.test.js @@ -0,0 +1,141 @@ +/** + * Unit tests for iloc method + */ + +import { describe, test, expect } from 'vitest'; +import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js'; +import { iloc } from '../../../../../packages/core/src/methods/dataframe/filtering/iloc.js'; + +// Test data for use in all tests +const testData = [ + { name: 'Alice', age: 25, city: 'New York', salary: 70000 }, + { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 }, + { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 }, + { name: 'David', age: 40, city: 'Boston', salary: 95000 }, + { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 }, +]; + +describe('Iloc Method', () => { + // Add iloc method to DataFrame prototype + DataFrame.prototype.iloc = function (rowSelector, columnSelector) { + return iloc(this, rowSelector, columnSelector); + }; + + describe('with standard storage', () => { + // Create DataFrame using fromRecords + const df = DataFrame.fromRecords(testData); + + test('should select rows by integer index', () => { + const result = df.iloc(1); + + // Check that the result is a DataFrame with one row + expect(result.rowCount).toBe(1); + expect(result.toArray()).toEqual([testData[1]]); + }); + + test('should select rows by array of indices', () => { + const result = df.iloc([0, 2, 4]); + + // Check that the result contains the selected rows + expect(result.rowCount).toBe(3); + expect(result.toArray()).toEqual([testData[0], testData[2], testData[4]]); + }); + + test('should select rows by predicate function', () => { + const result = df.iloc((i) => i % 2 === 0); + + // Should select rows at indices 0, 2, 4 + expect(result.rowCount).toBe(3); + expect(result.toArray()).toEqual([testData[0], testData[2], testData[4]]); + }); + + test('should select columns by integer index', () => { + const result = df.iloc(null, 1); + + // Should select the 'age' column for all rows + expect(result.columns).toEqual(['age']); + expect(result.rowCount).toBe(5); + expect(result.col('age').toArray()).toEqual([25, 30, 35, 40, 45]); + }); + + test('should select columns by array of indices', () => { + const result = df.iloc(null, [0, 2]); + + // Should select the 'name' and 'city' columns + expect(result.columns.sort()).toEqual(['city', 'name'].sort()); + expect(result.rowCount).toBe(5); + }); + + test('should select rows and columns by indices', () => { + const result = df.iloc([1, 3], [0, 2]); + + // Should select rows 1 and 3, columns 'name' and 'city' + expect(result.rowCount).toBe(2); + expect(result.columns.sort()).toEqual(['city', 'name'].sort()); + expect(result.toArray()).toEqual([ + { name: 'Bob', city: 'San Francisco' }, + { name: 'David', city: 'Boston' }, + ]); + }); + + test('should handle null for rows to select all rows', () => { + const result = df.iloc(null, 1); + + // Should select all rows, but only the 'age' column + expect(result.rowCount).toBe(5); + expect(result.columns).toEqual(['age']); + }); + + test('should handle null for columns to select all columns', () => { + const result = df.iloc(2, null); + + // Should select row 2, all columns + expect(result.rowCount).toBe(1); + expect(result.columns.sort()).toEqual( + ['age', 'city', 'name', 'salary'].sort(), + ); + expect(result.toArray()).toEqual([testData[2]]); + }); + + test('should throw error for out of bounds row index', () => { + expect(() => df.iloc(10)).toThrow('Row index out of bounds'); + }); + + test('should throw error for out of bounds column index', () => { + expect(() => df.iloc(null, 10)).toThrow('Column index out of bounds'); + }); + + test('should throw error for invalid row selector type', () => { + expect(() => df.iloc('invalid')).toThrow('Invalid row selector type'); + }); + + test('should throw error for invalid column selector type', () => { + expect(() => df.iloc(null, 'invalid')).toThrow( + 'Invalid column selector type', + ); + }); + + test('should preserve typed arrays', () => { + // Create DataFrame with typed arrays + const typedDf = DataFrame.fromRecords(testData, { + columns: { + age: { type: 'int32' }, + salary: { type: 'float64' }, + }, + }); + + // Select rows and columns + const result = typedDf.iloc([1, 3], [1, 3]); + + // Check that the result contains typed arrays + expect(result._columns.age.vector.__data).toBeInstanceOf(Int32Array); + expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array); + }); + + test('should handle empty DataFrame', () => { + const emptyDf = DataFrame.fromRecords([]); + + expect(() => emptyDf.iloc(0)).toThrow('Row index out of bounds'); + }); + }); +}); diff --git a/tests/core/methods/dataframe/indexing/loc.test.js b/tests/core/methods/dataframe/indexing/loc.test.js new file mode 100644 index 0000000..be67d2b --- /dev/null +++ b/tests/core/methods/dataframe/indexing/loc.test.js @@ -0,0 +1,159 @@ +/** + * Unit tests for loc method + */ + +import { describe, test, expect } from 'vitest'; +import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js'; +import { loc } from '../../../../../packages/core/src/methods/dataframe/filtering/loc.js'; + +// Test data for use in all tests +const testData = [ + { id: 'a1', name: 'Alice', age: 25, city: 'New York', salary: 70000 }, + { id: 'b2', name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 }, + { id: 'c3', name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 }, + { id: 'd4', name: 'David', age: 40, city: 'Boston', salary: 95000 }, + { id: 'e5', name: 'Eve', age: 45, city: 'Seattle', salary: 100000 }, +]; + +describe('Loc Method', () => { + // Add loc method to DataFrame prototype + DataFrame.prototype.loc = function (rowSelector, columnSelector) { + return loc(this, rowSelector, columnSelector); + }; + + describe('with standard storage', () => { + // Create DataFrame using fromRecords with id as index + const df = DataFrame.fromRecords(testData); + + // Set index to 'id' column + df.setIndex('id'); + + test('should select rows by label', () => { + const result = df.loc('b2'); + + // Check that the result is a DataFrame with one row + expect(result.rowCount).toBe(1); + expect(result.toArray()[0].name).toBe('Bob'); + }); + + test('should select rows by array of labels', () => { + const result = df.loc(['a1', 'c3', 'e5']); + + // Check that the result contains the selected rows + expect(result.rowCount).toBe(3); + expect(result.toArray().map((r) => r.name)).toEqual([ + 'Alice', + 'Charlie', + 'Eve', + ]); + }); + + test('should select rows by predicate function', () => { + const result = df.loc((row) => row.age > 30); + + // Should select rows with age > 30 + expect(result.rowCount).toBe(3); + expect(result.toArray().map((r) => r.name)).toEqual([ + 'Charlie', + 'David', + 'Eve', + ]); + }); + + test('should select rows by condition object', () => { + const result = df.loc({ city: 'Chicago' }); + + // Should select rows where city is Chicago + expect(result.rowCount).toBe(1); + expect(result.toArray()[0].name).toBe('Charlie'); + }); + + test('should select columns by name', () => { + const result = df.loc(null, 'age'); + + // Should select the 'age' column for all rows + expect(result.columns).toEqual(['age']); + expect(result.rowCount).toBe(5); + expect(result.col('age').toArray()).toEqual([25, 30, 35, 40, 45]); + }); + + test('should select columns by array of names', () => { + const result = df.loc(null, ['name', 'city']); + + // Should select the 'name' and 'city' columns + expect(result.columns.sort()).toEqual(['city', 'name'].sort()); + expect(result.rowCount).toBe(5); + }); + + test('should select rows and columns by labels', () => { + const result = df.loc(['b2', 'd4'], ['name', 'city']); + + // Should select rows with ids 'b2' and 'd4', columns 'name' and 'city' + expect(result.rowCount).toBe(2); + expect(result.columns.sort()).toEqual(['city', 'name'].sort()); + expect(result.toArray()).toEqual([ + { name: 'Bob', city: 'San Francisco' }, + { name: 'David', city: 'Boston' }, + ]); + }); + + test('should handle null for rows to select all rows', () => { + const result = df.loc(null, 'age'); + + // Should select all rows, but only the 'age' column + expect(result.rowCount).toBe(5); + expect(result.columns).toEqual(['age']); + }); + + test('should handle null for columns to select all columns', () => { + const result = df.loc('c3', null); + + // Should select row with id 'c3', all columns + expect(result.rowCount).toBe(1); + expect(result.columns.length).toBe(5); // id, name, age, city, salary + expect(result.toArray()[0].name).toBe('Charlie'); + }); + + test('should throw error for non-existent row label', () => { + expect(() => df.loc('z9')).toThrow('Row label not found'); + }); + + test('should throw error for non-existent column label', () => { + expect(() => df.loc(null, 'country')).toThrow('Column not found'); + }); + + test('should preserve typed arrays', () => { + // Create DataFrame with typed arrays + const typedDf = DataFrame.fromRecords(testData, { + columns: { + age: { type: 'int32' }, + salary: { type: 'float64' }, + }, + }); + typedDf.setIndex('id'); + + // Select rows and columns + const result = typedDf.loc(['b2', 'd4'], ['age', 'salary']); + + // Check that the result contains typed arrays + expect(result._columns.age.vector.__data).toBeInstanceOf(Int32Array); + expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array); + }); + + test('should handle empty DataFrame', () => { + const emptyDf = DataFrame.fromRecords([]); + emptyDf.setIndex('id'); + + expect(() => emptyDf.loc('a1')).toThrow('Row label not found'); + }); + + test('should handle DataFrame without index', () => { + const dfNoIndex = DataFrame.fromRecords(testData); + + // Should use row number as index + const result = dfNoIndex.loc(2); + expect(result.rowCount).toBe(1); + expect(result.toArray()[0].name).toBe('Charlie'); + }); + }); +}); diff --git a/tests/core/methods/dataframe/indexing/sample.test.js b/tests/core/methods/dataframe/indexing/sample.test.js new file mode 100644 index 0000000..e89fb7b --- /dev/null +++ b/tests/core/methods/dataframe/indexing/sample.test.js @@ -0,0 +1,175 @@ +/** + * Unit tests for sample method + */ + +import { describe, test, expect } from 'vitest'; +import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js'; +import { sample } from '../../../../../packages/core/src/methods/dataframe/filtering/sample.js'; + +// Test data for use in all tests +const testData = [ + { name: 'Alice', age: 25, city: 'New York', salary: 70000 }, + { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 }, + { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 }, + { name: 'David', age: 40, city: 'Boston', salary: 95000 }, + { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 }, + { name: 'Frank', age: 50, city: 'Denver', salary: 105000 }, + { name: 'Grace', age: 55, city: 'Miami', salary: 110000 }, +]; + +describe('Sample Method', () => { + // Add sample method to DataFrame prototype + DataFrame.prototype.sample = function (n, options) { + return sample(this, n, options); + }; + + describe('with standard storage', () => { + // Create DataFrame using fromRecords + const df = DataFrame.fromRecords(testData); + + test('should sample 1 row by default', () => { + const result = df.sample(); + + // Check that the result has 1 row + expect(result.rowCount).toBe(1); + // The row should be one of the original rows + const resultRow = result.toArray()[0]; + expect( + testData.some( + (row) => + row.name === resultRow.name && + row.age === resultRow.age && + row.city === resultRow.city && + row.salary === resultRow.salary, + ), + ).toBe(true); + }); + + test('should sample specified number of rows', () => { + const result = df.sample(3); + + // Check that the result has 3 rows + expect(result.rowCount).toBe(3); + + // Each row should be one of the original rows + const resultRows = result.toArray(); + for (const resultRow of resultRows) { + expect( + testData.some( + (row) => + row.name === resultRow.name && + row.age === resultRow.age && + row.city === resultRow.city && + row.salary === resultRow.salary, + ), + ).toBe(true); + } + }); + + test('should sample by fraction', () => { + const result = df.sample({ fraction: 0.5 }); + + // Check that the result has approximately half the rows + // Due to rounding, it might be 3 or 4 rows for 7 total rows + expect(result.rowCount).toBeGreaterThanOrEqual(3); + expect(result.rowCount).toBeLessThanOrEqual(4); + }); + + test('should throw error for invalid fraction', () => { + expect(() => df.sample({ fraction: 0 })).toThrow( + 'Fraction must be in the range (0, 1]', + ); + expect(() => df.sample({ fraction: 1.5 })).toThrow( + 'Fraction must be in the range (0, 1]', + ); + }); + + test('should throw error for negative n', () => { + expect(() => df.sample(-1)).toThrow( + 'Number of rows to sample must be a positive integer', + ); + }); + + test('should throw error for non-integer n', () => { + expect(() => df.sample(2.5)).toThrow( + 'Number of rows to sample must be an integer', + ); + }); + + test('should throw error when sampling without replacement and n > rows', () => { + expect(() => df.sample(10)).toThrow( + 'Sample size (10) cannot be greater than number of rows (7)', + ); + }); + + test('should allow sampling with replacement and n > rows', () => { + const result = df.sample(10, { replace: true }); + expect(result.rowCount).toBe(10); + }); + + test('should return a new DataFrame instance', () => { + const result = df.sample(3); + expect(result).toBeInstanceOf(DataFrame); + expect(result).not.toBe(df); // Should be a new instance + }); + + test('should preserve typed arrays', () => { + // Create DataFrame with typed arrays + const typedDf = DataFrame.fromRecords(testData, { + columns: { + age: { type: 'int32' }, + salary: { type: 'float64' }, + }, + }); + + // Sample the data with a fixed seed for deterministic results + const result = typedDf.sample(3, { seed: 42 }); + + // Check that the result has the correct columns + expect(result.columns.sort()).toEqual( + ['age', 'city', 'name', 'salary'].sort(), + ); + + // Check that the data is preserved correctly (using the public API) + const ageCol = result.col('age'); + const salaryCol = result.col('salary'); + + // We can't check exact values since they depend on the random seed implementation + // But we can check that the arrays have the right length and are of the right type + expect(ageCol.toArray().length).toBe(3); + expect(salaryCol.toArray().length).toBe(3); + + // Check that all values are from the original dataset + const originalAges = testData.map((row) => row.age); + const originalSalaries = testData.map((row) => row.salary); + + ageCol.toArray().forEach((value) => { + expect(originalAges).toContain(value); + }); + + salaryCol.toArray().forEach((value) => { + expect(originalSalaries).toContain(value); + }); + }); + + test('should produce deterministic results with seed', () => { + // Sample with the same seed should produce the same results + const sample1 = df.sample(3, { seed: 42 }); + const sample2 = df.sample(3, { seed: 42 }); + + // Compare the sampled rows + const rows1 = sample1.toArray(); + const rows2 = sample2.toArray(); + + expect(rows1).toEqual(rows2); + }); + + test('should handle empty DataFrame', () => { + const emptyDf = DataFrame.fromRecords([]); + const result = emptyDf.sample(); + + expect(result.rowCount).toBe(0); + expect(result.columns).toEqual([]); + }); + }); +}); diff --git a/tests/core/methods/dataframe/indexing/tail.test.js b/tests/core/methods/dataframe/indexing/tail.test.js new file mode 100644 index 0000000..3a7ecb4 --- /dev/null +++ b/tests/core/methods/dataframe/indexing/tail.test.js @@ -0,0 +1,100 @@ +/** + * Unit tests for tail method + */ + +import { describe, test, expect } from 'vitest'; +import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js'; +import { tail } from '../../../../../packages/core/src/methods/dataframe/filtering/tail.js'; + +// Test data for use in all tests +const testData = [ + { name: 'Alice', age: 25, city: 'New York', salary: 70000 }, + { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 }, + { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 }, + { name: 'David', age: 40, city: 'Boston', salary: 95000 }, + { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 }, + { name: 'Frank', age: 50, city: 'Denver', salary: 105000 }, + { name: 'Grace', age: 55, city: 'Miami', salary: 110000 }, +]; + +describe('Tail Method', () => { + // Add tail method to DataFrame prototype + DataFrame.prototype.tail = function (n, options) { + return tail(this, n, options); + }; + + describe('with standard storage', () => { + // Create DataFrame using fromRecords + const df = DataFrame.fromRecords(testData); + + test('should return last 5 rows by default', () => { + const result = df.tail(); + + // Check that the result has 5 rows + expect(result.rowCount).toBe(5); + expect(result.toArray()).toEqual(testData.slice(-5)); + }); + + test('should return specified number of rows from the end', () => { + const result = df.tail(3); + + // Check that the result has 3 rows + expect(result.rowCount).toBe(3); + expect(result.toArray()).toEqual(testData.slice(-3)); + }); + + test('should handle n greater than number of rows', () => { + const result = df.tail(10); + + // Should return all rows + expect(result.rowCount).toBe(testData.length); + expect(result.toArray()).toEqual(testData); + }); + + test('should throw error for negative n', () => { + expect(() => df.tail(-1)).toThrow( + 'Number of rows must be a positive integer', + ); + }); + + test('should throw error for non-integer n', () => { + expect(() => df.tail(2.5)).toThrow('Number of rows must be an integer'); + }); + + test('should return a new DataFrame instance', () => { + const result = df.tail(3); + expect(result).toBeInstanceOf(DataFrame); + expect(result).not.toBe(df); // Should be a new instance + }); + + test('should preserve typed arrays', () => { + // Create DataFrame with typed arrays + const typedDf = DataFrame.fromRecords(testData, { + columns: { + age: { type: 'int32' }, + salary: { type: 'float64' }, + }, + }); + + // Get tail of the data + const result = typedDf.tail(3); + + // Check that the result has the correct columns and data + expect(result.columns.sort()).toEqual( + ['age', 'city', 'name', 'salary'].sort(), + ); + + // Check that the data is preserved correctly (using the public API) + const ageCol = result.col('age'); + const salaryCol = result.col('salary'); + expect(ageCol.toArray()).toEqual([45, 50, 55]); + expect(salaryCol.toArray()).toEqual([100000, 105000, 110000]); + }); + + test('should accept options object', () => { + // The print option is for API compatibility and doesn't affect the result + const result = df.tail(3, { print: true }); + expect(result.rowCount).toBe(3); + }); + }); +});