diff --git a/packages/core/src/methods/dataframe/display/index.js b/packages/core/src/methods/dataframe/display/index.js
new file mode 100644
index 0000000..a2c5681
--- /dev/null
+++ b/packages/core/src/methods/dataframe/display/index.js
@@ -0,0 +1,17 @@
+/**
+ * DataFrame display methods
+ *
+ * This module exports all display methods for DataFrame.
+ * Methods are registered using extendDataFrame.
+ *
+ * @module methods/dataframe/display
+ */
+
+import { DataFrame } from '../../../data/model/index.js';
+import { extendDataFrame } from '../../../data/model/extendDataFrame.js';
+import * as pool from './pool.js';
+
+extendDataFrame(DataFrame.prototype, pool); // without namespace — base display methods
+
+// export directly (so that you can call display(df) if needed)
+export * from './pool.js';
diff --git a/packages/core/src/methods/dataframe/filtering/expr$.js b/packages/core/src/methods/dataframe/filtering/expr$.js
index 81d4a92..0e102bd 100644
--- a/packages/core/src/methods/dataframe/filtering/expr$.js
+++ b/packages/core/src/methods/dataframe/filtering/expr$.js
@@ -47,46 +47,52 @@ export function expr$(df, strings, ...values) {
if (filteredRows.length === 0) {
// Create a new DataFrame instance with the same options as the original
const result = new df.constructor({}, df._options);
-
+
// For each column, create a Series with the appropriate type
for (const col of allColumns) {
// Get the original column data to determine its type
const originalColumn = df._columns[col];
const originalArray = originalColumn.vector.__data;
-
+
// Create an empty array with the same type
- if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
const TypedArrayConstructor = originalArray.constructor;
const emptyTypedArray = new TypedArrayConstructor(0);
result._columns[col] = createTypedSeries(emptyTypedArray, col, df);
} else {
result._columns[col] = createTypedSeries([], col, df);
}
-
+
// Add to column order
if (!result._order.includes(col)) {
result._order.push(col);
}
}
-
+
return result;
}
// For non-empty results, create a new DataFrame with filtered rows
// Create a new DataFrame instance with the same options as the original
const result = new df.constructor({}, df._options);
-
+
// For each column, create a Series with the appropriate type
for (const col of allColumns) {
// Get the original column data to determine its type
const originalColumn = df._columns[col];
const originalArray = originalColumn.vector.__data;
-
+
// Extract values for this column from the filtered rows
- const values = filteredRows.map(row => row[col]);
-
+ const values = filteredRows.map((row) => row[col]);
+
// Preserve the array type if it's a typed array
- if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
const TypedArrayConstructor = originalArray.constructor;
const typedValues = new TypedArrayConstructor(values.length);
values.forEach((value, i) => {
@@ -96,19 +102,19 @@ export function expr$(df, strings, ...values) {
} else {
result._columns[col] = createTypedSeries(values, col, df);
}
-
+
// Add to column order
if (!result._order.includes(col)) {
result._order.push(col);
}
}
-
+
return result;
}
/**
* Create a predicate function for filtering rows
- *
+ *
* @param {string} expr - Expression to evaluate
* @returns {Function} - Predicate function
* @private
@@ -134,4 +140,3 @@ function createPredicate(expr) {
}
// Export the expr$ method directly
-export { expr$ };
diff --git a/packages/core/src/methods/dataframe/filtering/filter.js b/packages/core/src/methods/dataframe/filtering/filter.js
index d92e940..254e2cb 100644
--- a/packages/core/src/methods/dataframe/filtering/filter.js
+++ b/packages/core/src/methods/dataframe/filtering/filter.js
@@ -31,44 +31,50 @@ export function filter(df, predicate) {
if (filteredRows.length === 0) {
// Create a new DataFrame instance with the same options as the original
const result = new df.constructor({}, df._options);
-
+
// For each column, create a Series with the appropriate type
for (const col of allColumns) {
// Get the original column data to determine its type
const originalColumn = df._columns[col];
const originalArray = originalColumn.vector.__data;
-
+
// Create an empty array with the same type
- if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
const TypedArrayConstructor = originalArray.constructor;
const emptyTypedArray = new TypedArrayConstructor(0);
result._columns[col] = createTypedSeries(emptyTypedArray, col, df);
} else {
result._columns[col] = createTypedSeries([], col, df);
}
-
+
// Add to column order
if (!result._order.includes(col)) {
result._order.push(col);
}
}
-
+
return result;
}
// For non-empty results, create a new DataFrame with filtered rows
// Create a new DataFrame instance with the same options as the original
const result = new df.constructor({}, df._options);
-
+
// For each column, create a Series with the appropriate type
for (const col of allColumns) {
// Get the original column data to determine its type
const originalColumn = df._columns[col];
const originalArray = originalColumn.vector.__data;
- const values = filteredRows.map(row => row[col]);
-
+ const values = filteredRows.map((row) => row[col]);
+
// Preserve the array type if it's a typed array
- if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
const TypedArrayConstructor = originalArray.constructor;
const typedValues = new TypedArrayConstructor(values.length);
values.forEach((value, i) => {
@@ -78,15 +84,14 @@ export function filter(df, predicate) {
} else {
result._columns[col] = createTypedSeries(values, col, df);
}
-
+
// Add to column order
if (!result._order.includes(col)) {
result._order.push(col);
}
}
-
+
return result;
}
// Export the filter method directly
-export { filter };
diff --git a/packages/core/src/methods/dataframe/filtering/iloc.js b/packages/core/src/methods/dataframe/filtering/iloc.js
index 035e756..40bd82f 100644
--- a/packages/core/src/methods/dataframe/filtering/iloc.js
+++ b/packages/core/src/methods/dataframe/filtering/iloc.js
@@ -1,12 +1,12 @@
/*-------------------------------------------------------------------------*
| DataFrame -› filtering · iloc() |
| |
- | Выбор строк и колонок из DataFrame по целочисленным позициям. |
+ | Selection of rows and columns from DataFrame by integer positions. |
| |
- | df.iloc(5) → выбор строки с индексом 5 |
- | df.iloc([1, 3, 5]) → выбор строк с указанными индексами |
- | df.iloc(5, 2) → выбор значения в строке 5, колонке 2 |
- | df.iloc([1, 3], [0, 2]) → выбор строк 1,3 и колонок 0,2 |
+ | df.iloc(5) → select row with index 5 |
+ | df.iloc([1, 3, 5]) → select rows with specified indices |
+ | df.iloc(5, 2) → select value in row 5, column 2 |
+ | df.iloc([1, 3], [0, 2]) → select rows 1,3 and columns 0,2 |
*-------------------------------------------------------------------------*/
/**
@@ -75,7 +75,10 @@ export function iloc(df, rowSelector = null, colSelector = null) {
// Process column selector
if (colSelector === null || colSelector === undefined) {
// If selector is null, select all columns
- selectedColumnIndices = Array.from({ length: allColumns.length }, (_, i) => i);
+ selectedColumnIndices = Array.from(
+ { length: allColumns.length },
+ (_, i) => i,
+ );
} else if (typeof colSelector === 'number') {
// Single column index
const idx = colSelector < 0 ? allColumns.length + colSelector : colSelector;
@@ -118,16 +121,19 @@ export function iloc(df, rowSelector = null, colSelector = null) {
// Create a new DataFrame instance with the same options as the original
const result = new df.constructor({}, df._options);
-
+
// For each selected column, create a Series with the appropriate type
for (const col of selectedColumns) {
// Get the original column data to determine its type
const originalColumn = df._columns[col];
const originalArray = originalColumn.vector.__data;
- const values = selectedIndices.map(index => rows[index][col]);
-
+ const values = selectedIndices.map((index) => rows[index][col]);
+
// Preserve the array type if it's a typed array
- if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
const TypedArrayConstructor = originalArray.constructor;
const typedValues = new TypedArrayConstructor(values.length);
values.forEach((value, i) => {
@@ -137,15 +143,14 @@ export function iloc(df, rowSelector = null, colSelector = null) {
} else {
result._columns[col] = createTypedSeries(values, col, df);
}
-
+
// Add to column order
if (!result._order.includes(col)) {
result._order.push(col);
}
}
-
+
return result;
}
// Export the method for the pool
-export default { iloc };
diff --git a/packages/core/src/methods/dataframe/filtering/loc.js b/packages/core/src/methods/dataframe/filtering/loc.js
index 86f96bd..aef9e9b 100644
--- a/packages/core/src/methods/dataframe/filtering/loc.js
+++ b/packages/core/src/methods/dataframe/filtering/loc.js
@@ -13,7 +13,7 @@
/**
* Row and column selection by label or position
- *
+ *
* @module methods/dataframe/filtering/loc
*/
@@ -21,7 +21,7 @@ import { createTypedArray } from '../../../data/utils/createTypedArray.js';
/**
* Selects rows and columns by label or position
- *
+ *
* @param {DataFrame} df - DataFrame to select from
* @param {*} rowSelector - Row selector (label, array of labels, predicate function, or condition object)
* @param {*} colSelector - Column selector (name, array of names, or null for all columns)
@@ -37,7 +37,8 @@ export function loc(df, rowSelector, colSelector) {
let selectedIndices = [];
// Check if DataFrame has an index set
- const hasIndex = df._index !== null && df._indexMap !== undefined && df._indexMap.size > 0;
+ const hasIndex =
+ df._index !== null && df._indexMap !== undefined && df._indexMap.size > 0;
if (rowSelector === null) {
// If rowSelector is null, select all rows
@@ -49,7 +50,7 @@ export function loc(df, rowSelector, colSelector) {
// Use index for selection
selectedIndices = [];
selectedRows = [];
-
+
for (const label of rowSelector) {
const index = df._indexMap.get(label);
if (index === undefined) {
@@ -70,7 +71,10 @@ export function loc(df, rowSelector, colSelector) {
selectedIndices = rowSelector;
selectedRows = rows.filter((_, index) => rowSelector.includes(index));
}
- } else if (typeof rowSelector === 'number' || typeof rowSelector === 'string') {
+ } else if (
+ typeof rowSelector === 'number' ||
+ typeof rowSelector === 'string'
+ ) {
// If rowSelector is a number or string (index or label)
if (hasIndex && typeof rowSelector === 'string') {
// Use index for selection
@@ -127,12 +131,19 @@ export function loc(df, rowSelector, colSelector) {
// In tests, we need to return a DataFrame with rowCount property
// Create a DataFrame with one row
const result = df.constructor.fromRecords([selectedRows[0]], df._options);
-
+
// Copy column metadata to preserve typed arrays
for (const col of result.columns) {
- if (df._columns[col] && df._columns[col].vector && df._columns[col].vector.__data) {
+ if (
+ df._columns[col] &&
+ df._columns[col].vector &&
+ df._columns[col].vector.__data
+ ) {
const originalArray = df._columns[col].vector.__data;
- if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
const TypedArrayConstructor = originalArray.constructor;
// Create a new typed array with the same type
const newArray = new TypedArrayConstructor([selectedRows[0][col]]);
@@ -140,7 +151,7 @@ export function loc(df, rowSelector, colSelector) {
}
}
}
-
+
return result;
}
@@ -150,7 +161,10 @@ export function loc(df, rowSelector, colSelector) {
for (const col of df.columns) {
// Preserve array type if it's a typed array
const originalArray = df._columns[col].vector.__data;
- if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
const TypedArrayConstructor = originalArray.constructor;
emptyData[col] = new TypedArrayConstructor(0);
} else {
@@ -162,21 +176,29 @@ export function loc(df, rowSelector, colSelector) {
// Create a new DataFrame with the same options as the original
const result = df.constructor.fromRecords(selectedRows, df._options);
-
+
// Process each column to preserve typed arrays
for (const col of df.columns) {
- if (df._columns[col] && df._columns[col].vector && df._columns[col].vector.__data) {
+ if (
+ df._columns[col] &&
+ df._columns[col].vector &&
+ df._columns[col].vector.__data
+ ) {
const originalArray = df._columns[col].vector.__data;
if (ArrayBuffer.isView(originalArray)) {
// Get column options if specified
const columnOptions = df._options?.columns?.[col] || {};
-
+
// Extract values for this column from selected rows
- const values = selectedRows.map(row => row[col]);
-
+ const values = selectedRows.map((row) => row[col]);
+
// Create a new typed array with the same type
- const newArray = createTypedArray(values, originalArray, columnOptions);
-
+ const newArray = createTypedArray(
+ values,
+ originalArray,
+ columnOptions,
+ );
+
// Replace the array in the result DataFrame
if (result._columns[col] && result._columns[col].vector) {
result._columns[col].vector.__data = newArray;
@@ -184,7 +206,7 @@ export function loc(df, rowSelector, colSelector) {
}
}
}
-
+
return result;
}
@@ -226,7 +248,10 @@ export function loc(df, rowSelector, colSelector) {
for (const col of selectedColumns) {
// Preserve array type if it's a typed array
const originalArray = df._columns[col].vector.__data;
- if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
const TypedArrayConstructor = originalArray.constructor;
emptyData[col] = new TypedArrayConstructor(0);
} else {
@@ -235,27 +260,34 @@ export function loc(df, rowSelector, colSelector) {
}
return new df.constructor(emptyData, df._options);
}
-
+
// If only one row and one column are selected, but we need a DataFrame
- if (selectedRows.length === 1 && selectedColumns.length === 1 && typeof rowSelector === 'function') {
+ if (
+ selectedRows.length === 1 &&
+ selectedColumns.length === 1 &&
+ typeof rowSelector === 'function'
+ ) {
const singleColData = {};
const col = selectedColumns[0];
const value = selectedRows[0][col];
-
+
// Preserve array type if it's a typed array
const originalArray = df._columns[col].vector.__data;
- if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
const TypedArrayConstructor = originalArray.constructor;
singleColData[col] = new TypedArrayConstructor([value]);
} else {
singleColData[col] = [value];
}
-
+
return new df.constructor(singleColData, df._options);
}
// Create a new DataFrame with only selected columns
- const filteredRows = selectedRows.map(row => {
+ const filteredRows = selectedRows.map((row) => {
const filteredRow = {};
for (const col of selectedColumns) {
filteredRow[col] = row[col];
@@ -265,21 +297,25 @@ export function loc(df, rowSelector, colSelector) {
// Create a new DataFrame with the same options as the original
const result = df.constructor.fromRecords(filteredRows, df._options);
-
+
// Process each column to preserve typed arrays
for (const col of selectedColumns) {
- if (df._columns[col] && df._columns[col].vector && df._columns[col].vector.__data) {
+ if (
+ df._columns[col] &&
+ df._columns[col].vector &&
+ df._columns[col].vector.__data
+ ) {
const originalArray = df._columns[col].vector.__data;
if (ArrayBuffer.isView(originalArray)) {
// Get column options if specified
const columnOptions = df._options?.columns?.[col] || {};
-
+
// Extract values for this column from filtered rows
- const values = filteredRows.map(row => row[col]);
-
+ const values = filteredRows.map((row) => row[col]);
+
// Create a new typed array with the same type
const newArray = createTypedArray(values, originalArray, columnOptions);
-
+
// Replace the array in the result DataFrame
if (result._columns[col] && result._columns[col].vector) {
result._columns[col].vector.__data = newArray;
@@ -287,9 +323,8 @@ export function loc(df, rowSelector, colSelector) {
}
}
}
-
+
return result;
}
// Export the loc method directly
-export { loc };
diff --git a/packages/core/src/methods/dataframe/filtering/query$.js b/packages/core/src/methods/dataframe/filtering/query$.js
index d2a13de..49b586d 100644
--- a/packages/core/src/methods/dataframe/filtering/query$.js
+++ b/packages/core/src/methods/dataframe/filtering/query$.js
@@ -87,7 +87,7 @@ export function query$(df, strings, ...values) {
/**
* Create a predicate function for filtering rows
- *
+ *
* @param {string} expr - Expression to evaluate
* @returns {Function} - Predicate function
* @private
@@ -113,4 +113,3 @@ function createPredicate(expr) {
}
// Export the query$ method directly
-export { query$ };
diff --git a/packages/core/src/methods/dataframe/filtering/where.js b/packages/core/src/methods/dataframe/filtering/where.js
index 37dd417..c0529f1 100644
--- a/packages/core/src/methods/dataframe/filtering/where.js
+++ b/packages/core/src/methods/dataframe/filtering/where.js
@@ -8,21 +8,21 @@ import { validateColumn } from '../../../data/utils/validators.js';
/** Operator → predicate map */
const OPS = {
- '==': (a, b) => a == b, // eslint-disable-line eqeqeq
+ '==': (a, b) => a == b, // eslint-disable-line eqeqeq
'===': (a, b) => a === b,
- '!=': (a, b) => a != b, // eslint-disable-line eqeqeq
+ '!=': (a, b) => a != b, // eslint-disable-line eqeqeq
'!==': (a, b) => a !== b,
- '>': (a, b) => a > b,
- '>=': (a, b) => a >= b,
- '<': (a, b) => a < b,
- '<=': (a, b) => a <= b,
- in: (a, b) => Array.isArray(b) && b.includes(a),
- contains: (a, b) => String(a).includes(String(b)),
- startsWith: (a, b) => String(a).startsWith(String(b)),
- startswith: (a, b) => String(a).startsWith(String(b)),
- endsWith: (a, b) => String(a).endsWith(String(b)),
- endswith: (a, b) => String(a).endsWith(String(b)),
- matches: (a, b) =>
+ '>': (a, b) => a > b,
+ '>=': (a, b) => a >= b,
+ '<': (a, b) => a < b,
+ '<=': (a, b) => a <= b,
+ in: (a, b) => Array.isArray(b) && b.includes(a),
+ contains: (a, b) => String(a).includes(String(b)),
+ startsWith: (a, b) => String(a).startsWith(String(b)),
+ startswith: (a, b) => String(a).startsWith(String(b)),
+ endsWith: (a, b) => String(a).endsWith(String(b)),
+ endswith: (a, b) => String(a).endsWith(String(b)),
+ matches: (a, b) =>
b instanceof RegExp ? b.test(String(a)) : new RegExp(b).test(String(a)),
};
@@ -53,10 +53,9 @@ export function where(df, column, operator, value) {
// Create options for the new DataFrame with column type information
const newOptions = { ...df._options };
-
+
// Create new DataFrame from filtered rows with preserved column types
return df.constructor.fromRecords(outRows, newOptions);
}
// Export the where method directly
-export { where };
\ No newline at end of file
diff --git a/packages/core/src/methods/dataframe/index.js b/packages/core/src/methods/dataframe/index.js
new file mode 100644
index 0000000..fdddd73
--- /dev/null
+++ b/packages/core/src/methods/dataframe/index.js
@@ -0,0 +1,13 @@
+/**
+ * DataFrame methods
+ *
+ * This module exports all methods for DataFrame from all subcategories.
+ * Methods are registered using extendDataFrame.
+ *
+ * @module methods/dataframe
+ */
+
+// Экспорт всех методов из подкаталогов
+export * from './aggregation/index.js';
+export * from './display/index.js';
+export * from './filtering/index.js';
diff --git a/packages/core/src/methods/dataframe/indexing/at.js b/packages/core/src/methods/dataframe/indexing/at.js
new file mode 100644
index 0000000..baac196
--- /dev/null
+++ b/packages/core/src/methods/dataframe/indexing/at.js
@@ -0,0 +1,49 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame - indexing - at() |
+ | |
+ | Get a single row or value from the DataFrame by position. |
+ | |
+ | df.at(5) -> returns an object representing the row at index 5. |
+ | df.at(5, 'age') -> returns the value at row 5, column 'age'. |
+ *-------------------------------------------------------------------------*/
+/**
+ * Returns a row at the specified index.
+ * `df.at(5)` -> returns an object representing the row at index 5.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {number} index - Row index to select
+ * @returns {Object} - Object representing the selected row
+ * @throws {Error} If index is invalid or out of bounds
+ */
+export function at(df, index) {
+ // Validate index is an integer
+ if (!Number.isInteger(index)) {
+ throw new Error(
+ `Index must be an integer, got ${typeof index === 'number' ? index : typeof index}`,
+ );
+ }
+
+ // Validate index is not negative
+ if (index < 0) {
+ throw new Error(`Index out of bounds: ${index} is negative`);
+ }
+
+ const rows = df.toArray();
+
+ // Check if DataFrame is empty
+ if (rows.length === 0) {
+ throw new Error('Index out of bounds: DataFrame is empty');
+ }
+
+ // Check if index is within range
+ if (index >= rows.length) {
+ throw new Error(`Index out of bounds: ${index} >= ${rows.length}`);
+ }
+
+ return rows[index];
+}
+
+/* -------------------------------------------------------------- *
+ | Pool for extendDataFrame |
+ * -------------------------------------------------------------- */
+export default { at };
diff --git a/packages/core/src/methods/dataframe/indexing/head.js b/packages/core/src/methods/dataframe/indexing/head.js
new file mode 100644
index 0000000..8dacd23
--- /dev/null
+++ b/packages/core/src/methods/dataframe/indexing/head.js
@@ -0,0 +1,48 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame - indexing - head() |
+ | |
+ | Returns the first n rows of the DataFrame. |
+ | |
+ | df.head() -> returns a new DataFrame with the first 5 rows. |
+ | df.head(10) -> returns a new DataFrame with the first 10 rows. |
+ *-------------------------------------------------------------------------*/
+/**
+ * Returns the first n rows of a DataFrame.
+ * `df.head(5)` -> returns a new DataFrame with the first 5 rows.
+ * Similar to pandas' head() function.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {number} [n=5] - Number of rows to return
+ * @param {Object} [options] - Additional options
+ * @param {boolean} [options.print=false] - Option for compatibility with other libraries
+ * @returns {DataFrame} - New DataFrame with the first n rows
+ * @throws {Error} If n is not a positive integer
+ */
+export function head(df, n = 5, options = { print: false }) {
+ // Validate input parameters
+ if (n <= 0) {
+ throw new Error('Number of rows must be a positive integer');
+ }
+ if (!Number.isInteger(n)) {
+ throw new Error('Number of rows must be an integer');
+ }
+
+ // Get data from DataFrame
+ const rows = df.toArray();
+
+ // Select first n rows (or all if there are fewer than n)
+ const selectedRows = rows.slice(0, n);
+
+ // Create a new DataFrame from the selected rows
+ const builder =
+ typeof df.constructor.fromRecords === 'function'
+ ? df.constructor.fromRecords
+ : (rows) => new df.constructor(rows);
+
+ return builder(selectedRows);
+}
+
+/* -------------------------------------------------------------- *
+ | Pool for extendDataFrame |
+ * -------------------------------------------------------------- */
+export default { head };
diff --git a/packages/core/src/methods/dataframe/indexing/iloc.js b/packages/core/src/methods/dataframe/indexing/iloc.js
new file mode 100644
index 0000000..15ad32e
--- /dev/null
+++ b/packages/core/src/methods/dataframe/indexing/iloc.js
@@ -0,0 +1,156 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame - indexing - iloc() |
+ | |
+ | Selection of rows and columns from DataFrame by integer positions. |
+ | |
+ | df.iloc(5) -> select row with index 5 |
+ | df.iloc([1, 3, 5]) -> select rows with specified indices |
+ | df.iloc(5, 2) -> select value in row 5, column 2 |
+ | df.iloc([1, 3], [0, 2]) -> select rows 1,3 and columns 0,2 |
+ *-------------------------------------------------------------------------*/
+
+/**
+ * Method for selecting rows and columns by indices
+ *
+ * @module methods/dataframe/filtering/iloc
+ */
+
+// Import function for creating typed arrays
+import { createTypedSeries } from '../../../data/utils/createTypedArray.js';
+
+/**
+ * Method for selecting rows and columns by indices (similar to iloc in pandas)
+ * @param {DataFrame} df - DataFrame instance
+ * @param {number|number[]|function} rowSelector - Row index, array of indices, or predicate function
+ * @param {number|number[]|function} colSelector - Column index, array of indices, or predicate function
+ * @returns {DataFrame|*} - New DataFrame with selected rows and columns or a cell value
+ */
+export function iloc(df, rowSelector = null, colSelector = null) {
+ // Get all rows as array of objects
+ const rows = df.toArray();
+ const allColumns = df.columns;
+ const rowCount = df.rowCount;
+
+ if (rowCount === 0) {
+ throw new Error('Row index out of bounds');
+ }
+
+ // Indices of selected rows
+ let selectedIndices = [];
+
+ // Process row selector
+ if (rowSelector === null || rowSelector === undefined) {
+ // If selector is null, select all rows
+ selectedIndices = Array.from({ length: rowCount }, (_, i) => i);
+ } else if (typeof rowSelector === 'number') {
+ // Single row index
+ const idx = rowSelector < 0 ? rowCount + rowSelector : rowSelector;
+ if (idx < 0 || idx >= rowCount) {
+ throw new Error('Row index out of bounds');
+ }
+ selectedIndices = [idx];
+ } else if (Array.isArray(rowSelector)) {
+ // Array of row indices
+ selectedIndices = rowSelector.map((idx) => {
+ const adjustedIdx = idx < 0 ? rowCount + idx : idx;
+ if (adjustedIdx < 0 || adjustedIdx >= rowCount) {
+ throw new Error('Row index out of bounds');
+ }
+ return adjustedIdx;
+ });
+ } else if (typeof rowSelector === 'function') {
+ // Function returning true/false for each row index
+ for (let i = 0; i < rowCount; i++) {
+ if (rowSelector(i)) {
+ selectedIndices.push(i);
+ }
+ }
+ } else {
+ throw new Error('Invalid row selector type');
+ }
+
+ // Indices of selected columns
+ let selectedColumnIndices = [];
+
+ // Process column selector
+ if (colSelector === null || colSelector === undefined) {
+ // If selector is null, select all columns
+ selectedColumnIndices = Array.from(
+ { length: allColumns.length },
+ (_, i) => i,
+ );
+ } else if (typeof colSelector === 'number') {
+ // Single column index
+ const idx = colSelector < 0 ? allColumns.length + colSelector : colSelector;
+ if (idx < 0 || idx >= allColumns.length) {
+ throw new Error('Column index out of bounds');
+ }
+ selectedColumnIndices = [idx];
+ } else if (Array.isArray(colSelector)) {
+ // Array of column indices
+ selectedColumnIndices = colSelector.map((idx) => {
+ const adjustedIdx = idx < 0 ? allColumns.length + idx : idx;
+ if (adjustedIdx < 0 || adjustedIdx >= allColumns.length) {
+ throw new Error('Column index out of bounds');
+ }
+ return adjustedIdx;
+ });
+ } else if (typeof colSelector === 'function') {
+ // Function returning true/false for each column index
+ for (let i = 0; i < allColumns.length; i++) {
+ if (colSelector(i)) {
+ selectedColumnIndices.push(i);
+ }
+ }
+ } else {
+ throw new Error('Invalid column selector type');
+ }
+
+ // Get names of selected columns
+ const selectedColumns = selectedColumnIndices.map((idx) => allColumns[idx]);
+
+ // If only one row and one column are selected, return the value
+ if (
+ selectedIndices.length === 1 &&
+ selectedColumns.length === 1 &&
+ typeof rowSelector === 'number' &&
+ typeof colSelector === 'number'
+ ) {
+ return rows[selectedIndices[0]][selectedColumns[0]];
+ }
+
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // For each selected column, create a Series with the appropriate type
+ for (const col of selectedColumns) {
+ // Get the original column data to determine its type
+ const originalColumn = df._columns[col];
+ const originalArray = originalColumn.vector.__data;
+ const values = selectedIndices.map((index) => rows[index][col]);
+
+ // Preserve the array type if it's a typed array
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
+ const TypedArrayConstructor = originalArray.constructor;
+ const typedValues = new TypedArrayConstructor(values.length);
+ values.forEach((value, i) => {
+ typedValues[i] = value;
+ });
+ result._columns[col] = createTypedSeries(typedValues, col, df);
+ } else {
+ result._columns[col] = createTypedSeries(values, col, df);
+ }
+
+ // Add to column order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+}
+
+// Export the method for the pool
diff --git a/packages/core/src/methods/dataframe/indexing/index.js b/packages/core/src/methods/dataframe/indexing/index.js
new file mode 100644
index 0000000..8742e76
--- /dev/null
+++ b/packages/core/src/methods/dataframe/indexing/index.js
@@ -0,0 +1,18 @@
+/**
+ * DataFrame indexing methods
+ *
+ * This module exports all indexing methods for DataFrame.
+ * Methods are registered using extendDataFrame.
+ *
+ * @module methods/dataframe/indexing
+ */
+
+import { DataFrame } from '../../../data/model/index.js';
+import { extendDataFrame } from '../../../data/model/extendDataFrame.js';
+import * as pool from './pool.js';
+
+// Register methods for DataFrame without namespace
+extendDataFrame(DataFrame.prototype, pool);
+
+// Export methods directly for functional style calls
+export * from './pool.js';
diff --git a/packages/core/src/methods/dataframe/indexing/loc.js b/packages/core/src/methods/dataframe/indexing/loc.js
new file mode 100644
index 0000000..0894d4c
--- /dev/null
+++ b/packages/core/src/methods/dataframe/indexing/loc.js
@@ -0,0 +1,328 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame - indexing - loc() |
+ | |
+ | df.loc(5) -> select row with index 5 |
+ | df.loc([1, 3, 5]) -> select rows with specified indices |
+ | df.loc(5, 'age') -> select value in row 5, column 'age' |
+ | df.loc([1, 3], ['name', 'age']) -> select rows 1,3 and columns 'name','age' |
+ | df.loc(row => row.age > 30) -> select rows where age > 30 |
+ | df.loc({city: 'Chicago'}) -> select rows where city equals 'Chicago' |
+ *-------------------------------------------------------------------------*/
+
+/**
+ * Row and column selection by label or position
+ *
+ * @module methods/dataframe/filtering/loc
+ */
+
+import { createTypedArray } from '../../../data/utils/createTypedArray.js';
+
+/**
+ * Selects rows and columns by label or position
+ *
+ * @param {DataFrame} df - DataFrame to select from
+ * @param {*} rowSelector - Row selector (label, array of labels, predicate function, or condition object)
+ * @param {*} colSelector - Column selector (name, array of names, or null for all columns)
+ * @returns {DataFrame} - New DataFrame with selected rows and columns
+ */
+export function loc(df, rowSelector, colSelector) {
+ // Get data from DataFrame
+ const rows = df.toArray();
+ const rowCount = df.rowCount;
+
+ // Define rows to select
+ let selectedRows = [];
+ let selectedIndices = [];
+
+ // Check if DataFrame has an index set
+ const hasIndex =
+ df._index !== null && df._indexMap !== undefined && df._indexMap.size > 0;
+
+ if (rowSelector === null) {
+ // If rowSelector is null, select all rows
+ selectedRows = [...rows];
+ selectedIndices = Array.from({ length: rowCount }, (_, i) => i);
+ } else if (Array.isArray(rowSelector)) {
+ // If rowSelector is an array of indices or labels
+ if (hasIndex) {
+ // Use index for selection
+ selectedIndices = [];
+ selectedRows = [];
+
+ for (const label of rowSelector) {
+ const index = df._indexMap.get(label);
+ if (index === undefined) {
+ throw new Error('Row label not found');
+ }
+ selectedIndices.push(index);
+ selectedRows.push(rows[index]);
+ }
+ } else {
+ // Use numeric indices
+ for (const index of rowSelector) {
+ if (index < 0 || index >= rowCount) {
+ throw new Error(
+ `Row index ${index} is out of bounds for DataFrame with ${rowCount} rows`,
+ );
+ }
+ }
+ selectedIndices = rowSelector;
+ selectedRows = rows.filter((_, index) => rowSelector.includes(index));
+ }
+ } else if (
+ typeof rowSelector === 'number' ||
+ typeof rowSelector === 'string'
+ ) {
+ // If rowSelector is a number or string (index or label)
+ if (hasIndex && typeof rowSelector === 'string') {
+ // Use index for selection
+ const index = df._indexMap.get(rowSelector);
+ if (index === undefined) {
+ throw new Error('Row label not found');
+ }
+ selectedIndices = [index];
+ selectedRows = [rows[index]];
+ } else if (typeof rowSelector === 'number') {
+ // Use numeric index
+ if (rowSelector < 0 || rowSelector >= rowCount) {
+ throw new Error(
+ `Row index ${rowSelector} is out of bounds for DataFrame with ${rowCount} rows`,
+ );
+ }
+ selectedIndices = [rowSelector];
+ selectedRows = [rows[rowSelector]];
+ } else {
+ throw new Error('Row label not found');
+ }
+ } else if (typeof rowSelector === 'function') {
+ // If rowSelector is a predicate function
+ selectedRows = rows.filter(rowSelector);
+ selectedIndices = rows
+ .map((row, index) => (rowSelector(row) ? index : -1))
+ .filter((index) => index !== -1);
+ } else if (typeof rowSelector === 'object' && rowSelector !== null) {
+ // If rowSelector is an object with conditions
+ selectedIndices = [];
+ selectedRows = [];
+ rows.forEach((row, index) => {
+ let match = true;
+ for (const [key, value] of Object.entries(rowSelector)) {
+ if (row[key] !== value) {
+ match = false;
+ break;
+ }
+ }
+ if (match) {
+ selectedIndices.push(index);
+ selectedRows.push(row);
+ }
+ });
+ } else {
+ throw new Error('Invalid row selector type');
+ }
+
+ // If column selector is not specified, return all columns
+ if (colSelector === undefined) {
+ // If only one row is selected and rowSelector is not a function, we need to decide
+ // whether to return an object or a DataFrame with one row
+ if (selectedRows.length === 1 && typeof rowSelector !== 'function') {
+ // In tests, we need to return a DataFrame with rowCount property
+ // Create a DataFrame with one row
+ const result = df.constructor.fromRecords([selectedRows[0]], df._options);
+
+ // Copy column metadata to preserve typed arrays
+ for (const col of result.columns) {
+ if (
+ df._columns[col] &&
+ df._columns[col].vector &&
+ df._columns[col].vector.__data
+ ) {
+ const originalArray = df._columns[col].vector.__data;
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
+ const TypedArrayConstructor = originalArray.constructor;
+ // Create a new typed array with the same type
+ const newArray = new TypedArrayConstructor([selectedRows[0][col]]);
+ result._columns[col].vector.__data = newArray;
+ }
+ }
+ }
+
+ return result;
+ }
+
+ // If no results, create an empty DataFrame with the same columns
+ if (selectedRows.length === 0) {
+ const emptyData = {};
+ for (const col of df.columns) {
+ // Preserve array type if it's a typed array
+ const originalArray = df._columns[col].vector.__data;
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
+ const TypedArrayConstructor = originalArray.constructor;
+ emptyData[col] = new TypedArrayConstructor(0);
+ } else {
+ emptyData[col] = [];
+ }
+ }
+ return new df.constructor(emptyData, df._options);
+ }
+
+ // Create a new DataFrame with the same options as the original
+ const result = df.constructor.fromRecords(selectedRows, df._options);
+
+ // Process each column to preserve typed arrays
+ for (const col of df.columns) {
+ if (
+ df._columns[col] &&
+ df._columns[col].vector &&
+ df._columns[col].vector.__data
+ ) {
+ const originalArray = df._columns[col].vector.__data;
+ if (ArrayBuffer.isView(originalArray)) {
+ // Get column options if specified
+ const columnOptions = df._options?.columns?.[col] || {};
+
+ // Extract values for this column from selected rows
+ const values = selectedRows.map((row) => row[col]);
+
+ // Create a new typed array with the same type
+ const newArray = createTypedArray(
+ values,
+ originalArray,
+ columnOptions,
+ );
+
+ // Replace the array in the result DataFrame
+ if (result._columns[col] && result._columns[col].vector) {
+ result._columns[col].vector.__data = newArray;
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ // Define columns to select
+ let selectedColumns = [];
+
+ if (colSelector === null) {
+ // If colSelector is null, select all columns
+ selectedColumns = df.columns;
+ } else if (Array.isArray(colSelector)) {
+ // If colSelector is an array of column names
+ selectedColumns = colSelector;
+ } else if (typeof colSelector === 'string') {
+ // If colSelector is a single column name
+ selectedColumns = [colSelector];
+ } else {
+ throw new Error('Invalid column selector type');
+ }
+
+ // Check that all specified columns exist
+ for (const column of selectedColumns) {
+ if (!df.columns.includes(column)) {
+ throw new Error('Column not found');
+ }
+ }
+
+ // If only one row and one column are selected, return the value
+ if (
+ selectedRows.length === 1 &&
+ selectedColumns.length === 1 &&
+ typeof rowSelector !== 'function'
+ ) {
+ return selectedRows[0][selectedColumns[0]];
+ }
+
+ // If no results, create an empty DataFrame with selected columns
+ if (selectedRows.length === 0) {
+ const emptyData = {};
+ for (const col of selectedColumns) {
+ // Preserve array type if it's a typed array
+ const originalArray = df._columns[col].vector.__data;
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
+ const TypedArrayConstructor = originalArray.constructor;
+ emptyData[col] = new TypedArrayConstructor(0);
+ } else {
+ emptyData[col] = [];
+ }
+ }
+ return new df.constructor(emptyData, df._options);
+ }
+
+ // If only one row and one column are selected, but we need a DataFrame
+ if (
+ selectedRows.length === 1 &&
+ selectedColumns.length === 1 &&
+ typeof rowSelector === 'function'
+ ) {
+ const singleColData = {};
+ const col = selectedColumns[0];
+ const value = selectedRows[0][col];
+
+ // Preserve array type if it's a typed array
+ const originalArray = df._columns[col].vector.__data;
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
+ const TypedArrayConstructor = originalArray.constructor;
+ singleColData[col] = new TypedArrayConstructor([value]);
+ } else {
+ singleColData[col] = [value];
+ }
+
+ return new df.constructor(singleColData, df._options);
+ }
+
+ // Create a new DataFrame with only selected columns
+ const filteredRows = selectedRows.map((row) => {
+ const filteredRow = {};
+ for (const col of selectedColumns) {
+ filteredRow[col] = row[col];
+ }
+ return filteredRow;
+ });
+
+ // Create a new DataFrame with the same options as the original
+ const result = df.constructor.fromRecords(filteredRows, df._options);
+
+ // Process each column to preserve typed arrays
+ for (const col of selectedColumns) {
+ if (
+ df._columns[col] &&
+ df._columns[col].vector &&
+ df._columns[col].vector.__data
+ ) {
+ const originalArray = df._columns[col].vector.__data;
+ if (ArrayBuffer.isView(originalArray)) {
+ // Get column options if specified
+ const columnOptions = df._options?.columns?.[col] || {};
+
+ // Extract values for this column from filtered rows
+ const values = filteredRows.map((row) => row[col]);
+
+ // Create a new typed array with the same type
+ const newArray = createTypedArray(values, originalArray, columnOptions);
+
+ // Replace the array in the result DataFrame
+ if (result._columns[col] && result._columns[col].vector) {
+ result._columns[col].vector.__data = newArray;
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+// Export the loc method directly
diff --git a/packages/core/src/methods/dataframe/indexing/pool.js b/packages/core/src/methods/dataframe/indexing/pool.js
new file mode 100644
index 0000000..9610c6a
--- /dev/null
+++ b/packages/core/src/methods/dataframe/indexing/pool.js
@@ -0,0 +1,20 @@
+/**
+ * DataFrame indexing method pool
+ *
+ * This file re-exports all indexing methods for use with extendDataFrame
+ *
+ * @module methods/dataframe/indexing/pool
+ */
+
+// Row/column access methods
+export { at } from './at.js';
+export { iloc } from './iloc.js';
+export { loc } from './loc.js';
+
+// Row sampling methods
+export { sample } from './sample.js';
+export { head } from './head.js';
+export { tail } from './tail.js';
+
+// Index management
+export { setIndex } from './setIndex.js';
diff --git a/packages/core/src/methods/dataframe/indexing/sample.js b/packages/core/src/methods/dataframe/indexing/sample.js
new file mode 100644
index 0000000..4942c1a
--- /dev/null
+++ b/packages/core/src/methods/dataframe/indexing/sample.js
@@ -0,0 +1,123 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame - indexing - sample() |
+ | |
+ | Returns a random sample of rows from the DataFrame. |
+ | |
+ | df.sample() -> returns a new DataFrame with a random sample of rows. |
+ | df.sample(10) -> returns a new DataFrame with 10 random rows. |
+ | df.sample({ fraction: 0.1 }) -> returns a sample of 10% of rows. |
+ *-------------------------------------------------------------------------*/
+
+/**
+ * Returns a random sample of rows from a DataFrame.
+ * `df.sample(10)` -> returns a new DataFrame with 10 randomly selected rows.
+ * `df.sample({ fraction: 0.1 })` -> returns a sample of 10% of rows.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {number|Object} n - Number of rows to sample or options object
+ * @param {Object} [options] - Additional options
+ * @param {number} [options.seed] - Seed for random number generator
+ * @param {boolean} [options.replace=false] - Sample with replacement
+ * @param {number} [options.fraction] - Fraction of rows to sample (0 < fraction <= 1)
+ * @returns {DataFrame} - New DataFrame with sampled rows
+ * @throws {Error} If sampling parameters are invalid
+ */
+export function sample(df, n, options = {}) {
+ // Handle case when n is an options object
+ if (typeof n === 'object') {
+ options = n;
+ n = undefined;
+ }
+
+ // Get data from DataFrame
+ const rows = df.toArray();
+ if (rows.length === 0) {
+ // For empty DataFrame, return an empty DataFrame with the same structure
+ const builder =
+ typeof df.constructor.fromRecords === 'function'
+ ? df.constructor.fromRecords
+ : (rows) => new df.constructor(rows);
+
+ return builder([]);
+ }
+
+ // Determine sample size
+ let sampleSize;
+ if (options.fraction !== undefined) {
+ if (options.fraction <= 0 || options.fraction > 1) {
+ throw new Error('Fraction must be in the range (0, 1]');
+ }
+ sampleSize = Math.round(rows.length * options.fraction);
+ } else {
+ sampleSize = n !== undefined ? n : 1;
+ }
+
+ // Validate sample size
+ if (sampleSize <= 0) {
+ throw new Error('Number of rows to sample must be a positive integer');
+ }
+
+ // Check that sample size is an integer
+ if (!Number.isInteger(sampleSize)) {
+ throw new Error('Number of rows to sample must be an integer');
+ }
+
+ // If sampling without replacement and sample size is greater than number of rows
+ if (!options.replace && sampleSize > rows.length) {
+ throw new Error(
+ `Sample size (${sampleSize}) cannot be greater than number of rows (${rows.length})`,
+ );
+ }
+
+ // Create random number generator with seed if specified
+ const random =
+ options.seed !== undefined ? createSeededRandom(options.seed) : Math.random;
+
+ // Sample rows
+ const sampledRows = [];
+ if (options.replace) {
+ // Sampling with replacement
+ for (let i = 0; i < sampleSize; i++) {
+ const index = Math.floor(random() * rows.length);
+ sampledRows.push(rows[index]);
+ }
+ } else {
+ // Sampling without replacement (using Fisher-Yates shuffle algorithm)
+ const indices = Array.from({ length: rows.length }, (_, i) => i);
+ for (let i = indices.length - 1; i > 0; i--) {
+ const j = Math.floor(random() * (i + 1));
+ [indices[i], indices[j]] = [indices[j], indices[i]];
+ }
+ for (let i = 0; i < sampleSize; i++) {
+ sampledRows.push(rows[indices[i]]);
+ }
+ }
+
+ // Create a new DataFrame from the sampled rows
+ const builder =
+ typeof df.constructor.fromRecords === 'function'
+ ? df.constructor.fromRecords
+ : (rows) => new df.constructor(rows);
+
+ return builder(sampledRows);
+}
+
+/**
+ * Creates a seeded random number generator
+ *
+ * @param {number} seed - Seed for the random number generator
+ * @returns {Function} - Function that returns a pseudo-random number in the range [0, 1)
+ * @private
+ */
+function createSeededRandom(seed) {
+ return function () {
+ // Simple linear congruential generator
+ seed = (seed * 9301 + 49297) % 233280;
+ return seed / 233280;
+ };
+}
+
+/* -------------------------------------------------------------- *
+ | Pool for extendDataFrame |
+ * -------------------------------------------------------------- */
+export default { sample };
diff --git a/packages/core/src/methods/dataframe/indexing/setIndex.js b/packages/core/src/methods/dataframe/indexing/setIndex.js
new file mode 100644
index 0000000..05968a2
--- /dev/null
+++ b/packages/core/src/methods/dataframe/indexing/setIndex.js
@@ -0,0 +1,41 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame - indexing - setIndex() |
+ | |
+ | df.setIndex('id') -> sets 'id' column as the index |
+ *-------------------------------------------------------------------------*/
+
+/**
+ * Sets a column as the index for a DataFrame
+ *
+ * @param {Object} df - DataFrame instance
+ * @param {string} columnName - Name of the column to use as index
+ * @returns {Object} - DataFrame with the specified column set as index
+ */
+export function setIndex(df, columnName) {
+ // For empty DataFrame, just set the index column name but don't create a map
+ if (df.rowCount === 0) {
+ df._index = columnName;
+ df._indexMap = new Map();
+ return df;
+ }
+
+ // Check if the column exists
+ if (!df.columns.includes(columnName)) {
+ throw new Error('Column not found');
+ }
+
+ // Set the index column
+ df._index = columnName;
+
+ // Create a map for fast lookup by index value
+ df._indexMap = new Map();
+ const rows = df.toArray();
+ rows.forEach((row, i) => {
+ df._indexMap.set(row[columnName], i);
+ });
+
+ return df;
+}
+
+// Export object with method for the pool
+export default { setIndex };
diff --git a/packages/core/src/methods/dataframe/indexing/tail.js b/packages/core/src/methods/dataframe/indexing/tail.js
new file mode 100644
index 0000000..72d603c
--- /dev/null
+++ b/packages/core/src/methods/dataframe/indexing/tail.js
@@ -0,0 +1,49 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame - indexing - tail() |
+ | |
+ | Returns the last n rows of the DataFrame. |
+ | |
+ | df.tail() -> returns a new DataFrame with the last 5 rows. |
+ | df.tail(10) -> returns a new DataFrame with the last 10 rows. |
+ *-------------------------------------------------------------------------*/
+
+/**
+ * Returns the last n rows of a DataFrame.
+ * `df.tail(5)` -> returns a new DataFrame with the last 5 rows.
+ * Similar to pandas' tail() function.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {number} [n=5] - Number of rows to return
+ * @param {Object} [options] - Additional options
+ * @param {boolean} [options.print=false] - Option for compatibility with other libraries
+ * @returns {DataFrame} - New DataFrame with the last n rows
+ * @throws {Error} If n is not a positive integer
+ */
+export function tail(df, n = 5, options = { print: false }) {
+ // Validate input parameters
+ if (n <= 0) {
+ throw new Error('Number of rows must be a positive integer');
+ }
+ if (!Number.isInteger(n)) {
+ throw new Error('Number of rows must be an integer');
+ }
+
+ // Get data from DataFrame
+ const rows = df.toArray();
+
+ // Select last n rows (or all if there are fewer than n)
+ const selectedRows = rows.slice(-n);
+
+ // Create a new DataFrame from the selected rows
+ const builder =
+ typeof df.constructor.fromRecords === 'function'
+ ? df.constructor.fromRecords
+ : (rows) => new df.constructor(rows);
+
+ return builder(selectedRows);
+}
+
+/* -------------------------------------------------------------- *
+ | Pool for extendDataFrame |
+ * -------------------------------------------------------------- */
+export default { tail };
diff --git a/packages/core/src/methods/dataframe/pool.js b/packages/core/src/methods/dataframe/pool.js
index a39b53d..1bd2148 100644
--- a/packages/core/src/methods/dataframe/pool.js
+++ b/packages/core/src/methods/dataframe/pool.js
@@ -1,41 +1,12 @@
/**
* Pool of all DataFrame methods
*
- * This file exports all DataFrame methods to be registered on the DataFrame prototype.
- * It serves as a central registry for all methods to facilitate tree-shaking.
+ * This file re-exports all DataFrame methods for use with extendDataFrame
*
- * @module core/methods/dataframe/pool
+ * @module methods/dataframe/pool
*/
-// Aggregation methods
-export { count } from './aggregation/count.js';
-export { first } from './aggregation/first.js';
-export { last } from './aggregation/last.js';
-export { max } from './aggregation/max.js';
-export { mean } from './aggregation/mean.js';
-export { median } from './aggregation/median.js';
-export { min } from './aggregation/min.js';
-export { mode } from './aggregation/mode.js';
-export { std } from './aggregation/std.js';
-export { sum } from './aggregation/sum.js';
-export { variance } from './aggregation/variance.js';
-
-// Group aggregation methods
-export {
- group,
- groupBy,
- groupAgg,
- groupSum,
- groupMean,
- groupMin,
- groupMax,
- groupCount,
-} from './aggregation/group.js';
-
-// Display methods
-export { display } from './display/display.js';
-export { print } from './display/print.js';
-export { renderTo } from './display/renderTo.js';
-export { toHTML } from './display/toHTML.js';
-export { toJupyter } from './display/toJupyter.js';
-export { toMarkdown } from './display/toMarkdown.js';
+// Реэкспорт всех методов из подкаталогов
+export * from './aggregation/pool.js';
+export * from './display/pool.js';
+export * from './filtering/pool.js';
diff --git a/tests/core/methods/dataframe/filtering/where.fixed.test.js b/tests/core/methods/dataframe/filtering/where.fixed.test.js
index 711e9c7..fe6a8c7 100644
--- a/tests/core/methods/dataframe/filtering/where.fixed.test.js
+++ b/tests/core/methods/dataframe/filtering/where.fixed.test.js
@@ -8,14 +8,32 @@ import { where } from '../../../../../packages/core/src/methods/dataframe/filter
// Test data for use in all tests
const testData = [
- { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
- { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
- { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ {
+ name: 'Alice',
+ age: 25,
+ city: 'New York',
+ salary: 70000,
+ tags: ['dev', 'js'],
+ },
+ {
+ name: 'Bob',
+ age: 30,
+ city: 'San Francisco',
+ salary: 85000,
+ tags: ['dev', 'python'],
+ },
+ {
+ name: 'Charlie',
+ age: 35,
+ city: 'Chicago',
+ salary: 90000,
+ tags: ['manager'],
+ },
];
describe('Where Method', () => {
// Add where method to DataFrame prototype
- DataFrame.prototype.where = function(column, operator, value) {
+ DataFrame.prototype.where = function (column, operator, value) {
return where(this, column, operator, value);
};
@@ -29,7 +47,13 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(1);
expect(result.toArray()).toEqual([
- { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ {
+ name: 'Bob',
+ age: 30,
+ city: 'San Francisco',
+ salary: 85000,
+ tags: ['dev', 'python'],
+ },
]);
});
@@ -39,7 +63,13 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(1);
expect(result.toArray()).toEqual([
- { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ {
+ name: 'Bob',
+ age: 30,
+ city: 'San Francisco',
+ salary: 85000,
+ tags: ['dev', 'python'],
+ },
]);
});
@@ -49,8 +79,20 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(2);
expect(result.toArray()).toEqual([
- { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
- { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ {
+ name: 'Alice',
+ age: 25,
+ city: 'New York',
+ salary: 70000,
+ tags: ['dev', 'js'],
+ },
+ {
+ name: 'Charlie',
+ age: 35,
+ city: 'Chicago',
+ salary: 90000,
+ tags: ['manager'],
+ },
]);
});
@@ -60,8 +102,20 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(2);
expect(result.toArray()).toEqual([
- { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
- { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ {
+ name: 'Alice',
+ age: 25,
+ city: 'New York',
+ salary: 70000,
+ tags: ['dev', 'js'],
+ },
+ {
+ name: 'Charlie',
+ age: 35,
+ city: 'Chicago',
+ salary: 90000,
+ tags: ['manager'],
+ },
]);
});
@@ -71,8 +125,20 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(2);
expect(result.toArray()).toEqual([
- { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
- { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ {
+ name: 'Bob',
+ age: 30,
+ city: 'San Francisco',
+ salary: 85000,
+ tags: ['dev', 'python'],
+ },
+ {
+ name: 'Charlie',
+ age: 35,
+ city: 'Chicago',
+ salary: 90000,
+ tags: ['manager'],
+ },
]);
});
@@ -82,8 +148,20 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(2);
expect(result.toArray()).toEqual([
- { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
- { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ {
+ name: 'Bob',
+ age: 30,
+ city: 'San Francisco',
+ salary: 85000,
+ tags: ['dev', 'python'],
+ },
+ {
+ name: 'Charlie',
+ age: 35,
+ city: 'Chicago',
+ salary: 90000,
+ tags: ['manager'],
+ },
]);
});
@@ -93,7 +171,13 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(1);
expect(result.toArray()).toEqual([
- { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ {
+ name: 'Alice',
+ age: 25,
+ city: 'New York',
+ salary: 70000,
+ tags: ['dev', 'js'],
+ },
]);
});
@@ -103,8 +187,20 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(2);
expect(result.toArray()).toEqual([
- { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
- { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ {
+ name: 'Alice',
+ age: 25,
+ city: 'New York',
+ salary: 70000,
+ tags: ['dev', 'js'],
+ },
+ {
+ name: 'Bob',
+ age: 30,
+ city: 'San Francisco',
+ salary: 85000,
+ tags: ['dev', 'python'],
+ },
]);
});
@@ -114,8 +210,20 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(2);
expect(result.toArray()).toEqual([
- { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
- { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ {
+ name: 'Alice',
+ age: 25,
+ city: 'New York',
+ salary: 70000,
+ tags: ['dev', 'js'],
+ },
+ {
+ name: 'Charlie',
+ age: 35,
+ city: 'Chicago',
+ salary: 90000,
+ tags: ['manager'],
+ },
]);
});
@@ -125,7 +233,13 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(1);
expect(result.toArray()).toEqual([
- { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ {
+ name: 'Bob',
+ age: 30,
+ city: 'San Francisco',
+ salary: 85000,
+ tags: ['dev', 'python'],
+ },
]);
});
@@ -135,7 +249,13 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(1);
expect(result.toArray()).toEqual([
- { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ {
+ name: 'Bob',
+ age: 30,
+ city: 'San Francisco',
+ salary: 85000,
+ tags: ['dev', 'python'],
+ },
]);
});
@@ -145,7 +265,13 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(1);
expect(result.toArray()).toEqual([
- { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ {
+ name: 'Alice',
+ age: 25,
+ city: 'New York',
+ salary: 70000,
+ tags: ['dev', 'js'],
+ },
]);
});
@@ -155,7 +281,13 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(1);
expect(result.toArray()).toEqual([
- { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ {
+ name: 'Charlie',
+ age: 35,
+ city: 'Chicago',
+ salary: 90000,
+ tags: ['manager'],
+ },
]);
});
@@ -165,7 +297,13 @@ describe('Where Method', () => {
// Check that the filtered data is correct
expect(result.rowCount).toBe(1);
expect(result.toArray()).toEqual([
- { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ {
+ name: 'Alice',
+ age: 25,
+ city: 'New York',
+ salary: 70000,
+ tags: ['dev', 'js'],
+ },
]);
});
@@ -174,16 +312,20 @@ describe('Where Method', () => {
// Should be empty with no rows
expect(result.rowCount).toBe(0);
- // В новой реализации пустой DataFrame не сохраняет структуру колонок
- // что является нормальным поведением для fromRecords([])
+ // In the new implementation, an empty DataFrame does not save the column structure
+ // which is normal behavior for fromRecords([])
});
test('should throw error for non-existent column', () => {
- expect(() => df.where('nonexistent', '===', 30)).toThrow("Column 'nonexistent' not found");
+ expect(() => df.where('nonexistent', '===', 30)).toThrow(
+ "Column 'nonexistent' not found",
+ );
});
test('should throw error for invalid operator', () => {
- expect(() => df.where('age', 'invalid', 30)).toThrow("Unsupported operator: 'invalid'");
+ expect(() => df.where('age', 'invalid', 30)).toThrow(
+ "Unsupported operator: 'invalid'",
+ );
});
test('should return a new DataFrame instance', () => {
@@ -206,15 +348,19 @@ describe('Where Method', () => {
// Check that the result contains typed arrays
expect(ArrayBuffer.isView(result._columns.age.vector.__data)).toBe(true);
- expect(ArrayBuffer.isView(result._columns.salary.vector.__data)).toBe(true);
- // Проверяем только наличие типизированных массивов, без проверки конкретных типов
- // Типы могут быть разными в зависимости от реализации метода where
+ expect(ArrayBuffer.isView(result._columns.salary.vector.__data)).toBe(
+ true,
+ );
+ // Check only the presence of typed arrays, without checking specific types
+ // Types may be different depending on the implementation of the where method
});
test('should handle empty DataFrame', () => {
const emptyDf = DataFrame.fromRecords([]);
-
- expect(() => emptyDf.where('age', '===', 30)).toThrow("Column 'age' not found");
+
+ expect(() => emptyDf.where('age', '===', 30)).toThrow(
+ "Column 'age' not found",
+ );
});
});
});
diff --git a/tests/core/methods/dataframe/indexing/at.test.js b/tests/core/methods/dataframe/indexing/at.test.js
new file mode 100644
index 0000000..eace8e9
--- /dev/null
+++ b/tests/core/methods/dataframe/indexing/at.test.js
@@ -0,0 +1,97 @@
+/**
+ * Unit tests for at method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { at } from '../../../../../packages/core/src/methods/dataframe/filtering/at.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+];
+
+describe('At Method', () => {
+ // Add at method to DataFrame prototype
+ DataFrame.prototype.at = function (index) {
+ return at(this, index);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should return row at specified index', () => {
+ const result = df.at(1);
+
+ // Check that the result is the correct row
+ expect(result).toEqual({
+ name: 'Bob',
+ age: 30,
+ city: 'San Francisco',
+ salary: 85000,
+ });
+ });
+
+ test('should handle index 0', () => {
+ const result = df.at(0);
+
+ expect(result).toEqual({
+ name: 'Alice',
+ age: 25,
+ city: 'New York',
+ salary: 70000,
+ });
+ });
+
+ test('should handle last index', () => {
+ const result = df.at(2);
+
+ expect(result).toEqual({
+ name: 'Charlie',
+ age: 35,
+ city: 'Chicago',
+ salary: 90000,
+ });
+ });
+
+ test('should throw error for negative index', () => {
+ expect(() => df.at(-1)).toThrow('Index out of bounds: -1 is negative');
+ });
+
+ test('should throw error for index >= rowCount', () => {
+ expect(() => df.at(3)).toThrow('Index out of bounds: 3 >= 3');
+ });
+
+ test('should throw error for non-integer index', () => {
+ expect(() => df.at(1.5)).toThrow('Index must be an integer');
+ });
+
+ test('should handle typed arrays correctly', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Get row at index
+ const result = typedDf.at(1);
+
+ // Check that the values are correct
+ expect(result.age).toBe(30);
+ expect(result.salary).toBe(85000);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+
+ expect(() => emptyDf.at(0)).toThrow(
+ 'Index out of bounds: DataFrame is empty',
+ );
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/indexing/head.test.js b/tests/core/methods/dataframe/indexing/head.test.js
new file mode 100644
index 0000000..36fdcf5
--- /dev/null
+++ b/tests/core/methods/dataframe/indexing/head.test.js
@@ -0,0 +1,100 @@
+/**
+ * Unit tests for head method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { head } from '../../../../../packages/core/src/methods/dataframe/filtering/head.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { name: 'David', age: 40, city: 'Boston', salary: 95000 },
+ { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 },
+ { name: 'Frank', age: 50, city: 'Denver', salary: 105000 },
+ { name: 'Grace', age: 55, city: 'Miami', salary: 110000 },
+];
+
+describe('Head Method', () => {
+ // Add head method to DataFrame prototype
+ DataFrame.prototype.head = function (n, options) {
+ return head(this, n, options);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should return first 5 rows by default', () => {
+ const result = df.head();
+
+ // Check that the result has 5 rows
+ expect(result.rowCount).toBe(5);
+ expect(result.toArray()).toEqual(testData.slice(0, 5));
+ });
+
+ test('should return specified number of rows', () => {
+ const result = df.head(3);
+
+ // Check that the result has 3 rows
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual(testData.slice(0, 3));
+ });
+
+ test('should handle n greater than number of rows', () => {
+ const result = df.head(10);
+
+ // Should return all rows
+ expect(result.rowCount).toBe(testData.length);
+ expect(result.toArray()).toEqual(testData);
+ });
+
+ test('should throw error for negative n', () => {
+ expect(() => df.head(-1)).toThrow(
+ 'Number of rows must be a positive integer',
+ );
+ });
+
+ test('should throw error for non-integer n', () => {
+ expect(() => df.head(2.5)).toThrow('Number of rows must be an integer');
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.head(3);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Get head of the data
+ const result = typedDf.head(3);
+
+ // Check that the result has the correct columns and data
+ expect(result.columns.sort()).toEqual(
+ ['age', 'city', 'name', 'salary'].sort(),
+ );
+
+ // Check that the data is preserved correctly (using the public API)
+ const ageCol = result.col('age');
+ const salaryCol = result.col('salary');
+ expect(ageCol.toArray()).toEqual([25, 30, 35]);
+ expect(salaryCol.toArray()).toEqual([70000, 85000, 90000]);
+ });
+
+ test('should accept options object', () => {
+ // The print option is for API compatibility and doesn't affect the result
+ const result = df.head(3, { print: true });
+ expect(result.rowCount).toBe(3);
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/indexing/iloc.test.js b/tests/core/methods/dataframe/indexing/iloc.test.js
new file mode 100644
index 0000000..b68fd08
--- /dev/null
+++ b/tests/core/methods/dataframe/indexing/iloc.test.js
@@ -0,0 +1,141 @@
+/**
+ * Unit tests for iloc method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { iloc } from '../../../../../packages/core/src/methods/dataframe/filtering/iloc.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { name: 'David', age: 40, city: 'Boston', salary: 95000 },
+ { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 },
+];
+
+describe('Iloc Method', () => {
+ // Add iloc method to DataFrame prototype
+ DataFrame.prototype.iloc = function (rowSelector, columnSelector) {
+ return iloc(this, rowSelector, columnSelector);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should select rows by integer index', () => {
+ const result = df.iloc(1);
+
+ // Check that the result is a DataFrame with one row
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([testData[1]]);
+ });
+
+ test('should select rows by array of indices', () => {
+ const result = df.iloc([0, 2, 4]);
+
+ // Check that the result contains the selected rows
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual([testData[0], testData[2], testData[4]]);
+ });
+
+ test('should select rows by predicate function', () => {
+ const result = df.iloc((i) => i % 2 === 0);
+
+ // Should select rows at indices 0, 2, 4
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual([testData[0], testData[2], testData[4]]);
+ });
+
+ test('should select columns by integer index', () => {
+ const result = df.iloc(null, 1);
+
+ // Should select the 'age' column for all rows
+ expect(result.columns).toEqual(['age']);
+ expect(result.rowCount).toBe(5);
+ expect(result.col('age').toArray()).toEqual([25, 30, 35, 40, 45]);
+ });
+
+ test('should select columns by array of indices', () => {
+ const result = df.iloc(null, [0, 2]);
+
+ // Should select the 'name' and 'city' columns
+ expect(result.columns.sort()).toEqual(['city', 'name'].sort());
+ expect(result.rowCount).toBe(5);
+ });
+
+ test('should select rows and columns by indices', () => {
+ const result = df.iloc([1, 3], [0, 2]);
+
+ // Should select rows 1 and 3, columns 'name' and 'city'
+ expect(result.rowCount).toBe(2);
+ expect(result.columns.sort()).toEqual(['city', 'name'].sort());
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', city: 'San Francisco' },
+ { name: 'David', city: 'Boston' },
+ ]);
+ });
+
+ test('should handle null for rows to select all rows', () => {
+ const result = df.iloc(null, 1);
+
+ // Should select all rows, but only the 'age' column
+ expect(result.rowCount).toBe(5);
+ expect(result.columns).toEqual(['age']);
+ });
+
+ test('should handle null for columns to select all columns', () => {
+ const result = df.iloc(2, null);
+
+ // Should select row 2, all columns
+ expect(result.rowCount).toBe(1);
+ expect(result.columns.sort()).toEqual(
+ ['age', 'city', 'name', 'salary'].sort(),
+ );
+ expect(result.toArray()).toEqual([testData[2]]);
+ });
+
+ test('should throw error for out of bounds row index', () => {
+ expect(() => df.iloc(10)).toThrow('Row index out of bounds');
+ });
+
+ test('should throw error for out of bounds column index', () => {
+ expect(() => df.iloc(null, 10)).toThrow('Column index out of bounds');
+ });
+
+ test('should throw error for invalid row selector type', () => {
+ expect(() => df.iloc('invalid')).toThrow('Invalid row selector type');
+ });
+
+ test('should throw error for invalid column selector type', () => {
+ expect(() => df.iloc(null, 'invalid')).toThrow(
+ 'Invalid column selector type',
+ );
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Select rows and columns
+ const result = typedDf.iloc([1, 3], [1, 3]);
+
+ // Check that the result contains typed arrays
+ expect(result._columns.age.vector.__data).toBeInstanceOf(Int32Array);
+ expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+
+ expect(() => emptyDf.iloc(0)).toThrow('Row index out of bounds');
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/indexing/loc.test.js b/tests/core/methods/dataframe/indexing/loc.test.js
new file mode 100644
index 0000000..be67d2b
--- /dev/null
+++ b/tests/core/methods/dataframe/indexing/loc.test.js
@@ -0,0 +1,159 @@
+/**
+ * Unit tests for loc method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { loc } from '../../../../../packages/core/src/methods/dataframe/filtering/loc.js';
+
+// Test data for use in all tests
+const testData = [
+ { id: 'a1', name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { id: 'b2', name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { id: 'c3', name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { id: 'd4', name: 'David', age: 40, city: 'Boston', salary: 95000 },
+ { id: 'e5', name: 'Eve', age: 45, city: 'Seattle', salary: 100000 },
+];
+
+describe('Loc Method', () => {
+ // Add loc method to DataFrame prototype
+ DataFrame.prototype.loc = function (rowSelector, columnSelector) {
+ return loc(this, rowSelector, columnSelector);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords with id as index
+ const df = DataFrame.fromRecords(testData);
+
+ // Set index to 'id' column
+ df.setIndex('id');
+
+ test('should select rows by label', () => {
+ const result = df.loc('b2');
+
+ // Check that the result is a DataFrame with one row
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()[0].name).toBe('Bob');
+ });
+
+ test('should select rows by array of labels', () => {
+ const result = df.loc(['a1', 'c3', 'e5']);
+
+ // Check that the result contains the selected rows
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray().map((r) => r.name)).toEqual([
+ 'Alice',
+ 'Charlie',
+ 'Eve',
+ ]);
+ });
+
+ test('should select rows by predicate function', () => {
+ const result = df.loc((row) => row.age > 30);
+
+ // Should select rows with age > 30
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray().map((r) => r.name)).toEqual([
+ 'Charlie',
+ 'David',
+ 'Eve',
+ ]);
+ });
+
+ test('should select rows by condition object', () => {
+ const result = df.loc({ city: 'Chicago' });
+
+ // Should select rows where city is Chicago
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()[0].name).toBe('Charlie');
+ });
+
+ test('should select columns by name', () => {
+ const result = df.loc(null, 'age');
+
+ // Should select the 'age' column for all rows
+ expect(result.columns).toEqual(['age']);
+ expect(result.rowCount).toBe(5);
+ expect(result.col('age').toArray()).toEqual([25, 30, 35, 40, 45]);
+ });
+
+ test('should select columns by array of names', () => {
+ const result = df.loc(null, ['name', 'city']);
+
+ // Should select the 'name' and 'city' columns
+ expect(result.columns.sort()).toEqual(['city', 'name'].sort());
+ expect(result.rowCount).toBe(5);
+ });
+
+ test('should select rows and columns by labels', () => {
+ const result = df.loc(['b2', 'd4'], ['name', 'city']);
+
+ // Should select rows with ids 'b2' and 'd4', columns 'name' and 'city'
+ expect(result.rowCount).toBe(2);
+ expect(result.columns.sort()).toEqual(['city', 'name'].sort());
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', city: 'San Francisco' },
+ { name: 'David', city: 'Boston' },
+ ]);
+ });
+
+ test('should handle null for rows to select all rows', () => {
+ const result = df.loc(null, 'age');
+
+ // Should select all rows, but only the 'age' column
+ expect(result.rowCount).toBe(5);
+ expect(result.columns).toEqual(['age']);
+ });
+
+ test('should handle null for columns to select all columns', () => {
+ const result = df.loc('c3', null);
+
+ // Should select row with id 'c3', all columns
+ expect(result.rowCount).toBe(1);
+ expect(result.columns.length).toBe(5); // id, name, age, city, salary
+ expect(result.toArray()[0].name).toBe('Charlie');
+ });
+
+ test('should throw error for non-existent row label', () => {
+ expect(() => df.loc('z9')).toThrow('Row label not found');
+ });
+
+ test('should throw error for non-existent column label', () => {
+ expect(() => df.loc(null, 'country')).toThrow('Column not found');
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+ typedDf.setIndex('id');
+
+ // Select rows and columns
+ const result = typedDf.loc(['b2', 'd4'], ['age', 'salary']);
+
+ // Check that the result contains typed arrays
+ expect(result._columns.age.vector.__data).toBeInstanceOf(Int32Array);
+ expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+ emptyDf.setIndex('id');
+
+ expect(() => emptyDf.loc('a1')).toThrow('Row label not found');
+ });
+
+ test('should handle DataFrame without index', () => {
+ const dfNoIndex = DataFrame.fromRecords(testData);
+
+ // Should use row number as index
+ const result = dfNoIndex.loc(2);
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()[0].name).toBe('Charlie');
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/indexing/sample.test.js b/tests/core/methods/dataframe/indexing/sample.test.js
new file mode 100644
index 0000000..e89fb7b
--- /dev/null
+++ b/tests/core/methods/dataframe/indexing/sample.test.js
@@ -0,0 +1,175 @@
+/**
+ * Unit tests for sample method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { sample } from '../../../../../packages/core/src/methods/dataframe/filtering/sample.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { name: 'David', age: 40, city: 'Boston', salary: 95000 },
+ { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 },
+ { name: 'Frank', age: 50, city: 'Denver', salary: 105000 },
+ { name: 'Grace', age: 55, city: 'Miami', salary: 110000 },
+];
+
+describe('Sample Method', () => {
+ // Add sample method to DataFrame prototype
+ DataFrame.prototype.sample = function (n, options) {
+ return sample(this, n, options);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should sample 1 row by default', () => {
+ const result = df.sample();
+
+ // Check that the result has 1 row
+ expect(result.rowCount).toBe(1);
+ // The row should be one of the original rows
+ const resultRow = result.toArray()[0];
+ expect(
+ testData.some(
+ (row) =>
+ row.name === resultRow.name &&
+ row.age === resultRow.age &&
+ row.city === resultRow.city &&
+ row.salary === resultRow.salary,
+ ),
+ ).toBe(true);
+ });
+
+ test('should sample specified number of rows', () => {
+ const result = df.sample(3);
+
+ // Check that the result has 3 rows
+ expect(result.rowCount).toBe(3);
+
+ // Each row should be one of the original rows
+ const resultRows = result.toArray();
+ for (const resultRow of resultRows) {
+ expect(
+ testData.some(
+ (row) =>
+ row.name === resultRow.name &&
+ row.age === resultRow.age &&
+ row.city === resultRow.city &&
+ row.salary === resultRow.salary,
+ ),
+ ).toBe(true);
+ }
+ });
+
+ test('should sample by fraction', () => {
+ const result = df.sample({ fraction: 0.5 });
+
+ // Check that the result has approximately half the rows
+ // Due to rounding, it might be 3 or 4 rows for 7 total rows
+ expect(result.rowCount).toBeGreaterThanOrEqual(3);
+ expect(result.rowCount).toBeLessThanOrEqual(4);
+ });
+
+ test('should throw error for invalid fraction', () => {
+ expect(() => df.sample({ fraction: 0 })).toThrow(
+ 'Fraction must be in the range (0, 1]',
+ );
+ expect(() => df.sample({ fraction: 1.5 })).toThrow(
+ 'Fraction must be in the range (0, 1]',
+ );
+ });
+
+ test('should throw error for negative n', () => {
+ expect(() => df.sample(-1)).toThrow(
+ 'Number of rows to sample must be a positive integer',
+ );
+ });
+
+ test('should throw error for non-integer n', () => {
+ expect(() => df.sample(2.5)).toThrow(
+ 'Number of rows to sample must be an integer',
+ );
+ });
+
+ test('should throw error when sampling without replacement and n > rows', () => {
+ expect(() => df.sample(10)).toThrow(
+ 'Sample size (10) cannot be greater than number of rows (7)',
+ );
+ });
+
+ test('should allow sampling with replacement and n > rows', () => {
+ const result = df.sample(10, { replace: true });
+ expect(result.rowCount).toBe(10);
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.sample(3);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Sample the data with a fixed seed for deterministic results
+ const result = typedDf.sample(3, { seed: 42 });
+
+ // Check that the result has the correct columns
+ expect(result.columns.sort()).toEqual(
+ ['age', 'city', 'name', 'salary'].sort(),
+ );
+
+ // Check that the data is preserved correctly (using the public API)
+ const ageCol = result.col('age');
+ const salaryCol = result.col('salary');
+
+ // We can't check exact values since they depend on the random seed implementation
+ // But we can check that the arrays have the right length and are of the right type
+ expect(ageCol.toArray().length).toBe(3);
+ expect(salaryCol.toArray().length).toBe(3);
+
+ // Check that all values are from the original dataset
+ const originalAges = testData.map((row) => row.age);
+ const originalSalaries = testData.map((row) => row.salary);
+
+ ageCol.toArray().forEach((value) => {
+ expect(originalAges).toContain(value);
+ });
+
+ salaryCol.toArray().forEach((value) => {
+ expect(originalSalaries).toContain(value);
+ });
+ });
+
+ test('should produce deterministic results with seed', () => {
+ // Sample with the same seed should produce the same results
+ const sample1 = df.sample(3, { seed: 42 });
+ const sample2 = df.sample(3, { seed: 42 });
+
+ // Compare the sampled rows
+ const rows1 = sample1.toArray();
+ const rows2 = sample2.toArray();
+
+ expect(rows1).toEqual(rows2);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+ const result = emptyDf.sample();
+
+ expect(result.rowCount).toBe(0);
+ expect(result.columns).toEqual([]);
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/indexing/tail.test.js b/tests/core/methods/dataframe/indexing/tail.test.js
new file mode 100644
index 0000000..3a7ecb4
--- /dev/null
+++ b/tests/core/methods/dataframe/indexing/tail.test.js
@@ -0,0 +1,100 @@
+/**
+ * Unit tests for tail method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { tail } from '../../../../../packages/core/src/methods/dataframe/filtering/tail.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { name: 'David', age: 40, city: 'Boston', salary: 95000 },
+ { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 },
+ { name: 'Frank', age: 50, city: 'Denver', salary: 105000 },
+ { name: 'Grace', age: 55, city: 'Miami', salary: 110000 },
+];
+
+describe('Tail Method', () => {
+ // Add tail method to DataFrame prototype
+ DataFrame.prototype.tail = function (n, options) {
+ return tail(this, n, options);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should return last 5 rows by default', () => {
+ const result = df.tail();
+
+ // Check that the result has 5 rows
+ expect(result.rowCount).toBe(5);
+ expect(result.toArray()).toEqual(testData.slice(-5));
+ });
+
+ test('should return specified number of rows from the end', () => {
+ const result = df.tail(3);
+
+ // Check that the result has 3 rows
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual(testData.slice(-3));
+ });
+
+ test('should handle n greater than number of rows', () => {
+ const result = df.tail(10);
+
+ // Should return all rows
+ expect(result.rowCount).toBe(testData.length);
+ expect(result.toArray()).toEqual(testData);
+ });
+
+ test('should throw error for negative n', () => {
+ expect(() => df.tail(-1)).toThrow(
+ 'Number of rows must be a positive integer',
+ );
+ });
+
+ test('should throw error for non-integer n', () => {
+ expect(() => df.tail(2.5)).toThrow('Number of rows must be an integer');
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.tail(3);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Get tail of the data
+ const result = typedDf.tail(3);
+
+ // Check that the result has the correct columns and data
+ expect(result.columns.sort()).toEqual(
+ ['age', 'city', 'name', 'salary'].sort(),
+ );
+
+ // Check that the data is preserved correctly (using the public API)
+ const ageCol = result.col('age');
+ const salaryCol = result.col('salary');
+ expect(ageCol.toArray()).toEqual([45, 50, 55]);
+ expect(salaryCol.toArray()).toEqual([100000, 105000, 110000]);
+ });
+
+ test('should accept options object', () => {
+ // The print option is for API compatibility and doesn't affect the result
+ const result = df.tail(3, { print: true });
+ expect(result.rowCount).toBe(3);
+ });
+ });
+});