diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 67830ad3..bcd74113 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -95,6 +95,8 @@ __all__: list[str] = [ "execute", "executemany", "extract_statements", + "to_arrow_reader", + "to_arrow_table", "fetch_arrow_table", "fetch_df", "fetch_df_chunk", @@ -194,7 +196,11 @@ class DuckDBPyConnection: def __exit__(self, exc_type: object, exc: object, traceback: object) -> None: ... def append(self, table_name: str, df: pandas.DataFrame, *, by_name: bool = False) -> DuckDBPyConnection: ... def array_type(self, type: sqltypes.DuckDBPyType, size: pytyping.SupportsInt) -> sqltypes.DuckDBPyType: ... - def arrow(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def arrow(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: + """Alias of to_arrow_reader(). We recommend using to_arrow_reader() instead.""" + ... + def to_arrow_reader(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def to_arrow_table(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.Table: ... def begin(self) -> DuckDBPyConnection: ... def checkpoint(self) -> DuckDBPyConnection: ... def close(self) -> None: ... @@ -222,12 +228,16 @@ class DuckDBPyConnection: def execute(self, query: Statement | str, parameters: object = None) -> DuckDBPyConnection: ... def executemany(self, query: Statement | str, parameters: object = None) -> DuckDBPyConnection: ... def extract_statements(self, query: str) -> list[Statement]: ... - def fetch_arrow_table(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.Table: ... + def fetch_arrow_table(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.Table: + """Deprecated: use to_arrow_table() instead.""" + ... def fetch_df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... def fetch_df_chunk( self, vectors_per_chunk: pytyping.SupportsInt = 1, *, date_as_object: bool = False ) -> pandas.DataFrame: ... - def fetch_record_batch(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def fetch_record_batch(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: + """Deprecated: use to_arrow_reader() instead.""" + ... def fetchall(self) -> list[tuple[pytyping.Any, ...]]: ... def fetchdf(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... def fetchmany(self, size: pytyping.SupportsInt = 1) -> list[tuple[pytyping.Any, ...]]: ... @@ -487,7 +497,11 @@ class DuckDBPyRelation: def arg_min( self, arg_column: str, value_column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... - def arrow(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def arrow(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: + """Alias of to_arrow_reader(). We recommend using to_arrow_reader() instead.""" + ... + def to_arrow_reader(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def to_arrow_table(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.Table: ... def avg( self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... @@ -533,12 +547,18 @@ class DuckDBPyRelation: def favg( self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... - def fetch_arrow_reader(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... - def fetch_arrow_table(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.Table: ... + def fetch_arrow_reader(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: + """Deprecated: use to_arrow_reader() instead.""" + ... + def fetch_arrow_table(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.Table: + """Deprecated: use to_arrow_table() instead.""" + ... def fetch_df_chunk( self, vectors_per_chunk: pytyping.SupportsInt = 1, *, date_as_object: bool = False ) -> pandas.DataFrame: ... - def fetch_record_batch(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: ... + def fetch_record_batch(self, rows_per_batch: pytyping.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: + """Deprecated: use to_arrow_reader() instead.""" + ... def fetchall(self) -> list[tuple[pytyping.Any, ...]]: ... def fetchdf(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... def fetchmany(self, size: pytyping.SupportsInt = 1) -> list[tuple[pytyping.Any, ...]]: ... @@ -656,7 +676,6 @@ class DuckDBPyRelation: def query(self, virtual_table_name: str, sql_query: str) -> DuckDBPyRelation: ... def rank(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def rank_dense(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... - def record_batch(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.RecordBatchReader: ... def row_number(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def select(self, *args: str | Expression, groups: str = "") -> DuckDBPyRelation: ... def select_dtypes(self, types: pytyping.List[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... @@ -692,7 +711,6 @@ class DuckDBPyRelation: self, column: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... def tf(self) -> dict[str, tensorflow.Tensor]: ... - def to_arrow_table(self, batch_size: pytyping.SupportsInt = 1000000) -> pyarrow.lib.Table: ... def to_csv( self, file_name: str, @@ -1067,9 +1085,18 @@ def array_type( @pytyping.overload def arrow( rows_per_batch: pytyping.SupportsInt = 1000000, *, connection: DuckDBPyConnection | None = None -) -> pyarrow.lib.RecordBatchReader: ... +) -> pyarrow.lib.RecordBatchReader: + """Alias of to_arrow_reader(). We recommend using to_arrow_reader() instead.""" + ... + @pytyping.overload def arrow(arrow_object: pytyping.Any, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... +def to_arrow_reader( + batch_size: pytyping.SupportsInt = 1000000, *, connection: DuckDBPyConnection | None = None +) -> pyarrow.lib.RecordBatchReader: ... +def to_arrow_table( + batch_size: pytyping.SupportsInt = 1000000, *, connection: DuckDBPyConnection | None = None +) -> pyarrow.lib.Table: ... def begin(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def checkpoint(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def close(*, connection: DuckDBPyConnection | None = None) -> None: ... @@ -1128,7 +1155,10 @@ def executemany( def extract_statements(query: str, *, connection: DuckDBPyConnection | None = None) -> list[Statement]: ... def fetch_arrow_table( rows_per_batch: pytyping.SupportsInt = 1000000, *, connection: DuckDBPyConnection | None = None -) -> pyarrow.lib.Table: ... +) -> pyarrow.lib.Table: + """Deprecated: use to_arrow_table() instead.""" + ... + def fetch_df(*, date_as_object: bool = False, connection: DuckDBPyConnection | None = None) -> pandas.DataFrame: ... def fetch_df_chunk( vectors_per_chunk: pytyping.SupportsInt = 1, @@ -1138,7 +1168,10 @@ def fetch_df_chunk( ) -> pandas.DataFrame: ... def fetch_record_batch( rows_per_batch: pytyping.SupportsInt = 1000000, *, connection: DuckDBPyConnection | None = None -) -> pyarrow.lib.RecordBatchReader: ... +) -> pyarrow.lib.RecordBatchReader: + """Deprecated: use to_arrow_reader() instead.""" + ... + def fetchall(*, connection: DuckDBPyConnection | None = None) -> list[tuple[pytyping.Any, ...]]: ... def fetchdf(*, date_as_object: bool = False, connection: DuckDBPyConnection | None = None) -> pandas.DataFrame: ... def fetchmany( diff --git a/duckdb/__init__.py b/duckdb/__init__.py index a9ca7773..d17c530f 100644 --- a/duckdb/__init__.py +++ b/duckdb/__init__.py @@ -143,6 +143,8 @@ table_function, tf, threadsafety, + to_arrow_reader, + to_arrow_table, token_type, tokenize, torch, @@ -374,6 +376,8 @@ "tf", "threadsafety", "threadsafety", + "to_arrow_reader", + "to_arrow_table", "token_type", "tokenize", "torch", diff --git a/duckdb/polars_io.py b/duckdb/polars_io.py index a7ed84ff..ad74b848 100644 --- a/duckdb/polars_io.py +++ b/duckdb/polars_io.py @@ -270,10 +270,7 @@ def source_generator( # Try to pushdown filter, if one exists if duck_predicate is not None: relation_final = relation_final.filter(duck_predicate) - if batch_size is None: - results = relation_final.fetch_arrow_reader() - else: - results = relation_final.fetch_arrow_reader(batch_size) + results = relation_final.to_arrow_reader() if batch_size is None else relation_final.to_arrow_reader(batch_size) for record_batch in iter(results.read_next_batch, None): if predicate is not None and duck_predicate is None: diff --git a/external/duckdb b/external/duckdb index a1cac11e..3482c107 160000 --- a/external/duckdb +++ b/external/duckdb @@ -1 +1 @@ -Subproject commit a1cac11ecf71f867f107aa8d13d9f1bec16183df +Subproject commit 3482c107a36d6294ab81f81901059834d8f12168 diff --git a/scripts/connection_methods.json b/scripts/connection_methods.json index 3b02a9b1..56398af0 100644 --- a/scripts/connection_methods.json +++ b/scripts/connection_methods.json @@ -395,7 +395,7 @@ "return": "polars.DataFrame" }, { - "name": "fetch_arrow_table", + "name": "arrow_table", "function": "FetchArrow", "docs": "Fetch a result as Arrow table following execute()", "args": [ diff --git a/src/duckdb_py/duckdb_python.cpp b/src/duckdb_py/duckdb_python.cpp index fedbec5f..eea21519 100644 --- a/src/duckdb_py/duckdb_python.cpp +++ b/src/duckdb_py/duckdb_python.cpp @@ -446,31 +446,45 @@ static void InitializeConnectionMethods(py::module_ &m) { "Fetch a result as Polars DataFrame following execute()", py::arg("rows_per_batch") = 1000000, py::kw_only(), py::arg("lazy") = false, py::arg("connection") = py::none()); m.def( - "fetch_arrow_table", - [](idx_t rows_per_batch, shared_ptr conn = nullptr) { + "to_arrow_table", + [](idx_t batch_size, shared_ptr conn = nullptr) { if (!conn) { conn = DuckDBPyConnection::DefaultConnection(); } - return conn->FetchArrow(rows_per_batch); + return conn->FetchArrow(batch_size); }, - "Fetch a result as Arrow table following execute()", py::arg("rows_per_batch") = 1000000, py::kw_only(), + "Fetch a result as Arrow table following execute()", py::arg("batch_size") = 1000000, py::kw_only(), py::arg("connection") = py::none()); m.def( - "fetch_record_batch", - [](const idx_t rows_per_batch, shared_ptr conn = nullptr) { + "to_arrow_reader", + [](idx_t batch_size, shared_ptr conn = nullptr) { if (!conn) { conn = DuckDBPyConnection::DefaultConnection(); } - return conn->FetchRecordBatchReader(rows_per_batch); + return conn->FetchRecordBatchReader(batch_size); }, - "Fetch an Arrow RecordBatchReader following execute()", py::arg("rows_per_batch") = 1000000, py::kw_only(), + "Fetch an Arrow RecordBatchReader following execute()", py::arg("batch_size") = 1000000, py::kw_only(), py::arg("connection") = py::none()); m.def( - "arrow", + "fetch_arrow_table", + [](idx_t rows_per_batch, shared_ptr conn = nullptr) { + if (!conn) { + conn = DuckDBPyConnection::DefaultConnection(); + } + PyErr_WarnEx(PyExc_DeprecationWarning, "fetch_arrow_table() is deprecated, use to_arrow_table() instead.", + 0); + return conn->FetchArrow(rows_per_batch); + }, + "Fetch a result as Arrow table following execute()", py::arg("rows_per_batch") = 1000000, py::kw_only(), + py::arg("connection") = py::none()); + m.def( + "fetch_record_batch", [](const idx_t rows_per_batch, shared_ptr conn = nullptr) { if (!conn) { conn = DuckDBPyConnection::DefaultConnection(); } + PyErr_WarnEx(PyExc_DeprecationWarning, "fetch_record_batch() is deprecated, use to_arrow_reader() instead.", + 0); return conn->FetchRecordBatchReader(rows_per_batch); }, "Fetch an Arrow RecordBatchReader following execute()", py::arg("rows_per_batch") = 1000000, py::kw_only(), @@ -957,14 +971,14 @@ static void InitializeConnectionMethods(py::module_ &m) { // We define these "wrapper" methods manually because they are overloaded m.def( "arrow", - [](idx_t rows_per_batch, shared_ptr conn) -> duckdb::pyarrow::Table { + [](idx_t rows_per_batch, shared_ptr conn) -> duckdb::pyarrow::RecordBatchReader { if (!conn) { conn = DuckDBPyConnection::DefaultConnection(); } - return conn->FetchArrow(rows_per_batch); + return conn->FetchRecordBatchReader(rows_per_batch); }, - "Fetch a result as Arrow table following execute()", py::arg("rows_per_batch") = 1000000, py::kw_only(), - py::arg("connection") = py::none()); + "Alias of to_arrow_reader(). We recommend using to_arrow_reader() instead.", + py::arg("rows_per_batch") = 1000000, py::kw_only(), py::arg("connection") = py::none()); m.def( "arrow", [](py::object &arrow_object, shared_ptr conn) -> unique_ptr { diff --git a/src/duckdb_py/pyconnection.cpp b/src/duckdb_py/pyconnection.cpp index 75d1b612..c786421f 100644 --- a/src/duckdb_py/pyconnection.cpp +++ b/src/duckdb_py/pyconnection.cpp @@ -203,11 +203,28 @@ static void InitializeConnectionMethods(py::class_ &m) { py::arg("date_as_object") = false) .def("fetch_df_chunk", &DuckDBPyRelation::FetchDFChunk, "Execute and fetch a chunk of the rows", py::arg("vectors_per_chunk") = 1, py::kw_only(), py::arg("date_as_object") = false) - .def("arrow", &DuckDBPyRelation::ToRecordBatch, - "Execute and return an Arrow Record Batch Reader that yields all rows", py::arg("batch_size") = 1000000) - .def("fetch_arrow_table", &DuckDBPyRelation::ToArrowTable, "Execute and fetch all rows as an Arrow Table", - py::arg("batch_size") = 1000000) .def("to_arrow_table", &DuckDBPyRelation::ToArrowTable, "Execute and fetch all rows as an Arrow Table", py::arg("batch_size") = 1000000) + .def("to_arrow_reader", &DuckDBPyRelation::ToRecordBatch, + "Execute and return an Arrow Record Batch Reader that yields all rows", py::arg("batch_size") = 1000000) + .def("arrow", &DuckDBPyRelation::ToRecordBatch, + "Alias of to_arrow_reader(). We recommend using to_arrow_reader() instead.", + py::arg("batch_size") = 1000000) + .def( + "fetch_arrow_table", + [](pybind11::object &self, idx_t batch_size) { + PyErr_WarnEx(PyExc_DeprecationWarning, + "fetch_arrow_table() is deprecated, use to_arrow_table() instead.", 0); + return self.attr("to_arrow_table")(batch_size); + }, + "Execute and fetch all rows as an Arrow Table", py::arg("batch_size") = 1000000) .def("pl", &DuckDBPyRelation::ToPolars, "Execute and fetch all rows as a Polars DataFrame", py::arg("batch_size") = 1000000, py::kw_only(), py::arg("lazy") = false) .def("torch", &DuckDBPyRelation::FetchPyTorch, "Fetch a result as dict of PyTorch Tensors") @@ -79,18 +88,25 @@ static void InitializeConsumers(py::class_ &m) { )"; m.def("__arrow_c_stream__", &DuckDBPyRelation::ToArrowCapsule, capsule_docs, py::arg("requested_schema") = py::none()); - m.def("fetch_record_batch", &DuckDBPyRelation::ToRecordBatch, - "Execute and return an Arrow Record Batch Reader that yields all rows", py::arg("rows_per_batch") = 1000000) - .def("fetch_arrow_reader", &DuckDBPyRelation::ToRecordBatch, - "Execute and return an Arrow Record Batch Reader that yields all rows", py::arg("batch_size") = 1000000) + m.def( + "fetch_record_batch", + [](pybind11::object &self, idx_t rows_per_batch) { + PyErr_WarnEx(PyExc_DeprecationWarning, + "fetch_record_batch() is deprecated, use to_arrow_reader() instead.", 0); + return self.attr("to_arrow_reader")(rows_per_batch); + }, + "Execute and return an Arrow Record Batch Reader that yields all rows", py::arg("rows_per_batch") = 1000000) .def( - "record_batch", - [](pybind11::object &self, idx_t rows_per_batch) { + "fetch_arrow_reader", + [](pybind11::object &self, idx_t batch_size) { PyErr_WarnEx(PyExc_DeprecationWarning, - "record_batch() is deprecated, use fetch_record_batch() instead.", 0); - return self.attr("fetch_record_batch")(rows_per_batch); + "fetch_arrow_reader() is deprecated, use to_arrow_reader() instead.", 0); + if (PyErr_Occurred()) { + throw py::error_already_set(); + } + return self.attr("to_arrow_reader")(batch_size); }, - py::arg("batch_size") = 1000000); + "Execute and return an Arrow Record Batch Reader that yields all rows", py::arg("batch_size") = 1000000); } static void InitializeAggregates(py::class_ &m) { diff --git a/tests/fast/api/test_dbapi_fetch.py b/tests/fast/api/test_dbapi_fetch.py index 97ff6fe6..c6d3ccaa 100644 --- a/tests/fast/api/test_dbapi_fetch.py +++ b/tests/fast/api/test_dbapi_fetch.py @@ -42,11 +42,11 @@ def test_multiple_fetch_arrow(self, duckdb_cursor): pytest.importorskip("pyarrow") con = duckdb.connect() c = con.execute("SELECT 42::BIGINT AS a") - table = c.fetch_arrow_table() + table = c.to_arrow_table() df = table.to_pandas() pd.testing.assert_frame_equal(df, pd.DataFrame.from_dict({"a": [42]})) - assert c.fetch_arrow_table() is None - assert c.fetch_arrow_table() is None + assert c.to_arrow_table() is None + assert c.to_arrow_table() is None def test_multiple_close(self, duckdb_cursor): con = duckdb.connect() diff --git a/tests/fast/api/test_duckdb_connection.py b/tests/fast/api/test_duckdb_connection.py index efcc2203..9bca8288 100644 --- a/tests/fast/api/test_duckdb_connection.py +++ b/tests/fast/api/test_duckdb_connection.py @@ -54,7 +54,7 @@ def test_default_connection_from_connect(self): def test_arrow(self): pytest.importorskip("pyarrow") duckdb.execute("select [1,2,3]") - duckdb.fetch_arrow_table() + duckdb.to_arrow_table() def test_begin_commit(self): duckdb.begin() @@ -176,8 +176,8 @@ def test_pystatement(self): assert duckdb.table("tbl").fetchall() == [(21,), (22,), (23,)] duckdb.execute("drop table tbl") - def test_fetch_arrow_table(self): - # Needed for 'fetch_arrow_table' + def test_arrow_table(self): + # Needed for 'arrow_table' pytest.importorskip("pyarrow") duckdb.execute("Create Table test (a integer)") @@ -195,7 +195,7 @@ def test_fetch_arrow_table(self): result_df = duckdb.execute(sql).df() - arrow_table = duckdb.execute(sql).fetch_arrow_table() + arrow_table = duckdb.execute(sql).to_arrow_table() arrow_df = arrow_table.to_pandas() assert result_df["repetitions"].sum() == arrow_df["repetitions"].sum() @@ -220,12 +220,12 @@ def test_fetch_df_chunk(self): duckdb.execute("DROP TABLE t") def test_fetch_record_batch(self): - # Needed for 'fetch_arrow_table' + # Needed for 'arrow_table' pytest.importorskip("pyarrow") duckdb.execute("CREATE table t as select range a from range(3000);") duckdb.execute("SELECT a FROM t") - record_batch_reader = duckdb.fetch_record_batch(1024) + record_batch_reader = duckdb.to_arrow_reader(1024) chunk = record_batch_reader.read_all() assert len(chunk) == 3000 @@ -299,7 +299,7 @@ def test_unregister_problematic_behavior(self, duckdb_cursor): assert duckdb_cursor.execute("select * from vw").fetchone() == (0,) # Create a registered object called 'vw' - arrow_result = duckdb_cursor.execute("select 42").fetch_arrow_table() + arrow_result = duckdb_cursor.execute("select 42").to_arrow_table() with pytest.raises(duckdb.CatalogException, match='View with name "vw" already exists'): duckdb_cursor.register("vw", arrow_result) diff --git a/tests/fast/api/test_native_tz.py b/tests/fast/api/test_native_tz.py index 61b9ba24..18f7b7e7 100644 --- a/tests/fast/api/test_native_tz.py +++ b/tests/fast/api/test_native_tz.py @@ -78,14 +78,14 @@ def test_pandas_timestamp_time(self, duckdb_cursor): ) def test_arrow_timestamp_timezone(self, duckdb_cursor): duckdb_cursor.execute("SET timezone='America/Los_Angeles';") - table = duckdb_cursor.execute(f"select TimeRecStart as tz from '{filename}'").fetch_arrow_table() + table = duckdb_cursor.execute(f"select TimeRecStart as tz from '{filename}'").to_arrow_table() res = table.to_pandas() assert get_tz_string(res.dtypes["tz"].tz) == "America/Los_Angeles" assert res["tz"][0].hour == 14 assert res["tz"][0].minute == 52 duckdb_cursor.execute("SET timezone='UTC';") - res = duckdb_cursor.execute(f"select TimeRecStart as tz from '{filename}'").fetch_arrow_table().to_pandas() + res = duckdb_cursor.execute(f"select TimeRecStart as tz from '{filename}'").to_arrow_table().to_pandas() assert get_tz_string(res.dtypes["tz"].tz) == "UTC" assert res["tz"][0].hour == 21 assert res["tz"][0].minute == 52 @@ -93,13 +93,11 @@ def test_arrow_timestamp_timezone(self, duckdb_cursor): def test_arrow_timestamp_time(self, duckdb_cursor): duckdb_cursor.execute("SET timezone='America/Los_Angeles';") res1 = ( - duckdb_cursor.execute(f"select TimeRecStart::TIMETZ as tz from '{filename}'") - .fetch_arrow_table() - .to_pandas() + duckdb_cursor.execute(f"select TimeRecStart::TIMETZ as tz from '{filename}'").to_arrow_table().to_pandas() ) res2 = ( duckdb_cursor.execute(f"select TimeRecStart::TIMETZ::TIME as tz from '{filename}'") - .fetch_arrow_table() + .to_arrow_table() .to_pandas() ) assert res1["tz"][0].hour == 14 @@ -109,13 +107,11 @@ def test_arrow_timestamp_time(self, duckdb_cursor): duckdb_cursor.execute("SET timezone='UTC';") res1 = ( - duckdb_cursor.execute(f"select TimeRecStart::TIMETZ as tz from '{filename}'") - .fetch_arrow_table() - .to_pandas() + duckdb_cursor.execute(f"select TimeRecStart::TIMETZ as tz from '{filename}'").to_arrow_table().to_pandas() ) res2 = ( duckdb_cursor.execute(f"select TimeRecStart::TIMETZ::TIME as tz from '{filename}'") - .fetch_arrow_table() + .to_arrow_table() .to_pandas() ) assert res1["tz"][0].hour == 21 diff --git a/tests/fast/api/test_streaming_result.py b/tests/fast/api/test_streaming_result.py index 4003f20f..9cba78b1 100644 --- a/tests/fast/api/test_streaming_result.py +++ b/tests/fast/api/test_streaming_result.py @@ -41,7 +41,7 @@ def test_record_batch_reader(self, duckdb_cursor): pytest.importorskip("pyarrow.dataset") # record batch reader res = duckdb_cursor.sql("SELECT * FROM range(100000) t(i)") - reader = res.fetch_arrow_reader(batch_size=16_384) + reader = res.to_arrow_reader(batch_size=16_384) result = [] for batch in reader: result += batch.to_pydict()["i"] @@ -52,7 +52,7 @@ def test_record_batch_reader(self, duckdb_cursor): "SELECT CASE WHEN i < 10000 THEN i ELSE concat('hello', i::VARCHAR)::INT END FROM range(100000) t(i)" ) with pytest.raises(duckdb.ConversionException, match="Could not convert string 'hello10000' to INT32"): - reader = res.fetch_arrow_reader(batch_size=16_384) + reader = res.to_arrow_reader(batch_size=16_384) def test_9801(self, duckdb_cursor): duckdb_cursor.execute("CREATE TABLE test(id INTEGER , name VARCHAR NOT NULL);") diff --git a/tests/fast/arrow/test_2426.py b/tests/fast/arrow/test_2426.py index 6f76613f..f631ec1a 100644 --- a/tests/fast/arrow/test_2426.py +++ b/tests/fast/arrow/test_2426.py @@ -31,7 +31,7 @@ def test_2426(self, duckdb_cursor): result_df = con.execute(sql).df() - arrow_table = con.execute(sql).fetch_arrow_table() + arrow_table = con.execute(sql).to_arrow_table() arrow_df = arrow_table.to_pandas() assert result_df["repetitions"].sum() == arrow_df["repetitions"].sum() diff --git a/tests/fast/arrow/test_6584.py b/tests/fast/arrow/test_6584.py index feadc6d7..ed7d9a47 100644 --- a/tests/fast/arrow/test_6584.py +++ b/tests/fast/arrow/test_6584.py @@ -9,7 +9,7 @@ def f(cur, i, data): cur.execute(f"create table t_{i} as select * from data") - return cur.execute(f"select * from t_{i}").fetch_arrow_table() + return cur.execute(f"select * from t_{i}").to_arrow_table() def test_6584(): diff --git a/tests/fast/arrow/test_6796.py b/tests/fast/arrow/test_6796.py index a9e877d5..13286de2 100644 --- a/tests/fast/arrow/test_6796.py +++ b/tests/fast/arrow/test_6796.py @@ -19,7 +19,7 @@ def test_6796(): # fetching directly into Pandas works res_df = conn.execute(query).fetch_df() - res_arrow = conn.execute(query).fetch_arrow_table() # noqa: F841 + res_arrow = conn.execute(query).to_arrow_table() # noqa: F841 df_arrow_table = pyarrow.Table.from_pandas(res_df) # noqa: F841 diff --git a/tests/fast/arrow/test_9443.py b/tests/fast/arrow/test_9443.py index fe5a2ce1..66c8c0be 100644 --- a/tests/fast/arrow/test_9443.py +++ b/tests/fast/arrow/test_9443.py @@ -23,4 +23,4 @@ def test_9443(self, tmp_path, duckdb_cursor): sql = f'SELECT * FROM "{temp_file}"' duckdb_cursor.execute(sql) - duckdb_cursor.fetch_record_batch() + duckdb_cursor.to_arrow_reader() diff --git a/tests/fast/arrow/test_arrow_binary_view.py b/tests/fast/arrow/test_arrow_binary_view.py index 4e161ac3..10580d8e 100644 --- a/tests/fast/arrow/test_arrow_binary_view.py +++ b/tests/fast/arrow/test_arrow_binary_view.py @@ -11,10 +11,10 @@ def test_arrow_binary_view(self, duckdb_cursor): tab = pa.table({"x": pa.array([b"abc", b"thisisaverybigbinaryyaymorethanfifteen", None], pa.binary_view())}) assert con.execute("FROM tab").fetchall() == [(b"abc",), (b"thisisaverybigbinaryyaymorethanfifteen",), (None,)] # By default we won't export a view - assert not con.execute("FROM tab").fetch_arrow_table().equals(tab) + assert not con.execute("FROM tab").to_arrow_table().equals(tab) # We do the binary view from 1.4 onwards con.execute("SET arrow_output_version = 1.4") - assert con.execute("FROM tab").fetch_arrow_table().equals(tab) + assert con.execute("FROM tab").to_arrow_table().equals(tab) assert con.execute("FROM tab where x = 'thisisaverybigbinaryyaymorethanfifteen'").fetchall() == [ (b"thisisaverybigbinaryyaymorethanfifteen",) diff --git a/tests/fast/arrow/test_arrow_decimal256.py b/tests/fast/arrow/test_arrow_decimal256.py index d687ec8a..b7fd1a03 100644 --- a/tests/fast/arrow/test_arrow_decimal256.py +++ b/tests/fast/arrow/test_arrow_decimal256.py @@ -17,4 +17,4 @@ def test_decimal_256_throws(self, duckdb_cursor): with pytest.raises( duckdb.NotImplementedException, match="Unsupported Internal Arrow Type for Decimal d:12,4,256" ): - conn.execute("select * from pa_decimal256;").fetch_arrow_table().to_pylist() + conn.execute("select * from pa_decimal256;").to_arrow_table().to_pylist() diff --git a/tests/fast/arrow/test_arrow_decimal_32_64.py b/tests/fast/arrow/test_arrow_decimal_32_64.py index 301d890f..3ae4dc98 100644 --- a/tests/fast/arrow/test_arrow_decimal_32_64.py +++ b/tests/fast/arrow/test_arrow_decimal_32_64.py @@ -33,7 +33,7 @@ def test_decimal_32(self, duckdb_cursor): ] # Test write - arrow_table = duckdb_cursor.execute("FROM decimal_32").fetch_arrow_table() + arrow_table = duckdb_cursor.execute("FROM decimal_32").to_arrow_table() assert arrow_table.equals(decimal_32) @@ -64,5 +64,5 @@ def test_decimal_64(self, duckdb_cursor): ).fetchall() == [(2,)] # Test write - arrow_table = duckdb_cursor.execute("FROM decimal_64").fetch_arrow_table() + arrow_table = duckdb_cursor.execute("FROM decimal_64").to_arrow_table() assert arrow_table.equals(decimal_64) diff --git a/tests/fast/arrow/test_arrow_deprecation.py b/tests/fast/arrow/test_arrow_deprecation.py new file mode 100644 index 00000000..bd041712 --- /dev/null +++ b/tests/fast/arrow/test_arrow_deprecation.py @@ -0,0 +1,137 @@ +import warnings + +import pytest + +import duckdb + + +class TestArrowDeprecation: + @pytest.fixture(autouse=True) + def setup(self, duckdb_cursor): + self.con = duckdb_cursor + self.con.execute("CREATE TABLE t AS SELECT 1 AS a") + + def test_relation_fetch_arrow_table_deprecated(self): + rel = self.con.table("t") + with pytest.warns( + DeprecationWarning, match="fetch_arrow_table\\(\\) is deprecated, use to_arrow_table\\(\\) instead" + ): + rel.fetch_arrow_table() + + def test_relation_fetch_record_batch_deprecated(self): + rel = self.con.table("t") + with pytest.warns( + DeprecationWarning, match="fetch_record_batch\\(\\) is deprecated, use to_arrow_reader\\(\\) instead" + ): + rel.fetch_record_batch() + + def test_relation_fetch_arrow_reader_deprecated(self): + rel = self.con.table("t") + with pytest.warns( + DeprecationWarning, match="fetch_arrow_reader\\(\\) is deprecated, use to_arrow_reader\\(\\) instead" + ): + rel.fetch_arrow_reader() + + def test_connection_fetch_arrow_table_deprecated(self): + self.con.execute("SELECT 1") + with pytest.warns( + DeprecationWarning, match="fetch_arrow_table\\(\\) is deprecated, use to_arrow_table\\(\\) instead" + ): + self.con.fetch_arrow_table() + + def test_connection_fetch_record_batch_deprecated(self): + self.con.execute("SELECT 1") + with pytest.warns( + DeprecationWarning, match="fetch_record_batch\\(\\) is deprecated, use to_arrow_reader\\(\\) instead" + ): + self.con.fetch_record_batch() + + def test_module_fetch_arrow_table_deprecated(self): + duckdb.execute("SELECT 1") + with pytest.warns( + DeprecationWarning, match="fetch_arrow_table\\(\\) is deprecated, use to_arrow_table\\(\\) instead" + ): + duckdb.fetch_arrow_table() + + def test_module_fetch_record_batch_deprecated(self): + duckdb.execute("SELECT 1") + with pytest.warns( + DeprecationWarning, match="fetch_record_batch\\(\\) is deprecated, use to_arrow_reader\\(\\) instead" + ): + duckdb.fetch_record_batch() + + def test_relation_to_arrow_table_works(self): + rel = self.con.table("t") + with warnings.catch_warnings(): + warnings.simplefilter("error") + result = rel.to_arrow_table() + assert result.num_rows == 1 + + def test_relation_to_arrow_reader_works(self): + rel = self.con.table("t") + with warnings.catch_warnings(): + warnings.simplefilter("error") + reader = rel.to_arrow_reader() + assert reader.read_all().num_rows == 1 + + def test_relation_arrow_no_warning(self): + """relation.arrow() should NOT emit a deprecation warning (soft deprecated).""" + rel = self.con.table("t") + with warnings.catch_warnings(): + warnings.simplefilter("error") + reader = rel.arrow() + assert reader.read_all().num_rows == 1 + + def test_connection_to_arrow_table_works(self): + self.con.execute("SELECT 1") + with warnings.catch_warnings(): + warnings.simplefilter("error") + result = self.con.to_arrow_table() + assert result.num_rows == 1 + + def test_connection_to_arrow_reader_works(self): + self.con.execute("SELECT 1") + with warnings.catch_warnings(): + warnings.simplefilter("error") + reader = self.con.to_arrow_reader() + assert reader.read_all().num_rows == 1 + + def test_connection_arrow_no_warning(self): + """connection.arrow() should NOT emit a deprecation warning (soft deprecated).""" + self.con.execute("SELECT 1") + with warnings.catch_warnings(): + warnings.simplefilter("error") + reader = self.con.arrow() + assert reader.read_all().num_rows == 1 + + def test_module_to_arrow_table_works(self): + duckdb.execute("SELECT 1") + with warnings.catch_warnings(): + warnings.simplefilter("error") + result = duckdb.to_arrow_table() + assert result.num_rows == 1 + + def test_module_to_arrow_reader_works(self): + duckdb.execute("SELECT 1") + with warnings.catch_warnings(): + warnings.simplefilter("error") + reader = duckdb.to_arrow_reader() + assert reader.read_all().num_rows == 1 + + def test_module_arrow_no_warning(self): + """duckdb.arrow(rows_per_batch) should NOT emit a deprecation warning (soft deprecated).""" + duckdb.execute("SELECT 1") + with warnings.catch_warnings(): + warnings.simplefilter("error") + result = duckdb.arrow() + assert result.read_all().num_rows == 1 + + def test_from_arrow_not_deprecated(self): + """duckdb.arrow(arrow_object) should NOT emit a deprecation warning.""" + import pyarrow as pa + + table = pa.table({"a": [1, 2, 3]}) + with warnings.catch_warnings(): + warnings.simplefilter("error") + rel = duckdb.arrow(table) + assert rel.fetchall() == [(1,), (2,), (3,)] diff --git a/tests/fast/arrow/test_arrow_extensions.py b/tests/fast/arrow/test_arrow_extensions.py index f79c32c4..3a73d266 100644 --- a/tests/fast/arrow/test_arrow_extensions.py +++ b/tests/fast/arrow/test_arrow_extensions.py @@ -21,7 +21,7 @@ def test_uuid(self): arrow_table = pa.Table.from_arrays([storage_array], names=["uuid_col"]) - duck_arrow = duckdb_cursor.execute("FROM arrow_table").fetch_arrow_table() + duck_arrow = duckdb_cursor.execute("FROM arrow_table").to_arrow_table() assert duck_arrow.equals(arrow_table) @@ -29,7 +29,7 @@ def test_uuid_from_duck(self): duckdb_cursor = duckdb.connect() duckdb_cursor.execute("SET arrow_lossless_conversion = true") - arrow_table = duckdb_cursor.execute("select uuid from test_all_types()").fetch_arrow_table() + arrow_table = duckdb_cursor.execute("select uuid from test_all_types()").to_arrow_table() assert arrow_table.to_pylist() == [ {"uuid": UUID("00000000-0000-0000-0000-000000000000")}, @@ -45,7 +45,7 @@ def test_uuid_from_duck(self): arrow_table = duckdb_cursor.execute( "select '00000000-0000-0000-0000-000000000100'::UUID as uuid" - ).fetch_arrow_table() + ).to_arrow_table() assert arrow_table.to_pylist() == [{"uuid": UUID("00000000-0000-0000-0000-000000000100")}] assert duckdb_cursor.execute("FROM arrow_table").fetchall() == [(UUID("00000000-0000-0000-0000-000000000100"),)] @@ -61,7 +61,7 @@ def test_json(self, duckdb_cursor): arrow_table = pa.Table.from_arrays([storage_array], names=["json_col"]) duckdb_cursor.execute("SET arrow_lossless_conversion = true") - duck_arrow = duckdb_cursor.execute("FROM arrow_table").fetch_arrow_table() + duck_arrow = duckdb_cursor.execute("FROM arrow_table").to_arrow_table() assert duck_arrow.equals(arrow_table) @@ -69,7 +69,7 @@ def test_uuid_no_def(self): duckdb_cursor = duckdb.connect() duckdb_cursor.execute("SET arrow_lossless_conversion = true") - res_arrow = duckdb_cursor.execute("select uuid from test_all_types()").fetch_arrow_table() + res_arrow = duckdb_cursor.execute("select uuid from test_all_types()").to_arrow_table() res_duck = duckdb_cursor.execute("from res_arrow").fetchall() assert res_duck == [ (UUID("00000000-0000-0000-0000-000000000000"),), @@ -79,7 +79,7 @@ def test_uuid_no_def(self): def test_uuid_no_def_lossless(self): duckdb_cursor = duckdb.connect() - res_arrow = duckdb_cursor.execute("select uuid from test_all_types()").fetch_arrow_table() + res_arrow = duckdb_cursor.execute("select uuid from test_all_types()").to_arrow_table() assert res_arrow.to_pylist() == [ {"uuid": "00000000-0000-0000-0000-000000000000"}, {"uuid": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, @@ -97,7 +97,7 @@ def test_uuid_no_def_stream(self): duckdb_cursor = duckdb.connect() duckdb_cursor.execute("SET arrow_lossless_conversion = true") - res_arrow = duckdb_cursor.execute("select uuid from test_all_types()").fetch_record_batch() + res_arrow = duckdb_cursor.execute("select uuid from test_all_types()").to_arrow_reader() res_duck = duckdb.execute("from res_arrow").fetchall() assert res_duck == [ (UUID("00000000-0000-0000-0000-000000000000"),), @@ -136,7 +136,7 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized) -> object: arrow_table = pa.Table.from_arrays([storage_array, age_array], names=["pedro_pedro_pedro", "age"]) - duck_arrow = duckdb_cursor.execute("FROM arrow_table").fetch_arrow_table() + duck_arrow = duckdb_cursor.execute("FROM arrow_table").to_arrow_table() assert duckdb_cursor.execute("FROM duck_arrow").fetchall() == [(b"pedro", 29)] def test_hugeint(self): @@ -153,11 +153,11 @@ def test_hugeint(self): assert con.execute("FROM arrow_table").fetchall() == [(-1,)] - assert con.execute("FROM arrow_table").fetch_arrow_table().equals(arrow_table) + assert con.execute("FROM arrow_table").to_arrow_table().equals(arrow_table) con.execute("SET arrow_lossless_conversion = false") - assert not con.execute("FROM arrow_table").fetch_arrow_table().equals(arrow_table) + assert not con.execute("FROM arrow_table").to_arrow_table().equals(arrow_table) def test_uhugeint(self, duckdb_cursor): storage_array = pa.array([b"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"], pa.binary(16)) @@ -171,11 +171,11 @@ def test_uhugeint(self, duckdb_cursor): def test_bit(self): con = duckdb.connect() - res_blob = con.execute("SELECT '0101011'::BIT str FROM range(5) tbl(i)").fetch_arrow_table() + res_blob = con.execute("SELECT '0101011'::BIT str FROM range(5) tbl(i)").to_arrow_table() con.execute("SET arrow_lossless_conversion = true") - res_bit = con.execute("SELECT '0101011'::BIT str FROM range(5) tbl(i)").fetch_arrow_table() + res_bit = con.execute("SELECT '0101011'::BIT str FROM range(5) tbl(i)").to_arrow_table() assert con.execute("FROM res_blob").fetchall() == [ (b"\x01\xab",), @@ -195,11 +195,11 @@ def test_bit(self): def test_timetz(self): con = duckdb.connect() - res_time = con.execute("SELECT '02:30:00+04'::TIMETZ str FROM range(1) tbl(i)").fetch_arrow_table() + res_time = con.execute("SELECT '02:30:00+04'::TIMETZ str FROM range(1) tbl(i)").to_arrow_table() con.execute("SET arrow_lossless_conversion = true") - res_tz = con.execute("SELECT '02:30:00+04'::TIMETZ str FROM range(1) tbl(i)").fetch_arrow_table() + res_tz = con.execute("SELECT '02:30:00+04'::TIMETZ str FROM range(1) tbl(i)").to_arrow_table() assert con.execute("FROM res_time").fetchall() == [(datetime.time(2, 30),)] assert con.execute("FROM res_tz").fetchall() == [ @@ -210,7 +210,7 @@ def test_bignum(self): con = duckdb.connect() res_bignum = con.execute( "SELECT '179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368'::bignum a FROM range(1) tbl(i)" # noqa: E501 - ).fetch_arrow_table() + ).to_arrow_table() assert res_bignum.column("a").type.type_name == "bignum" assert res_bignum.column("a").type.vendor_name == "DuckDB" @@ -226,7 +226,7 @@ def test_nested_types_with_extensions(self): arrow_table = duckdb_cursor.execute( "select map {uuid(): 1::uhugeint, uuid(): 2::uhugeint} as li" - ).fetch_arrow_table() + ).to_arrow_table() assert arrow_table.schema[0].type.key_type.extension_name == "arrow.uuid" assert arrow_table.schema[0].type.item_type.extension_name == "arrow.opaque" @@ -267,7 +267,7 @@ def test_boolean(self): bool8_array = pa.ExtensionArray.from_storage(pa.bool8(), storage_array) arrow_table = pa.Table.from_arrays([bool8_array], names=["bool8"]) assert con.execute("FROM arrow_table").fetchall() == [(True,), (False,), (True,), (True,), (None,)] - result_table = con.execute("FROM arrow_table").fetch_arrow_table() + result_table = con.execute("FROM arrow_table").to_arrow_table() res_storage_array = pa.array([1, 0, 1, 1, None], pa.int8()) res_bool8_array = pa.ExtensionArray.from_storage(pa.bool8(), res_storage_array) @@ -290,7 +290,7 @@ def test_accept_malformed_complex_json(self, duckdb_cursor): schema=schema, ) - tbl = duckdb_cursor.sql("""SELECT geometry as wkt FROM geo_table;""").fetch_arrow_table() + tbl = duckdb_cursor.sql("""SELECT geometry as wkt FROM geo_table;""").to_arrow_table() assert pa.types.is_binary(tbl.schema[0].type) field = pa.field( @@ -307,7 +307,7 @@ def test_accept_malformed_complex_json(self, duckdb_cursor): schema=schema, ) with pytest.raises(duckdb.SerializationException, match="Failed to parse JSON string"): - tbl = duckdb_cursor.sql("""SELECT geometry as wkt FROM geo_table;""").fetch_arrow_table() + tbl = duckdb_cursor.sql("""SELECT geometry as wkt FROM geo_table;""").to_arrow_table() field = pa.field( "geometry", @@ -322,7 +322,7 @@ def test_accept_malformed_complex_json(self, duckdb_cursor): [pa.array([], pa.binary())], schema=schema, ) - tbl = duckdb_cursor.sql("""SELECT geometry as wkt FROM geo_table;""").fetch_arrow_table() + tbl = duckdb_cursor.sql("""SELECT geometry as wkt FROM geo_table;""").to_arrow_table() assert pa.types.is_binary(tbl.schema[0].type) field = pa.field( diff --git a/tests/fast/arrow/test_arrow_fetch.py b/tests/fast/arrow/test_arrow_fetch.py index ba5d13a4..cf1ff46b 100644 --- a/tests/fast/arrow/test_arrow_fetch.py +++ b/tests/fast/arrow/test_arrow_fetch.py @@ -14,7 +14,7 @@ def check_equal(duckdb_conn): true_result = duckdb_conn.execute("SELECT * from test").fetchall() duck_tbl = duckdb_conn.table("test") - duck_from_arrow = duckdb_conn.from_arrow(duck_tbl.fetch_arrow_table()) + duck_from_arrow = duckdb_conn.from_arrow(duck_tbl.to_arrow_table()) duck_from_arrow.create("testarrow") arrow_result = duckdb_conn.execute("SELECT * from testarrow").fetchall() assert arrow_result == true_result @@ -86,7 +86,7 @@ def test_to_arrow_chunk_size(self, duckdb_cursor): duckdb_cursor = duckdb.connect() duckdb_cursor.execute("CREATE table t as select range a from range(3000);") relation = duckdb_cursor.table("t") - arrow_tbl = relation.fetch_arrow_table() + arrow_tbl = relation.to_arrow_table() assert arrow_tbl["a"].num_chunks == 1 - arrow_tbl = relation.fetch_arrow_table(2048) + arrow_tbl = relation.to_arrow_table(2048) assert arrow_tbl["a"].num_chunks == 2 diff --git a/tests/fast/arrow/test_arrow_fetch_recordbatch.py b/tests/fast/arrow/test_arrow_fetch_recordbatch.py index a5804c87..d060659f 100644 --- a/tests/fast/arrow/test_arrow_fetch_recordbatch.py +++ b/tests/fast/arrow/test_arrow_fetch_recordbatch.py @@ -12,7 +12,7 @@ def test_record_batch_next_batch_numeric(self, duckdb_cursor): duckdb_cursor_check = duckdb.connect() duckdb_cursor.execute("CREATE table t as select range a from range(3000);") query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) assert record_batch_reader.schema.names == ["a"] chunk = record_batch_reader.read_next_batch() assert len(chunk) == 1024 @@ -24,7 +24,7 @@ def test_record_batch_next_batch_numeric(self, duckdb_cursor): chunk = record_batch_reader.read_next_batch() # Check if we are producing the correct thing query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) res = duckdb_cursor_check.execute("select * from record_batch_reader").fetchall() correct = duckdb_cursor.execute("select * from t").fetchall() @@ -38,7 +38,7 @@ def test_record_batch_next_batch_bool(self, duckdb_cursor): "CREATE table t as SELECT CASE WHEN i % 2 = 0 THEN true ELSE false END AS a from range(3000) as tbl(i);" ) query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) assert record_batch_reader.schema.names == ["a"] chunk = record_batch_reader.read_next_batch() assert len(chunk) == 1024 @@ -51,7 +51,7 @@ def test_record_batch_next_batch_bool(self, duckdb_cursor): # Check if we are producing the correct thing query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) res = duckdb_cursor_check.execute("select * from record_batch_reader").fetchall() correct = duckdb_cursor.execute("select * from t").fetchall() @@ -63,7 +63,7 @@ def test_record_batch_next_batch_varchar(self, duckdb_cursor): duckdb_cursor_check = duckdb.connect() duckdb_cursor.execute("CREATE table t as select range::varchar a from range(3000);") query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) assert record_batch_reader.schema.names == ["a"] chunk = record_batch_reader.read_next_batch() assert len(chunk) == 1024 @@ -76,7 +76,7 @@ def test_record_batch_next_batch_varchar(self, duckdb_cursor): # Check if we are producing the correct thing query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) res = duckdb_cursor_check.execute("select * from record_batch_reader").fetchall() correct = duckdb_cursor.execute("select * from t").fetchall() @@ -90,7 +90,7 @@ def test_record_batch_next_batch_struct(self, duckdb_cursor): "CREATE table t as select {'x': i, 'y': i::varchar, 'z': i+1} as a from range(3000) as tbl(i);" ) query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) assert record_batch_reader.schema.names == ["a"] chunk = record_batch_reader.read_next_batch() assert len(chunk) == 1024 @@ -103,7 +103,7 @@ def test_record_batch_next_batch_struct(self, duckdb_cursor): # Check if we are producing the correct thing query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) res = duckdb_cursor_check.execute("select * from record_batch_reader").fetchall() correct = duckdb_cursor.execute("select * from t").fetchall() @@ -115,7 +115,7 @@ def test_record_batch_next_batch_list(self, duckdb_cursor): duckdb_cursor_check = duckdb.connect() duckdb_cursor.execute("CREATE table t as select [i,i+1] as a from range(3000) as tbl(i);") query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) assert record_batch_reader.schema.names == ["a"] chunk = record_batch_reader.read_next_batch() assert len(chunk) == 1024 @@ -128,7 +128,7 @@ def test_record_batch_next_batch_list(self, duckdb_cursor): # Check if we are producing the correct thing query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) res = duckdb_cursor_check.execute("select * from record_batch_reader").fetchall() correct = duckdb_cursor.execute("select * from t").fetchall() @@ -141,7 +141,7 @@ def test_record_batch_next_batch_map(self, duckdb_cursor): duckdb_cursor_check = duckdb.connect() duckdb_cursor.execute("CREATE table t as select map([i], [i+1]) as a from range(3000) as tbl(i);") query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) assert record_batch_reader.schema.names == ["a"] chunk = record_batch_reader.read_next_batch() assert len(chunk) == 1024 @@ -154,7 +154,7 @@ def test_record_batch_next_batch_map(self, duckdb_cursor): # Check if we are producing the correct thing query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) res = duckdb_cursor_check.execute("select * from record_batch_reader").fetchall() correct = duckdb_cursor.execute("select * from t").fetchall() @@ -169,7 +169,7 @@ def test_record_batch_next_batch_with_null(self, duckdb_cursor): "CREATE table t as SELECT CASE WHEN i % 2 = 0 THEN i ELSE NULL END AS a from range(3000) as tbl(i);" ) query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) assert record_batch_reader.schema.names == ["a"] chunk = record_batch_reader.read_next_batch() assert len(chunk) == 1024 @@ -182,7 +182,7 @@ def test_record_batch_next_batch_with_null(self, duckdb_cursor): # Check if we are producing the correct thing query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) res = duckdb_cursor_check.execute("select * from record_batch_reader").fetchall() correct = duckdb_cursor.execute("select * from t").fetchall() @@ -193,7 +193,7 @@ def test_record_batch_read_default(self, duckdb_cursor): duckdb_cursor = duckdb.connect() duckdb_cursor.execute("CREATE table t as select range a from range(3000);") query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch() + record_batch_reader = query.to_arrow_reader() chunk = record_batch_reader.read_next_batch() assert len(chunk) == 3000 @@ -201,7 +201,7 @@ def test_record_batch_next_batch_multiple_vectors_per_chunk(self, duckdb_cursor) duckdb_cursor = duckdb.connect() duckdb_cursor.execute("CREATE table t as select range a from range(5000);") query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(2048) + record_batch_reader = query.to_arrow_reader(2048) chunk = record_batch_reader.read_next_batch() assert len(chunk) == 2048 chunk = record_batch_reader.read_next_batch() @@ -212,12 +212,12 @@ def test_record_batch_next_batch_multiple_vectors_per_chunk(self, duckdb_cursor) chunk = record_batch_reader.read_next_batch() query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1) + record_batch_reader = query.to_arrow_reader(1) chunk = record_batch_reader.read_next_batch() assert len(chunk) == 1 query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(2000) + record_batch_reader = query.to_arrow_reader(2000) chunk = record_batch_reader.read_next_batch() assert len(chunk) == 2000 @@ -226,15 +226,15 @@ def test_record_batch_next_batch_multiple_vectors_per_chunk_error(self, duckdb_c duckdb_cursor.execute("CREATE table t as select range a from range(5000);") query = duckdb_cursor.execute("SELECT a FROM t") with pytest.raises(RuntimeError, match="Approximate Batch Size of Record Batch MUST be higher than 0"): - query.fetch_record_batch(0) + query.to_arrow_reader(0) with pytest.raises(TypeError, match="incompatible function arguments"): - query.fetch_record_batch(-1) + query.to_arrow_reader(-1) def test_record_batch_reader_from_relation(self, duckdb_cursor): duckdb_cursor = duckdb.connect() duckdb_cursor.execute("CREATE table t as select range a from range(3000);") relation = duckdb_cursor.table("t") - record_batch_reader = relation.fetch_record_batch() + record_batch_reader = relation.to_arrow_reader() chunk = record_batch_reader.read_next_batch() assert len(chunk) == 3000 @@ -242,7 +242,7 @@ def test_record_coverage(self, duckdb_cursor): duckdb_cursor = duckdb.connect() duckdb_cursor.execute("CREATE table t as select range a from range(2048);") query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(1024) + record_batch_reader = query.to_arrow_reader(1024) chunk = record_batch_reader.read_all() assert len(chunk) == 2048 @@ -268,7 +268,7 @@ def test_many_list_batches(self): # Because this produces multiple chunks, this caused a segfault before # because we changed some data in the first batch fetch - batch_iter = conn.execute(query).fetch_record_batch(chunk_size) + batch_iter = conn.execute(query).to_arrow_reader(chunk_size) for batch in batch_iter: del batch @@ -278,7 +278,7 @@ def test_many_chunk_sizes(self): query = duckdb_cursor.execute(f"CREATE table t as select range a from range({object_size});") for i in [1, 2, 4, 8, 16, 32, 33, 77, 999, 999999]: query = duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = query.fetch_record_batch(i) + record_batch_reader = query.to_arrow_reader(i) num_loops = int(object_size / i) for _j in range(num_loops): assert record_batch_reader.schema.names == ["a"] diff --git a/tests/fast/arrow/test_arrow_run_end_encoding.py b/tests/fast/arrow/test_arrow_run_end_encoding.py index 40d9131a..6451f348 100644 --- a/tests/fast/arrow/test_arrow_run_end_encoding.py +++ b/tests/fast/arrow/test_arrow_run_end_encoding.py @@ -56,7 +56,7 @@ def test_arrow_run_end_encoding_numerics(self, duckdb_cursor, query, run_length, size = 127 query = query.format(run_length, value_type, size) rel = duckdb_cursor.sql(query) - array = rel.fetch_arrow_table()["ree"] + array = rel.to_arrow_table()["ree"] expected = rel.fetchall() encoded_array = pc.run_end_encode(array) @@ -128,7 +128,7 @@ def test_arrow_run_end_encoding(self, duckdb_cursor, dbtype, val1, val2, filter) expected = duckdb_cursor.query(f"select {projection} from ree_tbl where {filter}").fetchall() # Create an Arrow Table from the table - arrow_conversion = rel.fetch_arrow_table() + arrow_conversion = rel.to_arrow_table() arrays = { "ree": arrow_conversion["ree"], "a": arrow_conversion["a"], @@ -157,7 +157,7 @@ def test_arrow_run_end_encoding(self, duckdb_cursor, dbtype, val1, val2, filter) def test_arrow_ree_empty_table(self, duckdb_cursor): duckdb_cursor.query("create table tbl (ree integer)") rel = duckdb_cursor.table("tbl") - array = rel.fetch_arrow_table()["ree"] + array = rel.to_arrow_table()["ree"] expected = rel.fetchall() encoded_array = pc.run_end_encode(array) @@ -194,7 +194,7 @@ def test_arrow_ree_projections(self, duckdb_cursor, projection): ) # Fetch the result as an Arrow Table - result = duckdb_cursor.table("tbl").fetch_arrow_table() + result = duckdb_cursor.table("tbl").to_arrow_table() # Turn 'ree' into a run-end-encoded array and reconstruct a table from it arrays = { @@ -225,7 +225,7 @@ def test_arrow_ree_projections(self, duckdb_cursor, projection): f""" select {projection} from arrow_tbl """ - ).fetch_arrow_table() + ).to_arrow_table() # Verify correctness by fetching from the original table and the constructed result expected = duckdb_cursor.query(f"select {projection} from tbl").fetchall() @@ -249,7 +249,7 @@ def test_arrow_ree_list(self, duckdb_cursor, create_list): """ select * from tbl """ - ).fetch_arrow_table() + ).to_arrow_table() columns = unstructured.columns # Run-encode the first column ('ree') @@ -273,7 +273,7 @@ def test_arrow_ree_list(self, duckdb_cursor, create_list): structured = pa.chunked_array(structured_chunks) arrow_tbl = pa.Table.from_arrays([structured], names=["ree"]) - result = duckdb_cursor.query("select * from arrow_tbl").fetch_arrow_table() + result = duckdb_cursor.query("select * from arrow_tbl").to_arrow_table() assert arrow_tbl.to_pylist() == result.to_pylist() def test_arrow_ree_struct(self, duckdb_cursor): @@ -294,7 +294,7 @@ def test_arrow_ree_struct(self, duckdb_cursor): """ select * from tbl """ - ).fetch_arrow_table() + ).to_arrow_table() columns = unstructured.columns # Run-encode the first column ('ree') @@ -309,7 +309,7 @@ def test_arrow_ree_struct(self, duckdb_cursor): structured = pa.chunked_array(structured_chunks) arrow_tbl = pa.Table.from_arrays([structured], names=["ree"]) # noqa: F841 - result = duckdb_cursor.query("select * from arrow_tbl").fetch_arrow_table() # noqa: F841 + result = duckdb_cursor.query("select * from arrow_tbl").to_arrow_table() # noqa: F841 expected = duckdb_cursor.query("select {'ree': ree, 'a': a, 'b': b, 'c': c} as s from tbl").fetchall() actual = duckdb_cursor.query("select * from result").fetchall() @@ -336,7 +336,7 @@ def test_arrow_ree_union(self, duckdb_cursor): """ select * from tbl """ - ).fetch_arrow_table() + ).to_arrow_table() columns = unstructured.columns # Run-encode the first column ('ree') @@ -358,7 +358,7 @@ def test_arrow_ree_union(self, duckdb_cursor): structured = pa.chunked_array(structured_chunks) arrow_tbl = pa.Table.from_arrays([structured], names=["ree"]) # noqa: F841 - result = duckdb_cursor.query("select * from arrow_tbl").fetch_arrow_table() # noqa: F841 + result = duckdb_cursor.query("select * from arrow_tbl").to_arrow_table() # noqa: F841 # Recreate the same result set expected = [] @@ -392,7 +392,7 @@ def test_arrow_ree_map(self, duckdb_cursor): """ select * from tbl """ - ).fetch_arrow_table() + ).to_arrow_table() columns = unstructured.columns # Run-encode the first column ('ree') @@ -418,7 +418,7 @@ def test_arrow_ree_map(self, duckdb_cursor): structured = pa.chunked_array(structured_chunks) arrow_tbl = pa.Table.from_arrays([structured], names=["ree"]) - result = duckdb_cursor.query("select * from arrow_tbl").fetch_arrow_table() + result = duckdb_cursor.query("select * from arrow_tbl").to_arrow_table() # Verify that the resulting scan is the same as the input assert result.to_pylist() == arrow_tbl.to_pylist() @@ -440,7 +440,7 @@ def test_arrow_ree_dictionary(self, duckdb_cursor): """ select * from tbl """ - ).fetch_arrow_table() + ).to_arrow_table() columns = unstructured.columns # Run-encode the first column ('ree') @@ -458,7 +458,7 @@ def test_arrow_ree_dictionary(self, duckdb_cursor): structured = pa.chunked_array(structured_chunks) arrow_tbl = pa.Table.from_arrays([structured], names=["ree"]) - result = duckdb_cursor.query("select * from arrow_tbl").fetch_arrow_table() + result = duckdb_cursor.query("select * from arrow_tbl").to_arrow_table() # Verify that the resulting scan is the same as the input assert result.to_pylist() == arrow_tbl.to_pylist() diff --git a/tests/fast/arrow/test_arrow_string_view.py b/tests/fast/arrow/test_arrow_string_view.py index 9ed9bece..abba336e 100644 --- a/tests/fast/arrow/test_arrow_string_view.py +++ b/tests/fast/arrow/test_arrow_string_view.py @@ -13,7 +13,7 @@ def RoundTripStringView(query, array): con = duckdb.connect() con.execute("SET produce_arrow_string_view=True") - arrow_tbl = con.execute(query).fetch_arrow_table() + arrow_tbl = con.execute(query).to_arrow_table() # Assert that we spit the same as the defined array arrow_tbl[0].validate(full=True) assert arrow_tbl[0].combine_chunks().tolist() == array.tolist() @@ -27,14 +27,14 @@ def RoundTripStringView(query, array): # Create a table using the schema and the array gt_table = pa.Table.from_arrays([array], schema=schema) # noqa: F841 - arrow_table = con.execute("select * from gt_table").fetch_arrow_table() # noqa: F841 + arrow_table = con.execute("select * from gt_table").to_arrow_table() # noqa: F841 assert arrow_tbl[0].combine_chunks().tolist() == array.tolist() def RoundTripDuckDBInternal(query): con = duckdb.connect() con.execute("SET produce_arrow_string_view=True") - arrow_tbl = con.execute(query).fetch_arrow_table() + arrow_tbl = con.execute(query).to_arrow_table() arrow_tbl.validate(full=True) res = con.execute(query).fetchall() from_arrow_res = con.execute("FROM arrow_tbl order by str").fetchall() diff --git a/tests/fast/arrow/test_arrow_types.py b/tests/fast/arrow/test_arrow_types.py index be03009c..5f884f6a 100644 --- a/tests/fast/arrow/test_arrow_types.py +++ b/tests/fast/arrow/test_arrow_types.py @@ -12,7 +12,7 @@ def test_null_type(self, duckdb_cursor): inputs = [pa.array([None, None, None], type=pa.null())] arrow_table = pa.Table.from_arrays(inputs, schema=schema) duckdb_cursor.register("testarrow", arrow_table) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() # We turn it to an array of int32 nulls schema = pa.schema([("data", pa.int32())]) inputs = [pa.array([None, None, None], type=pa.null())] diff --git a/tests/fast/arrow/test_arrow_union.py b/tests/fast/arrow/test_arrow_union.py index 784a5433..1c89c388 100644 --- a/tests/fast/arrow/test_arrow_union.py +++ b/tests/fast/arrow/test_arrow_union.py @@ -36,7 +36,7 @@ def test_unions_with_struct(duckdb_cursor): ) rel = duckdb_cursor.table("tbl") - arrow = rel.fetch_arrow_table() # noqa: F841 + arrow = rel.to_arrow_table() # noqa: F841 duckdb_cursor.execute("create table other as select * from arrow") rel2 = duckdb_cursor.table("other") @@ -45,4 +45,4 @@ def test_unions_with_struct(duckdb_cursor): def run(conn, query): - return conn.sql(query).fetch_arrow_table().columns[0][0] + return conn.sql(query).to_arrow_table().columns[0][0] diff --git a/tests/fast/arrow/test_arrow_version_format.py b/tests/fast/arrow/test_arrow_version_format.py index d2864b15..7e5e37c3 100644 --- a/tests/fast/arrow/test_arrow_version_format.py +++ b/tests/fast/arrow/test_arrow_version_format.py @@ -20,7 +20,7 @@ def test_decimal_v1_5(self, duckdb_cursor): ], pa.schema([("data", pa.decimal32(5, 2))]), ) - col_type = duckdb_cursor.execute("FROM decimal_32").fetch_arrow_table().schema.field("data").type + col_type = duckdb_cursor.execute("FROM decimal_32").to_arrow_table().schema.field("data").type assert col_type.bit_width == 32 assert pa.types.is_decimal(col_type) @@ -33,12 +33,12 @@ def test_decimal_v1_5(self, duckdb_cursor): ], pa.schema([("data", pa.decimal64(16, 3))]), ) - col_type = duckdb_cursor.execute("FROM decimal_64").fetch_arrow_table().schema.field("data").type + col_type = duckdb_cursor.execute("FROM decimal_64").to_arrow_table().schema.field("data").type assert col_type.bit_width == 64 assert pa.types.is_decimal(col_type) for version in ["1.0", "1.1", "1.2", "1.3", "1.4"]: duckdb_cursor.execute(f"SET arrow_output_version = {version}") - result = duckdb_cursor.execute("FROM decimal_32").fetch_arrow_table() + result = duckdb_cursor.execute("FROM decimal_32").to_arrow_table() col_type = result.schema.field("data").type assert col_type.bit_width == 128 assert pa.types.is_decimal(col_type) @@ -46,7 +46,7 @@ def test_decimal_v1_5(self, duckdb_cursor): "data": [Decimal("100.20"), Decimal("110.21"), Decimal("31.20"), Decimal("500.20")] } - result = duckdb_cursor.execute("FROM decimal_64").fetch_arrow_table() + result = duckdb_cursor.execute("FROM decimal_64").to_arrow_table() col_type = result.schema.field("data").type assert col_type.bit_width == 128 assert pa.types.is_decimal(col_type) @@ -64,31 +64,31 @@ def test_view_v1_4(self, duckdb_cursor): duckdb_cursor.execute("SET arrow_output_version = 1.5") duckdb_cursor.execute("SET produce_arrow_string_view=True") duckdb_cursor.execute("SET arrow_output_list_view=True") - col_type = duckdb_cursor.execute("SELECT 'string' as data ").fetch_arrow_table().schema.field("data").type + col_type = duckdb_cursor.execute("SELECT 'string' as data ").to_arrow_table().schema.field("data").type assert pa.types.is_string_view(col_type) - col_type = duckdb_cursor.execute("SELECT ['string'] as data ").fetch_arrow_table().schema.field("data").type + col_type = duckdb_cursor.execute("SELECT ['string'] as data ").to_arrow_table().schema.field("data").type assert pa.types.is_list_view(col_type) for version in ["1.0", "1.1", "1.2", "1.3"]: duckdb_cursor.execute(f"SET arrow_output_version = {version}") - col_type = duckdb_cursor.execute("SELECT 'string' as data ").fetch_arrow_table().schema.field("data").type + col_type = duckdb_cursor.execute("SELECT 'string' as data ").to_arrow_table().schema.field("data").type assert not pa.types.is_string_view(col_type) - col_type = duckdb_cursor.execute("SELECT ['string'] as data ").fetch_arrow_table().schema.field("data").type + col_type = duckdb_cursor.execute("SELECT ['string'] as data ").to_arrow_table().schema.field("data").type assert not pa.types.is_list_view(col_type) for version in ["1.4", "1.5"]: duckdb_cursor.execute(f"SET arrow_output_version = {version}") - col_type = duckdb_cursor.execute("SELECT 'string' as data ").fetch_arrow_table().schema.field("data").type + col_type = duckdb_cursor.execute("SELECT 'string' as data ").to_arrow_table().schema.field("data").type assert pa.types.is_string_view(col_type) - col_type = duckdb_cursor.execute("SELECT ['string'] as data ").fetch_arrow_table().schema.field("data").type + col_type = duckdb_cursor.execute("SELECT ['string'] as data ").to_arrow_table().schema.field("data").type assert pa.types.is_list_view(col_type) duckdb_cursor.execute("SET produce_arrow_string_view=False") duckdb_cursor.execute("SET arrow_output_list_view=False") for version in ["1.4", "1.5"]: duckdb_cursor.execute(f"SET arrow_output_version = {version}") - col_type = duckdb_cursor.execute("SELECT 'string' as data ").fetch_arrow_table().schema.field("data").type + col_type = duckdb_cursor.execute("SELECT 'string' as data ").to_arrow_table().schema.field("data").type assert not pa.types.is_string_view(col_type) - col_type = duckdb_cursor.execute("SELECT ['string'] as data ").fetch_arrow_table().schema.field("data").type + col_type = duckdb_cursor.execute("SELECT ['string'] as data ").to_arrow_table().schema.field("data").type assert not pa.types.is_list_view(col_type) diff --git a/tests/fast/arrow/test_buffer_size_option.py b/tests/fast/arrow/test_buffer_size_option.py index c63adfcc..6f5c41af 100644 --- a/tests/fast/arrow/test_buffer_size_option.py +++ b/tests/fast/arrow/test_buffer_size_option.py @@ -11,23 +11,23 @@ def test_arrow_buffer_size(self): con = duckdb.connect() # All small string - res = con.query("select 'bla'").fetch_arrow_table() + res = con.query("select 'bla'").to_arrow_table() assert res[0][0].type == pa.string() - res = con.query("select 'bla'").fetch_record_batch() + res = con.query("select 'bla'").to_arrow_reader() assert res.schema[0].type == pa.string() # All Large String con.execute("SET arrow_large_buffer_size=True") - res = con.query("select 'bla'").fetch_arrow_table() + res = con.query("select 'bla'").to_arrow_table() assert res[0][0].type == pa.large_string() - res = con.query("select 'bla'").fetch_record_batch() + res = con.query("select 'bla'").to_arrow_reader() assert res.schema[0].type == pa.large_string() # All small string again con.execute("SET arrow_large_buffer_size=False") - res = con.query("select 'bla'").fetch_arrow_table() + res = con.query("select 'bla'").to_arrow_table() assert res[0][0].type == pa.string() - res = con.query("select 'bla'").fetch_record_batch() + res = con.query("select 'bla'").to_arrow_reader() assert res.schema[0].type == pa.string() def test_arrow_buffer_size_udf(self): @@ -37,12 +37,12 @@ def just_return(x): con = duckdb.connect() con.create_function("just_return", just_return, [VARCHAR], VARCHAR, type="arrow") - res = con.query("select just_return('bla')").fetch_arrow_table() + res = con.query("select just_return('bla')").to_arrow_table() assert res[0][0].type == pa.string() # All Large String con.execute("SET arrow_large_buffer_size=True") - res = con.query("select just_return('bla')").fetch_arrow_table() + res = con.query("select just_return('bla')").to_arrow_table() assert res[0][0].type == pa.large_string() diff --git a/tests/fast/arrow/test_dataset.py b/tests/fast/arrow/test_dataset.py index 36e29110..595c384a 100644 --- a/tests/fast/arrow/test_dataset.py +++ b/tests/fast/arrow/test_dataset.py @@ -77,7 +77,7 @@ def test_parallel_dataset_roundtrip(self, duckdb_cursor): duckdb_conn.register("dataset", userdata_parquet_dataset) query = duckdb_conn.execute("SELECT * FROM dataset order by id") - record_batch_reader = query.fetch_record_batch(2048) + record_batch_reader = query.to_arrow_reader(2048) arrow_table = record_batch_reader.read_all() # noqa: F841 # reorder since order of rows isn't deterministic @@ -94,7 +94,7 @@ def test_ducktyping(self, duckdb_cursor): duckdb_conn = duckdb.connect() dataset = CustomDataset() # noqa: F841 query = duckdb_conn.execute("SELECT b FROM dataset WHERE a < 5") - record_batch_reader = query.fetch_record_batch(2048) + record_batch_reader = query.to_arrow_reader(2048) arrow_table = record_batch_reader.read_all() assert arrow_table.equals(CustomDataset.DATA[:5].select(["b"])) diff --git a/tests/fast/arrow/test_date.py b/tests/fast/arrow/test_date.py index 20cf9f0f..951b85ef 100644 --- a/tests/fast/arrow/test_date.py +++ b/tests/fast/arrow/test_date.py @@ -15,7 +15,7 @@ def test_date_types(self, duckdb_cursor): data = (pa.array([1000 * 60 * 60 * 24], type=pa.date64()), pa.array([1], type=pa.date32())) arrow_table = pa.Table.from_arrays([data[0], data[1]], ["a", "b"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == arrow_table["b"] assert rel["b"] == arrow_table["b"] @@ -24,7 +24,7 @@ def test_date_null(self, duckdb_cursor): return data = (pa.array([None], type=pa.date64()), pa.array([None], type=pa.date32())) arrow_table = pa.Table.from_arrays([data[0], data[1]], ["a", "b"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == arrow_table["b"] assert rel["b"] == arrow_table["b"] @@ -38,6 +38,6 @@ def test_max_date(self, duckdb_cursor): pa.array([2147483647], type=pa.date32()), ) arrow_table = pa.Table.from_arrays([data[0], data[1]], ["a", "b"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == result["a"] assert rel["b"] == result["b"] diff --git a/tests/fast/arrow/test_dictionary_arrow.py b/tests/fast/arrow/test_dictionary_arrow.py index 32c348a3..7a6a5be7 100644 --- a/tests/fast/arrow/test_dictionary_arrow.py +++ b/tests/fast/arrow/test_dictionary_arrow.py @@ -127,7 +127,7 @@ def test_dictionary_roundtrip(self, query, element, duckdb_cursor, count): query = query.format(element, count) original_rel = duckdb_cursor.sql(query) expected = original_rel.fetchall() - arrow_res = original_rel.fetch_arrow_table() # noqa: F841 + arrow_res = original_rel.to_arrow_table() # noqa: F841 roundtrip_rel = duckdb_cursor.sql("select * from arrow_res") actual = roundtrip_rel.fetchall() diff --git a/tests/fast/arrow/test_filter_pushdown.py b/tests/fast/arrow/test_filter_pushdown.py index ad63f547..7938585a 100644 --- a/tests/fast/arrow/test_filter_pushdown.py +++ b/tests/fast/arrow/test_filter_pushdown.py @@ -24,7 +24,7 @@ def create_pyarrow_pandas(rel): def create_pyarrow_table(rel): - return rel.fetch_arrow_table() + return rel.to_arrow_table() def create_pyarrow_dataset(rel): @@ -553,7 +553,7 @@ def test_9371(self, duckdb_cursor, tmp_path): df.to_parquet(str(file_path)) my_arrow_dataset = pa_ds.dataset(str(file_path)) - res = duckdb_cursor.execute("SELECT * FROM my_arrow_dataset WHERE ts = ?", parameters=[dt]).fetch_arrow_table() + res = duckdb_cursor.execute("SELECT * FROM my_arrow_dataset WHERE ts = ?", parameters=[dt]).to_arrow_table() output = duckdb_cursor.sql("select * from res").fetchall() expected = [(1, dt), (2, dt), (3, dt)] assert output == expected @@ -704,7 +704,7 @@ def test_filter_pushdown_2145(self, duckdb_cursor, tmp_path, create_table): duckdb_cursor.execute(f"copy (select * from df2) to '{data2.as_posix()}'") glob_pattern = tmp_path / "data*.parquet" - table = duckdb_cursor.read_parquet(glob_pattern.as_posix()).fetch_arrow_table() + table = duckdb_cursor.read_parquet(glob_pattern.as_posix()).to_arrow_table() output_df = duckdb.arrow(table).filter("date > '2019-01-01'").df() expected_df = duckdb.from_parquet(glob_pattern.as_posix()).filter("date > '2019-01-01'").df() @@ -869,7 +869,7 @@ def test_filter_pushdown_not_supported(self): con.execute( "CREATE TABLE T as SELECT i::integer a, i::varchar b, i::uhugeint c, i::integer d FROM range(5) tbl(i)" ) - arrow_tbl = con.execute("FROM T").fetch_arrow_table() + arrow_tbl = con.execute("FROM T").to_arrow_table() # No projection just unsupported filter assert con.execute("from arrow_tbl where c == 3").fetchall() == [(3, "3", 3, 3)] @@ -893,7 +893,7 @@ def test_filter_pushdown_not_supported(self): "CREATE TABLE T_2 as SELECT i::integer a, i::varchar b, i::uhugeint c, i::integer d , i::uhugeint e, i::smallint f, i::uhugeint g FROM range(50) tbl(i)" # noqa: E501 ) - arrow_tbl = con.execute("FROM T_2").fetch_arrow_table() + arrow_tbl = con.execute("FROM T_2").to_arrow_table() assert con.execute( "select a, b from arrow_tbl where a > 2 and c < 40 and b == '28' and g > 15 and e < 30" @@ -905,8 +905,8 @@ def test_join_filter_pushdown(self, duckdb_cursor): duckdb_conn.execute("CREATE TABLE build as select (random()*9999)::INT b from range(20);") duck_probe = duckdb_conn.table("probe") duck_build = duckdb_conn.table("build") - duck_probe_arrow = duck_probe.fetch_arrow_table() - duck_build_arrow = duck_build.fetch_arrow_table() + duck_probe_arrow = duck_probe.to_arrow_table() + duck_build_arrow = duck_build.to_arrow_table() duckdb_conn.register("duck_probe_arrow", duck_probe_arrow) duckdb_conn.register("duck_build_arrow", duck_build_arrow) assert duckdb_conn.execute("SELECT count(*) from duck_probe_arrow, duck_build_arrow where a=b").fetchall() == [ @@ -917,7 +917,7 @@ def test_in_filter_pushdown(self, duckdb_cursor): duckdb_conn = duckdb.connect() duckdb_conn.execute("CREATE TABLE probe as select range a from range(1000);") duck_probe = duckdb_conn.table("probe") - duck_probe_arrow = duck_probe.fetch_arrow_table() + duck_probe_arrow = duck_probe.to_arrow_table() duckdb_conn.register("duck_probe_arrow", duck_probe_arrow) assert duckdb_conn.execute("SELECT * from duck_probe_arrow where a = any([1,999])").fetchall() == [(1,), (999,)] @@ -980,7 +980,7 @@ def assert_equal_results(con, arrow_table, query) -> None: arrow_res = con.sql(query.format(table="arrow_table")).fetchall() assert len(duckdb_res) == len(arrow_res) - arrow_table = duckdb_cursor.table("test").fetch_arrow_table() + arrow_table = duckdb_cursor.table("test").to_arrow_table() assert_equal_results(duckdb_cursor, arrow_table, "select * from {table} where a > 'NaN'::FLOAT") assert_equal_results(duckdb_cursor, arrow_table, "select * from {table} where a >= 'NaN'::FLOAT") assert_equal_results(duckdb_cursor, arrow_table, "select * from {table} where a < 'NaN'::FLOAT") diff --git a/tests/fast/arrow/test_integration.py b/tests/fast/arrow/test_integration.py index 1ec3a603..858d9f9c 100644 --- a/tests/fast/arrow/test_integration.py +++ b/tests/fast/arrow/test_integration.py @@ -19,10 +19,10 @@ def test_parquet_roundtrip(self, duckdb_cursor): userdata_parquet_table = pq.read_table(parquet_filename) userdata_parquet_table.validate(full=True) - rel_from_arrow = duckdb.arrow(userdata_parquet_table).project(cols).fetch_arrow_table() + rel_from_arrow = duckdb.arrow(userdata_parquet_table).project(cols).to_arrow_table() rel_from_arrow.validate(full=True) - rel_from_duckdb = duckdb_cursor.from_parquet(parquet_filename).project(cols).fetch_arrow_table() + rel_from_duckdb = duckdb_cursor.from_parquet(parquet_filename).project(cols).to_arrow_table() rel_from_duckdb.validate(full=True) # batched version, lets use various values for batch size @@ -30,7 +30,7 @@ def test_parquet_roundtrip(self, duckdb_cursor): userdata_parquet_table2 = pa.Table.from_batches(userdata_parquet_table.to_batches(i)) assert userdata_parquet_table.equals(userdata_parquet_table2, check_metadata=True) - rel_from_arrow2 = duckdb.arrow(userdata_parquet_table2).project(cols).fetch_arrow_table() + rel_from_arrow2 = duckdb.arrow(userdata_parquet_table2).project(cols).to_arrow_table() rel_from_arrow2.validate(full=True) assert rel_from_arrow.equals(rel_from_arrow2, check_metadata=True) @@ -42,10 +42,10 @@ def test_unsigned_roundtrip(self, duckdb_cursor): unsigned_parquet_table = pq.read_table(parquet_filename) unsigned_parquet_table.validate(full=True) - rel_from_arrow = duckdb.arrow(unsigned_parquet_table).project(cols).fetch_arrow_table() + rel_from_arrow = duckdb.arrow(unsigned_parquet_table).project(cols).to_arrow_table() rel_from_arrow.validate(full=True) - rel_from_duckdb = duckdb_cursor.from_parquet(parquet_filename).project(cols).fetch_arrow_table() + rel_from_duckdb = duckdb_cursor.from_parquet(parquet_filename).project(cols).to_arrow_table() rel_from_duckdb.validate(full=True) assert rel_from_arrow.equals(rel_from_duckdb, check_metadata=True) @@ -53,7 +53,7 @@ def test_unsigned_roundtrip(self, duckdb_cursor): duckdb_cursor.execute( "select NULL c_null, (c % 4 = 0)::bool c_bool, (c%128)::tinyint c_tinyint, c::smallint*1000::INT c_smallint, c::integer*100000 c_integer, c::bigint*1000000000000 c_bigint, c::float c_float, c::double c_double, 'c_' || c::string c_string from (select case when range % 2 == 0 then range else null end as c from range(-10000, 10000)) sq" # noqa: E501 ) - arrow_result = duckdb_cursor.fetch_arrow_table() + arrow_result = duckdb_cursor.to_arrow_table() arrow_result.validate(full=True) arrow_result.combine_chunks() arrow_result.validate(full=True) @@ -72,7 +72,7 @@ def test_decimals_roundtrip(self, duckdb_cursor): duck_tbl = duckdb_cursor.table("test") - duck_from_arrow = duckdb_cursor.from_arrow(duck_tbl.fetch_arrow_table()) + duck_from_arrow = duckdb_cursor.from_arrow(duck_tbl.to_arrow_table()) duck_from_arrow.create("testarrow") @@ -114,7 +114,7 @@ def test_intervals_roundtrip(self, duckdb_cursor): data = pa.array(arr, pa.month_day_nano_interval()) arrow_tbl = pa.Table.from_arrays([data], ["a"]) duckdb_cursor.from_arrow(arrow_tbl).create("intervaltbl") - duck_arrow_tbl = duckdb_cursor.table("intervaltbl").fetch_arrow_table()["a"] + duck_arrow_tbl = duckdb_cursor.table("intervaltbl").to_arrow_table()["a"] assert duck_arrow_tbl[0].value == expected_value @@ -122,7 +122,7 @@ def test_intervals_roundtrip(self, duckdb_cursor): duckdb_cursor.execute("CREATE TABLE test (a INTERVAL)") duckdb_cursor.execute("INSERT INTO test VALUES (INTERVAL 1 YEAR + INTERVAL 1 DAY + INTERVAL 1 SECOND)") expected_value = pa.MonthDayNano([12, 1, 1000000000]) - duck_tbl_arrow = duckdb_cursor.table("test").fetch_arrow_table()["a"] + duck_tbl_arrow = duckdb_cursor.table("test").to_arrow_table()["a"] assert duck_tbl_arrow[0].value.months == expected_value.months assert duck_tbl_arrow[0].value.days == expected_value.days assert duck_tbl_arrow[0].value.nanoseconds == expected_value.nanoseconds @@ -144,7 +144,7 @@ def test_null_intervals_roundtrip(self, duckdb_cursor): data = pa.array(arr, pa.month_day_nano_interval()) arrow_tbl = pa.Table.from_arrays([data], ["a"]) duckdb_cursor.from_arrow(arrow_tbl).create("intervalnulltbl") - duckdb_tbl_arrow = duckdb_cursor.table("intervalnulltbl").fetch_arrow_table()["a"] + duckdb_tbl_arrow = duckdb_cursor.table("intervalnulltbl").to_arrow_table()["a"] assert duckdb_tbl_arrow[0].value is None assert duckdb_tbl_arrow[1].value == expected_value @@ -158,7 +158,7 @@ def test_nested_interval_roundtrip(self, duckdb_cursor): dict_array = pa.DictionaryArray.from_arrays(indices, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ["a"]) duckdb_cursor.from_arrow(arrow_table).create("dictionarytbl") - duckdb_tbl_arrow = duckdb_cursor.table("dictionarytbl").fetch_arrow_table()["a"] + duckdb_tbl_arrow = duckdb_cursor.table("dictionarytbl").to_arrow_table()["a"] assert duckdb_tbl_arrow[0].value == first_value assert duckdb_tbl_arrow[1].value == second_value @@ -172,7 +172,7 @@ def test_nested_interval_roundtrip(self, duckdb_cursor): # List query = duckdb_cursor.sql( "SELECT a from (select list_value(INTERVAL 3 MONTHS, INTERVAL 5 DAYS, INTERVAL 10 SECONDS, NULL) as a) as t" - ).fetch_arrow_table()["a"] + ).to_arrow_table()["a"] assert query[0][0].value == pa.MonthDayNano([3, 0, 0]) assert query[0][1].value == pa.MonthDayNano([0, 5, 0]) assert query[0][2].value == pa.MonthDayNano([0, 0, 10000000000]) @@ -181,7 +181,7 @@ def test_nested_interval_roundtrip(self, duckdb_cursor): # Struct query = "SELECT a from (SELECT STRUCT_PACK(a := INTERVAL 1 MONTHS, b := INTERVAL 10 DAYS, c:= INTERVAL 20 SECONDS) as a) as t" # noqa: E501 true_answer = duckdb_cursor.sql(query).fetchall() - from_arrow = duckdb_cursor.from_arrow(duckdb_cursor.sql(query).fetch_arrow_table()).fetchall() + from_arrow = duckdb_cursor.from_arrow(duckdb_cursor.sql(query).to_arrow_table()).fetchall() assert true_answer[0][0]["a"] == from_arrow[0][0]["a"] assert true_answer[0][0]["b"] == from_arrow[0][0]["b"] assert true_answer[0][0]["c"] == from_arrow[0][0]["c"] @@ -193,7 +193,7 @@ def test_min_max_interval_roundtrip(self, duckdb_cursor): arrow_tbl = pa.Table.from_arrays([data], ["a"]) duckdb_cursor.from_arrow(arrow_tbl).create("intervalminmaxtbl") - duck_arrow_tbl = duckdb_cursor.table("intervalminmaxtbl").fetch_arrow_table()["a"] + duck_arrow_tbl = duckdb_cursor.table("intervalminmaxtbl").to_arrow_table()["a"] assert duck_arrow_tbl[0].value == pa.MonthDayNano([0, 0, 0]) assert duck_arrow_tbl[1].value == pa.MonthDayNano([2147483647, 2147483647, 9223372036854775000]) @@ -211,7 +211,7 @@ def test_duplicate_column_names(self, duckdb_cursor): df_b table2 ON table1.join_key = table2.join_key """ - ).fetch_arrow_table() + ).to_arrow_table() assert res.schema.names == ["join_key", "col_a", "join_key", "col_a"] def test_strings_roundtrip(self, duckdb_cursor): @@ -227,7 +227,7 @@ def test_strings_roundtrip(self, duckdb_cursor): duck_tbl = duckdb_cursor.table("test") - duck_from_arrow = duckdb_cursor.from_arrow(duck_tbl.fetch_arrow_table()) + duck_from_arrow = duckdb_cursor.from_arrow(duck_tbl.to_arrow_table()) duck_from_arrow.create("testarrow") diff --git a/tests/fast/arrow/test_interval.py b/tests/fast/arrow/test_interval.py index 5426f39d..80d22e66 100644 --- a/tests/fast/arrow/test_interval.py +++ b/tests/fast/arrow/test_interval.py @@ -24,7 +24,7 @@ def test_duration_types(self, duckdb_cursor): pa.array([1], pa.duration("s")), ) arrow_table = pa.Table.from_arrays([data[0], data[1], data[2], data[3]], ["a", "b", "c", "d"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == expected_arrow["a"] assert rel["b"] == expected_arrow["a"] assert rel["c"] == expected_arrow["a"] @@ -41,7 +41,7 @@ def test_duration_null(self, duckdb_cursor): pa.array([None], pa.duration("s")), ) arrow_table = pa.Table.from_arrays([data[0], data[1], data[2], data[3]], ["a", "b", "c", "d"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == expected_arrow["a"] assert rel["b"] == expected_arrow["a"] assert rel["c"] == expected_arrow["a"] @@ -56,4 +56,4 @@ def test_duration_overflow(self, duckdb_cursor): arrow_table = pa.Table.from_arrays([data], ["a"]) with pytest.raises(duckdb.ConversionException, match="Could not convert Interval to Microsecond"): - duckdb.from_arrow(arrow_table).fetch_arrow_table() + duckdb.from_arrow(arrow_table).to_arrow_table() diff --git a/tests/fast/arrow/test_large_offsets.py b/tests/fast/arrow/test_large_offsets.py index 45b078b8..f74b9955 100644 --- a/tests/fast/arrow/test_large_offsets.py +++ b/tests/fast/arrow/test_large_offsets.py @@ -18,11 +18,11 @@ def test_large_lists(self, duckdb_cursor): match="Arrow Appender: The maximum combined list offset for regular list buffers is 2147483647 but " "the offset of 2147481000 exceeds this", ): - duckdb_cursor.sql("SELECT col FROM tbl").fetch_arrow_table() + duckdb_cursor.sql("SELECT col FROM tbl").to_arrow_table() tbl2 = pa.Table.from_pydict({"col": ary.cast(pa.large_list(pa.uint8()))}) # noqa: F841 duckdb_cursor.sql("set arrow_large_buffer_size = true") - res2 = duckdb_cursor.sql("SELECT col FROM tbl2").fetch_arrow_table() + res2 = duckdb_cursor.sql("SELECT col FROM tbl2").to_arrow_table() res2.validate() @pytest.mark.skip(reason="CI does not have enough memory to validate this") @@ -35,8 +35,8 @@ def test_large_maps(self, duckdb_cursor): match="Arrow Appender: The maximum combined list offset for regular list buffers is 2147483647 but the " "offset of 2147481000 exceeds this", ): - duckdb_cursor.sql("select map(col, col) from tbl").fetch_arrow_table() + duckdb_cursor.sql("select map(col, col) from tbl").to_arrow_table() duckdb_cursor.sql("set arrow_large_buffer_size = true") - arrow_map_large = duckdb_cursor.sql("select map(col, col) from tbl").fetch_arrow_table() + arrow_map_large = duckdb_cursor.sql("select map(col, col) from tbl").to_arrow_table() arrow_map_large.validate() diff --git a/tests/fast/arrow/test_nested_arrow.py b/tests/fast/arrow/test_nested_arrow.py index 10fbfae0..0530ee15 100644 --- a/tests/fast/arrow/test_nested_arrow.py +++ b/tests/fast/arrow/test_nested_arrow.py @@ -10,13 +10,13 @@ def compare_results(duckdb_cursor, query): true_answer = duckdb_cursor.query(query).fetchall() - produced_arrow = duckdb_cursor.query(query).fetch_arrow_table() + produced_arrow = duckdb_cursor.query(query).to_arrow_table() from_arrow = duckdb_cursor.from_arrow(produced_arrow).fetchall() assert true_answer == from_arrow def arrow_to_pandas(duckdb_cursor, query): - return duckdb_cursor.query(query).fetch_arrow_table().to_pandas()["a"].values.tolist() + return duckdb_cursor.query(query).to_arrow_table().to_pandas()["a"].values.tolist() def get_use_list_view_options(): @@ -31,23 +31,19 @@ class TestArrowNested: def test_lists_basic(self, duckdb_cursor): # Test Constant List query = ( - duckdb_cursor.query("SELECT a from (select list_value(3,5,10) as a) as t") - .fetch_arrow_table()["a"] - .to_numpy() + duckdb_cursor.query("SELECT a from (select list_value(3,5,10) as a) as t").to_arrow_table()["a"].to_numpy() ) assert query[0][0] == 3 assert query[0][1] == 5 assert query[0][2] == 10 # Empty List - query = duckdb_cursor.query("SELECT a from (select list_value() as a) as t").fetch_arrow_table()["a"].to_numpy() + query = duckdb_cursor.query("SELECT a from (select list_value() as a) as t").to_arrow_table()["a"].to_numpy() assert len(query[0]) == 0 # Test Constant List With Null query = ( - duckdb_cursor.query("SELECT a from (select list_value(3,NULL) as a) as t") - .fetch_arrow_table()["a"] - .to_numpy() + duckdb_cursor.query("SELECT a from (select list_value(3,NULL) as a) as t").to_arrow_table()["a"].to_numpy() ) assert query[0][0] == 3 assert np.isnan(query[0][1]) diff --git a/tests/fast/arrow/test_projection_pushdown.py b/tests/fast/arrow/test_projection_pushdown.py index fbd258e0..7a792b9a 100644 --- a/tests/fast/arrow/test_projection_pushdown.py +++ b/tests/fast/arrow/test_projection_pushdown.py @@ -21,7 +21,7 @@ def test_projection_pushdown_no_filter(self, duckdb_cursor): """ ) duck_tbl = duckdb_cursor.table("test") - arrow_table = duck_tbl.fetch_arrow_table() + arrow_table = duck_tbl.to_arrow_table() assert duckdb_cursor.execute("SELECT sum(c) FROM arrow_table").fetchall() == [(333,)] # RecordBatch does not use projection pushdown, test that this also still works diff --git a/tests/fast/arrow/test_time.py b/tests/fast/arrow/test_time.py index 943b5065..1f359bd4 100644 --- a/tests/fast/arrow/test_time.py +++ b/tests/fast/arrow/test_time.py @@ -20,7 +20,7 @@ def test_time_types(self, duckdb_cursor): pa.array([1000000000], pa.time64("ns")), ) arrow_table = pa.Table.from_arrays([data[0], data[1], data[2], data[3]], ["a", "b", "c", "d"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == arrow_table["c"] assert rel["b"] == arrow_table["c"] assert rel["c"] == arrow_table["c"] @@ -36,7 +36,7 @@ def test_time_null(self, duckdb_cursor): pa.array([None], pa.time64("ns")), ) arrow_table = pa.Table.from_arrays([data[0], data[1], data[2], data[3]], ["a", "b", "c", "d"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == arrow_table["c"] assert rel["b"] == arrow_table["c"] assert rel["c"] == arrow_table["c"] @@ -50,7 +50,7 @@ def test_max_times(self, duckdb_cursor): # Max Sec data = pa.array([2147483647], type=pa.time32("s")) arrow_table = pa.Table.from_arrays([data], ["a"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == result["a"] # Max MSec @@ -58,7 +58,7 @@ def test_max_times(self, duckdb_cursor): result = pa.Table.from_arrays([data], ["a"]) data = pa.array([2147483647], type=pa.time32("ms")) arrow_table = pa.Table.from_arrays([data], ["a"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == result["a"] # Max NSec @@ -66,7 +66,7 @@ def test_max_times(self, duckdb_cursor): result = pa.Table.from_arrays([data], ["a"]) data = pa.array([9223372036854774000], type=pa.time64("ns")) arrow_table = pa.Table.from_arrays([data], ["a"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() print(rel["a"]) print(result["a"]) diff --git a/tests/fast/arrow/test_timestamp_timezone.py b/tests/fast/arrow/test_timestamp_timezone.py index 27ddf3ac..cec7c20d 100644 --- a/tests/fast/arrow/test_timestamp_timezone.py +++ b/tests/fast/arrow/test_timestamp_timezone.py @@ -45,7 +45,7 @@ def test_timestamp_tz_to_arrow(self, duckdb_cursor): for timezone in timezones: con.execute("SET TimeZone = '" + timezone + "'") arrow_table = generate_table(current_time, precision, timezone) - res = con.from_arrow(arrow_table).fetch_arrow_table() + res = con.from_arrow(arrow_table).to_arrow_table() assert res[0].type == pa.timestamp("us", tz=timezone) assert res == generate_table(current_time, "us", timezone) @@ -54,7 +54,7 @@ def test_timestamp_tz_with_null(self, duckdb_cursor): con.execute("create table t (i timestamptz)") con.execute("insert into t values (NULL),('2021-11-15 02:30:00'::timestamptz)") rel = con.table("t") - arrow_tbl = rel.fetch_arrow_table() + arrow_tbl = rel.to_arrow_table() con.register("t2", arrow_tbl) assert con.execute("select * from t").fetchall() == con.execute("select * from t2").fetchall() @@ -64,7 +64,7 @@ def test_timestamp_stream(self, duckdb_cursor): con.execute("create table t (i timestamptz)") con.execute("insert into t values (NULL),('2021-11-15 02:30:00'::timestamptz)") rel = con.table("t") - arrow_tbl = rel.fetch_record_batch().read_all() + arrow_tbl = rel.to_arrow_reader().read_all() con.register("t2", arrow_tbl) assert con.execute("select * from t").fetchall() == con.execute("select * from t2").fetchall() diff --git a/tests/fast/arrow/test_timestamps.py b/tests/fast/arrow/test_timestamps.py index b00b7982..0733a6d0 100644 --- a/tests/fast/arrow/test_timestamps.py +++ b/tests/fast/arrow/test_timestamps.py @@ -21,7 +21,7 @@ def test_timestamp_types(self, duckdb_cursor): pa.array([datetime.datetime.now()], pa.timestamp("s")), ) arrow_table = pa.Table.from_arrays([data[0], data[1], data[2], data[3]], ["a", "b", "c", "d"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == arrow_table["a"] assert rel["b"] == arrow_table["b"] assert rel["c"] == arrow_table["c"] @@ -37,7 +37,7 @@ def test_timestamp_nulls(self, duckdb_cursor): pa.array([None], pa.timestamp("s")), ) arrow_table = pa.Table.from_arrays([data[0], data[1], data[2], data[3]], ["a", "b", "c", "d"]) - rel = duckdb.from_arrow(arrow_table).fetch_arrow_table() + rel = duckdb.from_arrow(arrow_table).to_arrow_table() assert rel["a"] == arrow_table["a"] assert rel["b"] == arrow_table["b"] assert rel["c"] == arrow_table["c"] @@ -52,7 +52,7 @@ def test_timestamp_overflow(self, duckdb_cursor): pa.array([9223372036854775807], pa.timestamp("us")), ) arrow_table = pa.Table.from_arrays([data[0], data[1], data[2]], ["a", "b", "c"]) - arrow_from_duck = duckdb.from_arrow(arrow_table).fetch_arrow_table() + arrow_from_duck = duckdb.from_arrow(arrow_table).to_arrow_table() assert arrow_from_duck["a"] == arrow_table["a"] assert arrow_from_duck["b"] == arrow_table["b"] assert arrow_from_duck["c"] == arrow_table["c"] diff --git a/tests/fast/arrow/test_tpch.py b/tests/fast/arrow/test_tpch.py index cb6024cf..1c52c262 100644 --- a/tests/fast/arrow/test_tpch.py +++ b/tests/fast/arrow/test_tpch.py @@ -47,7 +47,7 @@ def test_tpch_arrow(self, duckdb_cursor): for tpch_table in tpch_tables: duck_tbl = duckdb_conn.table(tpch_table) - arrow_tables.append(duck_tbl.fetch_arrow_table()) + arrow_tables.append(duck_tbl.to_arrow_table()) duck_arrow_table = duckdb_conn.from_arrow(arrow_tables[-1]) duckdb_conn.execute("DROP TABLE " + tpch_table) duck_arrow_table.create(tpch_table) @@ -77,7 +77,7 @@ def test_tpch_arrow_01(self, duckdb_cursor): for tpch_table in tpch_tables: duck_tbl = duckdb_conn.table(tpch_table) - arrow_tables.append(duck_tbl.fetch_arrow_table()) + arrow_tables.append(duck_tbl.to_arrow_table()) duck_arrow_table = duckdb_conn.from_arrow(arrow_tables[-1]) duckdb_conn.execute("DROP TABLE " + tpch_table) duck_arrow_table.create(tpch_table) @@ -105,7 +105,7 @@ def test_tpch_arrow_batch(self, duckdb_cursor): for tpch_table in tpch_tables: duck_tbl = duckdb_conn.table(tpch_table) - arrow_tables.append(pyarrow.Table.from_batches(duck_tbl.fetch_arrow_table().to_batches(10))) + arrow_tables.append(pyarrow.Table.from_batches(duck_tbl.to_arrow_table().to_batches(10))) duck_arrow_table = duckdb_conn.from_arrow(arrow_tables[-1]) duckdb_conn.execute("DROP TABLE " + tpch_table) duck_arrow_table.create(tpch_table) diff --git a/tests/fast/arrow/test_unregister.py b/tests/fast/arrow/test_unregister.py index 0aceaea1..c2ca8e37 100644 --- a/tests/fast/arrow/test_unregister.py +++ b/tests/fast/arrow/test_unregister.py @@ -17,10 +17,10 @@ def test_arrow_unregister1(self, duckdb_cursor): connection = duckdb.connect(":memory:") connection.register("arrow_table", arrow_table_obj) - connection.execute("SELECT * FROM arrow_table;").fetch_arrow_table() + connection.execute("SELECT * FROM arrow_table;").to_arrow_table() connection.unregister("arrow_table") with pytest.raises(duckdb.CatalogException, match="Table with name arrow_table does not exist"): - connection.execute("SELECT * FROM arrow_table;").fetch_arrow_table() + connection.execute("SELECT * FROM arrow_table;").to_arrow_table() with pytest.raises(duckdb.CatalogException, match="View with name arrow_table does not exist"): connection.execute("DROP VIEW arrow_table;") connection.execute("DROP VIEW IF EXISTS arrow_table;") @@ -39,7 +39,7 @@ def test_arrow_unregister2(self, duckdb_cursor): connection = duckdb.connect(db) assert len(connection.execute("PRAGMA show_tables;").fetchall()) == 0 with pytest.raises(duckdb.CatalogException, match="Table with name arrow_table does not exist"): - connection.execute("SELECT * FROM arrow_table;").fetch_arrow_table() + connection.execute("SELECT * FROM arrow_table;").to_arrow_table() connection.close() del arrow_table_obj gc.collect() @@ -47,5 +47,5 @@ def test_arrow_unregister2(self, duckdb_cursor): connection = duckdb.connect(db) assert len(connection.execute("PRAGMA show_tables;").fetchall()) == 0 with pytest.raises(duckdb.CatalogException, match="Table with name arrow_table does not exist"): - connection.execute("SELECT * FROM arrow_table;").fetch_arrow_table() + connection.execute("SELECT * FROM arrow_table;").to_arrow_table() connection.close() diff --git a/tests/fast/relational_api/test_rapi_close.py b/tests/fast/relational_api/test_rapi_close.py index 969e2792..a5ed16cf 100644 --- a/tests/fast/relational_api/test_rapi_close.py +++ b/tests/fast/relational_api/test_rapi_close.py @@ -27,7 +27,7 @@ def test_close_conn_rel(self, duckdb_cursor): with pytest.raises(duckdb.ConnectionException, match="Connection has already been closed"): rel.arg_min("", "") with pytest.raises(duckdb.ConnectionException, match="Connection has already been closed"): - rel.fetch_arrow_table() + rel.to_arrow_table() with pytest.raises(duckdb.ConnectionException, match="Connection has already been closed"): rel.avg("") with pytest.raises(duckdb.ConnectionException, match="Connection has already been closed"): diff --git a/tests/fast/test_all_types.py b/tests/fast/test_all_types.py index c4ba0e55..07dc5f70 100644 --- a/tests/fast/test_all_types.py +++ b/tests/fast/test_all_types.py @@ -551,16 +551,16 @@ def test_arrow(self, cur_type): conn = duckdb.connect() if cur_type in replacement_values: - arrow_table = conn.execute("select " + replacement_values[cur_type]).fetch_arrow_table() + arrow_table = conn.execute("select " + replacement_values[cur_type]).to_arrow_table() else: - arrow_table = conn.execute(f'select "{cur_type}" from test_all_types()').fetch_arrow_table() + arrow_table = conn.execute(f'select "{cur_type}" from test_all_types()').to_arrow_table() if cur_type in enum_types: - round_trip_arrow_table = conn.execute("select * from arrow_table").fetch_arrow_table() + round_trip_arrow_table = conn.execute("select * from arrow_table").to_arrow_table() result_arrow = conn.execute("select * from arrow_table").fetchall() result_roundtrip = conn.execute("select * from round_trip_arrow_table").fetchall() assert recursive_equality(result_arrow, result_roundtrip) else: - round_trip_arrow_table = conn.execute("select * from arrow_table").fetch_arrow_table() + round_trip_arrow_table = conn.execute("select * from arrow_table").to_arrow_table() assert arrow_table.equals(round_trip_arrow_table, check_metadata=True) @pytest.mark.parametrize("cur_type", all_types) diff --git a/tests/fast/test_multithread.py b/tests/fast/test_multithread.py index ccf809c5..fec0ed12 100644 --- a/tests/fast/test_multithread.py +++ b/tests/fast/test_multithread.py @@ -176,11 +176,11 @@ def fetchdf_chunk_query(duckdb_conn, queue): queue.put(False) -def fetch_arrow_query(duckdb_conn, queue): +def arrow_table_query(duckdb_conn, queue): # Get a new connection duckdb_conn = duckdb.connect() try: - duckdb_conn.execute("select i from (values (42), (84), (NULL), (128)) tbl(i)").fetch_arrow_table() + duckdb_conn.execute("select i from (values (42), (84), (NULL), (128)) tbl(i)").to_arrow_table() queue.put(True) except Exception: queue.put(False) @@ -190,7 +190,7 @@ def fetch_record_batch_query(duckdb_conn, queue): # Get a new connection duckdb_conn = duckdb.connect() try: - duckdb_conn.execute("select i from (values (42), (84), (NULL), (128)) tbl(i)").fetch_record_batch() + duckdb_conn.execute("select i from (values (42), (84), (NULL), (128)) tbl(i)").to_arrow_reader() queue.put(True) except Exception: queue.put(False) @@ -406,7 +406,7 @@ def test_fetchdfchunk(self, duckdb_cursor): def test_fetcharrow(self, duckdb_cursor): pytest.importorskip("pyarrow") - duck_threads = DuckDBThreaded(10, fetch_arrow_query) + duck_threads = DuckDBThreaded(10, arrow_table_query) duck_threads.multithread_test() def test_fetch_record_batch(self, duckdb_cursor): diff --git a/tests/fast/test_replacement_scan.py b/tests/fast/test_replacement_scan.py index 2b195ab9..cafee85d 100644 --- a/tests/fast/test_replacement_scan.py +++ b/tests/fast/test_replacement_scan.py @@ -25,30 +25,26 @@ def using_sql(con, to_scan, object_name): # Fetch methods -def fetch_polars(rel): +def polars_from_rel(rel): return rel.pl() -def fetch_df(rel): +def df_from_rel(rel): return rel.df() -def fetch_arrow(rel): - return rel.fetch_arrow_table() +def arrow_table_from_rel(rel): + return rel.to_arrow_table() -def fetch_arrow_table(rel): - return rel.fetch_arrow_table() - - -def fetch_arrow_record_batch(rel: duckdb.DuckDBPyRelation): +def arrow_reader_from_rel(rel: duckdb.DuckDBPyRelation): # Note: this has to executed first, otherwise we'll create a deadlock # Because it will try to execute the input at the same time as executing the relation # On the same connection (that's the core of the issue) - return rel.execute().fetch_record_batch() + return rel.execute().to_arrow_reader() -def fetch_relation(rel): +def rel_from_rel(rel): return rel @@ -94,7 +90,7 @@ def test_parquet_replacement(self): @pytest.mark.parametrize("get_relation", [using_table, using_sql]) @pytest.mark.parametrize( "fetch_method", - [fetch_polars, fetch_df, fetch_arrow, fetch_arrow_table, fetch_arrow_record_batch, fetch_relation], + [polars_from_rel, df_from_rel, arrow_table_from_rel, arrow_reader_from_rel, rel_from_rel], ) @pytest.mark.parametrize("object_name", ["tbl", "table", "select", "update"]) def test_table_replacement_scans(self, duckdb_cursor, get_relation, fetch_method, object_name): diff --git a/tests/fast/test_result.py b/tests/fast/test_result.py index 4210a437..9180f8dc 100644 --- a/tests/fast/test_result.py +++ b/tests/fast/test_result.py @@ -21,9 +21,9 @@ def test_result_closed(self, duckdb_cursor): with pytest.raises(duckdb.InvalidInputException, match="result closed"): res.fetchnumpy() with pytest.raises(duckdb.InvalidInputException, match="There is no query result"): - res.fetch_arrow_table() + res.to_arrow_table() with pytest.raises(duckdb.InvalidInputException, match="There is no query result"): - res.fetch_arrow_reader(1) + res.to_arrow_reader(1) def test_result_describe_types(self, duckdb_cursor): connection = duckdb.connect("") diff --git a/tests/fast/test_runtime_error.py b/tests/fast/test_runtime_error.py index 44910a13..62bf7589 100644 --- a/tests/fast/test_runtime_error.py +++ b/tests/fast/test_runtime_error.py @@ -31,7 +31,7 @@ def test_arrow_error(self): con = duckdb.connect() con.execute("create table tbl as select 'hello' i") with pytest.raises(duckdb.ConversionException): - con.execute("select i::int from tbl").fetch_arrow_table() + con.execute("select i::int from tbl").to_arrow_table() def test_register_error(self): con = duckdb.connect() @@ -43,23 +43,23 @@ def test_arrow_fetch_table_error(self): pytest.importorskip("pyarrow") con = duckdb.connect() - arrow_object = con.execute("select 1").fetch_arrow_table() + arrow_object = con.execute("select 1").to_arrow_table() arrow_relation = con.from_arrow(arrow_object) res = arrow_relation.execute() res.close() with pytest.raises(duckdb.InvalidInputException, match="There is no query result"): - res.fetch_arrow_table() + res.to_arrow_table() def test_arrow_record_batch_reader_error(self): pytest.importorskip("pyarrow") con = duckdb.connect() - arrow_object = con.execute("select 1").fetch_arrow_table() + arrow_object = con.execute("select 1").to_arrow_table() arrow_relation = con.from_arrow(arrow_object) res = arrow_relation.execute() res.close() with pytest.raises(duckdb.ProgrammingError, match="There is no query result"): - res.fetch_arrow_reader(1) + res.to_arrow_reader(1) def test_relation_cache_fetchall(self): conn = duckdb.connect() @@ -185,7 +185,7 @@ def test_missing_result_from_conn_exceptions(self): conn.fetch_df_chunk() with no_result_set(): - conn.fetch_arrow_table() + conn.to_arrow_table() with no_result_set(): - conn.fetch_record_batch() + conn.to_arrow_reader() diff --git a/tests/fast/types/test_time_ns.py b/tests/fast/types/test_time_ns.py index c27f2958..5a27f612 100644 --- a/tests/fast/types/test_time_ns.py +++ b/tests/fast/types/test_time_ns.py @@ -44,7 +44,7 @@ def test_time_ns_arrow_roundtrip(duckdb_cursor): pa = pytest.importorskip("pyarrow") # Get a time_ns in an arrow table - arrow_table = duckdb_cursor.execute("SELECT TIME_NS '12:34:56.123456789' AS time_ns_col").fetch_arrow_table() + arrow_table = duckdb_cursor.execute("SELECT TIME_NS '12:34:56.123456789' AS time_ns_col").to_arrow_table() value = arrow_table.column("time_ns_col")[0] assert isinstance(value, pa.lib.Time64Scalar) diff --git a/tests/fast/udf/test_scalar_arrow.py b/tests/fast/udf/test_scalar_arrow.py index e3f18344..3d64ec51 100644 --- a/tests/fast/udf/test_scalar_arrow.py +++ b/tests/fast/udf/test_scalar_arrow.py @@ -149,7 +149,7 @@ def return_struct(col): """ select {'a': 5, 'b': 'test', 'c': [5,3,2]} """ - ).fetch_arrow_table() + ).to_arrow_table() con = duckdb.connect() struct_type = con.struct_type({"a": BIGINT, "b": VARCHAR, "c": con.list_type(BIGINT)})