Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions arkouda/pandas/extension/_arkouda_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,13 @@ def __init__(

@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
from arkouda.numpy.numeric import cast as ak_cast
from arkouda.numpy.pdarrayclass import pdarray
from arkouda.numpy.pdarraycreation import array as ak_array
from arkouda.pandas.categorical import Categorical

from ._dtypes import ArkoudaBigintDtype

# normalize dtype input
if (
dtype is not None
and (
Expand All @@ -130,7 +132,21 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
if dtype is not None and hasattr(dtype, "numpy_dtype"):
dtype = dtype.numpy_dtype

# If scalars is already a numpy array, we can preserve its dtype
if isinstance(scalars, Categorical):
codes = scalars.codes

# Some implementations might return an ArkoudaArray here
if isinstance(codes, ArkoudaArray):
codes = codes._data

if not isinstance(codes, pdarray):
raise TypeError(f"Categorical.codes expected pdarray, got {type(codes).__name__}")

if dtype is not None:
codes = ak_cast(codes, dtype)

return cls(codes)

return cls(ak_array(scalars, dtype=dtype, copy=copy))

def __getitem__(self, key: Any) -> Any:
Expand Down
24 changes: 24 additions & 0 deletions tests/pandas/extension/arkouda_categorical_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,30 @@ def test_categorical_isna_and_isnull(self):
assert np.array_equal(out_isna, expected)
assert np.array_equal(out_isnull, expected)

def test_pd_array_with_dtype_on_ak_categorical_should_not_iterate(self):
"""
Reproducer for: #5335 pd.array on an Arkouda-backed Categorical fails when dtype is provided.

Expected: should succeed (without iterating the Categorical) and produce an ak_int64
array representing the categorical codes.
"""
cat = Categorical(ak.array(["a", "a", "b"]))

expected = cat.codes.to_ndarray()

result = pd.array(cat, dtype="ak_int64")

assert len(result) == len(expected)

if hasattr(result, "to_ndarray"):
got = result.to_ndarray()
elif hasattr(result, "to_numpy"):
got = result.to_numpy()
else:
got = np.asarray(result)

assert np.array_equal(got, expected)


class TestArkoudaCategoricalAsType:
def test_categorical_array_astype_category_stays_extension(
Expand Down