diff --git a/arkouda/pandas/extension/_arkouda_array.py b/arkouda/pandas/extension/_arkouda_array.py index a04248827a2..3ff5549f7a0 100644 --- a/arkouda/pandas/extension/_arkouda_array.py +++ b/arkouda/pandas/extension/_arkouda_array.py @@ -111,11 +111,13 @@ def __init__( @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): + from arkouda.numpy.numeric import cast as ak_cast + from arkouda.numpy.pdarrayclass import pdarray from arkouda.numpy.pdarraycreation import array as ak_array + from arkouda.pandas.categorical import Categorical from ._dtypes import ArkoudaBigintDtype - # normalize dtype input if ( dtype is not None and ( @@ -130,7 +132,21 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype is not None and hasattr(dtype, "numpy_dtype"): dtype = dtype.numpy_dtype - # If scalars is already a numpy array, we can preserve its dtype + if isinstance(scalars, Categorical): + codes = scalars.codes + + # Some implementations might return an ArkoudaArray here + if isinstance(codes, ArkoudaArray): + codes = codes._data + + if not isinstance(codes, pdarray): + raise TypeError(f"Categorical.codes expected pdarray, got {type(codes).__name__}") + + if dtype is not None: + codes = ak_cast(codes, dtype) + + return cls(codes) + return cls(ak_array(scalars, dtype=dtype, copy=copy)) def __getitem__(self, key: Any) -> Any: diff --git a/tests/pandas/extension/arkouda_categorical_extension.py b/tests/pandas/extension/arkouda_categorical_extension.py index a4b45db13d6..900cfc62e31 100644 --- a/tests/pandas/extension/arkouda_categorical_extension.py +++ b/tests/pandas/extension/arkouda_categorical_extension.py @@ -185,6 +185,30 @@ def test_categorical_isna_and_isnull(self): assert np.array_equal(out_isna, expected) assert np.array_equal(out_isnull, expected) + def test_pd_array_with_dtype_on_ak_categorical_should_not_iterate(self): + """ + Reproducer for: #5335 pd.array on an Arkouda-backed Categorical fails when dtype is provided. + + Expected: should succeed (without iterating the Categorical) and produce an ak_int64 + array representing the categorical codes. + """ + cat = Categorical(ak.array(["a", "a", "b"])) + + expected = cat.codes.to_ndarray() + + result = pd.array(cat, dtype="ak_int64") + + assert len(result) == len(expected) + + if hasattr(result, "to_ndarray"): + got = result.to_ndarray() + elif hasattr(result, "to_numpy"): + got = result.to_numpy() + else: + got = np.asarray(result) + + assert np.array_equal(got, expected) + class TestArkoudaCategoricalAsType: def test_categorical_array_astype_category_stays_extension(