diff --git a/numpy/__init__.cython-30.pxd b/numpy/__init__.cython-30.pxd index 1c557b1ebb9f..b6b16cac2e13 100644 --- a/numpy/__init__.cython-30.pxd +++ b/numpy/__init__.cython-30.pxd @@ -7,6 +7,8 @@ # Author: Dag Sverre Seljebotn # +cimport cython + from cpython.ref cimport Py_INCREF from cpython.object cimport PyObject, PyTypeObject, PyObject_TypeCheck cimport libc.stdio as stdio @@ -1221,6 +1223,7 @@ cdef extern from "numpy/ndarraytypes.h": ctypedef struct npy_string_allocator: pass + @cython.extended_buffer_regex("numpy", r"numpy.dtypes.StringDType:[\da-f]+") ctypedef struct npy_packed_static_string: pass diff --git a/numpy/_core/_internal.py b/numpy/_core/_internal.py index 45d0a07855a1..e9112aeeda7a 100644 --- a/numpy/_core/_internal.py +++ b/numpy/_core/_internal.py @@ -627,6 +627,10 @@ def _view_is_safe(oldtype, newtype): 'X': 'function pointers', } +_time_unit_map = ("Y", "M", "W", "", "D", "h", "m", "s", "ms", + "us", "ns", "ps", "fs", "as", "generic") + + class _Stream: def __init__(self, s): self.s = s @@ -721,6 +725,31 @@ def __dtype_from_pep3118(stream, is_subdtype): if stream.consume('T{'): value, align = __dtype_from_pep3118( stream, is_subdtype=True) + elif stream.consume("[numpy$"): + # TODO: Clearly, we would need a registration and a C callback + # probably, i.e. a slot that is called based on the name. + dtype_str = stream.consume_until("]") + module, name, *params = dtype_str.split(":", 2) + numpy_byteorder = {'@': '=', '^': '='}.get( + stream.byteorder, stream.byteorder) + if module == "numpy.dtypes": + if name == "TimeDelta64DType": + unit, num = [int(i, 16) for i in params[0].split(":")] + unit = _time_unit_map[unit] + value = dtype(f"{numpy_byteorder}m8[{num}{unit}]") + elif name == "StringDType": + # Just stores the Python object (ignores any other state). + # (We could always do this, but I think it is nice to avoid + # objects when possible..) + import ctypes + value = ctypes.cast(int(params[0], 16), ctypes.py_object).value + if type(value) != StringDType: + raise SystemError("Critical error, dtype not a StringDType.") + else: + raise NotImplementedError(f"Unknown NumPy dtype: {module}:{name}") + align = value.alignment + else: + raise NotImplementedError(f"Unknown NumPy dtype: {module}:{name}") elif stream.next in type_map_chars: if stream.next == 'Z': typechar = stream.advance(2) diff --git a/numpy/_core/src/multiarray/buffer.c b/numpy/_core/src/multiarray/buffer.c index fcff3ad6ca74..7e6079daf97d 100644 --- a/numpy/_core/src/multiarray/buffer.c +++ b/numpy/_core/src/multiarray/buffer.c @@ -18,6 +18,8 @@ #include "arrayobject.h" #include "scalartypes.h" #include "dtypemeta.h" +#include "descriptor.h" +#include "_datetime.h" /************************************************************************* **************** Implement Buffer Protocol **************************** @@ -420,6 +422,21 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str, if (_append_str(str, buf) < 0) return -1; break; } + case NPY_TIMEDELTA: { + char buf[128]; + PyArray_DatetimeMetaData *meta = get_datetime_metadata_from_dtype(descr); + PyOS_snprintf(buf, sizeof(buf), "[numpy$numpy.dtypes:TimeDelta64DType:%x:%x]", + meta->base, meta->num); + if (_append_str(str, buf) < 0) return -1; + break; + } + case NPY_VSTRING: { + char buf[128]; + PyOS_snprintf(buf, sizeof(buf), "[numpy$numpy.dtypes:StringDType:%zx]", + (Py_uintptr_t)descr); + if (_append_str(str, buf) < 0) return -1; + break; + } default: if (PyDataType_ISLEGACY(descr)) { PyErr_Format(PyExc_ValueError, diff --git a/numpy/_core/src/multiarray/ctors.c b/numpy/_core/src/multiarray/ctors.c index 383aa200e21d..fe0a82ac8559 100644 --- a/numpy/_core/src/multiarray/ctors.c +++ b/numpy/_core/src/multiarray/ctors.c @@ -1806,26 +1806,27 @@ PyArray_CheckFromAny(PyObject *op, PyArray_Descr *descr, int min_depth, * Internal version of PyArray_CheckFromAny that accepts a dtypemeta. Borrows * references to the descriptor and dtype. */ - NPY_NO_EXPORT PyObject * PyArray_CheckFromAny_int(PyObject *op, PyArray_Descr *in_descr, PyArray_DTypeMeta *in_DType, int min_depth, int max_depth, int requires, PyObject *context) { PyObject *obj; + Py_XINCREF(in_descr); /* take ownership as we may replace it */ if (requires & NPY_ARRAY_NOTSWAPPED) { if (!in_descr && PyArray_Check(op)) { in_descr = PyArray_DESCR((PyArrayObject *)op); - Py_INCREF(in_descr); } - if (in_descr) { - PyArray_DESCR_REPLACE_CANONICAL(in_descr); + PyArray_DESCR_REPLACE_CANONICAL(in_descr); + if (in_descr == NULL) { + return NULL; } } int was_scalar; obj = PyArray_FromAny_int(op, in_descr, in_DType, min_depth, max_depth, requires, context, &was_scalar); + Py_XDECREF(in_descr); if (obj == NULL) { return NULL; } diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index d02e420c36a1..205df1412581 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -2127,7 +2127,6 @@ PyArray_SearchSorted(PyArrayObject *op1, PyObject *op2, if (dtype == NULL) { return NULL; } - /* refs to dtype we own = 1 */ /* Look for binary search function */ if (perm) { @@ -2138,26 +2137,20 @@ PyArray_SearchSorted(PyArrayObject *op1, PyObject *op2, } if (binsearch == NULL && argbinsearch == NULL) { PyErr_SetString(PyExc_TypeError, "compare not supported for type"); - /* refs to dtype we own = 1 */ Py_DECREF(dtype); - /* refs to dtype we own = 0 */ return NULL; } - /* need ap2 as contiguous array and of right type */ - /* refs to dtype we own = 1 */ - Py_INCREF(dtype); - /* refs to dtype we own = 2 */ + /* need ap2 as contiguous array and of right dtype (steals and may be replace it) */ ap2 = (PyArrayObject *)PyArray_CheckFromAny(op2, dtype, 0, 0, NPY_ARRAY_CARRAY_RO | NPY_ARRAY_NOTSWAPPED, NULL); - /* refs to dtype we own = 1, array creation steals one even on failure */ if (ap2 == NULL) { - Py_DECREF(dtype); - /* refs to dtype we own = 0 */ return NULL; } + /* dtype was stolen, replace it in case the array creation replaced it. */ + dtype = (PyArray_Descr *)Py_NewRef(PyArray_DESCR(ap2)); /* * If the needle (ap2) is larger than the haystack (op1) we copy the @@ -2166,9 +2159,9 @@ PyArray_SearchSorted(PyArrayObject *op1, PyObject *op2, if (PyArray_SIZE(ap2) > PyArray_SIZE(op1)) { ap1_flags |= NPY_ARRAY_CARRAY_RO; } + /* dtype is stolen, after this we have no reference */ ap1 = (PyArrayObject *)PyArray_CheckFromAny((PyObject *)op1, dtype, 1, 1, ap1_flags, NULL); - /* refs to dtype we own = 0, array creation steals one even on failure */ if (ap1 == NULL) { goto fail; } diff --git a/numpy/_core/tests/examples/cython/checks.pyx b/numpy/_core/tests/examples/cython/checks.pyx index 6cdfd787c838..879b9ac1e5a2 100644 --- a/numpy/_core/tests/examples/cython/checks.pyx +++ b/numpy/_core/tests/examples/cython/checks.pyx @@ -4,6 +4,9 @@ Functions in this module give python-space wrappers for cython functions exposed in numpy/__init__.pxd, so they can be tested in test_cython.py """ +from libc.stdint cimport uintptr_t +from cpython.ref cimport Py_INCREF + cimport numpy as cnp cnp.import_array() @@ -358,6 +361,38 @@ def npystring_allocators_other_types(arr1, arr2): return ret +cdef cnp.PyArray_StringDTypeObject *string_dtype_from_format(char *fmt): + # This helper should be part of the numpy.pyd of course (but needs to be + # in it's own `.pyd` for NumPy backwards compatibility). + cdef cnp.PyArray_StringDTypeObject descr + cdef uintptr_t ptr + if not fmt.startswith(b"[numpy$numpy.dtypes:StringDType:"): + raise NotImplementedError("not able to parse this format (yet)") + fmt = fmt + len(b"[numpy$numpy.dtypes:StringDType:") + len_fmt = len(fmt) + assert fmt[len_fmt-1] == "]" + ptr = int(fmt[:len_fmt-1], 16) + return ptr + + +def npystring_write_memview(cnp.npy_packed_static_string[:] mview): + cdef char *string = "Hello world, hello Pythonistas" + cdef size_t size = len(string) + cdef size_t i + + cdef cnp.PyArray_StringDTypeObject *descr = string_dtype_from_format(mview.format) + allocator = cnp.NpyString_acquire_allocator(descr) + + # copy string->packed_string, the pointer to the underlying array buffer + for i in range(mview.shape[0]): + ret = cnp.NpyString_pack(allocator, &mview[i], string, size) + if ret < 0: + break + + cnp.NpyString_release_allocator(allocator) + return 1 + + def check_npy_uintp_type_enum(): # Regression test for gh-27890: cnp.NPY_UINTP was not defined. # Cython would fail to compile this before gh-27890 was fixed. diff --git a/numpy/_core/tests/test_cython.py b/numpy/_core/tests/test_cython.py index 53c3d92f8a14..4b3d0999165f 100644 --- a/numpy/_core/tests/test_cython.py +++ b/numpy/_core/tests/test_cython.py @@ -329,6 +329,14 @@ def test_npystring_multiple_allocators(install_temp): assert arr1[-1] is None assert arr2[0] == "test this" +def test_pystring_pack_mview(install_temp): + """Test basic memoryview interface""" + import checks + + dt = np.dtypes.StringDType(na_object=None) + arr = np.array(['abcd', 'b', 'c'], dtype=dt) + checks.npystring_write_memview(arr) + assert (arr == "Hello world, hello Pythonistas").all() def test_npystring_allocators_other_dtype(install_temp): """Check that allocators for non-StringDType arrays is NULL.""" diff --git a/numpy/_core/tests/test_regression.py b/numpy/_core/tests/test_regression.py index 826bc60a4735..e1de6aca1899 100644 --- a/numpy/_core/tests/test_regression.py +++ b/numpy/_core/tests/test_regression.py @@ -2652,3 +2652,9 @@ def test_sort_overlap(self): inp = np.linspace(0, size, num=size, dtype=np.intc) out = np.sort(inp) assert_equal(inp, out) + + def test_searchsorted_structured(self): + # gh-28190 + x = np.array([(0, 1.)], dtype=[('time', '