seberg · seberg · Mar 3, 2025 · Mar 4, 2025 · Mar 16, 2025
diff --git a/numpy/__init__.cython-30.pxd b/numpy/__init__.cython-30.pxd
@@ -7,6 +7,8 @@
 # Author: Dag Sverre Seljebotn
 #
 
+cimport cython
+
 from cpython.ref cimport Py_INCREF
 from cpython.object cimport PyObject, PyTypeObject, PyObject_TypeCheck
 cimport libc.stdio as stdio
@@ -1221,6 +1223,7 @@ cdef extern from "numpy/ndarraytypes.h":
     ctypedef struct npy_string_allocator:
         pass
 
+    @cython.extended_buffer_regex("numpy", r"numpy.dtypes.StringDType:[\da-f]+")
     ctypedef struct npy_packed_static_string:
         pass
 

diff --git a/numpy/_core/_internal.py b/numpy/_core/_internal.py
@@ -627,6 +627,10 @@ def _view_is_safe(oldtype, newtype):
     'X': 'function pointers',
 }
 
+_time_unit_map = ("Y", "M", "W", "<invalid>", "D", "h", "m", "s", "ms",
+                  "us", "ns", "ps", "fs", "as", "generic")
+
+
 class _Stream:
     def __init__(self, s):
         self.s = s
@@ -721,6 +725,31 @@ def __dtype_from_pep3118(stream, is_subdtype):
         if stream.consume('T{'):
             value, align = __dtype_from_pep3118(
                 stream, is_subdtype=True)
+        elif stream.consume("[numpy$"):
+            # TODO: Clearly, we would need a registration and a C callback
+            #       probably, i.e. a slot that is called based on the name.
+            dtype_str = stream.consume_until("]")
+            module, name, *params = dtype_str.split(":", 2)
+            numpy_byteorder = {'@': '=', '^': '='}.get(
+                stream.byteorder, stream.byteorder)
+            if module == "numpy.dtypes":
+                if name == "TimeDelta64DType":
+                    unit, num = [int(i, 16) for i in params[0].split(":")]
+                    unit = _time_unit_map[unit]
+                    value = dtype(f"{numpy_byteorder}m8[{num}{unit}]")
+                elif name == "StringDType":
+                    # Just stores the Python object (ignores any other state).
+                    # (We could always do this, but I think it is nice to avoid
+                    # objects when possible..)
+                    import ctypes
+                    value = ctypes.cast(int(params[0], 16), ctypes.py_object).value
+                    if type(value) != StringDType:
+                        raise SystemError("Critical error, dtype not a StringDType.")
+                else:
+                    raise NotImplementedError(f"Unknown NumPy dtype: {module}:{name}")
+                align = value.alignment
+            else:
+                raise NotImplementedError(f"Unknown NumPy dtype: {module}:{name}")
         elif stream.next in type_map_chars:
             if stream.next == 'Z':
                 typechar = stream.advance(2)

diff --git a/numpy/_core/src/multiarray/buffer.c b/numpy/_core/src/multiarray/buffer.c
@@ -18,6 +18,8 @@
 #include "arrayobject.h"
 #include "scalartypes.h"
 #include "dtypemeta.h"
+#include "descriptor.h"
+#include "_datetime.h"
 
 /*************************************************************************
  ****************   Implement Buffer Protocol ****************************
@@ -420,6 +422,21 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
             if (_append_str(str, buf) < 0) return -1;
             break;
         }
+        case NPY_TIMEDELTA: {
+            char buf[128];
+            PyArray_DatetimeMetaData *meta = get_datetime_metadata_from_dtype(descr);
+            PyOS_snprintf(buf, sizeof(buf), "[numpy$numpy.dtypes:TimeDelta64DType:%x:%x]",
+                          meta->base, meta->num);
+            if (_append_str(str, buf) < 0) return -1;
+            break;
+        }
+        case NPY_VSTRING: {
+            char buf[128];
+            PyOS_snprintf(buf, sizeof(buf), "[numpy$numpy.dtypes:StringDType:%zx]",
+                          (Py_uintptr_t)descr);
+            if (_append_str(str, buf) < 0) return -1;
+            break;
+        }
         default:
             if (PyDataType_ISLEGACY(descr)) {
                 PyErr_Format(PyExc_ValueError,

diff --git a/numpy/_core/src/multiarray/ctors.c b/numpy/_core/src/multiarray/ctors.c
@@ -1806,26 +1806,27 @@ PyArray_CheckFromAny(PyObject *op, PyArray_Descr *descr, int min_depth,
  * Internal version of PyArray_CheckFromAny that accepts a dtypemeta. Borrows
  * references to the descriptor and dtype.
  */
-
 NPY_NO_EXPORT PyObject *
 PyArray_CheckFromAny_int(PyObject *op, PyArray_Descr *in_descr,
                          PyArray_DTypeMeta *in_DType, int min_depth,
                          int max_depth, int requires, PyObject *context)
 {
     PyObject *obj;
+    Py_XINCREF(in_descr);  /* take ownership as we may replace it */
     if (requires & NPY_ARRAY_NOTSWAPPED) {
         if (!in_descr && PyArray_Check(op)) {
             in_descr = PyArray_DESCR((PyArrayObject *)op);
-            Py_INCREF(in_descr);
         }
-        if (in_descr) {
-            PyArray_DESCR_REPLACE_CANONICAL(in_descr);
+        PyArray_DESCR_REPLACE_CANONICAL(in_descr);
+        if (in_descr == NULL) {
+            return NULL;
         }
     }
 
     int was_scalar;
     obj = PyArray_FromAny_int(op, in_descr, in_DType, min_depth,
                               max_depth, requires, context, &was_scalar);
+    Py_XDECREF(in_descr);
     if (obj == NULL) {
         return NULL;
     }

diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c
@@ -2127,7 +2127,6 @@ PyArray_SearchSorted(PyArrayObject *op1, PyObject *op2,
     if (dtype == NULL) {
         return NULL;
     }
-    /* refs to dtype we own = 1 */
 
     /* Look for binary search function */
     if (perm) {
@@ -2138,26 +2137,20 @@ PyArray_SearchSorted(PyArrayObject *op1, PyObject *op2,
     }
     if (binsearch == NULL && argbinsearch == NULL) {
         PyErr_SetString(PyExc_TypeError, "compare not supported for type");
-        /* refs to dtype we own = 1 */
         Py_DECREF(dtype);
-        /* refs to dtype we own = 0 */
         return NULL;
     }
 
-    /* need ap2 as contiguous array and of right type */
-    /* refs to dtype we own = 1 */
-    Py_INCREF(dtype);
-    /* refs to dtype we own = 2 */
+    /* need ap2 as contiguous array and of right dtype (steals and may be replace it) */
     ap2 = (PyArrayObject *)PyArray_CheckFromAny(op2, dtype,
                                 0, 0,
                                 NPY_ARRAY_CARRAY_RO | NPY_ARRAY_NOTSWAPPED,
                                 NULL);
-    /* refs to dtype we own = 1, array creation steals one even on failure */
     if (ap2 == NULL) {
-        Py_DECREF(dtype);
-        /* refs to dtype we own = 0 */
         return NULL;
     }
+    /* dtype was stolen, replace it in case the array creation replaced it. */
+    dtype = (PyArray_Descr *)Py_NewRef(PyArray_DESCR(ap2));
 
     /*
      * If the needle (ap2) is larger than the haystack (op1) we copy the
@@ -2166,9 +2159,9 @@ PyArray_SearchSorted(PyArrayObject *op1, PyObject *op2,
     if (PyArray_SIZE(ap2) > PyArray_SIZE(op1)) {
         ap1_flags |= NPY_ARRAY_CARRAY_RO;
     }
+    /* dtype is stolen, after this we have no reference */
     ap1 = (PyArrayObject *)PyArray_CheckFromAny((PyObject *)op1, dtype,
                                 1, 1, ap1_flags, NULL);
-    /* refs to dtype we own = 0, array creation steals one even on failure */
     if (ap1 == NULL) {
         goto fail;
     }

diff --git a/numpy/_core/tests/examples/cython/checks.pyx b/numpy/_core/tests/examples/cython/checks.pyx
@@ -4,6 +4,9 @@
 Functions in this module give python-space wrappers for cython functions
 exposed in numpy/__init__.pxd, so they can be tested in test_cython.py
 """
+from libc.stdint cimport uintptr_t
+from cpython.ref cimport Py_INCREF
+
 cimport numpy as cnp
 cnp.import_array()
 
@@ -358,6 +361,38 @@ def npystring_allocators_other_types(arr1, arr2):
     return ret
 
 
+cdef cnp.PyArray_StringDTypeObject *string_dtype_from_format(char *fmt):
+    # This helper should be part of the numpy.pyd of course (but needs to be
+    # in it's own `.pyd` for NumPy backwards compatibility).
+    cdef cnp.PyArray_StringDTypeObject descr
+    cdef uintptr_t ptr
+    if not fmt.startswith(b"[numpy$numpy.dtypes:StringDType:"):
+        raise NotImplementedError("not able to parse this format (yet)")
+    fmt = fmt + len(b"[numpy$numpy.dtypes:StringDType:")
+    len_fmt = len(fmt)
+    assert fmt[len_fmt-1] == "]"
+    ptr = int(fmt[:len_fmt-1], 16)
+    return <cnp.PyArray_StringDTypeObject *>ptr
+
+
+def npystring_write_memview(cnp.npy_packed_static_string[:] mview):
+    cdef char *string = "Hello world, hello Pythonistas"
+    cdef size_t size = len(string)
+    cdef size_t i
+
+    cdef cnp.PyArray_StringDTypeObject *descr = string_dtype_from_format(mview.format)
+    allocator = cnp.NpyString_acquire_allocator(descr)
+
+    # copy string->packed_string, the pointer to the underlying array buffer
+    for i in range(mview.shape[0]):
+        ret = cnp.NpyString_pack(allocator, &mview[i], string, size)
+        if ret < 0:
+            break
+
+    cnp.NpyString_release_allocator(allocator)
+    return 1
+
+
 def check_npy_uintp_type_enum():
     # Regression test for gh-27890: cnp.NPY_UINTP was not defined.
     # Cython would fail to compile this before gh-27890 was fixed.

diff --git a/numpy/_core/tests/test_cython.py b/numpy/_core/tests/test_cython.py
@@ -329,6 +329,14 @@ def test_npystring_multiple_allocators(install_temp):
     assert arr1[-1] is None
     assert arr2[0] == "test this"
 
+def test_pystring_pack_mview(install_temp):
+    """Test basic memoryview interface"""
+    import checks
+
+    dt = np.dtypes.StringDType(na_object=None)
+    arr = np.array(['abcd', 'b', 'c'], dtype=dt)
+    checks.npystring_write_memview(arr)
+    assert (arr == "Hello world, hello Pythonistas").all()
 
 def test_npystring_allocators_other_dtype(install_temp):
     """Check that allocators for non-StringDType arrays is NULL."""

diff --git a/numpy/_core/tests/test_regression.py b/numpy/_core/tests/test_regression.py
@@ -2652,3 +2652,9 @@ def test_sort_overlap(self):
         inp = np.linspace(0, size, num=size, dtype=np.intc)
         out = np.sort(inp)
         assert_equal(inp, out)
+
+    def test_searchsorted_structured(self):
+        # gh-28190
+        x = np.array([(0, 1.)], dtype=[('time', '<i8'), ('value', '<f8')])
+        y = np.array((0, 0.), dtype=[('time', '<i8'), ('value', '<f8')])
+        x.searchsorted(y)