diff --git a/cuda_core/cuda/core/experimental/_layout.pxd b/cuda_core/cuda/core/experimental/_layout.pxd index 2d96a2cc83..ff83449e5d 100644 --- a/cuda_core/cuda/core/experimental/_layout.pxd +++ b/cuda_core/cuda/core/experimental/_layout.pxd @@ -297,9 +297,7 @@ cdef class _StridedLayout: raise ValueError( f"Allocation size for a layout that maps elements " f"to negative memory offsets is ambiguous. " - f"The layout's min_offset is {min_offset}. " - f"To create a supported layout with the same shape " - f"please use _StridedLayout.to_dense()." + f"The layout's min_offset is {min_offset}." ) if max_offset < min_offset: return 0 diff --git a/cuda_core/cuda/core/experimental/_layout.pyx b/cuda_core/cuda/core/experimental/_layout.pyx index 26117f840a..b1ff975dc9 100644 --- a/cuda_core/cuda/core/experimental/_layout.pyx +++ b/cuda_core/cuda/core/experimental/_layout.pyx @@ -48,8 +48,8 @@ cdef class _StridedLayout: def __init__( self : _StridedLayout, - shape : tuple[int], - strides : tuple[int] | None, + shape : tuple[int, ...], + strides : tuple[int, ...] | None, itemsize : int, divide_strides : bool = False ) -> None: @@ -455,7 +455,7 @@ cdef class _StridedLayout: a_view = StridedMemoryView(a, -1) # get the original layout of ``a`` and convert it to a dense layout # to avoid overallocating memory (e.g. if the ``a`` was sliced) - layout = a_view.layout.to_dense() + layout = a_view._layout.to_dense() # get the required size in bytes to fit the tensor required_size = layout.required_size_in_bytes() # allocate the memory on the device @@ -669,12 +669,12 @@ cdef class _StridedLayout: # Viewing (5, 6) float array as (5, 3) complex64 array. a = numpy.ones((5, 6), dtype=numpy.float32) float_view = StridedMemoryView(a, -1) - layout = float_view.layout + layout = float_view._layout assert layout.shape == (5, 6) assert layout.itemsize == 4 complex_view = float_view.view(layout.repacked(8), numpy.complex64) - assert complex_view.layout.shape == (5, 3) - assert complex_view.layout.itemsize == 8 + assert complex_view._layout.shape == (5, 3) + assert complex_view._layout.itemsize == 8 b = numpy.from_dlpack(complex_view) assert b.shape == (5, 3) """ diff --git a/cuda_core/cuda/core/experimental/_memoryview.pyx b/cuda_core/cuda/core/experimental/_memoryview.pyx index cbfc790866..6c995b7ef0 100644 --- a/cuda_core/cuda/core/experimental/_memoryview.pyx +++ b/cuda_core/cuda/core/experimental/_memoryview.pyx @@ -28,7 +28,8 @@ cdef class StridedMemoryView: 1. Using the :obj:`args_viewable_as_strided_memory` decorator (recommended) 2. Explicit construction relying on DLPack or CUDA Array Interface, see below. - 3. From :obj:`~_memory.Buffer` and a :obj:`_StridedLayout` (see :meth:`from_buffer` classmethod) + 3. From :obj:`~_memory.Buffer` and shape and size tuples (see + :meth:`from_buffer` classmethod) ``StridedMemoryView(obj, stream_ptr)`` can be used to create a view from objects supporting either DLPack (up to v1.0) or CUDA Array Interface @@ -160,22 +161,20 @@ cdef class StridedMemoryView: @classmethod def from_buffer( - cls, buffer : Buffer, layout : _StridedLayout, + cls, + buffer : Buffer, + shape : tuple[int, ...], + strides : tuple[int, ...] | None = None, + *, + itemsize : int | None = None, dtype : numpy.dtype | None = None, is_readonly : bool = False ) -> StridedMemoryView: """ - Creates a :obj:`StridedMemoryView` instance from a :obj:`~_memory.Buffer` and a :obj:`_StridedLayout`. + Creates a :obj:`StridedMemoryView` instance from a :obj:`~_memory.Buffer` and shape and strides tuples. The Buffer can be either allocation coming from a :obj:`MemoryResource` or an external allocation wrapped in a :obj:`~_memory.Buffer` object with ``Buffer.from_handle(ptr, size, owner=...)``. - .. hint:: - When allocating the memory for a given layout, the required allocation size - can be obtained with the :meth:`_StridedLayout.required_size_in_bytes` method. - It is best to use the :meth:`_StridedLayout.to_dense` method - first to make sure the layout is contiguous, to avoid overallocating memory - for layouts with gaps. - .. caution:: When creating a :obj:`StridedMemoryView` from a :obj:`~_memory.Buffer`, no synchronization is performed. It is the user's responsibility to ensure @@ -185,19 +184,33 @@ cdef class StridedMemoryView: ---------- buffer : :obj:`~_memory.Buffer` The buffer to create the view from. - layout : :obj:`_StridedLayout` + shape : :obj:`tuple` + The layout describing the shape, strides and itemsize of the elements in + the buffer. + strides : :obj:`tuple` The layout describing the shape, strides and itemsize of the elements in the buffer. - dtype : :obj:`numpy.dtype`, optional + dtype : :obj:`numpy.dtype` Optional dtype. If specified, the dtype's itemsize must match the layout's itemsize. - To view the buffer with a different itemsize, please use :meth:`_StridedLayout.repacked` - first to transform the layout to the desired itemsize. is_readonly : bool, optional Whether the mark the view as readonly. """ cdef StridedMemoryView view = StridedMemoryView.__new__(cls) - view_buffer_strided(view, buffer, layout, dtype, is_readonly) + if itemsize is None and dtype is None: + raise ValueError("Either itemsize or dtype must be specified") + if itemsize is not None and dtype is not None and itemsize != dtype.itemsize: + raise ValueError( + f"itemsize ({itemsize}) does not match dtype.itemsize ({dtype.itemsize})" + ) + # (itemsize is None XOR dtype is None) OR they are equal + view_buffer_strided( + view, + buffer, + _StridedLayout(shape=shape, strides=strides, itemsize=getattr(dtype, "itemsize", itemsize)), + dtype, + is_readonly, + ) return view def __dealloc__(self): @@ -245,14 +258,6 @@ cdef class StridedMemoryView: The copy can be performed between following memory spaces: host-to-device, device-to-host, device-to-device (on the same device). - The following conditions must be met: - * Both views must have compatible shapes, i.e. the shapes must be equal - or the source view's shape must be broadcastable to the target view's shape - (see :meth:`_StridedLayout.broadcast_to`). - * Both views must have the same :attr:`dtype` (or :attr:`_StridedLayout.itemsize` - if :attr:`dtype` is not specified). - * The destination's layout must be unique (see :meth:`_StridedLayout.is_unique`). - Parameters ---------- other : StridedMemoryView @@ -260,7 +265,7 @@ cdef class StridedMemoryView: stream : Stream | None, optional The stream to schedule the copy on. allocator : MemoryResource | None, optional - If temporary buffers are needed, the specifed memory resources + If temporary buffers are needed, the specified memory resources will be used to allocate the memory. If not specified, default resources will be used. blocking : bool | None, optional @@ -289,7 +294,7 @@ cdef class StridedMemoryView: raise NotImplementedError("Sorry, not supported: copy_to") @property - def layout(self) -> _StridedLayout: + def _layout(self) -> _StridedLayout: """ The layout of the tensor. For StridedMemoryView created from DLPack or CAI, the layout is inferred from the tensor object's metadata. @@ -325,7 +330,7 @@ cdef class StridedMemoryView: return (f"StridedMemoryView(ptr={self.ptr},\n" + f" shape={self.shape},\n" + f" strides={self.strides},\n" - + f" itemsize={self.layout.itemsize},\n" + + f" itemsize={self._layout.itemsize},\n" + f" dtype={get_simple_repr(self.dtype)},\n" + f" device_id={self.device_id},\n" + f" is_device_accessible={self.is_device_accessible},\n" @@ -677,8 +682,7 @@ cdef inline int view_buffer_strided( if dtype.itemsize != layout.itemsize: raise ValueError( f"The dtype's itemsize ({dtype.itemsize}) does not match the layout's " - f"itemsize ({layout.itemsize}). Please use :meth:`_StridedLayout.repacked` " - f"to transform the layout to the desired itemsize." + f"itemsize ({layout.itemsize})." ) # Check the layout's offset range [min_offset, max_offset] fits # within the [0, buffer.size - 1] range. diff --git a/cuda_core/cuda/core/experimental/utils.py b/cuda_core/cuda/core/experimental/utils.py index b8dc55b478..32f62918f6 100644 --- a/cuda_core/cuda/core/experimental/utils.py +++ b/cuda_core/cuda/core/experimental/utils.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from cuda.core.experimental._layout import _StridedLayout # noqa: F401 from cuda.core.experimental._memoryview import ( StridedMemoryView, # noqa: F401 args_viewable_as_strided_memory, # noqa: F401 diff --git a/cuda_core/tests/test_utils.py b/cuda_core/tests/test_utils.py index 4adff9d9c5..7769de0bef 100644 --- a/cuda_core/tests/test_utils.py +++ b/cuda_core/tests/test_utils.py @@ -16,7 +16,8 @@ import numpy as np import pytest from cuda.core.experimental import Device -from cuda.core.experimental.utils import StridedMemoryView, _StridedLayout, args_viewable_as_strided_memory +from cuda.core.experimental._layout import _StridedLayout +from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory def test_cast_to_3_tuple_success(): @@ -234,44 +235,60 @@ def _dense_strides(shape, stride_order): return tuple(strides) -@pytest.mark.parametrize("shape", [tuple(), (2, 3), (10, 10), (10, 13, 11)]) -@pytest.mark.parametrize("itemsize", [1, 4]) +@pytest.mark.parametrize("shape", [tuple(), (2, 3), (10, 10), (10, 13, 11)], ids=str) +@pytest.mark.parametrize("dtype", [np.dtype(np.int8), np.dtype(np.uint32)], ids=str) @pytest.mark.parametrize("stride_order", ["C", "F"]) @pytest.mark.parametrize("readonly", [True, False]) -def test_from_buffer(shape, itemsize, stride_order, readonly): +def test_from_buffer(shape, dtype, stride_order, readonly): dev = Device() dev.set_current() - layout = _StridedLayout.dense(shape=shape, itemsize=itemsize, stride_order=stride_order) + layout = _StridedLayout.dense(shape=shape, itemsize=dtype.itemsize, stride_order=stride_order) required_size = layout.required_size_in_bytes() - assert required_size == math.prod(shape) * itemsize + assert required_size == math.prod(shape) * dtype.itemsize buffer = dev.memory_resource.allocate(required_size) - view = StridedMemoryView.from_buffer(buffer, layout, is_readonly=readonly) + view = StridedMemoryView.from_buffer(buffer, shape=shape, strides=layout.strides, dtype=dtype, is_readonly=readonly) assert view.exporting_obj is buffer - assert view.layout is layout + assert view._layout == layout assert view.ptr == int(buffer.handle) assert view.shape == shape assert view.strides == _dense_strides(shape, stride_order) - assert view.dtype is None + assert view.dtype == dtype assert view.device_id == dev.device_id assert view.is_device_accessible assert view.readonly == readonly +@pytest.mark.parametrize( + ("dtype", "itemsize", "msg"), + [ + (np.dtype("int16"), 1, "itemsize .+ does not match dtype.itemsize .+"), + (None, None, "itemsize or dtype must be specified"), + ], +) +def test_from_buffer_incompatible_dtype_and_itemsize(dtype, itemsize, msg): + layout = _StridedLayout.dense((5,), 2) + device = Device() + device.set_current() + buffer = device.memory_resource.allocate(layout.required_size_in_bytes()) + with pytest.raises(ValueError, match=msg): + StridedMemoryView.from_buffer(buffer, (5,), dtype=dtype, itemsize=itemsize) + + @pytest.mark.parametrize("stride_order", ["C", "F"]) def test_from_buffer_sliced(stride_order): layout = _StridedLayout.dense((5, 7), 2, stride_order=stride_order) device = Device() device.set_current() buffer = device.memory_resource.allocate(layout.required_size_in_bytes()) - view = StridedMemoryView.from_buffer(buffer, layout) + view = StridedMemoryView.from_buffer(buffer, (5, 7), dtype=np.dtype(np.int16)) assert view.shape == (5, 7) assert int(buffer.handle) == view.ptr sliced_view = view.view(layout[:-2, 3:]) assert sliced_view.shape == (3, 4) expected_offset = 3 if stride_order == "C" else 3 * 5 - assert sliced_view.layout.slice_offset == expected_offset - assert sliced_view.layout.slice_offset_in_bytes == expected_offset * 2 + assert sliced_view._layout.slice_offset == expected_offset + assert sliced_view._layout.slice_offset_in_bytes == expected_offset * 2 assert sliced_view.ptr == view.ptr + expected_offset * 2 assert int(buffer.handle) + expected_offset * 2 == sliced_view.ptr @@ -282,7 +299,12 @@ def test_from_buffer_too_small(): d.set_current() buffer = d.memory_resource.allocate(20) with pytest.raises(ValueError, match="Expected at least 40 bytes, got 20 bytes."): - StridedMemoryView.from_buffer(buffer, layout) + StridedMemoryView.from_buffer( + buffer, + shape=layout.shape, + strides=layout.strides, + dtype=np.dtype("int16"), + ) def test_from_buffer_disallowed_negative_offset(): @@ -290,8 +312,13 @@ def test_from_buffer_disallowed_negative_offset(): d = Device() d.set_current() buffer = d.memory_resource.allocate(20) - with pytest.raises(ValueError, match="please use _StridedLayout.to_dense()."): - StridedMemoryView.from_buffer(buffer, layout) + with pytest.raises(ValueError): + StridedMemoryView.from_buffer( + buffer, + shape=layout.shape, + strides=layout.strides, + dtype=np.dtype("uint8"), + ) class _EnforceCAIView: @@ -331,7 +358,7 @@ def test_view_sliced_external(shape, slices, stride_order, view_as): pytest.skip("CuPy is not installed") a = cp.arange(math.prod(shape), dtype=cp.int32).reshape(shape, order=stride_order) view = StridedMemoryView.from_cuda_array_interface(_EnforceCAIView(a), -1) - layout = view.layout + layout = view._layout assert layout.is_dense assert layout.required_size_in_bytes() == a.nbytes assert view.ptr == _get_ptr(a) @@ -344,11 +371,11 @@ def test_view_sliced_external(shape, slices, stride_order, view_as): assert 0 <= sliced_layout.required_size_in_bytes() <= a.nbytes assert not sliced_layout.is_dense - assert sliced_view.layout is sliced_layout + assert sliced_view._layout is sliced_layout assert view.dtype == sliced_view.dtype - assert sliced_view.layout.itemsize == a_sliced.itemsize == layout.itemsize + assert sliced_view._layout.itemsize == a_sliced.itemsize == layout.itemsize assert sliced_view.shape == a_sliced.shape - assert sliced_view.layout.strides_in_bytes == a_sliced.strides + assert sliced_view._layout.strides_in_bytes == a_sliced.strides @pytest.mark.parametrize( @@ -369,7 +396,7 @@ def test_view_sliced_external_negative_offset(stride_order, view_as): a = cp.arange(math.prod(shape), dtype=cp.int32).reshape(shape, order=stride_order) a = a[::-1] view = StridedMemoryView.from_cuda_array_interface(_EnforceCAIView(a), -1) - layout = view.layout + layout = view._layout assert not layout.is_dense assert layout.strides == (-1,) assert view.ptr == _get_ptr(a) @@ -381,8 +408,8 @@ def test_view_sliced_external_negative_offset(stride_order, view_as): assert sliced_view.ptr == view.ptr - 3 * a.itemsize assert not sliced_layout.is_dense - assert sliced_view.layout is sliced_layout + assert sliced_view._layout is sliced_layout assert view.dtype == sliced_view.dtype - assert sliced_view.layout.itemsize == a_sliced.itemsize == layout.itemsize + assert sliced_view._layout.itemsize == a_sliced.itemsize == layout.itemsize assert sliced_view.shape == a_sliced.shape - assert sliced_view.layout.strides_in_bytes == a_sliced.strides + assert sliced_view._layout.strides_in_bytes == a_sliced.strides