Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions cuda_core/cuda/core/experimental/_layout.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,7 @@ cdef class _StridedLayout:
raise ValueError(
f"Allocation size for a layout that maps elements "
f"to negative memory offsets is ambiguous. "
f"The layout's min_offset is {min_offset}. "
f"To create a supported layout with the same shape "
f"please use _StridedLayout.to_dense()."
f"The layout's min_offset is {min_offset}."
)
if max_offset < min_offset:
return 0
Expand Down
12 changes: 6 additions & 6 deletions cuda_core/cuda/core/experimental/_layout.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ cdef class _StridedLayout:

def __init__(
self : _StridedLayout,
shape : tuple[int],
strides : tuple[int] | None,
shape : tuple[int, ...],
strides : tuple[int, ...] | None,
itemsize : int,
divide_strides : bool = False
) -> None:
Expand Down Expand Up @@ -455,7 +455,7 @@ cdef class _StridedLayout:
a_view = StridedMemoryView(a, -1)
# get the original layout of ``a`` and convert it to a dense layout
# to avoid overallocating memory (e.g. if the ``a`` was sliced)
layout = a_view.layout.to_dense()
layout = a_view._layout.to_dense()
# get the required size in bytes to fit the tensor
required_size = layout.required_size_in_bytes()
# allocate the memory on the device
Expand Down Expand Up @@ -669,12 +669,12 @@ cdef class _StridedLayout:
# Viewing (5, 6) float array as (5, 3) complex64 array.
a = numpy.ones((5, 6), dtype=numpy.float32)
float_view = StridedMemoryView(a, -1)
layout = float_view.layout
layout = float_view._layout
assert layout.shape == (5, 6)
assert layout.itemsize == 4
complex_view = float_view.view(layout.repacked(8), numpy.complex64)
assert complex_view.layout.shape == (5, 3)
assert complex_view.layout.itemsize == 8
assert complex_view._layout.shape == (5, 3)
assert complex_view._layout.itemsize == 8
b = numpy.from_dlpack(complex_view)
assert b.shape == (5, 3)
"""
Expand Down
60 changes: 32 additions & 28 deletions cuda_core/cuda/core/experimental/_memoryview.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ cdef class StridedMemoryView:

1. Using the :obj:`args_viewable_as_strided_memory` decorator (recommended)
2. Explicit construction relying on DLPack or CUDA Array Interface, see below.
3. From :obj:`~_memory.Buffer` and a :obj:`_StridedLayout` (see :meth:`from_buffer` classmethod)
3. From :obj:`~_memory.Buffer` and shape and size tuples (see
:meth:`from_buffer` classmethod)

``StridedMemoryView(obj, stream_ptr)`` can be used to create a view from
objects supporting either DLPack (up to v1.0) or CUDA Array Interface
Expand Down Expand Up @@ -160,22 +161,20 @@ cdef class StridedMemoryView:

@classmethod
def from_buffer(
cls, buffer : Buffer, layout : _StridedLayout,
cls,
buffer : Buffer,
shape : tuple[int, ...],
strides : tuple[int, ...] | None = None,
*,
itemsize : int | None = None,
dtype : numpy.dtype | None = None,
is_readonly : bool = False
) -> StridedMemoryView:
"""
Creates a :obj:`StridedMemoryView` instance from a :obj:`~_memory.Buffer` and a :obj:`_StridedLayout`.
Creates a :obj:`StridedMemoryView` instance from a :obj:`~_memory.Buffer` and shape and strides tuples.
The Buffer can be either allocation coming from a :obj:`MemoryResource` or an external allocation
wrapped in a :obj:`~_memory.Buffer` object with ``Buffer.from_handle(ptr, size, owner=...)``.

.. hint::
When allocating the memory for a given layout, the required allocation size
can be obtained with the :meth:`_StridedLayout.required_size_in_bytes` method.
It is best to use the :meth:`_StridedLayout.to_dense` method
first to make sure the layout is contiguous, to avoid overallocating memory
for layouts with gaps.

.. caution::
When creating a :obj:`StridedMemoryView` from a :obj:`~_memory.Buffer`,
no synchronization is performed. It is the user's responsibility to ensure
Expand All @@ -185,19 +184,33 @@ cdef class StridedMemoryView:
----------
buffer : :obj:`~_memory.Buffer`
The buffer to create the view from.
layout : :obj:`_StridedLayout`
shape : :obj:`tuple`
The layout describing the shape, strides and itemsize of the elements in
the buffer.
strides : :obj:`tuple`
The layout describing the shape, strides and itemsize of the elements in
the buffer.
dtype : :obj:`numpy.dtype`, optional
dtype : :obj:`numpy.dtype`
Optional dtype.
If specified, the dtype's itemsize must match the layout's itemsize.
To view the buffer with a different itemsize, please use :meth:`_StridedLayout.repacked`
first to transform the layout to the desired itemsize.
is_readonly : bool, optional
Whether the mark the view as readonly.
"""
cdef StridedMemoryView view = StridedMemoryView.__new__(cls)
view_buffer_strided(view, buffer, layout, dtype, is_readonly)
if itemsize is None and dtype is None:
raise ValueError("Either itemsize or dtype must be specified")
if itemsize is not None and dtype is not None and itemsize != dtype.itemsize:
raise ValueError(
f"itemsize ({itemsize}) does not match dtype.itemsize ({dtype.itemsize})"
)
# (itemsize is None XOR dtype is None) OR they are equal
view_buffer_strided(
view,
buffer,
_StridedLayout(shape=shape, strides=strides, itemsize=getattr(dtype, "itemsize", itemsize)),
dtype,
is_readonly,
)
return view

def __dealloc__(self):
Expand Down Expand Up @@ -245,22 +258,14 @@ cdef class StridedMemoryView:
The copy can be performed between following memory spaces:
host-to-device, device-to-host, device-to-device (on the same device).

The following conditions must be met:
* Both views must have compatible shapes, i.e. the shapes must be equal
or the source view's shape must be broadcastable to the target view's shape
(see :meth:`_StridedLayout.broadcast_to`).
* Both views must have the same :attr:`dtype` (or :attr:`_StridedLayout.itemsize`
if :attr:`dtype` is not specified).
* The destination's layout must be unique (see :meth:`_StridedLayout.is_unique`).

Parameters
----------
other : StridedMemoryView
The view to copy data from.
stream : Stream | None, optional
The stream to schedule the copy on.
allocator : MemoryResource | None, optional
If temporary buffers are needed, the specifed memory resources
If temporary buffers are needed, the specified memory resources
will be used to allocate the memory. If not specified, default
resources will be used.
blocking : bool | None, optional
Expand Down Expand Up @@ -289,7 +294,7 @@ cdef class StridedMemoryView:
raise NotImplementedError("Sorry, not supported: copy_to")

@property
def layout(self) -> _StridedLayout:
def _layout(self) -> _StridedLayout:
"""
The layout of the tensor. For StridedMemoryView created from DLPack or CAI,
the layout is inferred from the tensor object's metadata.
Expand Down Expand Up @@ -325,7 +330,7 @@ cdef class StridedMemoryView:
return (f"StridedMemoryView(ptr={self.ptr},\n"
+ f" shape={self.shape},\n"
+ f" strides={self.strides},\n"
+ f" itemsize={self.layout.itemsize},\n"
+ f" itemsize={self._layout.itemsize},\n"
+ f" dtype={get_simple_repr(self.dtype)},\n"
+ f" device_id={self.device_id},\n"
+ f" is_device_accessible={self.is_device_accessible},\n"
Expand Down Expand Up @@ -677,8 +682,7 @@ cdef inline int view_buffer_strided(
if dtype.itemsize != layout.itemsize:
raise ValueError(
f"The dtype's itemsize ({dtype.itemsize}) does not match the layout's "
f"itemsize ({layout.itemsize}). Please use :meth:`_StridedLayout.repacked` "
f"to transform the layout to the desired itemsize."
f"itemsize ({layout.itemsize})."
)
# Check the layout's offset range [min_offset, max_offset] fits
# within the [0, buffer.size - 1] range.
Expand Down
1 change: 0 additions & 1 deletion cuda_core/cuda/core/experimental/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#
# SPDX-License-Identifier: Apache-2.0

from cuda.core.experimental._layout import _StridedLayout # noqa: F401
from cuda.core.experimental._memoryview import (
StridedMemoryView, # noqa: F401
args_viewable_as_strided_memory, # noqa: F401
Expand Down
73 changes: 50 additions & 23 deletions cuda_core/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
import numpy as np
import pytest
from cuda.core.experimental import Device
from cuda.core.experimental.utils import StridedMemoryView, _StridedLayout, args_viewable_as_strided_memory
from cuda.core.experimental._layout import _StridedLayout
from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory


def test_cast_to_3_tuple_success():
Expand Down Expand Up @@ -234,44 +235,60 @@ def _dense_strides(shape, stride_order):
return tuple(strides)


@pytest.mark.parametrize("shape", [tuple(), (2, 3), (10, 10), (10, 13, 11)])
@pytest.mark.parametrize("itemsize", [1, 4])
@pytest.mark.parametrize("shape", [tuple(), (2, 3), (10, 10), (10, 13, 11)], ids=str)
@pytest.mark.parametrize("dtype", [np.dtype(np.int8), np.dtype(np.uint32)], ids=str)
@pytest.mark.parametrize("stride_order", ["C", "F"])
@pytest.mark.parametrize("readonly", [True, False])
def test_from_buffer(shape, itemsize, stride_order, readonly):
def test_from_buffer(shape, dtype, stride_order, readonly):
dev = Device()
dev.set_current()
layout = _StridedLayout.dense(shape=shape, itemsize=itemsize, stride_order=stride_order)
layout = _StridedLayout.dense(shape=shape, itemsize=dtype.itemsize, stride_order=stride_order)
required_size = layout.required_size_in_bytes()
assert required_size == math.prod(shape) * itemsize
assert required_size == math.prod(shape) * dtype.itemsize
buffer = dev.memory_resource.allocate(required_size)
view = StridedMemoryView.from_buffer(buffer, layout, is_readonly=readonly)
view = StridedMemoryView.from_buffer(buffer, shape=shape, strides=layout.strides, dtype=dtype, is_readonly=readonly)
assert view.exporting_obj is buffer
assert view.layout is layout
assert view._layout == layout
assert view.ptr == int(buffer.handle)
assert view.shape == shape
assert view.strides == _dense_strides(shape, stride_order)
assert view.dtype is None
assert view.dtype == dtype
assert view.device_id == dev.device_id
assert view.is_device_accessible
assert view.readonly == readonly


@pytest.mark.parametrize(
("dtype", "itemsize", "msg"),
[
(np.dtype("int16"), 1, "itemsize .+ does not match dtype.itemsize .+"),
(None, None, "itemsize or dtype must be specified"),
],
)
def test_from_buffer_incompatible_dtype_and_itemsize(dtype, itemsize, msg):
layout = _StridedLayout.dense((5,), 2)
device = Device()
device.set_current()
buffer = device.memory_resource.allocate(layout.required_size_in_bytes())
with pytest.raises(ValueError, match=msg):
StridedMemoryView.from_buffer(buffer, (5,), dtype=dtype, itemsize=itemsize)


@pytest.mark.parametrize("stride_order", ["C", "F"])
def test_from_buffer_sliced(stride_order):
layout = _StridedLayout.dense((5, 7), 2, stride_order=stride_order)
device = Device()
device.set_current()
buffer = device.memory_resource.allocate(layout.required_size_in_bytes())
view = StridedMemoryView.from_buffer(buffer, layout)
view = StridedMemoryView.from_buffer(buffer, (5, 7), dtype=np.dtype(np.int16))
assert view.shape == (5, 7)
assert int(buffer.handle) == view.ptr

sliced_view = view.view(layout[:-2, 3:])
assert sliced_view.shape == (3, 4)
expected_offset = 3 if stride_order == "C" else 3 * 5
assert sliced_view.layout.slice_offset == expected_offset
assert sliced_view.layout.slice_offset_in_bytes == expected_offset * 2
assert sliced_view._layout.slice_offset == expected_offset
assert sliced_view._layout.slice_offset_in_bytes == expected_offset * 2
assert sliced_view.ptr == view.ptr + expected_offset * 2
assert int(buffer.handle) + expected_offset * 2 == sliced_view.ptr

Expand All @@ -282,16 +299,26 @@ def test_from_buffer_too_small():
d.set_current()
buffer = d.memory_resource.allocate(20)
with pytest.raises(ValueError, match="Expected at least 40 bytes, got 20 bytes."):
StridedMemoryView.from_buffer(buffer, layout)
StridedMemoryView.from_buffer(
buffer,
shape=layout.shape,
strides=layout.strides,
dtype=np.dtype("int16"),
)


def test_from_buffer_disallowed_negative_offset():
layout = _StridedLayout((5, 4), (-4, 1), 1)
d = Device()
d.set_current()
buffer = d.memory_resource.allocate(20)
with pytest.raises(ValueError, match="please use _StridedLayout.to_dense()."):
StridedMemoryView.from_buffer(buffer, layout)
with pytest.raises(ValueError):
StridedMemoryView.from_buffer(
buffer,
shape=layout.shape,
strides=layout.strides,
dtype=np.dtype("uint8"),
)


class _EnforceCAIView:
Expand Down Expand Up @@ -331,7 +358,7 @@ def test_view_sliced_external(shape, slices, stride_order, view_as):
pytest.skip("CuPy is not installed")
a = cp.arange(math.prod(shape), dtype=cp.int32).reshape(shape, order=stride_order)
view = StridedMemoryView.from_cuda_array_interface(_EnforceCAIView(a), -1)
layout = view.layout
layout = view._layout
assert layout.is_dense
assert layout.required_size_in_bytes() == a.nbytes
assert view.ptr == _get_ptr(a)
Expand All @@ -344,11 +371,11 @@ def test_view_sliced_external(shape, slices, stride_order, view_as):

assert 0 <= sliced_layout.required_size_in_bytes() <= a.nbytes
assert not sliced_layout.is_dense
assert sliced_view.layout is sliced_layout
assert sliced_view._layout is sliced_layout
assert view.dtype == sliced_view.dtype
assert sliced_view.layout.itemsize == a_sliced.itemsize == layout.itemsize
assert sliced_view._layout.itemsize == a_sliced.itemsize == layout.itemsize
assert sliced_view.shape == a_sliced.shape
assert sliced_view.layout.strides_in_bytes == a_sliced.strides
assert sliced_view._layout.strides_in_bytes == a_sliced.strides


@pytest.mark.parametrize(
Expand All @@ -369,7 +396,7 @@ def test_view_sliced_external_negative_offset(stride_order, view_as):
a = cp.arange(math.prod(shape), dtype=cp.int32).reshape(shape, order=stride_order)
a = a[::-1]
view = StridedMemoryView.from_cuda_array_interface(_EnforceCAIView(a), -1)
layout = view.layout
layout = view._layout
assert not layout.is_dense
assert layout.strides == (-1,)
assert view.ptr == _get_ptr(a)
Expand All @@ -381,8 +408,8 @@ def test_view_sliced_external_negative_offset(stride_order, view_as):
assert sliced_view.ptr == view.ptr - 3 * a.itemsize

assert not sliced_layout.is_dense
assert sliced_view.layout is sliced_layout
assert sliced_view._layout is sliced_layout
assert view.dtype == sliced_view.dtype
assert sliced_view.layout.itemsize == a_sliced.itemsize == layout.itemsize
assert sliced_view._layout.itemsize == a_sliced.itemsize == layout.itemsize
assert sliced_view.shape == a_sliced.shape
assert sliced_view.layout.strides_in_bytes == a_sliced.strides
assert sliced_view._layout.strides_in_bytes == a_sliced.strides
Loading