NVIDIA · cpcloud · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025
diff --git a/cuda_core/cuda/core/experimental/_layout.pxd b/cuda_core/cuda/core/experimental/_layout.pxd
@@ -297,9 +297,7 @@ cdef class _StridedLayout:
             raise ValueError(
                 f"Allocation size for a layout that maps elements "
                 f"to negative memory offsets is ambiguous. "
-                f"The layout's min_offset is {min_offset}. "
-                f"To create a supported layout with the same shape "
-                f"please use _StridedLayout.to_dense()."
+                f"The layout's min_offset is {min_offset}."
             )
         if max_offset < min_offset:
             return 0

diff --git a/cuda_core/cuda/core/experimental/_layout.pyx b/cuda_core/cuda/core/experimental/_layout.pyx
@@ -48,8 +48,8 @@ cdef class _StridedLayout:
 
     def __init__(
         self : _StridedLayout,
-        shape : tuple[int],
-        strides : tuple[int] | None,
+        shape : tuple[int, ...],
+        strides : tuple[int, ...] | None,
         itemsize : int,
         divide_strides : bool = False
     ) -> None:
@@ -455,7 +455,7 @@ cdef class _StridedLayout:
                 a_view = StridedMemoryView(a, -1)
                 # get the original layout of ``a`` and convert it to a dense layout
                 # to avoid overallocating memory (e.g. if the ``a`` was sliced)
-                layout = a_view.layout.to_dense()
+                layout = a_view._layout.to_dense()
                 # get the required size in bytes to fit the tensor
                 required_size = layout.required_size_in_bytes()
                 # allocate the memory on the device
@@ -669,12 +669,12 @@ cdef class _StridedLayout:
             # Viewing (5, 6) float array as (5, 3) complex64 array.
             a = numpy.ones((5, 6), dtype=numpy.float32)
             float_view = StridedMemoryView(a, -1)
-            layout = float_view.layout
+            layout = float_view._layout
             assert layout.shape == (5, 6)
             assert layout.itemsize == 4
             complex_view = float_view.view(layout.repacked(8), numpy.complex64)
-            assert complex_view.layout.shape == (5, 3)
-            assert complex_view.layout.itemsize == 8
+            assert complex_view._layout.shape == (5, 3)
+            assert complex_view._layout.itemsize == 8
             b = numpy.from_dlpack(complex_view)
             assert b.shape == (5, 3)
         """

diff --git a/cuda_core/cuda/core/experimental/_memoryview.pyx b/cuda_core/cuda/core/experimental/_memoryview.pyx
@@ -28,7 +28,8 @@ cdef class StridedMemoryView:
 
       1. Using the :obj:`args_viewable_as_strided_memory` decorator (recommended)
       2. Explicit construction relying on DLPack or CUDA Array Interface, see below.
-      3. From :obj:`~_memory.Buffer` and a :obj:`_StridedLayout` (see :meth:`from_buffer` classmethod)
+      3. From :obj:`~_memory.Buffer` and shape and size tuples (see
+         :meth:`from_buffer` classmethod)
 
     ``StridedMemoryView(obj, stream_ptr)`` can be used to create a view from
     objects supporting either DLPack (up to v1.0) or CUDA Array Interface
@@ -160,22 +161,20 @@ cdef class StridedMemoryView:
 
     @classmethod
     def from_buffer(
-        cls, buffer : Buffer, layout : _StridedLayout,
+        cls,
+        buffer : Buffer,
+        shape : tuple[int, ...],
+        strides : tuple[int, ...] | None = None,
+        *,
+        itemsize : int | None = None,
         dtype : numpy.dtype | None = None,
         is_readonly : bool = False
     ) -> StridedMemoryView:
         """
-        Creates a :obj:`StridedMemoryView` instance from a :obj:`~_memory.Buffer` and a :obj:`_StridedLayout`.
+        Creates a :obj:`StridedMemoryView` instance from a :obj:`~_memory.Buffer` and shape and strides tuples.
         The Buffer can be either allocation coming from a :obj:`MemoryResource` or an external allocation
         wrapped in a :obj:`~_memory.Buffer` object with ``Buffer.from_handle(ptr, size, owner=...)``.
 
-        .. hint::
-            When allocating the memory for a given layout, the required allocation size
-            can be obtained with the :meth:`_StridedLayout.required_size_in_bytes` method.
-            It is best to use the :meth:`_StridedLayout.to_dense` method
-            first to make sure the layout is contiguous, to avoid overallocating memory
-            for layouts with gaps.
-
         .. caution::
             When creating a :obj:`StridedMemoryView` from a :obj:`~_memory.Buffer`,
             no synchronization is performed. It is the user's responsibility to ensure
@@ -185,19 +184,33 @@ cdef class StridedMemoryView:
         ----------
         buffer : :obj:`~_memory.Buffer`
             The buffer to create the view from.
-        layout : :obj:`_StridedLayout`
+        shape : :obj:`tuple`
+            The layout describing the shape, strides and itemsize of the elements in
+            the buffer.
+        strides : :obj:`tuple`
             The layout describing the shape, strides and itemsize of the elements in
             the buffer.
-        dtype : :obj:`numpy.dtype`, optional
+        dtype : :obj:`numpy.dtype`
             Optional dtype.
             If specified, the dtype's itemsize must match the layout's itemsize.
-            To view the buffer with a different itemsize, please use :meth:`_StridedLayout.repacked`
-            first to transform the layout to the desired itemsize.
         is_readonly : bool, optional
             Whether the mark the view as readonly.
         """
         cdef StridedMemoryView view = StridedMemoryView.__new__(cls)
-        view_buffer_strided(view, buffer, layout, dtype, is_readonly)
+        if itemsize is None and dtype is None:
+            raise ValueError("Either itemsize or dtype must be specified")
+        if itemsize is not None and dtype is not None and itemsize != dtype.itemsize:
+            raise ValueError(
+                f"itemsize ({itemsize}) does not match dtype.itemsize ({dtype.itemsize})"
+            )
+        # (itemsize is None XOR dtype is None) OR they are equal
+        view_buffer_strided(
+            view,
+            buffer,
+            _StridedLayout(shape=shape, strides=strides, itemsize=getattr(dtype, "itemsize", itemsize)),
+            dtype,
+            is_readonly,
+        )
         return view
 
     def __dealloc__(self):
@@ -245,22 +258,14 @@ cdef class StridedMemoryView:
         The copy can be performed between following memory spaces:
         host-to-device, device-to-host, device-to-device (on the same device).
 
-        The following conditions must be met:
-            * Both views must have compatible shapes, i.e. the shapes must be equal
-              or the source view's shape must be broadcastable to the target view's shape
-              (see :meth:`_StridedLayout.broadcast_to`).
-            * Both views must have the same :attr:`dtype` (or :attr:`_StridedLayout.itemsize`
-              if :attr:`dtype` is not specified).
-            * The destination's layout must be unique (see :meth:`_StridedLayout.is_unique`).
-
         Parameters
         ----------
         other : StridedMemoryView
             The view to copy data from.
         stream : Stream | None, optional
             The stream to schedule the copy on.
         allocator : MemoryResource | None, optional
-            If temporary buffers are needed, the specifed memory resources
+            If temporary buffers are needed, the specified memory resources
             will be used to allocate the memory. If not specified, default
             resources will be used.
         blocking : bool | None, optional
@@ -289,7 +294,7 @@ cdef class StridedMemoryView:
         raise NotImplementedError("Sorry, not supported: copy_to")
 
     @property
-    def layout(self) -> _StridedLayout:
+    def _layout(self) -> _StridedLayout:
         """
         The layout of the tensor. For StridedMemoryView created from DLPack or CAI,
         the layout is inferred from the tensor object's metadata.
@@ -325,7 +330,7 @@ cdef class StridedMemoryView:
         return (f"StridedMemoryView(ptr={self.ptr},\n"
               + f"                  shape={self.shape},\n"
               + f"                  strides={self.strides},\n"
-              + f"                  itemsize={self.layout.itemsize},\n"
+              + f"                  itemsize={self._layout.itemsize},\n"
               + f"                  dtype={get_simple_repr(self.dtype)},\n"
               + f"                  device_id={self.device_id},\n"
               + f"                  is_device_accessible={self.is_device_accessible},\n"
@@ -677,8 +682,7 @@ cdef inline int view_buffer_strided(
         if dtype.itemsize != layout.itemsize:
             raise ValueError(
                 f"The dtype's itemsize ({dtype.itemsize}) does not match the layout's "
-                f"itemsize ({layout.itemsize}). Please use :meth:`_StridedLayout.repacked` "
-                f"to transform the layout to the desired itemsize."
+                f"itemsize ({layout.itemsize})."
             )
     # Check the layout's offset range [min_offset, max_offset] fits
     # within the [0, buffer.size - 1] range.

diff --git a/cuda_core/cuda/core/experimental/utils.py b/cuda_core/cuda/core/experimental/utils.py
@@ -2,7 +2,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from cuda.core.experimental._layout import _StridedLayout  # noqa: F401
 from cuda.core.experimental._memoryview import (
     StridedMemoryView,  # noqa: F401
     args_viewable_as_strided_memory,  # noqa: F401

diff --git a/cuda_core/tests/test_utils.py b/cuda_core/tests/test_utils.py
@@ -16,7 +16,8 @@
 import numpy as np
 import pytest
 from cuda.core.experimental import Device
-from cuda.core.experimental.utils import StridedMemoryView, _StridedLayout, args_viewable_as_strided_memory
+from cuda.core.experimental._layout import _StridedLayout
+from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory
 
 
 def test_cast_to_3_tuple_success():
@@ -234,44 +235,60 @@ def _dense_strides(shape, stride_order):
     return tuple(strides)
 
 
-@pytest.mark.parametrize("shape", [tuple(), (2, 3), (10, 10), (10, 13, 11)])
-@pytest.mark.parametrize("itemsize", [1, 4])
+@pytest.mark.parametrize("shape", [tuple(), (2, 3), (10, 10), (10, 13, 11)], ids=str)
+@pytest.mark.parametrize("dtype", [np.dtype(np.int8), np.dtype(np.uint32)], ids=str)
 @pytest.mark.parametrize("stride_order", ["C", "F"])
 @pytest.mark.parametrize("readonly", [True, False])
-def test_from_buffer(shape, itemsize, stride_order, readonly):
+def test_from_buffer(shape, dtype, stride_order, readonly):
     dev = Device()
     dev.set_current()
-    layout = _StridedLayout.dense(shape=shape, itemsize=itemsize, stride_order=stride_order)
+    layout = _StridedLayout.dense(shape=shape, itemsize=dtype.itemsize, stride_order=stride_order)
     required_size = layout.required_size_in_bytes()
-    assert required_size == math.prod(shape) * itemsize
+    assert required_size == math.prod(shape) * dtype.itemsize
     buffer = dev.memory_resource.allocate(required_size)
-    view = StridedMemoryView.from_buffer(buffer, layout, is_readonly=readonly)
+    view = StridedMemoryView.from_buffer(buffer, shape=shape, strides=layout.strides, dtype=dtype, is_readonly=readonly)
     assert view.exporting_obj is buffer
-    assert view.layout is layout
+    assert view._layout == layout
     assert view.ptr == int(buffer.handle)
     assert view.shape == shape
     assert view.strides == _dense_strides(shape, stride_order)
-    assert view.dtype is None
+    assert view.dtype == dtype
     assert view.device_id == dev.device_id
     assert view.is_device_accessible
     assert view.readonly == readonly
 
 
+@pytest.mark.parametrize(
+    ("dtype", "itemsize", "msg"),
+    [
+        (np.dtype("int16"), 1, "itemsize .+ does not match dtype.itemsize .+"),
+        (None, None, "itemsize or dtype must be specified"),
+    ],
+)
+def test_from_buffer_incompatible_dtype_and_itemsize(dtype, itemsize, msg):
+    layout = _StridedLayout.dense((5,), 2)
+    device = Device()
+    device.set_current()
+    buffer = device.memory_resource.allocate(layout.required_size_in_bytes())
+    with pytest.raises(ValueError, match=msg):
+        StridedMemoryView.from_buffer(buffer, (5,), dtype=dtype, itemsize=itemsize)
+
+
 @pytest.mark.parametrize("stride_order", ["C", "F"])
 def test_from_buffer_sliced(stride_order):
     layout = _StridedLayout.dense((5, 7), 2, stride_order=stride_order)
     device = Device()
     device.set_current()
     buffer = device.memory_resource.allocate(layout.required_size_in_bytes())
-    view = StridedMemoryView.from_buffer(buffer, layout)
+    view = StridedMemoryView.from_buffer(buffer, (5, 7), dtype=np.dtype(np.int16))
     assert view.shape == (5, 7)
     assert int(buffer.handle) == view.ptr
 
     sliced_view = view.view(layout[:-2, 3:])
     assert sliced_view.shape == (3, 4)
     expected_offset = 3 if stride_order == "C" else 3 * 5
-    assert sliced_view.layout.slice_offset == expected_offset
-    assert sliced_view.layout.slice_offset_in_bytes == expected_offset * 2
+    assert sliced_view._layout.slice_offset == expected_offset
+    assert sliced_view._layout.slice_offset_in_bytes == expected_offset * 2
     assert sliced_view.ptr == view.ptr + expected_offset * 2
     assert int(buffer.handle) + expected_offset * 2 == sliced_view.ptr
 
@@ -282,16 +299,26 @@ def test_from_buffer_too_small():
     d.set_current()
     buffer = d.memory_resource.allocate(20)
     with pytest.raises(ValueError, match="Expected at least 40 bytes, got 20 bytes."):
-        StridedMemoryView.from_buffer(buffer, layout)
+        StridedMemoryView.from_buffer(
+            buffer,
+            shape=layout.shape,
+            strides=layout.strides,
+            dtype=np.dtype("int16"),
+        )
 
 
 def test_from_buffer_disallowed_negative_offset():
     layout = _StridedLayout((5, 4), (-4, 1), 1)
     d = Device()
     d.set_current()
     buffer = d.memory_resource.allocate(20)
-    with pytest.raises(ValueError, match="please use _StridedLayout.to_dense()."):
-        StridedMemoryView.from_buffer(buffer, layout)
+    with pytest.raises(ValueError):
+        StridedMemoryView.from_buffer(
+            buffer,
+            shape=layout.shape,
+            strides=layout.strides,
+            dtype=np.dtype("uint8"),
+        )
 
 
 class _EnforceCAIView:
@@ -331,7 +358,7 @@ def test_view_sliced_external(shape, slices, stride_order, view_as):
             pytest.skip("CuPy is not installed")
         a = cp.arange(math.prod(shape), dtype=cp.int32).reshape(shape, order=stride_order)
         view = StridedMemoryView.from_cuda_array_interface(_EnforceCAIView(a), -1)
-    layout = view.layout
+    layout = view._layout
     assert layout.is_dense
     assert layout.required_size_in_bytes() == a.nbytes
     assert view.ptr == _get_ptr(a)
@@ -344,11 +371,11 @@ def test_view_sliced_external(shape, slices, stride_order, view_as):
 
     assert 0 <= sliced_layout.required_size_in_bytes() <= a.nbytes
     assert not sliced_layout.is_dense
-    assert sliced_view.layout is sliced_layout
+    assert sliced_view._layout is sliced_layout
     assert view.dtype == sliced_view.dtype
-    assert sliced_view.layout.itemsize == a_sliced.itemsize == layout.itemsize
+    assert sliced_view._layout.itemsize == a_sliced.itemsize == layout.itemsize
     assert sliced_view.shape == a_sliced.shape
-    assert sliced_view.layout.strides_in_bytes == a_sliced.strides
+    assert sliced_view._layout.strides_in_bytes == a_sliced.strides
 
 
 @pytest.mark.parametrize(
@@ -369,7 +396,7 @@ def test_view_sliced_external_negative_offset(stride_order, view_as):
         a = cp.arange(math.prod(shape), dtype=cp.int32).reshape(shape, order=stride_order)
         a = a[::-1]
         view = StridedMemoryView.from_cuda_array_interface(_EnforceCAIView(a), -1)
-    layout = view.layout
+    layout = view._layout
     assert not layout.is_dense
     assert layout.strides == (-1,)
     assert view.ptr == _get_ptr(a)
@@ -381,8 +408,8 @@ def test_view_sliced_external_negative_offset(stride_order, view_as):
     assert sliced_view.ptr == view.ptr - 3 * a.itemsize
 
     assert not sliced_layout.is_dense
-    assert sliced_view.layout is sliced_layout
+    assert sliced_view._layout is sliced_layout
     assert view.dtype == sliced_view.dtype
-    assert sliced_view.layout.itemsize == a_sliced.itemsize == layout.itemsize
+    assert sliced_view._layout.itemsize == a_sliced.itemsize == layout.itemsize
     assert sliced_view.shape == a_sliced.shape
-    assert sliced_view.layout.strides_in_bytes == a_sliced.strides
+    assert sliced_view._layout.strides_in_bytes == a_sliced.strides