Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ on:
branches:
- "pull-request/[0-9]+"
- "main"
- "release/*"

jobs:
ci-vars:
Expand Down
14 changes: 4 additions & 10 deletions cuda_core/cuda/core/_layout.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ cdef class _StridedLayout:
# ==============================

cdef inline int _init(_StridedLayout self, BaseLayout& base, int itemsize, bint divide_strides=False) except -1 nogil:
_validate_itemsize(itemsize)
if itemsize <= 0:
raise ValueError("itemsize must be positive")

if base.strides != NULL and divide_strides:
_divide_strides(base, itemsize)
Expand All @@ -123,7 +124,8 @@ cdef class _StridedLayout:
return 0

cdef inline stride_t _init_dense(_StridedLayout self, BaseLayout& base, int itemsize, OrderFlag order_flag, axis_vec_t* stride_order=NULL) except -1 nogil:
_validate_itemsize(itemsize)
if itemsize <= 0:
raise ValueError("itemsize must be positive")

cdef stride_t volume
if order_flag == ORDER_C:
Expand Down Expand Up @@ -643,14 +645,6 @@ cdef inline bint _normalize_axis(integer_t& axis, integer_t extent) except -1 no
return True


cdef inline int _validate_itemsize(int itemsize) except -1 nogil:
if itemsize <= 0:
raise ValueError("itemsize must be positive")
if itemsize & (itemsize - 1):
raise ValueError("itemsize must be a power of two")
return 0


cdef inline bint _is_unique(BaseLayout& base, axis_vec_t& stride_order) except -1 nogil:
if base.strides == NULL:
return True
Expand Down
29 changes: 14 additions & 15 deletions cuda_core/cuda/core/_layout.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ cdef class _StridedLayout:
Otherwise, the strides are assumed to be implicitly C-contiguous and the resulting
layout's :attr:`strides` will be None.
itemsize : int
The number of bytes per single element (dtype size). Must be a power of two.
The number of bytes per single element (dtype size).
divide_strides : bool, optional
If True, the provided :attr:`strides` will be divided by the :attr:`itemsize`.

Expand All @@ -40,7 +40,7 @@ cdef class _StridedLayout:
Attributes
----------
itemsize : int
The number of bytes per single element (dtype size). Must be a power of two.
The number of bytes per single element (dtype size).
slice_offset : int
The offset (as a number of elements, not bytes) of the element at
index ``(0,) * ndim``. See also :attr:`slice_offset_in_bytes`.
Expand Down Expand Up @@ -636,7 +636,6 @@ cdef class _StridedLayout:
In either case, the ``volume * itemsize`` of the layout remains the same.

The conversion is subject to the following constraints:
* The old and new itemsizes must be powers of two.
* The extent at ``axis`` must be a positive integer.
* The stride at ``axis`` must be 1.

Expand Down Expand Up @@ -1214,10 +1213,10 @@ cdef inline int64_t gcd(int64_t a, int64_t b) except? -1 nogil:

cdef inline int pack_extents(BaseLayout& out_layout, stride_t& out_slice_offset, BaseLayout& in_layout, stride_t slice_offset, int itemsize, int new_itemsize, intptr_t data_ptr, bint keep_dim, int axis) except -1 nogil:
cdef int ndim = in_layout.ndim
if new_itemsize <= 0 or new_itemsize & (new_itemsize - 1):
raise ValueError(f"new itemsize must be a power of two, got {new_itemsize}.")
if itemsize <= 0 or itemsize & (itemsize - 1):
raise ValueError(f"itemsize must be a power of two, got {itemsize}.")
if new_itemsize <= 0:
raise ValueError(f"new itemsize must be greater than zero, got {new_itemsize}.")
if itemsize <= 0:
raise ValueError(f"itemsize must be greater than zero, got {itemsize}.")
if new_itemsize <= itemsize:
if new_itemsize == itemsize:
return 1
Expand Down Expand Up @@ -1270,10 +1269,10 @@ cdef inline int unpack_extents(BaseLayout &out_layout, BaseLayout &in_layout, in
cdef int ndim = in_layout.ndim
if not _normalize_axis(axis, ndim):
raise ValueError(f"Invalid axis: {axis} out of range for {ndim}D tensor")
if new_itemsize <= 0 or new_itemsize & (new_itemsize - 1):
raise ValueError(f"new itemsize must be a power of two, got {new_itemsize}.")
if itemsize <= 0 or itemsize & (itemsize - 1):
raise ValueError(f"itemsize must be a power of two, got {itemsize}.")
if new_itemsize <= 0:
raise ValueError(f"new itemsize must be greater than zero, got {new_itemsize}.")
if itemsize <= 0:
raise ValueError(f"itemsize must be greater than zero, got {itemsize}.")
if new_itemsize >= itemsize:
if new_itemsize == itemsize:
return 1
Expand Down Expand Up @@ -1301,10 +1300,10 @@ cdef inline int unpack_extents(BaseLayout &out_layout, BaseLayout &in_layout, in

cdef inline int max_compatible_itemsize(BaseLayout& layout, stride_t slice_offset, int itemsize, int max_itemsize, intptr_t data_ptr, int axis) except? -1 nogil:
cdef int ndim = layout.ndim
if max_itemsize <= 0 or max_itemsize & (max_itemsize - 1):
raise ValueError(f"max_itemsize must be a power of two, got {max_itemsize}.")
if itemsize <= 0 or itemsize & (itemsize - 1):
raise ValueError(f"itemsize must be a power of two, got {itemsize}.")
if max_itemsize <= 0:
raise ValueError(f"max_itemsize must be greater than zero, got {max_itemsize}.")
if itemsize <= 0:
raise ValueError(f"itemsize must be greater than zero, got {itemsize}.")
if not _normalize_axis(axis, ndim):
raise ValueError(f"Invalid axis: {axis} out of range for {ndim}D tensor")
if max_itemsize < itemsize:
Expand Down
65 changes: 35 additions & 30 deletions cuda_core/cuda/core/_memoryview.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -137,22 +137,22 @@ cdef class StridedMemoryView:

@classmethod
def from_dlpack(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView:
cdef StridedMemoryView buf
with warnings.catch_warnings():
warnings.simplefilter("ignore")
buf = cls()
cdef StridedMemoryView buf = StridedMemoryView.__new__(cls)
view_as_dlpack(obj, stream_ptr, buf)
return buf

@classmethod
def from_cuda_array_interface(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView:
cdef StridedMemoryView buf
with warnings.catch_warnings():
warnings.simplefilter("ignore")
buf = cls()
cdef StridedMemoryView buf = StridedMemoryView.__new__(cls)
view_as_cai(obj, stream_ptr, buf)
return buf

@classmethod
def from_array_interface(cls, obj: object) -> StridedMemoryView:
cdef StridedMemoryView buf = StridedMemoryView.__new__(cls)
view_as_array_interface(obj, buf)
return buf

@classmethod
def from_any_interface(cls, obj: object, stream_ptr: int | None = None) -> StridedMemoryView:
if check_has_dlpack(obj):
Expand Down Expand Up @@ -365,8 +365,7 @@ cdef class StridedMemoryView:
if self.dl_tensor != NULL:
self._dtype = dtype_dlpack_to_numpy(&self.dl_tensor.dtype)
elif self.metadata is not None:
# TODO: this only works for built-in numeric types
self._dtype = _typestr2dtype[self.metadata["typestr"]]
self._dtype = _typestr2dtype(self.metadata["typestr"])
return self._dtype


Expand Down Expand Up @@ -486,25 +485,14 @@ cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None):
return buf


_builtin_numeric_dtypes = [
numpy.dtype("uint8"),
numpy.dtype("uint16"),
numpy.dtype("uint32"),
numpy.dtype("uint64"),
numpy.dtype("int8"),
numpy.dtype("int16"),
numpy.dtype("int32"),
numpy.dtype("int64"),
numpy.dtype("float16"),
numpy.dtype("float32"),
numpy.dtype("float64"),
numpy.dtype("complex64"),
numpy.dtype("complex128"),
numpy.dtype("bool"),
]
# Doing it once to avoid repeated overhead
_typestr2dtype = {dtype.str: dtype for dtype in _builtin_numeric_dtypes}
_typestr2itemsize = {dtype.str: dtype.itemsize for dtype in _builtin_numeric_dtypes}
@functools.lru_cache
def _typestr2dtype(str typestr):
return numpy.dtype(typestr)


@functools.lru_cache
def _typestr2itemsize(str typestr):
return _typestr2dtype(typestr).itemsize


cdef object dtype_dlpack_to_numpy(DLDataType* dtype):
Expand Down Expand Up @@ -609,6 +597,23 @@ cpdef StridedMemoryView view_as_cai(obj, stream_ptr, view=None):
return buf


cpdef StridedMemoryView view_as_array_interface(obj, view=None):
cdef dict data = obj.__array_interface__
if data["version"] < 3:
raise BufferError("only NumPy Array Interface v3 or above is supported")
if data.get("mask") is not None:
raise BufferError("mask is not supported")

cdef StridedMemoryView buf = StridedMemoryView() if view is None else view
buf.exporting_obj = obj
buf.metadata = data
buf.dl_tensor = NULL
buf.ptr, buf.readonly = data["data"]
buf.is_device_accessible = False
buf.device_id = handle_return(driver.cuCtxGetDevice())
return buf


def args_viewable_as_strided_memory(tuple arg_indices):
"""
Decorator to create proxy objects to :obj:`StridedMemoryView` for the
Expand Down Expand Up @@ -664,7 +669,7 @@ cdef _StridedLayout layout_from_cai(object metadata):
cdef _StridedLayout layout = _StridedLayout.__new__(_StridedLayout)
cdef object shape = metadata["shape"]
cdef object strides = metadata.get("strides")
cdef int itemsize = _typestr2itemsize[metadata["typestr"]]
cdef int itemsize = _typestr2itemsize(metadata["typestr"])
layout.init_from_tuple(shape, strides, itemsize, True)
return layout

Expand Down
2 changes: 1 addition & 1 deletion cuda_core/cuda/core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
#
# SPDX-License-Identifier: Apache-2.0

__version__ = "0.5.0"
__version__ = "0.5.1"
4 changes: 4 additions & 0 deletions cuda_core/docs/nv-versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
"version": "latest",
"url": "https://nvidia.github.io/cuda-python/cuda-core/latest/"
},
{
"version": "0.5.1",
"url": "https://nvidia.github.io/cuda-python/cuda-core/0.5.1/"
},
{
"version": "0.5.0",
"url": "https://nvidia.github.io/cuda-python/cuda-core/0.5.0/"
Expand Down
18 changes: 3 additions & 15 deletions cuda_core/pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion cuda_core/pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ cu12 = { features = ["cu12", "test", "cython-tests"], solve-group = "cu12" }
# TODO: check if these can be extracted from pyproject.toml
[package]
name = "cuda-core"
version = "0.5.0"
version = "0.5.1"

[package.build]
backend = { name = "pixi-build-python", version = "*" }
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def init_cuda():
driver.cuDevicePrimaryCtxSetFlags(device.device_id, driver.CUctx_flags.CU_CTX_SCHED_BLOCKING_SYNC)
)

yield
yield device
_ = _device_unset_current()


Expand Down
Loading
Loading