Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions cuda_core/cuda/core/_cpp/resource_handles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ struct StreamBox {
};
} // namespace

StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int priority) {
StreamHandle create_stream_handle(const ContextHandle& h_ctx, unsigned int flags, int priority) {
GILReleaseGuard gil;
CUstream stream;
if (CUDA_SUCCESS != (err = p_cuStreamCreateWithPriority(&stream, flags, priority))) {
Expand Down Expand Up @@ -301,7 +301,7 @@ struct EventBox {
};
} // namespace

EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) {
EventHandle create_event_handle(const ContextHandle& h_ctx, unsigned int flags) {
GILReleaseGuard gil;
CUevent event;
if (CUDA_SUCCESS != (err = p_cuEventCreate(&event, flags))) {
Expand Down Expand Up @@ -449,11 +449,11 @@ StreamHandle deallocation_stream(const DevicePtrHandle& h) noexcept {
return get_box(h)->h_stream;
}

void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) noexcept {
get_box(h)->h_stream = std::move(h_stream);
void set_deallocation_stream(const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept {
get_box(h)->h_stream = h_stream;
}

DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) {
DevicePtrHandle deviceptr_alloc_from_pool(size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) {
GILReleaseGuard gil;
CUdeviceptr ptr;
if (CUDA_SUCCESS != (err = p_cuMemAllocFromPoolAsync(&ptr, size, *h_pool, as_cu(h_stream)))) {
Expand All @@ -471,7 +471,7 @@ DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool,
return DevicePtrHandle(box, &box->resource);
}

DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) {
DevicePtrHandle deviceptr_alloc_async(size_t size, const StreamHandle& h_stream) {
GILReleaseGuard gil;
CUdeviceptr ptr;
if (CUDA_SUCCESS != (err = p_cuMemAllocAsync(&ptr, size, as_cu(h_stream)))) {
Expand Down Expand Up @@ -612,7 +612,7 @@ struct ExportDataKeyHash {
static std::mutex ipc_ptr_cache_mutex;
static std::unordered_map<ExportDataKey, std::weak_ptr<DevicePtrBox>, ExportDataKeyHash> ipc_ptr_cache;

DevicePtrHandle deviceptr_import_ipc(MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) {
DevicePtrHandle deviceptr_import_ipc(const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) {
auto data = const_cast<CUmemPoolPtrExportData*>(
reinterpret_cast<const CUmemPoolPtrExportData*>(export_data));

Expand Down
16 changes: 8 additions & 8 deletions cuda_core/cuda/core/_cpp/resource_handles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ ContextHandle get_current_context();
// The stream structurally depends on the provided context handle.
// When the last reference is released, cuStreamDestroy is called automatically.
// Returns empty handle on error (caller must check).
StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int priority);
StreamHandle create_stream_handle(const ContextHandle& h_ctx, unsigned int flags, int priority);

// Create a non-owning stream handle (references existing stream).
// Use for borrowed streams (from foreign code) or built-in streams.
Expand Down Expand Up @@ -122,7 +122,7 @@ StreamHandle get_per_thread_stream();
// The event structurally depends on the provided context handle.
// When the last reference is released, cuEventDestroy is called automatically.
// Returns empty handle on error (caller must check).
EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags);
EventHandle create_event_handle(const ContextHandle& h_ctx, unsigned int flags);

// Create an owning event handle without context dependency.
// Use for temporary events that are created and destroyed in the same scope.
Expand Down Expand Up @@ -173,13 +173,13 @@ using DevicePtrHandle = std::shared_ptr<const CUdeviceptr>;
// Returns empty handle on error (caller must check).
DevicePtrHandle deviceptr_alloc_from_pool(
size_t size,
MemoryPoolHandle h_pool,
StreamHandle h_stream);
const MemoryPoolHandle& h_pool,
const StreamHandle& h_stream);

// Allocate device memory asynchronously via cuMemAllocAsync.
// When the last reference is released, cuMemFreeAsync is called on the stored stream.
// Returns empty handle on error (caller must check).
DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream);
DevicePtrHandle deviceptr_alloc_async(size_t size, const StreamHandle& h_stream);

// Allocate device memory synchronously via cuMemAlloc.
// When the last reference is released, cuMemFree is called.
Expand Down Expand Up @@ -207,16 +207,16 @@ DevicePtrHandle deviceptr_create_with_owner(CUdeviceptr ptr, PyObject* owner);
// Note: Does not yet implement reference counting for nvbug 5570902.
// On error, returns empty handle and sets thread-local error (use get_last_error()).
DevicePtrHandle deviceptr_import_ipc(
MemoryPoolHandle h_pool,
const MemoryPoolHandle& h_pool,
const void* export_data,
StreamHandle h_stream);
const StreamHandle& h_stream);

// Access the deallocation stream for a device pointer handle (read-only).
// For non-owning handles, the stream is not used but can still be accessed.
StreamHandle deallocation_stream(const DevicePtrHandle& h) noexcept;

// Set the deallocation stream for a device pointer handle.
void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) noexcept;
void set_deallocation_stream(const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept;

// ============================================================================
// Overloaded helper functions to extract raw resources from handles
Expand Down
4 changes: 2 additions & 2 deletions cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ cdef class ManagedMemoryResource(_MemPool):
opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE

opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED

super().__init__(device_id, opts_base)
ELSE:
raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later")

super().__init__(device_id, opts_base)

@property
def is_device_accessible(self) -> bool:
"""Return True. This memory resource provides device-accessible buffers."""
Expand Down
46 changes: 23 additions & 23 deletions cuda_core/cuda/core/_resource_handles.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -56,41 +56,41 @@ cdef cydriver.CUresult peek_last_error() noexcept nogil
cdef void clear_last_error() noexcept nogil

# Context handles
cdef ContextHandle create_context_handle_ref(cydriver.CUcontext ctx) nogil except+
cdef ContextHandle get_primary_context(int device_id) nogil except+
cdef ContextHandle get_current_context() nogil except+
cdef ContextHandle create_context_handle_ref(cydriver.CUcontext ctx) except+ nogil
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you include in the PR description why nogil except+ is different compared to except+ nogil?

cdef ContextHandle get_primary_context(int device_id) except+ nogil
cdef ContextHandle get_current_context() except+ nogil

# Stream handles
cdef StreamHandle create_stream_handle(
ContextHandle h_ctx, unsigned int flags, int priority) nogil except+
cdef StreamHandle create_stream_handle_ref(cydriver.CUstream stream) nogil except+
cdef StreamHandle create_stream_handle_with_owner(cydriver.CUstream stream, object owner) nogil except+
cdef StreamHandle get_legacy_stream() nogil except+
cdef StreamHandle get_per_thread_stream() nogil except+
const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil
cdef StreamHandle create_stream_handle_ref(cydriver.CUstream stream) except+ nogil
cdef StreamHandle create_stream_handle_with_owner(cydriver.CUstream stream, object owner) except+ nogil
cdef StreamHandle get_legacy_stream() except+ nogil
cdef StreamHandle get_per_thread_stream() except+ nogil

# Event handles
cdef EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) nogil except+
cdef EventHandle create_event_handle_noctx(unsigned int flags) nogil except+
cdef EventHandle create_event_handle(const ContextHandle& h_ctx, unsigned int flags) except+ nogil
cdef EventHandle create_event_handle_noctx(unsigned int flags) except+ nogil
cdef EventHandle create_event_handle_ipc(
const cydriver.CUipcEventHandle& ipc_handle) nogil except+
const cydriver.CUipcEventHandle& ipc_handle) except+ nogil

# Memory pool handles
cdef MemoryPoolHandle create_mempool_handle(
const cydriver.CUmemPoolProps& props) nogil except+
cdef MemoryPoolHandle create_mempool_handle_ref(cydriver.CUmemoryPool pool) nogil except+
cdef MemoryPoolHandle get_device_mempool(int device_id) nogil except+
const cydriver.CUmemPoolProps& props) except+ nogil
cdef MemoryPoolHandle create_mempool_handle_ref(cydriver.CUmemoryPool pool) except+ nogil
cdef MemoryPoolHandle get_device_mempool(int device_id) except+ nogil
cdef MemoryPoolHandle create_mempool_handle_ipc(
int fd, cydriver.CUmemAllocationHandleType handle_type) nogil except+
int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil

# Device pointer handles
cdef DevicePtrHandle deviceptr_alloc_from_pool(
size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) nogil except+
cdef DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) nogil except+
cdef DevicePtrHandle deviceptr_alloc(size_t size) nogil except+
cdef DevicePtrHandle deviceptr_alloc_host(size_t size) nogil except+
cdef DevicePtrHandle deviceptr_create_ref(cydriver.CUdeviceptr ptr) nogil except+
cdef DevicePtrHandle deviceptr_create_with_owner(cydriver.CUdeviceptr ptr, object owner) nogil except+
size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil
cdef DevicePtrHandle deviceptr_alloc_async(size_t size, const StreamHandle& h_stream) except+ nogil
cdef DevicePtrHandle deviceptr_alloc(size_t size) except+ nogil
cdef DevicePtrHandle deviceptr_alloc_host(size_t size) except+ nogil
cdef DevicePtrHandle deviceptr_create_ref(cydriver.CUdeviceptr ptr) except+ nogil
cdef DevicePtrHandle deviceptr_create_with_owner(cydriver.CUdeviceptr ptr, object owner) except+ nogil
cdef DevicePtrHandle deviceptr_import_ipc(
MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) nogil except+
const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil
cdef StreamHandle deallocation_stream(const DevicePtrHandle& h) noexcept nogil
cdef void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) noexcept nogil
cdef void set_deallocation_stream(const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil
46 changes: 23 additions & 23 deletions cuda_core/cuda/core/_resource_handles.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,56 +40,56 @@ cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":

# Context handles
ContextHandle create_context_handle_ref "cuda_core::create_context_handle_ref" (
cydriver.CUcontext ctx) nogil except+
cydriver.CUcontext ctx) except+ nogil
ContextHandle get_primary_context "cuda_core::get_primary_context" (
int device_id) nogil except+
ContextHandle get_current_context "cuda_core::get_current_context" () nogil except+
int device_id) except+ nogil
ContextHandle get_current_context "cuda_core::get_current_context" () except+ nogil

# Stream handles
StreamHandle create_stream_handle "cuda_core::create_stream_handle" (
ContextHandle h_ctx, unsigned int flags, int priority) nogil except+
const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil
StreamHandle create_stream_handle_ref "cuda_core::create_stream_handle_ref" (
cydriver.CUstream stream) nogil except+
cydriver.CUstream stream) except+ nogil
StreamHandle create_stream_handle_with_owner "cuda_core::create_stream_handle_with_owner" (
cydriver.CUstream stream, object owner) nogil except+
StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () nogil except+
StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () nogil except+
cydriver.CUstream stream, object owner) except+ nogil
StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () except+ nogil
StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () except+ nogil

# Event handles (note: _create_event_handle* are internal due to C++ overloading)
EventHandle create_event_handle "cuda_core::create_event_handle" (
ContextHandle h_ctx, unsigned int flags) nogil except+
const ContextHandle& h_ctx, unsigned int flags) except+ nogil
EventHandle create_event_handle_noctx "cuda_core::create_event_handle_noctx" (
unsigned int flags) nogil except+
unsigned int flags) except+ nogil
EventHandle create_event_handle_ipc "cuda_core::create_event_handle_ipc" (
const cydriver.CUipcEventHandle& ipc_handle) nogil except+
const cydriver.CUipcEventHandle& ipc_handle) except+ nogil

# Memory pool handles
MemoryPoolHandle create_mempool_handle "cuda_core::create_mempool_handle" (
const cydriver.CUmemPoolProps& props) nogil except+
const cydriver.CUmemPoolProps& props) except+ nogil
MemoryPoolHandle create_mempool_handle_ref "cuda_core::create_mempool_handle_ref" (
cydriver.CUmemoryPool pool) nogil except+
cydriver.CUmemoryPool pool) except+ nogil
MemoryPoolHandle get_device_mempool "cuda_core::get_device_mempool" (
int device_id) nogil except+
int device_id) except+ nogil
MemoryPoolHandle create_mempool_handle_ipc "cuda_core::create_mempool_handle_ipc" (
int fd, cydriver.CUmemAllocationHandleType handle_type) nogil except+
int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil

# Device pointer handles
DevicePtrHandle deviceptr_alloc_from_pool "cuda_core::deviceptr_alloc_from_pool" (
size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) nogil except+
size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil
DevicePtrHandle deviceptr_alloc_async "cuda_core::deviceptr_alloc_async" (
size_t size, StreamHandle h_stream) nogil except+
DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) nogil except+
DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) nogil except+
size_t size, const StreamHandle& h_stream) except+ nogil
DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) except+ nogil
DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) except+ nogil
DevicePtrHandle deviceptr_create_ref "cuda_core::deviceptr_create_ref" (
cydriver.CUdeviceptr ptr) nogil except+
cydriver.CUdeviceptr ptr) except+ nogil
DevicePtrHandle deviceptr_create_with_owner "cuda_core::deviceptr_create_with_owner" (
cydriver.CUdeviceptr ptr, object owner) nogil except+
cydriver.CUdeviceptr ptr, object owner) except+ nogil
DevicePtrHandle deviceptr_import_ipc "cuda_core::deviceptr_import_ipc" (
MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) nogil except+
const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil
StreamHandle deallocation_stream "cuda_core::deallocation_stream" (
const DevicePtrHandle& h) noexcept nogil
void set_deallocation_stream "cuda_core::set_deallocation_stream" (
const DevicePtrHandle& h, StreamHandle h_stream) noexcept nogil
const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil


# =============================================================================
Expand Down
Loading