diff --git a/cuda_core/cuda/core/_cpp/resource_handles.cpp b/cuda_core/cuda/core/_cpp/resource_handles.cpp index 724ea97169..5bd77c11b5 100644 --- a/cuda_core/cuda/core/_cpp/resource_handles.cpp +++ b/cuda_core/cuda/core/_cpp/resource_handles.cpp @@ -234,7 +234,7 @@ struct StreamBox { }; } // namespace -StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int priority) { +StreamHandle create_stream_handle(const ContextHandle& h_ctx, unsigned int flags, int priority) { GILReleaseGuard gil; CUstream stream; if (CUDA_SUCCESS != (err = p_cuStreamCreateWithPriority(&stream, flags, priority))) { @@ -301,7 +301,7 @@ struct EventBox { }; } // namespace -EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) { +EventHandle create_event_handle(const ContextHandle& h_ctx, unsigned int flags) { GILReleaseGuard gil; CUevent event; if (CUDA_SUCCESS != (err = p_cuEventCreate(&event, flags))) { @@ -449,11 +449,11 @@ StreamHandle deallocation_stream(const DevicePtrHandle& h) noexcept { return get_box(h)->h_stream; } -void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) noexcept { - get_box(h)->h_stream = std::move(h_stream); +void set_deallocation_stream(const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept { + get_box(h)->h_stream = h_stream; } -DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) { +DevicePtrHandle deviceptr_alloc_from_pool(size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) { GILReleaseGuard gil; CUdeviceptr ptr; if (CUDA_SUCCESS != (err = p_cuMemAllocFromPoolAsync(&ptr, size, *h_pool, as_cu(h_stream)))) { @@ -471,7 +471,7 @@ DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool, return DevicePtrHandle(box, &box->resource); } -DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) { +DevicePtrHandle deviceptr_alloc_async(size_t size, const StreamHandle& h_stream) { GILReleaseGuard gil; CUdeviceptr ptr; if (CUDA_SUCCESS != (err = p_cuMemAllocAsync(&ptr, size, as_cu(h_stream)))) { @@ -612,7 +612,7 @@ struct ExportDataKeyHash { static std::mutex ipc_ptr_cache_mutex; static std::unordered_map, ExportDataKeyHash> ipc_ptr_cache; -DevicePtrHandle deviceptr_import_ipc(MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) { +DevicePtrHandle deviceptr_import_ipc(const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) { auto data = const_cast( reinterpret_cast(export_data)); diff --git a/cuda_core/cuda/core/_cpp/resource_handles.hpp b/cuda_core/cuda/core/_cpp/resource_handles.hpp index 4a6d9bb241..06216f31cc 100644 --- a/cuda_core/cuda/core/_cpp/resource_handles.hpp +++ b/cuda_core/cuda/core/_cpp/resource_handles.hpp @@ -93,7 +93,7 @@ ContextHandle get_current_context(); // The stream structurally depends on the provided context handle. // When the last reference is released, cuStreamDestroy is called automatically. // Returns empty handle on error (caller must check). -StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int priority); +StreamHandle create_stream_handle(const ContextHandle& h_ctx, unsigned int flags, int priority); // Create a non-owning stream handle (references existing stream). // Use for borrowed streams (from foreign code) or built-in streams. @@ -122,7 +122,7 @@ StreamHandle get_per_thread_stream(); // The event structurally depends on the provided context handle. // When the last reference is released, cuEventDestroy is called automatically. // Returns empty handle on error (caller must check). -EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags); +EventHandle create_event_handle(const ContextHandle& h_ctx, unsigned int flags); // Create an owning event handle without context dependency. // Use for temporary events that are created and destroyed in the same scope. @@ -173,13 +173,13 @@ using DevicePtrHandle = std::shared_ptr; // Returns empty handle on error (caller must check). DevicePtrHandle deviceptr_alloc_from_pool( size_t size, - MemoryPoolHandle h_pool, - StreamHandle h_stream); + const MemoryPoolHandle& h_pool, + const StreamHandle& h_stream); // Allocate device memory asynchronously via cuMemAllocAsync. // When the last reference is released, cuMemFreeAsync is called on the stored stream. // Returns empty handle on error (caller must check). -DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream); +DevicePtrHandle deviceptr_alloc_async(size_t size, const StreamHandle& h_stream); // Allocate device memory synchronously via cuMemAlloc. // When the last reference is released, cuMemFree is called. @@ -207,16 +207,16 @@ DevicePtrHandle deviceptr_create_with_owner(CUdeviceptr ptr, PyObject* owner); // Note: Does not yet implement reference counting for nvbug 5570902. // On error, returns empty handle and sets thread-local error (use get_last_error()). DevicePtrHandle deviceptr_import_ipc( - MemoryPoolHandle h_pool, + const MemoryPoolHandle& h_pool, const void* export_data, - StreamHandle h_stream); + const StreamHandle& h_stream); // Access the deallocation stream for a device pointer handle (read-only). // For non-owning handles, the stream is not used but can still be accessed. StreamHandle deallocation_stream(const DevicePtrHandle& h) noexcept; // Set the deallocation stream for a device pointer handle. -void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) noexcept; +void set_deallocation_stream(const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept; // ============================================================================ // Overloaded helper functions to extract raw resources from handles diff --git a/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx b/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx index 1b8b03f8f2..509d9ca38d 100644 --- a/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx +++ b/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx @@ -90,11 +90,11 @@ cdef class ManagedMemoryResource(_MemPool): opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED + + super().__init__(device_id, opts_base) ELSE: raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later") - super().__init__(device_id, opts_base) - @property def is_device_accessible(self) -> bool: """Return True. This memory resource provides device-accessible buffers.""" diff --git a/cuda_core/cuda/core/_resource_handles.pxd b/cuda_core/cuda/core/_resource_handles.pxd index 7a634f3a82..1ab8a87edb 100644 --- a/cuda_core/cuda/core/_resource_handles.pxd +++ b/cuda_core/cuda/core/_resource_handles.pxd @@ -56,41 +56,41 @@ cdef cydriver.CUresult peek_last_error() noexcept nogil cdef void clear_last_error() noexcept nogil # Context handles -cdef ContextHandle create_context_handle_ref(cydriver.CUcontext ctx) nogil except+ -cdef ContextHandle get_primary_context(int device_id) nogil except+ -cdef ContextHandle get_current_context() nogil except+ +cdef ContextHandle create_context_handle_ref(cydriver.CUcontext ctx) except+ nogil +cdef ContextHandle get_primary_context(int device_id) except+ nogil +cdef ContextHandle get_current_context() except+ nogil # Stream handles cdef StreamHandle create_stream_handle( - ContextHandle h_ctx, unsigned int flags, int priority) nogil except+ -cdef StreamHandle create_stream_handle_ref(cydriver.CUstream stream) nogil except+ -cdef StreamHandle create_stream_handle_with_owner(cydriver.CUstream stream, object owner) nogil except+ -cdef StreamHandle get_legacy_stream() nogil except+ -cdef StreamHandle get_per_thread_stream() nogil except+ + const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil +cdef StreamHandle create_stream_handle_ref(cydriver.CUstream stream) except+ nogil +cdef StreamHandle create_stream_handle_with_owner(cydriver.CUstream stream, object owner) except+ nogil +cdef StreamHandle get_legacy_stream() except+ nogil +cdef StreamHandle get_per_thread_stream() except+ nogil # Event handles -cdef EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) nogil except+ -cdef EventHandle create_event_handle_noctx(unsigned int flags) nogil except+ +cdef EventHandle create_event_handle(const ContextHandle& h_ctx, unsigned int flags) except+ nogil +cdef EventHandle create_event_handle_noctx(unsigned int flags) except+ nogil cdef EventHandle create_event_handle_ipc( - const cydriver.CUipcEventHandle& ipc_handle) nogil except+ + const cydriver.CUipcEventHandle& ipc_handle) except+ nogil # Memory pool handles cdef MemoryPoolHandle create_mempool_handle( - const cydriver.CUmemPoolProps& props) nogil except+ -cdef MemoryPoolHandle create_mempool_handle_ref(cydriver.CUmemoryPool pool) nogil except+ -cdef MemoryPoolHandle get_device_mempool(int device_id) nogil except+ + const cydriver.CUmemPoolProps& props) except+ nogil +cdef MemoryPoolHandle create_mempool_handle_ref(cydriver.CUmemoryPool pool) except+ nogil +cdef MemoryPoolHandle get_device_mempool(int device_id) except+ nogil cdef MemoryPoolHandle create_mempool_handle_ipc( - int fd, cydriver.CUmemAllocationHandleType handle_type) nogil except+ + int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil # Device pointer handles cdef DevicePtrHandle deviceptr_alloc_from_pool( - size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) nogil except+ -cdef DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) nogil except+ -cdef DevicePtrHandle deviceptr_alloc(size_t size) nogil except+ -cdef DevicePtrHandle deviceptr_alloc_host(size_t size) nogil except+ -cdef DevicePtrHandle deviceptr_create_ref(cydriver.CUdeviceptr ptr) nogil except+ -cdef DevicePtrHandle deviceptr_create_with_owner(cydriver.CUdeviceptr ptr, object owner) nogil except+ + size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil +cdef DevicePtrHandle deviceptr_alloc_async(size_t size, const StreamHandle& h_stream) except+ nogil +cdef DevicePtrHandle deviceptr_alloc(size_t size) except+ nogil +cdef DevicePtrHandle deviceptr_alloc_host(size_t size) except+ nogil +cdef DevicePtrHandle deviceptr_create_ref(cydriver.CUdeviceptr ptr) except+ nogil +cdef DevicePtrHandle deviceptr_create_with_owner(cydriver.CUdeviceptr ptr, object owner) except+ nogil cdef DevicePtrHandle deviceptr_import_ipc( - MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) nogil except+ + const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil cdef StreamHandle deallocation_stream(const DevicePtrHandle& h) noexcept nogil -cdef void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) noexcept nogil +cdef void set_deallocation_stream(const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil diff --git a/cuda_core/cuda/core/_resource_handles.pyx b/cuda_core/cuda/core/_resource_handles.pyx index 7989cd1bb0..6d874e426d 100644 --- a/cuda_core/cuda/core/_resource_handles.pyx +++ b/cuda_core/cuda/core/_resource_handles.pyx @@ -40,56 +40,56 @@ cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": # Context handles ContextHandle create_context_handle_ref "cuda_core::create_context_handle_ref" ( - cydriver.CUcontext ctx) nogil except+ + cydriver.CUcontext ctx) except+ nogil ContextHandle get_primary_context "cuda_core::get_primary_context" ( - int device_id) nogil except+ - ContextHandle get_current_context "cuda_core::get_current_context" () nogil except+ + int device_id) except+ nogil + ContextHandle get_current_context "cuda_core::get_current_context" () except+ nogil # Stream handles StreamHandle create_stream_handle "cuda_core::create_stream_handle" ( - ContextHandle h_ctx, unsigned int flags, int priority) nogil except+ + const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil StreamHandle create_stream_handle_ref "cuda_core::create_stream_handle_ref" ( - cydriver.CUstream stream) nogil except+ + cydriver.CUstream stream) except+ nogil StreamHandle create_stream_handle_with_owner "cuda_core::create_stream_handle_with_owner" ( - cydriver.CUstream stream, object owner) nogil except+ - StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () nogil except+ - StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () nogil except+ + cydriver.CUstream stream, object owner) except+ nogil + StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () except+ nogil + StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () except+ nogil # Event handles (note: _create_event_handle* are internal due to C++ overloading) EventHandle create_event_handle "cuda_core::create_event_handle" ( - ContextHandle h_ctx, unsigned int flags) nogil except+ + const ContextHandle& h_ctx, unsigned int flags) except+ nogil EventHandle create_event_handle_noctx "cuda_core::create_event_handle_noctx" ( - unsigned int flags) nogil except+ + unsigned int flags) except+ nogil EventHandle create_event_handle_ipc "cuda_core::create_event_handle_ipc" ( - const cydriver.CUipcEventHandle& ipc_handle) nogil except+ + const cydriver.CUipcEventHandle& ipc_handle) except+ nogil # Memory pool handles MemoryPoolHandle create_mempool_handle "cuda_core::create_mempool_handle" ( - const cydriver.CUmemPoolProps& props) nogil except+ + const cydriver.CUmemPoolProps& props) except+ nogil MemoryPoolHandle create_mempool_handle_ref "cuda_core::create_mempool_handle_ref" ( - cydriver.CUmemoryPool pool) nogil except+ + cydriver.CUmemoryPool pool) except+ nogil MemoryPoolHandle get_device_mempool "cuda_core::get_device_mempool" ( - int device_id) nogil except+ + int device_id) except+ nogil MemoryPoolHandle create_mempool_handle_ipc "cuda_core::create_mempool_handle_ipc" ( - int fd, cydriver.CUmemAllocationHandleType handle_type) nogil except+ + int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil # Device pointer handles DevicePtrHandle deviceptr_alloc_from_pool "cuda_core::deviceptr_alloc_from_pool" ( - size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) nogil except+ + size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil DevicePtrHandle deviceptr_alloc_async "cuda_core::deviceptr_alloc_async" ( - size_t size, StreamHandle h_stream) nogil except+ - DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) nogil except+ - DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) nogil except+ + size_t size, const StreamHandle& h_stream) except+ nogil + DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) except+ nogil + DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) except+ nogil DevicePtrHandle deviceptr_create_ref "cuda_core::deviceptr_create_ref" ( - cydriver.CUdeviceptr ptr) nogil except+ + cydriver.CUdeviceptr ptr) except+ nogil DevicePtrHandle deviceptr_create_with_owner "cuda_core::deviceptr_create_with_owner" ( - cydriver.CUdeviceptr ptr, object owner) nogil except+ + cydriver.CUdeviceptr ptr, object owner) except+ nogil DevicePtrHandle deviceptr_import_ipc "cuda_core::deviceptr_import_ipc" ( - MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) nogil except+ + const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil StreamHandle deallocation_stream "cuda_core::deallocation_stream" ( const DevicePtrHandle& h) noexcept nogil void set_deallocation_stream "cuda_core::set_deallocation_stream" ( - const DevicePtrHandle& h, StreamHandle h_stream) noexcept nogil + const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil # =============================================================================