From 6b729f725f26f3f5ef9fc8afbf3f1d6508e361a5 Mon Sep 17 00:00:00 2001 From: talubik Date: Thu, 6 Nov 2025 07:42:07 +0300 Subject: [PATCH 1/5] add thread and warps writer in name of device --- CMakeLists.txt | 2 +- lib/CL/devices/vortex/pocl-vortex.c | 21 ++++++++++++---- lib/kernel/vortex/CMakeLists.txt | 2 +- lib/kernel/vortex/atomics.c | 38 +++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 lib/kernel/vortex/atomics.c diff --git a/CMakeLists.txt b/CMakeLists.txt index cd7fb0c62..7a352c2a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1855,7 +1855,7 @@ if (ENABLE_VORTEX) message(FATAL_ERROR "should set 'VORTEX_PREFIX' option") endif() set(BUILD_VORTEX 1) - set(VORTEX_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_int64 cl_khr_fp64") + set(VORTEX_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_int64 cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics") set(VORTEX_DEVICE_CL_VERSION_MAJOR 1) set(VORTEX_DEVICE_CL_VERSION_MINOR 2) set(VORTEX_DEVICE_CL_VERSION "120") diff --git a/lib/CL/devices/vortex/pocl-vortex.c b/lib/CL/devices/vortex/pocl-vortex.c index 1759f4a91..08e62f5e3 100644 --- a/lib/CL/devices/vortex/pocl-vortex.c +++ b/lib/CL/devices/vortex/pocl-vortex.c @@ -161,7 +161,6 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) } dev->vendor = "Vortex Group"; - dev->long_name = "Vortex OpenGPU"; dev->short_name = "Vortex"; dev->vendor_id = 0; dev->type = CL_DEVICE_TYPE_GPU; @@ -236,6 +235,15 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) free(dd); return CL_DEVICE_NOT_FOUND; } + char* long_name = (char *)malloc(64 * sizeof(char)); + if (long_name == NULL) + { + vx_dev_close(vx_device); + free(dd); + return CL_OUT_OF_HOST_MEMORY; + } + snprintf(long_name, 64 * sizeof(char), "Vortex OpenGPU W%luT%lu", num_warps, num_threads); + dev->long_name = long_name; uint64_t max_work_group_size = num_warps * num_threads; @@ -283,8 +291,10 @@ cl_int pocl_vortex_uninit (unsigned j, cl_device_id dev) { int pocl_vortex_init_context (cl_device_id dev, cl_context context) { vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; - if (NULL == dd) - return CL_SUCCESS; + if (dd == NULL){ + pocl_vortex_init(0,dev,NULL); + dd = (vortex_device_data_t *)dev->data; + } dd->ctx_refcount++; @@ -308,7 +318,6 @@ int pocl_vortex_post_build_program (cl_program program, cl_uint device_i) { cl_device_id dev = program->devices[device_i]; vortex_device_data_t *ddata = (vortex_device_data_t *)dev->data; vortex_program_data_t *pdata = NULL; - POCL_LOCK (ddata->compile_lock); do { @@ -578,6 +587,7 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { // release previous kernel buffer if (dd->vx_kernel_buffer != NULL) { + vx_dump_perf(dd->vx_device, stdout); vx_mem_free(dd->vx_kernel_buffer); dd->vx_kernel_buffer = NULL; } @@ -755,7 +765,8 @@ void pocl_vortex_submit (_cl_command_node *node, cl_command_queue cq) { void pocl_vortex_flush (cl_device_id dev, cl_command_queue cq) { vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; - + if(dd == NULL) + return; POCL_LOCK (dd->cq_lock); vortex_command_scheduler (dd); POCL_UNLOCK (dd->cq_lock); diff --git a/lib/kernel/vortex/CMakeLists.txt b/lib/kernel/vortex/CMakeLists.txt index e0e781128..61542c38b 100644 --- a/lib/kernel/vortex/CMakeLists.txt +++ b/lib/kernel/vortex/CMakeLists.txt @@ -41,7 +41,7 @@ foreach(FILE printf.c printf_base.c list(REMOVE_ITEM KERNEL_SOURCES "${FILE}") endforeach() -foreach(FILE workitems.c printf.c barrier.c) +foreach(FILE workitems.c printf.c barrier.c atomics.c) list(REMOVE_ITEM KERNEL_SOURCES "${FILE}") list(APPEND KERNEL_SOURCES "vortex/${FILE}") endforeach() diff --git a/lib/kernel/vortex/atomics.c b/lib/kernel/vortex/atomics.c new file mode 100644 index 000000000..02f846997 --- /dev/null +++ b/lib/kernel/vortex/atomics.c @@ -0,0 +1,38 @@ +static inline int _vx_atomic_add_asm(volatile void *addr, int value) +{ + int old_value; + __asm__ volatile( + "amoadd.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} + + +int _Z14_cl_atomic_addPU8CLglobalVii(volatile void *ptr, int val) +{ + return _vx_atomic_add_asm(ptr, val); +} + + +int _Z14_cl_atomic_addPU8CLglobalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_add_asm(ptr, (int)val); +} + + +int _Z14_cl_atomic_addPU7CLlocalVii(volatile void *ptr, int val) +{ + return _vx_atomic_add_asm(ptr, val); +} + +int _Z14_cl_atomic_addPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_add_asm(ptr, (int)val); +} + +int _Z14_cl_atomic_incPU8CLglobalVj(volatile void *ptr) +{ + return _vx_atomic_add_asm(ptr, 1); +} \ No newline at end of file From 506207db15ac7e757ca210129bf732ddc358f7ea Mon Sep 17 00:00:00 2001 From: talubik Date: Sun, 9 Nov 2025 09:49:32 +0300 Subject: [PATCH 2/5] add atomic add and inc --- lib/CL/devices/vortex/pocl-vortex.c | 50 ++++++++++------------------- 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/lib/CL/devices/vortex/pocl-vortex.c b/lib/CL/devices/vortex/pocl-vortex.c index 08e62f5e3..5e76c7348 100644 --- a/lib/CL/devices/vortex/pocl-vortex.c +++ b/lib/CL/devices/vortex/pocl-vortex.c @@ -161,6 +161,7 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) } dev->vendor = "Vortex Group"; + dev->long_name = "Vortex OpenGPU"; dev->short_name = "Vortex"; dev->vendor_id = 0; dev->type = CL_DEVICE_TYPE_GPU; @@ -235,15 +236,6 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) free(dd); return CL_DEVICE_NOT_FOUND; } - char* long_name = (char *)malloc(64 * sizeof(char)); - if (long_name == NULL) - { - vx_dev_close(vx_device); - free(dd); - return CL_OUT_OF_HOST_MEMORY; - } - snprintf(long_name, 64 * sizeof(char), "Vortex OpenGPU W%luT%lu", num_warps, num_threads); - dev->long_name = long_name; uint64_t max_work_group_size = num_warps * num_threads; @@ -291,10 +283,8 @@ cl_int pocl_vortex_uninit (unsigned j, cl_device_id dev) { int pocl_vortex_init_context (cl_device_id dev, cl_context context) { vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; - if (dd == NULL){ - pocl_vortex_init(0,dev,NULL); - dd = (vortex_device_data_t *)dev->data; - } + if (NULL == dd) + return CL_SUCCESS; dd->ctx_refcount++; @@ -318,6 +308,7 @@ int pocl_vortex_post_build_program (cl_program program, cl_uint device_i) { cl_device_id dev = program->devices[device_i]; vortex_device_data_t *ddata = (vortex_device_data_t *)dev->data; vortex_program_data_t *pdata = NULL; + POCL_LOCK (ddata->compile_lock); do { @@ -584,27 +575,21 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { // release argument host buffer free(host_kargs_base_ptr); - // release previous kernel buffer - if (dd->vx_kernel_buffer != NULL) - { - vx_dump_perf(dd->vx_device, stdout); - vx_mem_free(dd->vx_kernel_buffer); - dd->vx_kernel_buffer = NULL; - } - // upload kernel to device - char sz_program_bc[POCL_MAX_PATHNAME_LENGTH]; - char sz_program_vxbin[POCL_MAX_PATHNAME_LENGTH]; + if (NULL == dd->vx_kernel_buffer) { + char sz_program_bc[POCL_MAX_PATHNAME_LENGTH]; + char sz_program_vxbin[POCL_MAX_PATHNAME_LENGTH]; - pocl_cache_program_bc_path(sz_program_bc, program, device_i); - remove_extension(sz_program_bc); + pocl_cache_program_bc_path(sz_program_bc, program, device_i); + remove_extension(sz_program_bc); - strcpy(sz_program_vxbin, sz_program_bc); - strncat(sz_program_vxbin, ".vxbin", POCL_MAX_PATHNAME_LENGTH - 1); - - vx_err = vx_upload_kernel_file(dd->vx_device, sz_program_vxbin, &dd->vx_kernel_buffer); - if (vx_err != 0) { - POCL_ABORT("POCL_VORTEX_RUN\n"); + strcpy(sz_program_vxbin, sz_program_bc); + strncat(sz_program_vxbin, ".vxbin", POCL_MAX_PATHNAME_LENGTH - 1); + + vx_err = vx_upload_kernel_file(dd->vx_device, sz_program_vxbin, &dd->vx_kernel_buffer); + if (vx_err != 0) { + POCL_ABORT("POCL_VORTEX_RUN\n"); + } } // launch kernel execution @@ -765,8 +750,7 @@ void pocl_vortex_submit (_cl_command_node *node, cl_command_queue cq) { void pocl_vortex_flush (cl_device_id dev, cl_command_queue cq) { vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; - if(dd == NULL) - return; + POCL_LOCK (dd->cq_lock); vortex_command_scheduler (dd); POCL_UNLOCK (dd->cq_lock); From 81ed3400c155af775e4c4444a4b0b36916cb6235 Mon Sep 17 00:00:00 2001 From: talubik Date: Tue, 2 Dec 2025 06:43:26 +0300 Subject: [PATCH 3/5] add : implementation of all atomic operations --- lib/kernel/vortex/atomics.c | 329 +++++++++++++++++++++++++++++++++++- 1 file changed, 322 insertions(+), 7 deletions(-) diff --git a/lib/kernel/vortex/atomics.c b/lib/kernel/vortex/atomics.c index 02f846997..af022dc77 100644 --- a/lib/kernel/vortex/atomics.c +++ b/lib/kernel/vortex/atomics.c @@ -1,3 +1,4 @@ +// atomic_add//atomic_inc/atomic_dec/atomic_sub static inline int _vx_atomic_add_asm(volatile void *addr, int value) { int old_value; @@ -9,30 +10,344 @@ static inline int _vx_atomic_add_asm(volatile void *addr, int value) return old_value; } - int _Z14_cl_atomic_addPU8CLglobalVii(volatile void *ptr, int val) { return _vx_atomic_add_asm(ptr, val); } +int _Z14_cl_atomic_addPU7CLlocalVii(volatile void *ptr, int val) +{ + return _vx_atomic_add_asm(ptr, val); +} -int _Z14_cl_atomic_addPU8CLglobalVjj(volatile void *ptr, unsigned int val) +unsigned int _Z14_cl_atomic_addPU8CLglobalVjj(volatile void *ptr, unsigned int val) { return _vx_atomic_add_asm(ptr, (int)val); } +unsigned int _Z14_cl_atomic_addPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_add_asm(ptr, (int)val); +} -int _Z14_cl_atomic_addPU7CLlocalVii(volatile void *ptr, int val) +// atomic_inc + +int _Z14_cl_atomic_incPU8CLglobalVi(volatile void *ptr) { - return _vx_atomic_add_asm(ptr, val); + return _vx_atomic_add_asm(ptr, 1); } -int _Z14_cl_atomic_addPU7CLlocalVjj(volatile void *ptr, unsigned int val) +int _Z14_cl_atomic_incPU7CLlocalVi(volatile void *ptr) { - return _vx_atomic_add_asm(ptr, (int)val); + return _vx_atomic_add_asm(ptr, 1); } -int _Z14_cl_atomic_incPU8CLglobalVj(volatile void *ptr) +unsigned int _Z14_cl_atomic_incPU8CLglobalVj(volatile void *ptr) { return _vx_atomic_add_asm(ptr, 1); +} + +unsigned int _Z14_cl_atomic_incPU7CLlocalVj(volatile void *ptr) +{ + return _vx_atomic_add_asm(ptr, 1); +} + +// atomic_dec + +int _Z14_cl_atomic_decPU8CLglobalVi(volatile void *ptr) +{ + return _vx_atomic_add_asm(ptr, -1); +} + +int _Z14_cl_atomic_decPU7CLlocalVi(volatile void *ptr) +{ + return _vx_atomic_add_asm(ptr, -1); +} + +unsigned int _Z14_cl_atomic_decPU7CLlocalVj(volatile void *ptr) +{ + return _vx_atomic_add_asm(ptr, -1); +} + +unsigned int _Z14_cl_atomic_decPU8CLglobalVj(volatile void *ptr) +{ + return _vx_atomic_add_asm(ptr, -1); +} + +// atomic_sub + +int _Z14_cl_atomic_subPU7CLlocalVii(volatile void *ptr, int val) +{ + return _vx_atomic_add_asm(ptr, -val); +} + +int _Z14_cl_atomic_subPU8CLglobalVii(volatile void *ptr, int val) +{ + return _vx_atomic_add_asm(ptr, -val); +} + +unsigned int _Z14_cl_atomic_subPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_add_asm(ptr, -(int)val); +} + +unsigned int _Z14_cl_atomic_subPU8CLglobalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_add_asm(ptr, -(int)val); +} + +// atomic_max + +static inline int _vx_atomic_max_asm(volatile void *addr, int value) +{ + int old_value; + __asm__ volatile( + "amomax.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} + +int _Z14_cl_atomic_maxPU7CLlocalVii(volatile void *ptr, int val) +{ + return _vx_atomic_max_asm(ptr, val); +} + +int _Z14_cl_atomic_maxPU8CLglobalVii(volatile void *ptr, int val) +{ + return _vx_atomic_max_asm(ptr, val); +} + +// atomic_min + +static inline int _vx_atomic_min_asm(volatile void *addr, int value) +{ + int old_value; + __asm__ volatile( + "amomin.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} + +int _Z14_cl_atomic_minPU7CLlocalVii(volatile void *ptr, int val) +{ + return _vx_atomic_min_asm(ptr, val); +} + +int _Z14_cl_atomic_minPU8CLglobalVii(volatile void *ptr, int val) +{ + return _vx_atomic_min_asm(ptr, val); +} + +// atomic_xor + +static inline int _vx_atomic_xor_asm(volatile void *addr, int value) +{ + int old_value; + __asm__ volatile( + "amoxor.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} + +int _Z14_cl_atomic_xorPU7CLlocalVii(volatile void *ptr, int val) +{ + return _vx_atomic_xor_asm(ptr, val); +} + +int _Z14_cl_atomic_xorPU8CLglobalVii(volatile void *ptr, int val) +{ + return _vx_atomic_xor_asm(ptr, val); +} + +unsigned int _Z14_cl_atomic_xorPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_xor_asm(ptr, (int)val); +} + +unsigned int _Z14_cl_atomic_xorPU8CLglobalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_xor_asm(ptr, (int)val); +} +// atomic_or + +static inline int _vx_atomic_or_asm(volatile void *addr, int value) +{ + int old_value; + __asm__ volatile( + "amoor.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} + +int _Z13_cl_atomic_orPU7CLlocalVii(volatile void *ptr, int val) +{ + return _vx_atomic_or_asm(ptr, val); +} + +int _Z13_cl_atomic_orPU8CLglobalVii(volatile void *ptr, int val) +{ + return _vx_atomic_or_asm(ptr, val); +} + +unsigned int _Z13_cl_atomic_orPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_or_asm(ptr, (int)val); +} + +unsigned int _Z13_cl_atomic_orPU8CLglobalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_or_asm(ptr, (int)val); +} + +// atomic_and + +static inline int _vx_atomic_and_asm(volatile void *addr, int value) +{ + int old_value; + __asm__ volatile( + "amoand.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} + +int _Z14_cl_atomic_andPU7CLlocalVii(volatile void *ptr, int val) +{ + return _vx_atomic_and_asm(ptr, val); +} + +int _Z14_cl_atomic_andPU8CLglobalVii(volatile void *ptr, int val) +{ + return _vx_atomic_and_asm(ptr, val); +} + +unsigned int _Z14_cl_atomic_andPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_and_asm(ptr, (int)val); +} + +unsigned int _Z14_cl_atomic_andPU8CLglobalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_and_asm(ptr, (int)val); +} + +// atomic_xchg + +static inline int _vx_atomic_xchg_asm(volatile void *addr, int value) +{ + int old_value; + __asm__ volatile( + "amoswap.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} +int _Z15_cl_atomic_xchgPU8CLglobalVii(volatile void *ptr, int val) +{ + return _vx_atomic_xchg_asm(ptr, val); +} + +int _Z15_cl_atomic_xchgPU7CLlocalVii(volatile void *ptr, int val) +{ + return _vx_atomic_xchg_asm(ptr, val); +} + +unsigned int _Z15_cl_atomic_xchgPU8CLglobalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_xchg_asm(ptr, (int)val); +} + +unsigned int _Z15_cl_atomic_xchgPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_xchg_asm(ptr, (int)val); +} + +// atomic_cmpxchg + +static inline int _vx_atomic_cmpxchg_asm(volatile void *addr, int cmp_val, int new_val) +{ + int old_val; + int success; + __asm__ volatile( + "1: lr.w %0, (%2)\n" + " bne %0, %3, 2f\n" + " sc.w %1, %4, (%2)\n" + " bnez %1, 1b\n" + "2:" + : "=&r"(old_val), "=&r"(success) + : "r"(addr), "r"(cmp_val), "r"(new_val) + : "memory"); + return old_val; +} + +int _Z18_cl_atomic_cmpxchgPU7CLlocalViii(volatile void *ptr, int cmp_val, int new_val) +{ + return _vx_atomic_cmpxchg_asm(ptr, cmp_val, new_val); +} + +int _Z18_cl_atomic_cmpxchgPU8CLglobalViii(volatile void *ptr, int cmp_val, int new_val) +{ + return _vx_atomic_cmpxchg_asm(ptr, cmp_val, new_val); +} + +unsigned int _Z18_cl_atomic_cmpxchgPU7CLlocalVjjj(volatile void *ptr, unsigned int cmp_val, unsigned int new_val) +{ + return _vx_atomic_cmpxchg_asm(ptr, (int)cmp_val, (int)new_val); +} + +unsigned int _Z18_cl_atomic_cmpxchgPU8CLglobalVjjj(volatile void *ptr, unsigned int cmp_val, unsigned int new_val) +{ + return _vx_atomic_cmpxchg_asm(ptr, (int)cmp_val, (int)new_val); +} + +static inline unsigned int _vx_atomic_minu_asm(volatile void *addr, unsigned int value) +{ + unsigned int old_value; + __asm__ volatile( + "amominu.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} + +unsigned int _Z14_cl_atomic_minPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_minu_asm(ptr, val); +} + +unsigned int _Z14_cl_atomic_minPU8CLglobalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_minu_asm(ptr, val); +} + +static inline unsigned int _vx_atomic_maxu_asm(volatile void *addr, unsigned int value) +{ + unsigned int old_value; + __asm__ volatile( + "amomaxu.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} + +unsigned int _Z14_cl_atomic_maxPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_maxu_asm(ptr, val); +} + +unsigned int _Z14_cl_atomic_maxPU8CLglobalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_maxu_asm(ptr, val); } \ No newline at end of file From 21c389500f9c7eac9cfa1bb31b459882c203b2c7 Mon Sep 17 00:00:00 2001 From: talubik Date: Tue, 2 Dec 2025 07:44:28 +0300 Subject: [PATCH 4/5] fix : merge of vortex_2.x --- lib/CL/devices/vortex/pocl-vortex.c | 30 +++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/lib/CL/devices/vortex/pocl-vortex.c b/lib/CL/devices/vortex/pocl-vortex.c index 5e76c7348..6d5d0937c 100644 --- a/lib/CL/devices/vortex/pocl-vortex.c +++ b/lib/CL/devices/vortex/pocl-vortex.c @@ -575,21 +575,27 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { // release argument host buffer free(host_kargs_base_ptr); - // upload kernel to device - if (NULL == dd->vx_kernel_buffer) { - char sz_program_bc[POCL_MAX_PATHNAME_LENGTH]; - char sz_program_vxbin[POCL_MAX_PATHNAME_LENGTH]; + // release previous kernel buffer + if (dd->vx_kernel_buffer != NULL) + { + vx_dump_perf(dd->vx_device, stdout); + vx_mem_free(dd->vx_kernel_buffer); + dd->vx_kernel_buffer = NULL; + } - pocl_cache_program_bc_path(sz_program_bc, program, device_i); - remove_extension(sz_program_bc); + // upload kernel to device + char sz_program_bc[POCL_MAX_PATHNAME_LENGTH]; + char sz_program_vxbin[POCL_MAX_PATHNAME_LENGTH]; - strcpy(sz_program_vxbin, sz_program_bc); - strncat(sz_program_vxbin, ".vxbin", POCL_MAX_PATHNAME_LENGTH - 1); + pocl_cache_program_bc_path(sz_program_bc, program, device_i); + remove_extension(sz_program_bc); - vx_err = vx_upload_kernel_file(dd->vx_device, sz_program_vxbin, &dd->vx_kernel_buffer); - if (vx_err != 0) { - POCL_ABORT("POCL_VORTEX_RUN\n"); - } + strcpy(sz_program_vxbin, sz_program_bc); + strncat(sz_program_vxbin, ".vxbin", POCL_MAX_PATHNAME_LENGTH - 1); + + vx_err = vx_upload_kernel_file(dd->vx_device, sz_program_vxbin, &dd->vx_kernel_buffer); + if (vx_err != 0) { + POCL_ABORT("POCL_VORTEX_RUN\n"); } // launch kernel execution From 787452042299f02f8830d6348e3d88af7be3cdd2 Mon Sep 17 00:00:00 2001 From: talubik Date: Thu, 4 Dec 2025 09:04:18 +0300 Subject: [PATCH 5/5] refactor: atomics --- lib/kernel/vortex/atomics.c | 84 ++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/lib/kernel/vortex/atomics.c b/lib/kernel/vortex/atomics.c index af022dc77..d2677b64f 100644 --- a/lib/kernel/vortex/atomics.c +++ b/lib/kernel/vortex/atomics.c @@ -119,6 +119,27 @@ int _Z14_cl_atomic_maxPU8CLglobalVii(volatile void *ptr, int val) return _vx_atomic_max_asm(ptr, val); } +static inline unsigned int _vx_atomic_maxu_asm(volatile void *addr, unsigned int value) +{ + unsigned int old_value; + __asm__ volatile( + "amomaxu.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} + +unsigned int _Z14_cl_atomic_maxPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_maxu_asm(ptr, val); +} + +unsigned int _Z14_cl_atomic_maxPU8CLglobalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_maxu_asm(ptr, val); +} + // atomic_min static inline int _vx_atomic_min_asm(volatile void *addr, int value) @@ -142,6 +163,27 @@ int _Z14_cl_atomic_minPU8CLglobalVii(volatile void *ptr, int val) return _vx_atomic_min_asm(ptr, val); } +static inline unsigned int _vx_atomic_minu_asm(volatile void *addr, unsigned int value) +{ + unsigned int old_value; + __asm__ volatile( + "amominu.w %0, %2, (%1)" + : "=r"(old_value) + : "r"(addr), "r"(value) + : "memory"); + return old_value; +} + +unsigned int _Z14_cl_atomic_minPU7CLlocalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_minu_asm(ptr, val); +} + +unsigned int _Z14_cl_atomic_minPU8CLglobalVjj(volatile void *ptr, unsigned int val) +{ + return _vx_atomic_minu_asm(ptr, val); +} + // atomic_xor static inline int _vx_atomic_xor_asm(volatile void *addr, int value) @@ -309,45 +351,3 @@ unsigned int _Z18_cl_atomic_cmpxchgPU8CLglobalVjjj(volatile void *ptr, unsigned { return _vx_atomic_cmpxchg_asm(ptr, (int)cmp_val, (int)new_val); } - -static inline unsigned int _vx_atomic_minu_asm(volatile void *addr, unsigned int value) -{ - unsigned int old_value; - __asm__ volatile( - "amominu.w %0, %2, (%1)" - : "=r"(old_value) - : "r"(addr), "r"(value) - : "memory"); - return old_value; -} - -unsigned int _Z14_cl_atomic_minPU7CLlocalVjj(volatile void *ptr, unsigned int val) -{ - return _vx_atomic_minu_asm(ptr, val); -} - -unsigned int _Z14_cl_atomic_minPU8CLglobalVjj(volatile void *ptr, unsigned int val) -{ - return _vx_atomic_minu_asm(ptr, val); -} - -static inline unsigned int _vx_atomic_maxu_asm(volatile void *addr, unsigned int value) -{ - unsigned int old_value; - __asm__ volatile( - "amomaxu.w %0, %2, (%1)" - : "=r"(old_value) - : "r"(addr), "r"(value) - : "memory"); - return old_value; -} - -unsigned int _Z14_cl_atomic_maxPU7CLlocalVjj(volatile void *ptr, unsigned int val) -{ - return _vx_atomic_maxu_asm(ptr, val); -} - -unsigned int _Z14_cl_atomic_maxPU8CLglobalVjj(volatile void *ptr, unsigned int val) -{ - return _vx_atomic_maxu_asm(ptr, val); -} \ No newline at end of file