diff --git a/.gitignore b/.gitignore index 43388e9cb5..f4f5b9210f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /.vscode *.cache *.code-workspace +/tests/opencl/atomics/ \ No newline at end of file diff --git a/tests/opencl/Makefile b/tests/opencl/Makefile index e60cd6ec74..53b02e4fda 100644 --- a/tests/opencl/Makefile +++ b/tests/opencl/Makefile @@ -22,6 +22,9 @@ all: $(MAKE) -C kmeans $(MAKE) -C blackscholes $(MAKE) -C bfs + $(MAKE) -C twokernels + $(MAKE) -C copybuf + run-simx: $(MAKE) -C vecadd run-simx @@ -43,6 +46,10 @@ run-simx: $(MAKE) -C kmeans run-simx $(MAKE) -C blackscholes run-simx $(MAKE) -C bfs run-simx + $(MAKE) -C twokernels run-simx + $(MAKE) -C copybuf run-simx + + run-rtlsim: $(MAKE) -C vecadd run-rtlsim @@ -64,6 +71,9 @@ run-rtlsim: $(MAKE) -C kmeans run-rtlsim $(MAKE) -C blackscholes run-rtlsim $(MAKE) -C bfs run-rtlsim + $(MAKE) -C twokernels run-rtlsim + $(MAKE) -C copybuf run-rtlsim + clean: $(MAKE) -C vecadd clean @@ -85,4 +95,6 @@ clean: $(MAKE) -C guassian clean $(MAKE) -C kmeans clean $(MAKE) -C blackscholes clean - $(MAKE) -C bfs clean \ No newline at end of file + $(MAKE) -C bfs clean + $(MAKE) -C copybuf clean + $(MAKE) -C twokernels clean \ No newline at end of file diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index f9fe92d89e..757ab7c19d 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -43,7 +43,7 @@ VX_CFLAGS += -Xclang -target-feature -Xclang +zicond VX_CFLAGS += -mllvm -disable-loop-idiom-all # disable memset/memcpy loop replacement #VX_CFLAGS += -mllvm -vortex-branch-divergence=0 #VX_CFLAGS += -mllvm -debug -mllvm -print-after-all - +VX_CFLAGS += -save-temps VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(VX_LIBS) VX_BINTOOL += OBJCOPY=$(LLVM_VORTEX)/bin/llvm-objcopy $(VORTEX_HOME)/kernel/scripts/vxbin.py @@ -52,6 +52,7 @@ CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter -Wno-narrowing CXXFLAGS += -pthread CXXFLAGS += -I$(POCL_PATH)/include +CXXFLAGS += -I$(POCL_PATH)/include/hpp CXXFLAGS += $(CONFIGS) POCL_CC_FLAGS += LLVM_PREFIX=$(LLVM_VORTEX) POCL_VORTEX_BINTOOL="$(VX_BINTOOL)" POCL_VORTEX_CFLAGS="$(VX_CFLAGS)" POCL_VORTEX_LDFLAGS="$(VX_LDFLAGS)" diff --git a/tests/opencl/copybuf/Makefile b/tests/opencl/copybuf/Makefile new file mode 100644 index 0000000000..f10a02695a --- /dev/null +++ b/tests/opencl/copybuf/Makefile @@ -0,0 +1,11 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := copybuf + +SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT) + +SRCS := $(SRC_DIR)/main.cc + + +include ../common.mk diff --git a/tests/opencl/copybuf/main.cc b/tests/opencl/copybuf/main.cc new file mode 100644 index 0000000000..76c243a96b --- /dev/null +++ b/tests/opencl/copybuf/main.cc @@ -0,0 +1,77 @@ +#define CL_TARGET_OPENCL_VERSION 120 +#include +#include +#include + +int main() { + cl_int err; + cl_uint num_platforms = 0; + err = clGetPlatformIDs(0, NULL, &num_platforms); + if (err != CL_SUCCESS || num_platforms == 0) { + fprintf(stderr, "No OpenCL platform\n"); + return -1; + } + cl_platform_id *platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms); + clGetPlatformIDs(num_platforms, platforms, NULL); + cl_uint num_devices = 0; + err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); + if (err != CL_SUCCESS || num_devices == 0) { + fprintf(stderr, "No OpenCL device\n"); + free(platforms); + return -1; + } + cl_device_id *devices = (cl_device_id *)malloc(sizeof(cl_device_id) * num_devices); + clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); + cl_context context = clCreateContext(NULL, 1, &devices[0], NULL, NULL, &err); + if (err != CL_SUCCESS) { + fprintf(stderr, "Failed to create context\n"); + return -1; + } + cl_command_queue queue = clCreateCommandQueue(context, devices[0], 0, &err); + if (err != CL_SUCCESS) { + fprintf(stderr, "Failed to create queue\n"); + return -1; + } + int src[] = {10, 20, 30, 40, 50}; + int dst[5] = {0}; + cl_mem bufSrc = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR , + sizeof(src), &src, &err); + if(err!= CL_SUCCESS){ + fprintf(stderr, "Failed to create buffer\n"); + return -1; + } + cl_mem bufDst = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(dst), NULL, &err); + if (err != CL_SUCCESS) { + fprintf(stderr, "Failed to create buffer\n"); + return -1; + } + err = clEnqueueCopyBuffer(queue, bufSrc, bufDst, 0, 0, sizeof(src), 0, NULL, NULL); + if (err != CL_SUCCESS) { + fprintf(stderr, "Failed to copy buffer\n"); + return -1; + } + err = clEnqueueReadBuffer(queue, bufDst, CL_TRUE, 0, sizeof(dst), dst, 0, NULL, NULL); + if (err != CL_SUCCESS) { + fprintf(stderr, "Failed to read buffer\n"); + return -1; + } + + printf("Source Buffer: "); + for (int i = 0; i < 5; i++) + printf("%d ", src[i]); + printf("\nDestination buffer: "); + for (int i = 0; i < 5; i++) + printf("%d ", dst[i]); + printf("\n"); + + clReleaseMemObject(bufSrc); + clReleaseMemObject(bufDst); + clReleaseCommandQueue(queue); + clReleaseContext(context); + + free(devices); + free(platforms); + + return 0; +} diff --git a/tests/opencl/twokernels/Makefile b/tests/opencl/twokernels/Makefile new file mode 100644 index 0000000000..254a9d94ef --- /dev/null +++ b/tests/opencl/twokernels/Makefile @@ -0,0 +1,11 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := twokernels + +SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT) + +SRCS := $(SRC_DIR)/main.cpp + + +include ../common.mk diff --git a/tests/opencl/twokernels/main.cpp b/tests/opencl/twokernels/main.cpp new file mode 100644 index 0000000000..23c8340ef6 --- /dev/null +++ b/tests/opencl/twokernels/main.cpp @@ -0,0 +1,126 @@ +#define CL_TARGET_OPENCL_VERSION 120 +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#define CL_HPP_MINIMUM_OPENCL_VERSION 120 + +#include +#include +#include + + + +const char *kernelSrcAdd = R"CLC( +__kernel void pair_add(__global const int* in, __global int* out) { + int gid = get_global_id(0); + int i = 2 * gid; + out[gid] = in[i]+ in[i + 1]; +} +)CLC"; + + +const char *kernelSrcMul = R"CLC( +__kernel void pair_mul(__global const int* in, __global int* out) { + int gid = get_global_id(0); + int i = 2 * gid; + out[gid] = in[i]* in[i + 1]; +} +)CLC"; + + +std::vector runPairAdd(cl::Context &context, cl::CommandQueue &queue, + const std::vector &input) { + int N = input.size(); + int outN = N / 2; + std::vector output(outN, 0); + + cl::Buffer bufIn(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + sizeof(int) * N, const_cast(input.data())); + cl::Buffer bufOut(context, CL_MEM_WRITE_ONLY, sizeof(int) * outN); + + cl::Program program(context, kernelSrcAdd); + if (program.build({queue.getInfo()}) != CL_SUCCESS) { + std::cerr << "Error of bulding pair_add:\n" + << program.getBuildInfo(queue.getInfo()) + << std::endl; + return {}; + } + + cl::Kernel kernel(program, "pair_add"); + kernel.setArg(0, bufIn); + kernel.setArg(1, bufOut); + + queue.enqueueNDRangeKernel(kernel, cl::NullRange, + cl::NDRange(outN), cl::NullRange); + queue.enqueueReadBuffer(bufOut, CL_TRUE, 0, sizeof(int) * outN, output.data()); + queue.finish(); + return output; +} + +std::vector runPairMul(cl::Context &context, cl::CommandQueue &queue, + const std::vector &input) { + int N = input.size(); + int outN = N / 2; + std::vector output(outN, 0); + + cl::Buffer bufIn(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + sizeof(int) * N, const_cast(input.data())); + cl::Buffer bufOut(context, CL_MEM_WRITE_ONLY, sizeof(int) * outN); + + cl::Program program(context, kernelSrcMul); + if (program.build({queue.getInfo()}) != CL_SUCCESS) { + std::cerr << "Error of building pair_mul:\n" + << program.getBuildInfo(queue.getInfo()) + << std::endl; + return {}; + } + + cl::Kernel kernel(program, "pair_mul"); + kernel.setArg(0, bufIn); + kernel.setArg(1, bufOut); + queue.enqueueNDRangeKernel(kernel, cl::NullRange, + cl::NDRange(outN), cl::NullRange); + queue.enqueueReadBuffer(bufOut, CL_TRUE, 0, sizeof(int) * outN, output.data()); + queue.finish(); + return output; +} + +int main() { + std::vector platforms; + cl::Platform::get(&platforms); + if (platforms.empty()) { + std::cerr << "No OpenCL platforms\n"; + return 1; + } + + std::vector devices; + + platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices); + if (devices.empty()) { + std::cerr << "No OpenCL devices\n"; + return 2; + } + + cl::Context context(devices[0]); + cl::CommandQueue queue(context, devices[0]); + + std::vector input = {1, 2, 3, 4, 5, 6, 7, 8}; + auto resultAdd = runPairAdd(context, queue, input); + auto resultMul = runPairMul(context, queue, input); + + + + + + std::cout << "Input: "; + for (auto v : input) + std::cout << v << " "; + std::cout << "\nResult of pair_add: "; + for (auto v : resultAdd) + std::cout << v << " "; + std::cout << "\nResult of pair_mul: "; + for (auto v : resultMul) + std::cout << v << " "; + std::cout << std::endl; + + + return 0; +}