From ab8799695e08a991ab81388c229b091337a54a66 Mon Sep 17 00:00:00 2001 From: Kali Uday Balleda Date: Wed, 31 May 2023 22:55:39 +0530 Subject: [PATCH 1/2] Rotate bug reproducer implementation --- rotate_bug/JULIA_ONEMKL_RUN.txt | 55 +++++++++++++++ rotate_bug/SYCL_ONEMKL_RUN.txt | 121 ++++++++++++++++++++++++++++++++ rotate_bug/rotate_gpu_usm.cpp | 96 +++++++++++++++++++++++++ rotate_bug/rotbug.jl | 40 +++++++++++ 4 files changed, 312 insertions(+) create mode 100755 rotate_bug/JULIA_ONEMKL_RUN.txt create mode 100755 rotate_bug/SYCL_ONEMKL_RUN.txt create mode 100755 rotate_bug/rotate_gpu_usm.cpp create mode 100755 rotate_bug/rotbug.jl diff --git a/rotate_bug/JULIA_ONEMKL_RUN.txt b/rotate_bug/JULIA_ONEMKL_RUN.txt new file mode 100755 index 00000000..1b7ed4fb --- /dev/null +++ b/rotate_bug/JULIA_ONEMKL_RUN.txt @@ -0,0 +1,55 @@ +kali@sdp:~/Kali/2023/2705_rotbug/oneAPI.jl$ $JULIA --project -L test/setup.jl test/rotbug.jl +x = ComplexF64[1.0 + 0.0im] +y = ComplexF64[1.0 + 0.0im] +x = ComplexF64[1.3928964429210167 + 0.0im] +Array(d_x) = ComplexF64[0.9592049927584356 + 0.0im] +c = 0.8187587885612081 +s = 0.5741376543598085 + 0.0im +ERROR: LoadError: AssertionError: x == Array(d_x) +Stacktrace: + [1] main(; n::Int64, T::Type{ComplexF64}) + @ Main ~/Kali/2023/2705_rotbug/oneAPI.jl/test/rotbug.jl:36 + [2] main() + @ Main ~/Kali/2023/2705_rotbug/oneAPI.jl/test/rotbug.jl:4 + [3] top-level scope + @ ~/Kali/2023/2705_rotbug/oneAPI.jl/test/rotbug.jl:39 +in expression starting at /home/kali/Kali/2023/2705_rotbug/oneAPI.jl/test/rotbug.jl:39 +kali@sdp:~/Kali/2023/2705_rotbug/oneAPI.jl$ vim test/rotbug.jl +kali@sdp:~/Kali/2023/2705_rotbug/oneAPI.jl$ $JULIA --project -L test/setup.jl test/rotbug.jl +x = ComplexF64[1.0 + 0.0im] +y = ComplexF64[1.0 + 0.0im] +x = ComplexF64[1.3928964429210167 + 0.0im] +Array(d_x) = ComplexF64[1.3928964429210167 + 0.0im] +c = 0.8187587885612081 +s = 0.5741376543598085 + 0.0im +kali@sdp:~/Kali/2023/2705_rotbug/oneAPI.jl$ $JULIA --project -L test/setup.jl test/rotbug.jl +x = ComplexF64[1.0 + 0.0im] +y = ComplexF64[1.0 + 0.0im] +x = ComplexF64[1.3928964429210167 + 0.0im] +Array(d_x) = ComplexF64[0.9592049927584356 + 0.0im] +c = 0.8187587885612081 +s = 0.5741376543598085 + 0.0im +ERROR: LoadError: AssertionError: x == Array(d_x) +Stacktrace: + [1] main(; n::Int64, T::Type{ComplexF64}) + @ Main ~/Kali/2023/2705_rotbug/oneAPI.jl/test/rotbug.jl:37 + [2] main() + @ Main ~/Kali/2023/2705_rotbug/oneAPI.jl/test/rotbug.jl:4 + [3] top-level scope + @ ~/Kali/2023/2705_rotbug/oneAPI.jl/test/rotbug.jl:40 +in expression starting at /home/kali/Kali/2023/2705_rotbug/oneAPI.jl/test/rotbug.jl:40 +kali@sdp:~/Kali/2023/2705_rotbug/oneAPI.jl$ $JULIA --project -L test/setup.jl test/rotbug.jl +x = ComplexF64[1.0 + 0.0im] +y = ComplexF64[1.0 + 0.0im] +x = ComplexF64[1.3928964429210167 + 0.0im] +Array(d_x) = ComplexF64[1.3928964429210167 + 0.0im] +c = 0.8187587885612081 +s = 0.5741376543598085 + 0.0im +kali@sdp:~/Kali/2023/2705_rotbug/oneAPI.jl$ $JULIA --project -L test/setup.jl test/rotbug.jl +x = ComplexF64[1.0 + 0.0im] +y = ComplexF64[1.0 + 0.0im] +x = ComplexF64[1.3928964429210167 + 0.0im] +Array(d_x) = ComplexF64[1.3928964429210167 + 0.0im] +c = 0.8187587885612081 +s = 0.5741376543598085 + 0.0im +kali@sdp:~/Kali/2023/2705_rotbug/oneAPI.jl$ \ No newline at end of file diff --git a/rotate_bug/SYCL_ONEMKL_RUN.txt b/rotate_bug/SYCL_ONEMKL_RUN.txt new file mode 100755 index 00000000..46e4461a --- /dev/null +++ b/rotate_bug/SYCL_ONEMKL_RUN.txt @@ -0,0 +1,121 @@ +kali@sdp:~/Kali/2023/2705_rotbug/onemkl_reproducer/2023.1.0/examples/dpcpp/blas/source$ ./rotate.exe +c: 0.818759s: (0.574138,0) +(1,0) +GPU Results: +1 +X - array +(1.3929,0) +Y - array +(0.244621,0) +Done with GPU Rotate !!, Starting CPU rotate +CPU Results: +X - array +(1.3929,0) +Y - array +(0.244621,0) +kali@sdp:~/Kali/2023/2705_rotbug/onemkl_reproducer/2023.1.0/examples/dpcpp/blas/source$ ./rotate.exe +c: 0.818759s: (0.574138,0) +(1,0) +GPU Results: +1 +X - array +(1.3929,0) +Y - array +(0.244621,0) +Done with GPU Rotate !!, Starting CPU rotate +CPU Results: +X - array +(1.3929,0) +Y - array +(0.244621,0) +kali@sdp:~/Kali/2023/2705_rotbug/onemkl_reproducer/2023.1.0/examples/dpcpp/blas/source$ ./rotate.exe +c: 0.818759s: (0.574138,0) +(1,0) +GPU Results: +1 +X - array +(1.3929,0) +Y - array +(0.244621,0) +Done with GPU Rotate !!, Starting CPU rotate +CPU Results: +X - array +(1.3929,0) +Y - array +(0.244621,0) +kali@sdp:~/Kali/2023/2705_rotbug/onemkl_reproducer/2023.1.0/examples/dpcpp/blas/source$ ./rotate.exe +c: 0.818759s: (0.574138,0) +(1,0) +GPU Results: +1 +X - array +(1.3929,0) +Y - array +(0.244621,0) +Done with GPU Rotate !!, Starting CPU rotate +CPU Results: +X - array +(1.3929,0) +Y - array +(0.244621,0) +kali@sdp:~/Kali/2023/2705_rotbug/onemkl_reproducer/2023.1.0/examples/dpcpp/blas/source$ ./rotate.exe +c: 0.818759s: (0.574138,0) +(1,0) +GPU Results: +1 +X - array +(1.3929,0) +Y - array +(0.244621,0) +Done with GPU Rotate !!, Starting CPU rotate +CPU Results: +X - array +(1.3929,0) +Y - array +(0.244621,0) +kali@sdp:~/Kali/2023/2705_rotbug/onemkl_reproducer/2023.1.0/examples/dpcpp/blas/source$ ./rotate.exe +c: 0.818759s: (0.574138,0) +(1,0) +GPU Results: +1 +X - array +(1.3929,0) +Y - array +(0.244621,0) +Done with GPU Rotate !!, Starting CPU rotate +CPU Results: +X - array +(1.3929,0) +Y - array +(0.244621,0) +kali@sdp:~/Kali/2023/2705_rotbug/onemkl_reproducer/2023.1.0/examples/dpcpp/blas/source$ ./rotate.exe +c: 0.818759s: (0.574138,0) +(1,0) +GPU Results: +1 +X - array +(1.3929,0) +Y - array +(0.244621,0) +Done with GPU Rotate !!, Starting CPU rotate +CPU Results: +X - array +(1.3929,0) +Y - array +(0.244621,0) +kali@sdp:~/Kali/2023/2705_rotbug/onemkl_reproducer/2023.1.0/examples/dpcpp/blas/source$ ./rotate.exe +c: 0.818759s: (0.574138,0) +(1,0) +GPU Results: +1 +X - array +(1.3929,0) +Y - array +(0.244621,0) +Done with GPU Rotate !!, Starting CPU rotate +CPU Results: +X - array +(1.3929,0) +Y - array +(0.244621,0) +kali@sdp:~/Kali/2023/2705_rotbug/onemkl_reproducer/2023.1.0/examples/dpcpp/blas/source$ \ No newline at end of file diff --git a/rotate_bug/rotate_gpu_usm.cpp b/rotate_bug/rotate_gpu_usm.cpp new file mode 100755 index 00000000..849bde56 --- /dev/null +++ b/rotate_bug/rotate_gpu_usm.cpp @@ -0,0 +1,96 @@ +#include +#include +#include +#include +using namespace sycl; + +int main() { +// double c = 0.5550150321048569; +// std::complex s(0.83184031769183,0.0); +// double c = 0.40516847542537016; +// std::complex s(0.9142420393536284,0.0); + + // Occasional failure + double c = 0.8187587885612081; + std::complex s(0.5741376543598085,0.0); + std::cout << "c: " << c << "s: " << s << std::endl; + try { + // Create a SYCL queue + cl::sycl::queue main_queue(cl::sycl::gpu_selector{}); + auto cxt = main_queue.get_context(); + auto dev = main_queue.get_device(); +#if 0 + auto ua = usm_allocator, usm::alloc::shared, 64>(cxt, dev); + std::vector, decltype(ua)> x(ua), y(ua); + for (int i = 0; i < 1; i++) { + x.push_back({1.0, 0.0}); + y.push_back({1.0, 0.0}); + } +#endif + std::complex *x = (std::complex *) malloc_shared(10 * sizeof(std::complex), dev, cxt); + std::complex *y = (std::complex *) malloc_shared(10 * sizeof(std::complex), dev, cxt); + + for (int i = 0; i < 1; i++) { + x[i] = {1,0}; + y[i] = {1,0}; + } + + for (int i = 0; i < 1; i++) { + std::cout << x[i] << std::endl; + } + + // Perform the Givens rotation on the vectors + auto status = oneapi::mkl::blas::column_major::rot(main_queue, 1, x, 1, + y, 1, c, s); + sycl::get_native(status); + main_queue.wait_and_throw(); + std::cout << "GPU Results: "<< std::endl; + int n = 1; + std::cout << n << std::endl; + std::cout << "X - array\n"; + for(int i = 0; i < n; i++) { + std::cout << x[i] << " "; + } + std::cout << std::endl; + + std::cout << "Y - array\n"; + for(int i = 0; i < n; i++) { + std::cout << y[i] << " "; + } + std::cout << std::endl; + + + } catch (cl::sycl::exception& e) { + std::cout << "SYCL exception encountered: " << e.what() << std::endl; + return 1; + } + + std::cout << "Done with GPU Rotate !!, Starting CPU rotate" << std::endl; + + std::vector > x; + std::vector >y; + for (int i = 0; i < 1; i++) { + x.push_back({1.0,0.0}); + y.push_back({1.0,0.0}); + } + int n = sizeof(x) / sizeof(x[0]); + + cblas_zrot(n, x.data(), 1, y.data(), 1, c, &s); + std::cout << "CPU Results: "<< std::endl; + + std::cout << "X - array\n"; + for(int i = 0; i < n; i++) { + std::cout << x[i] << " "; + } + std::cout << std::endl; + + std::cout << "Y - array\n"; + for(int i = 0; i < n; i++) { + std::cout << y[i] << " "; + } + std::cout << std::endl; + + + return 0; +} + diff --git a/rotate_bug/rotbug.jl b/rotate_bug/rotbug.jl new file mode 100755 index 00000000..8d15d3fd --- /dev/null +++ b/rotate_bug/rotbug.jl @@ -0,0 +1,40 @@ +using oneAPI +using MKL +using LinearAlgebra +function main(; n = 1, T = ComplexF64) + x = ones(T, n) + y = ones(T, n) + #c = rand(real(T)) + #s = T(sqrt(1 - c^2)) + c = 0.8187587885612081 + s = T(sqrt(1-c^2)) + @show x + @show y + + @test c*c + s*conj(s) ≈ 1 + d_x = oneArray(x) + d_y = oneArray(y) + + incx = stride(x, 1) + incy = stride(y, 1) + BLAS.rot!(n, x, incx, y, incy, c, s) + + queue = global_queue(context(d_x), device(d_x)) + incx = stride(d_x, 1) + incy = stride(d_y, 1) + @assert length(d_x) >= 1 + (n - 1)*abs(incx) + @assert length(d_y) >= 1 + (n - 1)*abs(incy) + + (T == Float32) && oneMKL.onemklSrot(sycl_queue(queue), n, d_x, incx, d_y, incy, c, s) + (T == Float64) && oneMKL.onemklDrot(sycl_queue(queue), n, d_x, incx, d_y, incy, c, s) + (T == ComplexF32) && oneMKL.onemklCrot(sycl_queue(queue), n, d_x, incx, d_y, incy, c, s) + (T == ComplexF64) && oneMKL.onemklZrot(sycl_queue(queue), n, d_x, incx, d_y, incy, c, s) + + @show x + @show Array(d_x) + @show c + @show s + @assert x == Array(d_x) +end + +main() From 4ac91f3f6c6fb4d5b75bf6bee9938293f5ee877e Mon Sep 17 00:00:00 2001 From: Kali Uday Balleda Date: Wed, 31 May 2023 22:57:25 +0530 Subject: [PATCH 2/2] added readme --- rotate_bug/README.log | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 rotate_bug/README.log diff --git a/rotate_bug/README.log b/rotate_bug/README.log new file mode 100644 index 00000000..d5687a78 --- /dev/null +++ b/rotate_bug/README.log @@ -0,0 +1,3 @@ +Compile Command: dpcpp -g -fsycl -I/opt/intel/oneapi/mkl/2023.1.0/include/ -L/opt/intel/oneapi/mkl/2023.1.0/lib/ -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl -lmkl_sycl rotate_gpu_usm.cpp -o rotgpu_usm.exe + +Run Julia: $JULIA --project -L test/setup.jl test/rotbug.jl