From 2ed98a161547183fee35319d22c5cc41d79cd6c5 Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Mon, 1 Dec 2025 14:24:22 +0100 Subject: [PATCH 1/9] feat: transpose kernel for Alpaka --- .../inc/SOFIE/ROperator_Transpose.hxx | 40 ++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index 090d2d8..709bb3a 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -165,18 +165,48 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) { std::string op; op = "\n//------ TRANSPOSE_KERNEL_ALPAKA\n"; - op += SP + "struct TransposeKernel{\n"; + op += SP + "struct TransposeKernel {\n"; op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T const * output, std::size_t * shape, std::size_t * strides) const {\n"; - op += SP + SP + SP + "for (auto i : alpaka::uniformElementsND(acc, shape)) {\n"; - op += SP + SP + SP + SP + "size_t input_idx = 0;\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T * output,"; + op += "std::size_t const * input_strides, std::size_t const * output_strides, std::size_t const * perm, "; + op += "std::size_t const ndim) const {\n"; + op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec(output_shape));\n"; + op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; + op += SP + SP + SP + SP + SP + "size_t input_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "size_t output_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "for (int i = 0; i < ndim; ++i) {\n"; + op += SP + SP + SP + SP + SP + SP + "size_t out_coord = elem[i];\n"; + op += SP + SP + SP + SP + SP + SP + "size_t in_axis = perm[i];\n"; + op += SP + SP + SP + SP + SP + SP + "input_idx += out_coord * input_strides[in_axis];\n"; + op += SP + SP + SP + SP + SP + SP + "output_idx += out_coord * output_strides[i];\n"; + op += SP + SP + SP + SP + SP + "}\n"; + op += SP + SP + SP + SP + SP + "output[output_idx] = input[input_idx];\n"; + op += SP + SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; return op; } + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { + return SP + "TransposeKernel transposeKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Transpose called to Generate without being initialized first"); + } + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fShape); + out << "\n//------ TRANSPOSE_GPU_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", transposeKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), static_cast(" << length << ")); \n"; + return out.str(); + } }; From ea1c751783022777fd8fd548b20888f3b25f6af4 Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Tue, 9 Dec 2025 10:56:32 +0100 Subject: [PATCH 2/9] update kernel call with internal functions --- src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index 709bb3a..ae4ca10 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -203,8 +203,9 @@ public: std::stringstream out; auto length = ConvertDynamicShapeToLength(fShape); out << "\n//------ TRANSPOSE_GPU_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", transposeKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), static_cast(" << length << ")); \n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNData << ", transposeKernel, alpaka::getPtrNative(deviceBuf_" << fNOutput << "), " << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeData))<<", " + << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeOutput)) << ", " << ConvertShapeToString(fAttrPerm) << ", " << fShapeOutput.size()<<");\n"; return out.str(); } From e639084cbe7e51a595a462bdef141f6f9c4a2167 Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Tue, 9 Dec 2025 14:07:51 +0100 Subject: [PATCH 3/9] use shapes as kernel arguments --- .../inc/SOFIE/ROperator_Transpose.hxx | 36 +++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index ae4ca10..d52cd61 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -170,23 +170,29 @@ public: op = "\n//------ TRANSPOSE_KERNEL_ALPAKA\n"; op += SP + "struct TransposeKernel {\n"; op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T * output,"; - op += "std::size_t const * input_strides, std::size_t const * output_strides, std::size_t const * perm, "; - op += "std::size_t const ndim) const {\n"; - op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec(output_shape));\n"; - op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; - op += SP + SP + SP + SP + SP + "size_t input_idx = 0;\n"; - op += SP + SP + SP + SP + SP + "size_t output_idx = 0;\n"; - op += SP + SP + SP + SP + SP + "for (int i = 0; i < ndim; ++i) {\n"; - op += SP + SP + SP + SP + SP + SP + "size_t out_coord = elem[i];\n"; - op += SP + SP + SP + SP + SP + SP + "size_t in_axis = perm[i];\n"; - op += SP + SP + SP + SP + SP + SP + "input_idx += out_coord * input_strides[in_axis];\n"; - op += SP + SP + SP + SP + SP + SP + "output_idx += out_coord * output_strides[i];\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* input, T* output, const std::size_t* input_strides,,"; + op += "const std::size_t* output_strides, const std::size_t* input_shape,"; + op += "const std::size_t* output_shape, const std::size_t* perm,"; + op += "const std::size_t ndim) const {\n"; + op += SP + SP + SP + SP + "using DimAcc = alpaka::Dim;\n"; + op += SP + SP + SP + SP + "using IdxAcc = alpaka::Idx;\n"; + op += SP + SP + SP + SP + "constexpr std::size_t D = static_cast(DimAcc::value);\n"; + op += SP + SP + SP + SP + "alpaka::Vec shapeVec{};\n"; + op += SP + SP + SP + SP + "for (std::size_t d = 0; d < D; ++d) shapeVec[d] = output_shape[d];\n"; + op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, shapeVec);\n"; + op += SP + SP + SP + SP + "for (auto const& idx : elements) {\n"; + op += SP + SP + SP + SP + SP + "std::size_t input_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "std::size_t output_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "for (std::size_t d = 0; d < D; ++d) {\n"; + op += SP + SP + SP + SP + SP + SP + "std::size_t out_coord = idx[d];\n"; + op += SP + SP + SP + SP + SP + SP + "std::size_t in_axis = perm[d];\n"; + op += SP + SP + SP + SP + SP + SP + "input_idx += out_coord * input_strides[in_axis];\n"; + op += SP + SP + SP + SP + SP + SP + "output_idx += out_coord * output_strides[d];\n"; op += SP + SP + SP + SP + SP + "}\n"; op += SP + SP + SP + SP + SP + "output[output_idx] = input[input_idx];\n"; op += SP + SP + SP + SP + "}\n"; - op += SP + SP + "}\n"; - op += SP + "};\n"; + op += SP + SP + SP" + }\n"; + op += SP + SP + "};\n"; return op; } @@ -205,7 +211,7 @@ public: out << "\n//------ TRANSPOSE_GPU_ALPAKA\n"; out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; out << SP << "alpaka::exec(queue, workDiv_" << fNData << ", transposeKernel, alpaka::getPtrNative(deviceBuf_" << fNOutput << "), " << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeData))<<", " - << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeOutput)) << ", " << ConvertShapeToString(fAttrPerm) << ", " << fShapeOutput.size()<<");\n"; + << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeOutput)) << ", " << ConvertShapeToString(fShapeData) << ", " << ConvertShapeToString(fShapeOutput) << ", " << ConvertShapeToString(fAttrPerm) << ", " << fShapeOutput.size()<<");\n"; return out.str(); } From ad3328a172302a9cfe1ca4138c517f2d1fa2809d Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Tue, 9 Dec 2025 17:15:05 +0100 Subject: [PATCH 4/9] woops, pass input to exec too --- src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index d52cd61..5418c28 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -210,7 +210,7 @@ public: auto length = ConvertDynamicShapeToLength(fShape); out << "\n//------ TRANSPOSE_GPU_ALPAKA\n"; out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNData << ", transposeKernel, alpaka::getPtrNative(deviceBuf_" << fNOutput << "), " << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeData))<<", " + out << SP << "alpaka::exec(queue, workDiv_" << fNData << ", transposeKernel, alpaka::getPtrNative(deviceBuf_" << fNData << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), " << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeData))<<", " << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeOutput)) << ", " << ConvertShapeToString(fShapeData) << ", " << ConvertShapeToString(fShapeOutput) << ", " << ConvertShapeToString(fAttrPerm) << ", " << fShapeOutput.size()<<");\n"; return out.str(); } From 215e0a2f502d1e899ab6a57051522082e5be164c Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Sun, 14 Dec 2025 14:34:44 +0100 Subject: [PATCH 5/9] fix build --- src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx | 9 +++------ src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx | 2 ++ src/SOFIE_core/src/SOFIE_common.cxx | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index 5418c28..945942c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -191,7 +191,7 @@ public: op += SP + SP + SP + SP + SP + "}\n"; op += SP + SP + SP + SP + SP + "output[output_idx] = input[input_idx];\n"; op += SP + SP + SP + SP + "}\n"; - op += SP + SP + SP" + }\n"; + op += SP + SP + SP + "}\n"; op += SP + SP + "};\n"; return op; @@ -203,13 +203,10 @@ public: std::string Generate_GPU_ALPAKA(std::string OpName) override { OpName = "op_" + OpName; - if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Transpose called to Generate without being initialized first"); - } std::stringstream out; - auto length = ConvertDynamicShapeToLength(fShape); + auto length = ConvertShapeToLength(fShapeOutput); out << "\n//------ TRANSPOSE_GPU_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<< length << " + 256 - 1) / 256), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; out << SP << "alpaka::exec(queue, workDiv_" << fNData << ", transposeKernel, alpaka::getPtrNative(deviceBuf_" << fNData << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), " << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeData))<<", " << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeOutput)) << ", " << ConvertShapeToString(fShapeData) << ", " << ConvertShapeToString(fShapeOutput) << ", " << ConvertShapeToString(fAttrPerm) << ", " << fShapeOutput.size()<<");\n"; return out.str(); diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx index 8b9727b..4237d93 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx @@ -202,6 +202,8 @@ inline std::size_t ConvertShapeToLength(const std::vector & shape){ } std::string ConvertShapeToString(const std::vector & shape); +std::string ConvertIntShapeToString(const std::vector & shape); +std::string ConvertShapeToString(const std::vector & shape); std::string ConvertDimShapeToString(const std::vector & shape); std::string ConvertShapeToString(const std::vector & shape); diff --git a/src/SOFIE_core/src/SOFIE_common.cxx b/src/SOFIE_core/src/SOFIE_common.cxx index 05f873b..78c0703 100644 --- a/src/SOFIE_core/src/SOFIE_common.cxx +++ b/src/SOFIE_core/src/SOFIE_common.cxx @@ -116,6 +116,20 @@ std::string ConvertShapeToString(const std::vector & shape) { return out.str(); } +std::string ConvertIntShapeToString(const std::vector & shape) { + std::stringstream out; + out << "{ "; + for (size_t i = 0; i < shape.size(); i++) { + out << shape[i]; + if (i < shape.size()-1) out << " , "; + } + out << " }"; + return out.str(); +} +std::string ConvertShapeToString(const std::vector & shape) { + return ConvertIntShapeToString(shape); +} + std::string ConvertDimShapeToString(const std::vector & shape) { std::stringstream out; out << "{ "; From 67b46c79b33589ca45cc997e7a8eb8f0da665e8b Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Sun, 14 Dec 2025 14:43:18 +0100 Subject: [PATCH 6/9] add runtime error --- src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index 945942c..c006c09 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -203,6 +203,9 @@ public: std::string Generate_GPU_ALPAKA(std::string OpName) override { OpName = "op_" + OpName; + if (fShapeOutput.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first"); + } std::stringstream out; auto length = ConvertShapeToLength(fShapeOutput); out << "\n//------ TRANSPOSE_GPU_ALPAKA\n"; From a4677ecbb0ae25f22e6baf90d00d57dd8d2db352 Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Tue, 16 Dec 2025 03:46:32 +0530 Subject: [PATCH 7/9] add transpose onnx model --- src/SOFIE_core/test/input_models/Transpose.onnx | Bin 0 -> 164 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/SOFIE_core/test/input_models/Transpose.onnx diff --git a/src/SOFIE_core/test/input_models/Transpose.onnx b/src/SOFIE_core/test/input_models/Transpose.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5955409c5f7ca5ea4fe9766169a90ca595b27561 GIT binary patch literal 164 zcmdYbPbBmf-pB!&P0 literal 0 HcmV?d00001 From 72d8f9c8ac2dfbff06c2993bcd07ab8408f4413f Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Tue, 16 Dec 2025 14:48:07 +0530 Subject: [PATCH 8/9] rm old file --- src/SOFIE_core/test/input_models/Transpose.onnx | Bin 164 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/SOFIE_core/test/input_models/Transpose.onnx diff --git a/src/SOFIE_core/test/input_models/Transpose.onnx b/src/SOFIE_core/test/input_models/Transpose.onnx deleted file mode 100644 index 5955409c5f7ca5ea4fe9766169a90ca595b27561..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 164 zcmdYbPbBmf-pB!&P0 From 06e392b3f490d988be30fa854f6c53475d321160 Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Tue, 16 Dec 2025 15:00:07 +0530 Subject: [PATCH 9/9] update transpose model --- src/SOFIE_core/test/input_models/Transpose.onnx | Bin 0 -> 156 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/SOFIE_core/test/input_models/Transpose.onnx diff --git a/src/SOFIE_core/test/input_models/Transpose.onnx b/src/SOFIE_core/test/input_models/Transpose.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0e08157fb44f2c39f08643eb57f36b8e68631a64 GIT binary patch literal 156 zcmd