diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index 090d2d8..c006c09 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -165,18 +165,55 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) { std::string op; op = "\n//------ TRANSPOSE_KERNEL_ALPAKA\n"; - op += SP + "struct TransposeKernel{\n"; + op += SP + "struct TransposeKernel {\n"; op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T const * output, std::size_t * shape, std::size_t * strides) const {\n"; - op += SP + SP + SP + "for (auto i : alpaka::uniformElementsND(acc, shape)) {\n"; - op += SP + SP + SP + SP + "size_t input_idx = 0;\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* input, T* output, const std::size_t* input_strides,,"; + op += "const std::size_t* output_strides, const std::size_t* input_shape,"; + op += "const std::size_t* output_shape, const std::size_t* perm,"; + op += "const std::size_t ndim) const {\n"; + op += SP + SP + SP + SP + "using DimAcc = alpaka::Dim;\n"; + op += SP + SP + SP + SP + "using IdxAcc = alpaka::Idx;\n"; + op += SP + SP + SP + SP + "constexpr std::size_t D = static_cast(DimAcc::value);\n"; + op += SP + SP + SP + SP + "alpaka::Vec shapeVec{};\n"; + op += SP + SP + SP + SP + "for (std::size_t d = 0; d < D; ++d) shapeVec[d] = output_shape[d];\n"; + op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, shapeVec);\n"; + op += SP + SP + SP + SP + "for (auto const& idx : elements) {\n"; + op += SP + SP + SP + SP + SP + "std::size_t input_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "std::size_t output_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "for (std::size_t d = 0; d < D; ++d) {\n"; + op += SP + SP + SP + SP + SP + SP + "std::size_t out_coord = idx[d];\n"; + op += SP + SP + SP + SP + SP + SP + "std::size_t in_axis = perm[d];\n"; + op += SP + SP + SP + SP + SP + SP + "input_idx += out_coord * input_strides[in_axis];\n"; + op += SP + SP + SP + SP + SP + SP + "output_idx += out_coord * output_strides[d];\n"; + op += SP + SP + SP + SP + SP + "}\n"; + op += SP + SP + SP + SP + SP + "output[output_idx] = input[input_idx];\n"; + op += SP + SP + SP + SP + "}\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "};\n"; return op; } + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { + return SP + "TransposeKernel transposeKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShapeOutput.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first"); + } + std::stringstream out; + auto length = ConvertShapeToLength(fShapeOutput); + out << "\n//------ TRANSPOSE_GPU_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<< length << " + 256 - 1) / 256), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNData << ", transposeKernel, alpaka::getPtrNative(deviceBuf_" << fNData << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), " << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeData))<<", " + << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeOutput)) << ", " << ConvertShapeToString(fShapeData) << ", " << ConvertShapeToString(fShapeOutput) << ", " << ConvertShapeToString(fAttrPerm) << ", " << fShapeOutput.size()<<");\n"; + return out.str(); + } }; diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx index 8b9727b..4237d93 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx @@ -202,6 +202,8 @@ inline std::size_t ConvertShapeToLength(const std::vector & shape){ } std::string ConvertShapeToString(const std::vector & shape); +std::string ConvertIntShapeToString(const std::vector & shape); +std::string ConvertShapeToString(const std::vector & shape); std::string ConvertDimShapeToString(const std::vector & shape); std::string ConvertShapeToString(const std::vector & shape); diff --git a/src/SOFIE_core/src/SOFIE_common.cxx b/src/SOFIE_core/src/SOFIE_common.cxx index 05f873b..78c0703 100644 --- a/src/SOFIE_core/src/SOFIE_common.cxx +++ b/src/SOFIE_core/src/SOFIE_common.cxx @@ -116,6 +116,20 @@ std::string ConvertShapeToString(const std::vector & shape) { return out.str(); } +std::string ConvertIntShapeToString(const std::vector & shape) { + std::stringstream out; + out << "{ "; + for (size_t i = 0; i < shape.size(); i++) { + out << shape[i]; + if (i < shape.size()-1) out << " , "; + } + out << " }"; + return out.str(); +} +std::string ConvertShapeToString(const std::vector & shape) { + return ConvertIntShapeToString(shape); +} + std::string ConvertDimShapeToString(const std::vector & shape) { std::stringstream out; out << "{ "; diff --git a/src/SOFIE_core/test/input_models/Transpose.onnx b/src/SOFIE_core/test/input_models/Transpose.onnx new file mode 100644 index 0000000..0e08157 Binary files /dev/null and b/src/SOFIE_core/test/input_models/Transpose.onnx differ