diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx index c828668..892e4a5 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx @@ -316,7 +316,68 @@ return out.str(); } - }; + + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) { + std::string op; + op = "\n//------ CONCAT_KERNEL_ALPAKA\n"; + op += SP + "struct ConcatKernel {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* const* input_ptrs, T* output,"; + op += " std::size_t const* const* input_strides_ptrs, std::size_t const* axis_sizes,"; + op += " std::size_t num_inputs, std::size_t axis, std::size_t const* output_strides,"; + op += " std::size_t const* output_shape) const {\n"; + op += SP + SP + SP + SP + "using DimAcc = alpaka::Dim;\n"; + op += SP + SP + SP + SP + "using IdxAcc = alpaka::Idx;\n"; + op += SP + SP + SP + SP + "constexpr std::size_t D = static_cast(DimAcc::value);\n"; + op += SP + SP + SP + SP + "alpaka::Vec shapeVec{};\n"; + op += SP + SP + SP + SP + "for (std::size_t d = 0; d < D; ++d) shapeVec[d] = output_shape[d];\n"; + op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, shapeVec);\n"; + op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; + op += SP + SP + SP + SP + SP + "std::size_t out_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "for (std::size_t d = 0; d < D; ++d) out_idx += idx[d] * output_strides[d];\n"; + op += SP + SP + SP + SP + SP + "std::size_t axis_coord = idx[axis];\n"; + op += SP + SP + SP + SP + SP + "std::size_t chosen = 0;\n"; + op += SP + SP + SP + SP + SP + "std::size_t offset = 0;\n"; + op += SP + SP + SP + SP + SP + "for (std::size_t k = 0; k < num_inputs; ++k) {\n"; + op += SP + SP + SP + SP + SP + SP + "std::size_t sz = axis_sizes[k];\n"; + op += SP + SP + SP + SP + SP + SP + "size_t size = axis_sizes[i];\n"; + op += SP + SP + SP + SP + SP + SP + "if (axis_coord < offset + sz) { chosen = k; break; }\n"; + op += SP + SP + SP + SP + SP + SP + "offset += sz;\n"; + op += SP + SP + SP + SP + SP + "}\n"; + op += SP + SP + SP + SP + SP + "std::size_t in_idx = 0;"; + op += SP + SP + SP + SP + SP + "for (std::size_t d = 0; d < D; ++d) {\n"; + op += SP + SP + SP + SP + SP + SP + "std::size_t coord_out = idx[d];\n"; + op += SP + SP + SP + SP + SP + SP + "std::size_t coord_in = (d == axis) ? (coord_out - offset) : coord_out;\n"; + op += SP + SP + SP + SP + SP + SP + "in_idx += coord_in * input_strides_ptrs[chosen][d];\n"; + op += SP + SP + SP + SP + SP + "}\n"; + op += SP + SP + SP + SP + SP + "T const* src = input_ptrs[chosen];\n"; + op += SP + SP + SP + SP + SP + "output[out_idx] = src[in_idx];\n"; + op += SP + SP + SP + SP + "}\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "};\n"; + + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { + return SP + "ConcatKernel concatKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fOutputShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first"); + } + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fOutputShape); + out << "\n//------ CONCAT_GPU_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<< length << " + 256 - 1) / 256), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << ConvertShapeToString(fInputs) << ", concatKernel, alpaka::getPtrNative(deviceBuf_" << ConvertShapeToString(fInputs) << "), alpaka::getPtrNative(deviceBuf_" << fOutput << "), " + << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fInputShapes)) << ", " << ConvertShapeToString(fInputShapes[fAxis]) << ", " << fInputs.size() << ", " << fAxis << ", " + << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fOutputShape)) << ", " << ConvertShapeToString(fOutputShape) << ");\n"; + return out.str(); + } + }; }//SOFIE diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx index 8b9727b..b3a56e9 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx @@ -202,8 +202,12 @@ inline std::size_t ConvertShapeToLength(const std::vector & shape){ } std::string ConvertShapeToString(const std::vector & shape); +std::string ConvertStringShapeToString(const std::vector & shape); +std::string ConvertShapeToString(const std::vector & shape); std::string ConvertDimShapeToString(const std::vector & shape); std::string ConvertShapeToString(const std::vector & shape); +std::string ConvertVectorDimShapeToString(const std::vector> & shapes); +std::string ConvertShapeToString(const std::vector> & shapes); @@ -513,6 +517,7 @@ void UnidirectionalBroadcast(const T* data, const std::vector& shape, co /// compute stride of a tensor given its shape (assume layout is row-major) std::vector ComputeStrideFromShape(const std::vector & shape); std::vector ComputeStrideFromShape(const std::vector & shape); +std::vector> ComputeStrideFromShape(const std::vector> & shapes); /// function to check if a >> 0 and a < MAX using a single comparison //// use trick casting to unsigned values so it becomes a single comparison diff --git a/src/SOFIE_core/src/SOFIE_common.cxx b/src/SOFIE_core/src/SOFIE_common.cxx index 05f873b..2ccf49f 100644 --- a/src/SOFIE_core/src/SOFIE_common.cxx +++ b/src/SOFIE_core/src/SOFIE_common.cxx @@ -116,6 +116,20 @@ std::string ConvertShapeToString(const std::vector & shape) { return out.str(); } +std::string ConvertStringShapeToString(const std::vector & shape) { + std::stringstream out; + out << "{ "; + for (size_t i = 0; i < shape.size(); i++) { + out << shape[i]; + if (i < shape.size()-1) out << " , "; + } + out << " }"; + return out.str(); +} +std::string ConvertShapeToString(const std::vector & shape) { + return ConvertStringShapeToString(shape); +} + std::string ConvertDimShapeToString(const std::vector & shape) { std::stringstream out; out << "{ "; @@ -164,6 +178,20 @@ std::string ConvertDynamicShapeToLength(const std::vector & shape) { return ConvertDimShapeToLength(shape); } +std::string ConvertVectorDimShapeToString(const std::vector> & shapes) { + std::stringstream out; + out << "{ "; + for (size_t i = 0; i < shapes.size(); i++) { + out << ConvertShapeToString(shapes[i]); + if (i < shapes.size() - 1) out << " , "; + } + out << " }"; + return out.str(); +} +std::string ConvertShapeToString(const std::vector> & shapes) { + return ConvertVectorDimShapeToString(shapes); +} + namespace{ template @@ -537,4 +565,16 @@ std::vector UTILITY::ComputeStrideFromShape(const std::vector & shape) return strides; } +std::vector> UTILITY::ComputeStrideFromShape(const std::vector> & shapes) { + std::vector> all_strides; + all_strides.reserve(shapes.size()); + + // Process each shape individually using the existing single-vector implementation + for (const auto& shape : shapes) { + all_strides.push_back(ComputeStrideFromShape(shape)); + } + + return all_strides; +} + } // namespace SOFIE