Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 62 additions & 1 deletion src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,68 @@

return out.str();
}
};

std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) {
std::string op;
op = "\n//------ CONCAT_KERNEL_ALPAKA\n";
op += SP + "struct ConcatKernel {\n";
op += SP + SP + "template<typename TAcc, typename T>\n";
op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* const* input_ptrs, T* output,";
op += " std::size_t const* const* input_strides_ptrs, std::size_t const* axis_sizes,";
op += " std::size_t num_inputs, std::size_t axis, std::size_t const* output_strides,";
op += " std::size_t const* output_shape) const {\n";
op += SP + SP + SP + SP + "using DimAcc = alpaka::Dim<TAcc>;\n";
op += SP + SP + SP + SP + "using IdxAcc = alpaka::Idx<TAcc>;\n";
op += SP + SP + SP + SP + "constexpr std::size_t D = static_cast<std::size_t>(DimAcc::value);\n";
op += SP + SP + SP + SP + "alpaka::Vec<DimAcc, IdxAcc> shapeVec{};\n";
op += SP + SP + SP + SP + "for (std::size_t d = 0; d < D; ++d) shapeVec[d] = output_shape[d];\n";
op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, shapeVec);\n";
op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n";
op += SP + SP + SP + SP + SP + "std::size_t out_idx = 0;\n";
op += SP + SP + SP + SP + SP + "for (std::size_t d = 0; d < D; ++d) out_idx += idx[d] * output_strides[d];\n";
op += SP + SP + SP + SP + SP + "std::size_t axis_coord = idx[axis];\n";
op += SP + SP + SP + SP + SP + "std::size_t chosen = 0;\n";
op += SP + SP + SP + SP + SP + "std::size_t offset = 0;\n";
op += SP + SP + SP + SP + SP + "for (std::size_t k = 0; k < num_inputs; ++k) {\n";
op += SP + SP + SP + SP + SP + SP + "std::size_t sz = axis_sizes[k];\n";
op += SP + SP + SP + SP + SP + SP + "size_t size = axis_sizes[i];\n";
op += SP + SP + SP + SP + SP + SP + "if (axis_coord < offset + sz) { chosen = k; break; }\n";
op += SP + SP + SP + SP + SP + SP + "offset += sz;\n";
op += SP + SP + SP + SP + SP + "}\n";
op += SP + SP + SP + SP + SP + "std::size_t in_idx = 0;";
op += SP + SP + SP + SP + SP + "for (std::size_t d = 0; d < D; ++d) {\n";
op += SP + SP + SP + SP + SP + SP + "std::size_t coord_out = idx[d];\n";
op += SP + SP + SP + SP + SP + SP + "std::size_t coord_in = (d == axis) ? (coord_out - offset) : coord_out;\n";
op += SP + SP + SP + SP + SP + SP + "in_idx += coord_in * input_strides_ptrs[chosen][d];\n";
op += SP + SP + SP + SP + SP + "}\n";
op += SP + SP + SP + SP + SP + "T const* src = input_ptrs[chosen];\n";
op += SP + SP + SP + SP + SP + "output[out_idx] = src[in_idx];\n";
op += SP + SP + SP + SP + "}\n";
op += SP + SP + SP + "}\n";
op += SP + SP + "};\n";

return op;
}

std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override {
return SP + "ConcatKernel concatKernel;\n";
}

std::string Generate_GPU_ALPAKA(std::string OpName) override {
OpName = "op_" + OpName;
if (fOutputShape.empty()) {
throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first");
}
std::stringstream out;
auto length = ConvertDynamicShapeToLength(fOutputShape);
out << "\n//------ CONCAT_GPU_ALPAKA\n";
out << SP << "alpaka::WorkDivMembers<Dim, Idx> workDiv_"<<ConvertShapeToString(fInputs)<<"(alpaka::Vec<Dim, Idx>::all("<< length << " + 256 - 1) / 256), alpaka::Vec<Dim, Idx>::all(256), alpaka::Vec<Dim, Idx>::all(1));\n";
out << SP << "alpaka::exec<Acc>(queue, workDiv_" << ConvertShapeToString(fInputs) << ", concatKernel, alpaka::getPtrNative(deviceBuf_" << ConvertShapeToString(fInputs) << "), alpaka::getPtrNative(deviceBuf_" << fOutput << "), "
<< ConvertShapeToString(UTILITY::ComputeStrideFromShape(fInputShapes)) << ", " << ConvertShapeToString(fInputShapes[fAxis]) << ", " << fInputs.size() << ", " << fAxis << ", "
<< ConvertShapeToString(UTILITY::ComputeStrideFromShape(fOutputShape)) << ", " << ConvertShapeToString(fOutputShape) << ");\n";
return out.str();
}
};
}//SOFIE


Expand Down
5 changes: 5 additions & 0 deletions src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,12 @@ inline std::size_t ConvertShapeToLength(const std::vector<size_t> & shape){
}

std::string ConvertShapeToString(const std::vector<size_t> & shape);
std::string ConvertStringShapeToString(const std::vector<std::string> & shape);
std::string ConvertShapeToString(const std::vector<std::string> & shape);
std::string ConvertDimShapeToString(const std::vector<Dim> & shape);
std::string ConvertShapeToString(const std::vector<Dim> & shape);
std::string ConvertVectorDimShapeToString(const std::vector<std::vector<Dim>> & shapes);
std::string ConvertShapeToString(const std::vector<std::vector<Dim>> & shapes);



Expand Down Expand Up @@ -513,6 +517,7 @@ void UnidirectionalBroadcast(const T* data, const std::vector<size_t>& shape, co
/// compute stride of a tensor given its shape (assume layout is row-major)
std::vector<size_t> ComputeStrideFromShape(const std::vector<size_t> & shape);
std::vector<Dim> ComputeStrideFromShape(const std::vector<Dim> & shape);
std::vector<std::vector<Dim>> ComputeStrideFromShape(const std::vector<std::vector<Dim>> & shapes);

/// function to check if a >> 0 and a < MAX using a single comparison
//// use trick casting to unsigned values so it becomes a single comparison
Expand Down
40 changes: 40 additions & 0 deletions src/SOFIE_core/src/SOFIE_common.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,20 @@ std::string ConvertShapeToString(const std::vector<size_t> & shape) {
return out.str();
}

std::string ConvertStringShapeToString(const std::vector<std::string> & shape) {
std::stringstream out;
out << "{ ";
for (size_t i = 0; i < shape.size(); i++) {
out << shape[i];
if (i < shape.size()-1) out << " , ";
}
out << " }";
return out.str();
}
std::string ConvertShapeToString(const std::vector<std::string> & shape) {
return ConvertStringShapeToString(shape);
}

std::string ConvertDimShapeToString(const std::vector<Dim> & shape) {
std::stringstream out;
out << "{ ";
Expand Down Expand Up @@ -164,6 +178,20 @@ std::string ConvertDynamicShapeToLength(const std::vector<Dim> & shape) {
return ConvertDimShapeToLength(shape);
}

std::string ConvertVectorDimShapeToString(const std::vector<std::vector<Dim>> & shapes) {
std::stringstream out;
out << "{ ";
for (size_t i = 0; i < shapes.size(); i++) {
out << ConvertShapeToString(shapes[i]);
if (i < shapes.size() - 1) out << " , ";
}
out << " }";
return out.str();
}
std::string ConvertShapeToString(const std::vector<std::vector<Dim>> & shapes) {
return ConvertVectorDimShapeToString(shapes);
}


namespace{
template<typename T>
Expand Down Expand Up @@ -537,4 +565,16 @@ std::vector<Dim> UTILITY::ComputeStrideFromShape(const std::vector<Dim> & shape)
return strides;
}

std::vector<std::vector<Dim>> UTILITY::ComputeStrideFromShape(const std::vector<std::vector<Dim>> & shapes) {
std::vector<std::vector<Dim>> all_strides;
all_strides.reserve(shapes.size());

// Process each shape individually using the existing single-vector implementation
for (const auto& shape : shapes) {
all_strides.push_back(ComputeStrideFromShape(shape));
}

return all_strides;
}

} // namespace SOFIE