Skip to content
44 changes: 34 additions & 10 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10452,7 +10452,7 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA,
static_cast<const toolchains::SYCLToolChain &>(*TC);
SYCLTC.AddImpliedTargetArgs(TC->getTriple(), Args, BuildArgs, JA, *HostTC,
Arch);
SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs);
SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs, Arch);
createArgString("compile-opts=");
BuildArgs.clear();
SYCLTC.TranslateLinkerTargetArgs(TC->getTriple(), Args, BuildArgs);
Expand Down Expand Up @@ -11614,27 +11614,51 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
if (!TC->getTriple().isSPIROrSPIRV())
continue;
ArgStringList BuildArgs;
SmallString<128> BackendOptString;
std::vector<SmallString<128>> BackendOptVec;
SmallString<128> LinkOptString;
SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs);
for (const auto &A : BuildArgs)
appendOption(BackendOptString, A);

BuildArgs.clear();
// Construct backend options for each target passed via -Xsycl-target-backend
// in the form: "-device <arch> <backend_opt>"
StringRef Device = "";
for (const Arg *A : Args.filtered(options::OPT_Xsycl_backend_EQ, options::OPT_Xsycl_backend)) {
SmallString<128> BackendArgs;
if(A->getNumValues() > 1) {
Device = SYCL::gen::resolveGenDevice(A->getValue());
if(Device.empty() && (A->getNumValues() > 1))
// If target is spir64_gen, the device name needs to be extracted
// from the arguments.
Device = SYCL::gen::extractDeviceFromArg(A->getValue(1));
else
// If target is intel_gpu_*, "-device <arch>"
// is appended to BackendArgs.
appendOption(BackendArgs, "-device " + Device.str());
}
SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs, Device);
for (const auto &BA : BuildArgs) {
appendOption(BackendArgs, BA);
}
if(!BackendArgs.empty()) {
BackendOptVec.push_back(std::move(BackendArgs));
}
BuildArgs.clear();
}

SYCLTC.TranslateLinkerTargetArgs(TC->getTriple(), Args, BuildArgs);
for (const auto &A : BuildArgs) {
if (TC->getTriple().getSubArch() == llvm::Triple::NoSubArch)
appendOption(LinkOptString, A);
else
else {
// For AOT, combine the Backend and Linker strings into one.
appendOption(BackendOptString, A);
for (SmallString<128> &BackendArgs : BackendOptVec)
appendOption(BackendArgs, A);
}
}

if (!BackendOptString.empty()) {
for (SmallString<128> &BackendArgs : BackendOptVec) {
CmdArgs.push_back(Args.MakeArgString(
"--device-compiler=" +
Action::GetOffloadKindName(Action::OFK_SYCL) + ":" +
TC->getTripleString() + "=" + BackendOptString));
TC->getTripleString() + "=" + BackendArgs));
}
if (!LinkOptString.empty()) {
CmdArgs.push_back(Args.MakeArgString(
Expand Down
18 changes: 15 additions & 3 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1083,6 +1083,18 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
C.addCommand(std::move(Cmd));
}

// Extracts the device specified after "-device" from the backend
// argument string provided via -Xsycl-target-backend.
StringRef SYCL::gen::extractDeviceFromArg(llvm::StringRef Arg) {
llvm::SmallVector<StringRef, 8> Arglist;
Arg.split(Arglist, ' ');
for (size_t i = 0; i + 1 < Arglist.size(); ++i) {
if (Arglist[i] == "-device")
return Arglist[i + 1];
}
return "";
}

StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) {
StringRef Device;
Device =
Expand Down Expand Up @@ -1556,6 +1568,8 @@ void SYCLToolChain::TranslateTargetOpt(const llvm::Triple &Triple,
getDriver().getSYCLDeviceTriple(A->getValue(), A);
// Passing device args: -X<Opt>=<triple> -opt=val.
StringRef GenDevice = SYCL::gen::resolveGenDevice(A->getValue());
if(GenDevice.empty())
GenDevice = SYCL::gen::extractDeviceFromArg(A->getValue(1));
bool IsGenTriple = Triple.isSPIR() &&
Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen;
if (IsGenTriple) {
Expand All @@ -1565,9 +1579,6 @@ void SYCLToolChain::TranslateTargetOpt(const llvm::Triple &Triple,
// Triples do not match, but only skip when we know we are not
// comparing against intel_gpu_*
continue;
if (OptTargetTriple == Triple && !Device.empty())
// Triples match, but we are expecting a specific device to be set.
continue;
} else if (OptTargetTriple != Triple)
continue;
} else if (!OptNoTriple)
Expand Down Expand Up @@ -1678,6 +1689,7 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
}
// Check for any -device settings.
std::string DevArg;
llvm::errs() << "[DEBUG] device is " << Device << "\n";
if (IsJIT || Device == "pvc" || hasPVCDevice(TargArgs, DevArg)) {
// The -device option passed in by the user may not be 'pvc'. Use the
// value provided by the user if it was specified.
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Driver/ToolChains/SYCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class LLVM_LIBRARY_VISIBILITY BackendCompiler : public Tool {
const char *LinkingOutput) const override;
};

StringRef extractDeviceFromArg(StringRef Arg);
StringRef resolveGenDevice(StringRef DeviceName);
SmallString<64> getGenDeviceMacro(StringRef DeviceName);
StringRef getGenGRFFlag(StringRef GRFMode);
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/clang-linker-wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@

// Check that when "--device-compiler=triple=-device pvc" is specified in clang-linker-wrapper
// (happen when AOT device is specified via -Xsycl-target-backend '-device pvc' in clang),
// the target is not passed to sycl-post-link for filtering.
// the target is passed to sycl-post-link for filtering.
// RUN: clang-linker-wrapper -sycl-embed-ir -sycl-device-libraries=%t1.devicelib.o -sycl-post-link-options="SYCL_POST_LINK_OPTIONS" -llvm-spirv-options="LLVM_SPIRV_OPTIONS" "--host-triple=x86_64-unknown-linux-gnu" "--device-compiler=spir64_gen-unknown-unknown=-device pvc" "--linker-path=/usr/bin/ld" "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" HOST_LIB_PATH HOST_STAT_LIB %t1.o --dry-run 2>&1 | FileCheck -check-prefix=CHK-NO-CMDS-AOT-GEN %s
// CHK-NO-CMDS-AOT-GEN: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o {{[^,]*}}.table {{.*}}.bc
// CHK-NO-CMDS-AOT-GEN: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o intel_gpu_pvc,{{.*}}.table {{.*}}.bc

/// Check for list of commands for standalone clang-linker-wrapper run for sycl (AOT for Intel CPU)
// -------
Expand Down
9 changes: 9 additions & 0 deletions clang/test/Driver/sycl-offload-new-driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,15 @@
// WRAPPER_OPTIONS_BACKEND_AOT-SAME: "--device-compiler=sycl:spir64_gen-unknown-unknown=-backend-gen-opt"
// WRAPPER_OPTIONS_BACKEND_AOT-SAME: "--device-compiler=sycl:spir64_x86_64-unknown-unknown=-backend-cpu-opt"

// Check that AOT backend compiler options passed via
// -Xsycl-target-backend=<target>,<option> are only applied to their specified target
// RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --offload-new-driver \
// RUN: -fsycl-targets=intel_gpu_dg1,spir64_gen \
// RUN: -Xsycl-target-backend=intel_gpu_dg1 "-options -extraopt_dg1" \
// RUN: -Xsycl-target-backend=spir64_gen "-device pvc -options -extraopt_pvc" %s -### 2>&1 \
// RUN: | FileCheck -check-prefixes=MULTI_TARGETS_OPTIONS_BACKEND_AOT %s
// MULTI_TARGETS_OPTIONS_BACKEND_AOT: clang-linker-wrapper{{.*}} "--device-compiler=sycl:spir64_gen-unknown-unknown=-device dg1 -options -extraopt_dg1" "--device-compiler=sycl:spir64_gen-unknown-unknown=-device pvc -options -extraopt_pvc"

/// Verify arch settings for nvptx and amdgcn targets
// RUN: %clangxx -fsycl -### -fsycl-targets=amdgcn-amd-amdhsa -fno-sycl-libspirv \
// RUN: -nocudalib --offload-new-driver \
Expand Down
27 changes: 19 additions & 8 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -753,11 +753,17 @@ runSYCLPostLinkTool(ArrayRef<StringRef> InputFiles, const ArgList &Args,
// when Intel GPU targets are passed in -fsycl-targets.
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);

if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && !Arch.empty() &&
!IsDevicePassedWithSyclTargetBackend && Arch != "*")
OutputPathWithArch = "intel_gpu_" + Arch.str() + "," + OutputPathWithArch;
else if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64)

// Prefix the output path with the target architecture(s),
// e.g. intel_gpu_dg2,intel_gpu_pvc for arch = "dg2,pvc".
if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && !Arch.empty() && Arch != "*") {
SmallVector<StringRef, 8> ArchList;
Arch.split(ArchList, ',');
std::string ArchString;
for (StringRef SingleArch : ArchList)
ArchString += "intel_gpu_" + SingleArch.str() + ",";
OutputPathWithArch = ArchString + OutputPathWithArch;
} else if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64)
OutputPathWithArch = "spir64_x86_64," + OutputPathWithArch;

SmallVector<StringRef, 8> CmdArgs;
Expand Down Expand Up @@ -2180,16 +2186,21 @@ extractSYCLCompileLinkOptions(ArrayRef<OffloadFile> OffloadFiles) {
}

// Append SYCL device compiler and linker options specified at link time,
// filtering by target triple and offload kind.
// filtering by target triple, offload kind, and device architecture.
// TODO: Consider how to refactor this function to merge it with getLinkerArgs()
// and determine if it's possible to use OPT_compiler_arg_EQ and
// OPT_linker_arg_EQ to handle device compiler/linker options
static void appendSYCLDeviceOptionsAtLinkTime(const DerivedArgList &LinkerArgs,
std::string &CompileOptions,
std::string &LinkOptions) {
const StringRef TripleStr = LinkerArgs.getLastArgValue(OPT_triple_EQ);
auto processDeviceArgs = [&](unsigned OptID, std::string &Options) {
auto processDeviceArgs = [&](unsigned OptID, std::string &Options, StringRef TargetArch = StringRef()) {
for (StringRef Arg : LinkerArgs.getAllArgValues(OptID)) {
if (!TargetArch.empty()) {
std::string DeviceArchPattern = "-device " + TargetArch.str();
if (Arg.find(DeviceArchPattern) == StringRef::npos)
continue;
}
size_t ColonPos = Arg.find(':');
if (ColonPos != StringRef::npos) {
StringRef Kind = Arg.substr(0, ColonPos);
Expand All @@ -2211,7 +2222,7 @@ static void appendSYCLDeviceOptionsAtLinkTime(const DerivedArgList &LinkerArgs,
}
};

processDeviceArgs(OPT_device_compiler_args_EQ, CompileOptions);
processDeviceArgs(OPT_device_compiler_args_EQ, CompileOptions, LinkerArgs.getLastArgValue(OPT_arch_EQ));
processDeviceArgs(OPT_device_linker_args_EQ, LinkOptions);
}

Expand Down
91 changes: 50 additions & 41 deletions llvm/tools/sycl-post-link/sycl-post-link.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,19 @@ cl::opt<std::string> DeviceLibDir{
cl::value_desc("dirname"), cl::cat(PostLinkCat)};

struct TargetFilenamePair {
std::string Target;
std::vector<std::string> Targets;
std::string Filename;
};

struct TargetFilenamePairParser : public cl::basic_parser<TargetFilenamePair> {
using cl::basic_parser<TargetFilenamePair>::basic_parser;
bool parse(cl::Option &O, StringRef ArgName, StringRef &ArgValue,
TargetFilenamePair &Val) const {
auto [Target, Filename] = ArgValue.split(",");
if (Filename == "")
std::swap(Target, Filename);
Val = {Target.str(), Filename.str()};
SmallVector<StringRef, 8> ArgList;
ArgValue.split(ArgList, ",");
Val.Filename = ArgList.back().str();
for (size_t i = 0; i + 1 < ArgList.size(); ++i)
Val.Targets.push_back(ArgList[i].str());
return false;
}
};
Expand Down Expand Up @@ -300,7 +301,7 @@ void saveModuleIR(Module &M, const StringRef Filename) {

void saveModuleProperties(const module_split::ModuleDesc &MD,
const GlobalBinImageProps &GlobProps,
const StringRef Filename, StringRef Target = "") {
const StringRef Filename, const std::vector<std::string> &Targets) {

PropSetRegTy PropSet;

Expand All @@ -323,9 +324,9 @@ void saveModuleProperties(const module_split::ModuleDesc &MD,
PropSet.remove(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS,
PropSetRegTy::PROPERTY_REQD_WORK_GROUP_SIZE);

if (!Target.empty())
PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, "compile_target",
Target);
for (const auto &T : Targets)
PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS,
"compile_target", T);

std::error_code EC;
raw_fd_ostream SCOut(Filename, EC);
Expand All @@ -340,7 +341,7 @@ void saveModuleSymbolTable(const module_split::ModuleDesc &MD,
writeToFile(Filename, SymT);
}

bool isTargetCompatibleWithModule(const std::string &Target,
bool isTargetCompatibleWithModule(const std::vector<std::string> &Targets,
module_split::ModuleDesc &IrMD);

void addTableRow(util::SimpleTable &Table,
Expand Down Expand Up @@ -384,21 +385,25 @@ void saveModule(
}

for (const auto &[Table, OutputFile] : zip_equal(OutTables, OutputFiles)) {
if (!isTargetCompatibleWithModule(OutputFile.Target, MD))
if (!isTargetCompatibleWithModule(OutputFile.Targets, MD))
continue;
auto CopyTriple = BaseTriple;
if (DoPropGen) {
GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata,
EmitKernelNames, EmitExportedSymbols,
EmitImportedSymbols, DeviceGlobals};
StringRef Target = OutputFile.Target;
std::vector<std::string> Targets = OutputFile.Targets;
std::string NewSuff = Suffix.str();
if (!Target.empty())
NewSuff = (Twine("_") + Target).str();
if (!Targets.empty()) {
std::string Joined;
for (auto &T : Targets)
Joined += "_" + T;
NewSuff = Joined;
}

CopyTriple.Prop =
(OutputPrefix + NewSuff + "_" + Twine(I) + ".prop").str();
saveModuleProperties(MD, Props, CopyTriple.Prop, Target);
saveModuleProperties(MD, Props, CopyTriple.Prop, Targets);
}
addTableRow(*Table, CopyTriple);
}
Expand Down Expand Up @@ -451,44 +456,48 @@ void addTableRow(util::SimpleTable &Table,
// information comes from the device config file (DeviceConfigFile.td).
// For example, the intel_gpu_tgllp target does not support fp64 - therefore,
// a module using fp64 would *not* be compatible with intel_gpu_tgllp.
bool isTargetCompatibleWithModule(const std::string &Target,
bool isTargetCompatibleWithModule(const std::vector<std::string> &Targets,
module_split::ModuleDesc &IrMD) {
// When the user does not specify a target,
// (e.g. -o out.table compared to -o intel_gpu_pvc,out-pvc.table)
// Target will be empty and we will not want to perform any filtering, so
// we return true here.
if (Target.empty())
if (Targets.empty())
return true;

for (const std::string Target : Targets) {
llvm::errs() << "[DEBUG] Checking compatibility of target '" << Target
<< "' with module '" << IrMD.Name << "'\n";
// TODO: If a target not found in the device config file is passed,
// to sycl-post-link, then we should probably throw an error. However,
// since not all the information for all the targets is filled out
// right now, we return true, having the affect that unrecognized
// targets have no filtering applied to them.
if (!is_contained(DeviceConfigFile::TargetTable, Target))
continue;

// TODO: If a target not found in the device config file is passed,
// to sycl-post-link, then we should probably throw an error. However,
// since not all the information for all the targets is filled out
// right now, we return true, having the affect that unrecognized
// targets have no filtering applied to them.
if (!is_contained(DeviceConfigFile::TargetTable, Target))
return true;
const DeviceConfigFile::TargetInfo &TargetInfo =
DeviceConfigFile::TargetTable[Target];
const SYCLDeviceRequirements &ModuleReqs =
IrMD.getOrComputeDeviceRequirements();

const DeviceConfigFile::TargetInfo &TargetInfo =
DeviceConfigFile::TargetTable[Target];
const SYCLDeviceRequirements &ModuleReqs =
IrMD.getOrComputeDeviceRequirements();
// Check to see if all the requirements of the input module
// are compatbile with the target.
for (const auto &Aspect : ModuleReqs.Aspects) {
if (!is_contained(TargetInfo.aspects, Aspect.Name))
return false;
}

// Check to see if all the requirements of the input module
// are compatbile with the target.
for (const auto &Aspect : ModuleReqs.Aspects) {
if (!is_contained(TargetInfo.aspects, Aspect.Name))
// Check if module sub group size is compatible with the target.
// For ESIMD, the reqd_sub_group_size will be 1; this is not
// a supported by any backend (e.g. no backend can support a kernel
// with sycl::reqd_sub_group_size(1)), but for ESIMD, this is
// a special case.
if (!IrMD.isESIMD() && ModuleReqs.SubGroupSize.has_value() &&
!is_contained(TargetInfo.subGroupSizes, *ModuleReqs.SubGroupSize))
return false;
}

// Check if module sub group size is compatible with the target.
// For ESIMD, the reqd_sub_group_size will be 1; this is not
// a supported by any backend (e.g. no backend can support a kernel
// with sycl::reqd_sub_group_size(1)), but for ESIMD, this is
// a special case.
if (!IrMD.isESIMD() && ModuleReqs.SubGroupSize.has_value() &&
!is_contained(TargetInfo.subGroupSizes, *ModuleReqs.SubGroupSize))
return false;

return true;
}

Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/NewOffloadDriver/aot-gpu.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// REQUIRES: ocloc, gpu, target-spir, !gpu-intel-gen12
// REQUIRES: ocloc, gpu, target-spir, gpu-intel-dg2
// Test with `--offload-new-driver`
//
// RUN: %clangxx -fsycl -fsycl-device-code-split=per_source \
Expand Down
Loading