Fixed bug in symbol linking for CUDA kernel registration with LLVM 13.

Since cling was ported to LLVM 13, it is required that the
`__cuda_register_globals` function and the `__cuda_fatbin_wrapper` and
`__cuda_gpubin_handle` variables are unique when defining a CUDA kernel.
Otherwise, the JIT is lazy and reuses the compiled version of
`__cuda_register_globals`, `__cuda_fatbin_wrapper` and `__cuda_gpubin_handle`
from the first CUDA kernel definition for all subsequent CUDA kernel
definitions, which in practice means that the PTX code from the first kernel is
re-registered each time.

Increase the default CUDA SM level to 35 because SM 20 is deprecated or
removed in the current CUDA SDK versions.
This commit is contained in:
Simeon Ehrig 2022-07-27 11:23:42 +00:00 committed by jenkins
parent 927f761b8b
commit 0ee3ebcb5b
3 changed files with 142 additions and 58 deletions

View File

@ -127,17 +127,19 @@ char WeakTypeinfoVTablePass::ID = 0;
namespace { namespace {
// Add a suffix to the CUDA module ctor/dtor to generate a unique name. // Add a suffix to the CUDA module ctor/dtor, CUDA specific functions and
// This is necessary for lazy compilation. Without suffix, cling cannot // variables to generate a unique name. This is necessary for lazy
// distinguish ctor/dtor of subsequent modules. // compilation. Without suffix, cling cannot distinguish ctor/dtor, register
// function and and ptx code string of subsequent modules.
class UniqueCUDAStructorName : public ModulePass { class UniqueCUDAStructorName : public ModulePass {
static char ID; static char ID;
bool runOnFunction(Function& F, const StringRef ModuleName){ // append a suffix to a symbol to make it unique
if(F.hasName() && (F.getName() == "__cuda_module_ctor" // the suffix is "_cling_module_<module number>"
|| F.getName() == "__cuda_module_dtor") ){ llvm::SmallString<128> add_module_suffix(const StringRef SymbolName,
const StringRef ModuleName) {
llvm::SmallString<128> NewFunctionName; llvm::SmallString<128> NewFunctionName;
NewFunctionName.append(F.getName()); NewFunctionName.append(SymbolName);
NewFunctionName.append("_"); NewFunctionName.append("_");
NewFunctionName.append(ModuleName); NewFunctionName.append(ModuleName);
@ -148,8 +150,32 @@ namespace {
NewFunctionName[i] = '_'; NewFunctionName[i] = '_';
} }
F.setName(NewFunctionName); return NewFunctionName;
}
// make CUDA specific variables unique
bool runOnGlobal(GlobalValue& GV, const StringRef ModuleName) {
if (GV.isDeclaration())
return false; // no change.
if (!GV.hasName())
return false;
if (GV.getName().equals("__cuda_fatbin_wrapper") ||
GV.getName().equals("__cuda_gpubin_handle")) {
GV.setName(add_module_suffix(GV.getName(), ModuleName));
return true;
}
return false;
}
// make CUDA specific functions unique
bool runOnFunction(Function& F, const StringRef ModuleName) {
if (F.hasName() && (F.getName().equals("__cuda_module_ctor") ||
F.getName().equals("__cuda_module_dtor") ||
F.getName().equals("__cuda_register_globals"))) {
F.setName(add_module_suffix(F.getName(), ModuleName));
return true; return true;
} }
@ -164,10 +190,12 @@ namespace {
const StringRef ModuleName = M.getName(); const StringRef ModuleName = M.getName();
for (auto&& F : M) for (auto&& F : M)
ret |= runOnFunction(F, ModuleName); ret |= runOnFunction(F, ModuleName);
for (auto&& G : M.globals())
ret |= runOnGlobal(G, ModuleName);
return ret; return ret;
} }
}; };
} } // namespace
char UniqueCUDAStructorName::ID = 0; char UniqueCUDAStructorName::ID = 0;

View File

@ -121,7 +121,7 @@ namespace cling {
llvm::errs() llvm::errs()
<< "IncrementalCUDADeviceCompiler: No valid c++ standard is set.\n"; << "IncrementalCUDADeviceCompiler: No valid c++ standard is set.\n";
uint32_t smVersion = 20; uint32_t smVersion = 35;
if (!invocationOptions.CompilerOpts.CUDAGpuArch.empty()) { if (!invocationOptions.CompilerOpts.CUDAGpuArch.empty()) {
llvm::StringRef(invocationOptions.CompilerOpts.CUDAGpuArch) llvm::StringRef(invocationOptions.CompilerOpts.CUDAGpuArch)
.drop_front(3 /* sm_ */) .drop_front(3 /* sm_ */)

View File

@ -6,9 +6,10 @@
// LICENSE.TXT for details. // LICENSE.TXT for details.
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// The Test checks, if the symbols __cuda_module_ctor and __cuda_module_dtor are // The Test checks, if the symbols __cuda_module_ctor, __cuda_module_dtor,
// unique for every module. Attention, for a working test case, a cuda // __cuda_register_globals, __cuda_fatbin_wrapper and __cuda_gpubin_handle are
// fatbinary is necessary. // unique for every module. Attention, for a working test case, a cuda fatbinary
// is necessary.
// RUN: cat %s | %cling -x cuda --cuda-path=%cudapath %cudasmlevel -Xclang -verify 2>&1 | FileCheck %s // RUN: cat %s | %cling -x cuda --cuda-path=%cudapath %cudasmlevel -Xclang -verify 2>&1 | FileCheck %s
// REQUIRES: cuda-runtime // REQUIRES: cuda-runtime
@ -17,58 +18,113 @@
#include "llvm/IR/Function.h" #include "llvm/IR/Function.h"
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
#include <iostream> #include <iostream>
#include <map>
#include <vector>
// Compare the cuda module ctor and dtor of two random modules. // for each key in the map, the test searches for a symbol in a module and
std::string ctor1, ctor2, dtor1, dtor2; // stores the full name in the vector in the value
std::map<std::string, std::vector<std::string>> module_compare;
auto T1 = gCling->getLatestTransaction(); for (std::string const& s :
// only __global__ CUDA kernel definitions has the cuda module ctor and dtor {"__cuda_module_ctor", "__cuda_module_dtor", "__cuda_register_globals",
__global__ void g1() { int i = 1; } "__cuda_fatbin_wrapper", "__cuda_gpubin_handle"}) {
auto M1 = T1->getNext()->getModule(); module_compare.emplace(s, std::vector<std::string>{});
for(auto &I : *M1){
// The trailing '_' identify the function name as modified name.
if(I.getName().startswith_lower("__cuda_module_ctor_")){
ctor1 = I.getName().str();
} }
if(I.getName().startswith_lower("__cuda_module_dtor_")){ auto T1 = gCling->getLatestTransaction();
dtor1 = I.getName().str(); // only __global__ CUDA kernel definitions has the cuda specific functions and
// variables to register a kernel
__global__ void g1() { int i = 1; }
auto M1 = T1->getNext()->getCompiledModule();
// search for the symbols in the llvm::Module M1
for (auto const& key_value : module_compare) {
for (auto& I : *M1) {
if (I.getName().startswith(key_value.first)) {
module_compare[key_value.first].push_back(I.getName().str());
}
}
for (auto& I : M1->globals()) {
if (I.getName().startswith(key_value.first)) {
module_compare[key_value.first].push_back(I.getName().str());
}
}
}
// verify, that each symbol was found in the module
// if a symbol was not found, the vector should be empty
for (auto const& key_value : module_compare) {
if (key_value.second.size() < 1) {
std::cout << "could not find symbol" << std::endl;
// CHECK-NOT: could not find symbol
std::cout << "\"" << key_value.first << "\" is not in transaction T1"
<< std::endl;
} }
} }
auto T2 = gCling->getLatestTransaction(); auto T2 = gCling->getLatestTransaction();
__global__ void g2() { int i = 2; } __global__ void g2() { int i = 2; }
auto M2 = T2->getNext()->getModule(); auto M2 = T2->getNext()->getCompiledModule();
// The two modules should have different names, because of the for loop.
M1->getName().str() != M2->getName().str()
// CHECK: (bool) true
// search for the symbols in the llvm::Module M2
for (auto const& key_value : module_compare) {
for (auto& I : *M2) { for (auto& I : *M2) {
if(I.getName().startswith_lower("__cuda_module_ctor_")){ if (I.getName().startswith(key_value.first)) {
ctor2 = I.getName().str(); module_compare[key_value.first].push_back(I.getName().str());
}
}
for (auto& I : M2->globals()) {
if (I.getName().startswith(key_value.first)) {
module_compare[key_value.first].push_back(I.getName().str());
} }
if(I.getName().startswith_lower("__cuda_module_dtor_")){
dtor2 = I.getName().str();
} }
} }
// Check if the ctor and dtor of the two modules are different. // verify, that each symbol was found in the second module
ctor1 != ctor2 // expected-note {{use '|=' to turn this inequality comparison into an or-assignment}} for (auto const& key_value : module_compare) {
// CHECK: (bool) true if (key_value.second.size() < 2) {
dtor1 != dtor2 // expected-note {{use '|=' to turn this inequality comparison into an or-assignment}} std::cout << "could not find symbol" << std::endl;
// CHECK: (bool) true // CHECK-NOT: could not find symbol
std::cout << "\"" << key_value.first << "\" is not in transaction T2"
<< std::endl;
}
}
// Check if the ctor symbol starts with the correct prefix. for (auto const& key_value : module_compare) {
std::string expectedCtorPrefix = "__cuda_module_ctor_cling_module_"; std::string const generic_symbol_name = key_value.first;
ctor1.compare(0, expectedCtorPrefix.length(), expectedCtorPrefix) std::string const symbol_name_suffix = generic_symbol_name + "_cling_module_";
// CHECK: (int) 0 std::string const T1_symbol_name = key_value.second[0];
std::string const T2_symbol_name = key_value.second[1];
// Check if the dtor symbol starts with the correct prefix. // check if each symbols are different for different modules
std::string expectedDtorPrefix = "__cuda_module_dtor_cling_module_"; if (T1_symbol_name != T2_symbol_name) {
dtor1.compare(0, expectedDtorPrefix.length(), expectedDtorPrefix) std::cout << "T1_symbol_name and T2_symbol_name are unique" << std::endl;
// CHECK: (int) 0 // CHECK: T1_symbol_name and T2_symbol_name are unique
} else {
std::cerr << "T1_symbol_name and T2_symbol_name are equals" << std::endl;
// CHECK-NOT: T1_symbol_name and T2_symbol_name are equals
std::cerr << T1_symbol_name << " == " << T2_symbol_name << std::endl;
}
// only the module number is difference for each symbol
// therefor the begin of the symbol name can be checked
if (0 != T1_symbol_name.compare(0, symbol_name_suffix.length(),
symbol_name_suffix)) {
std::cerr << "Wrong suffix" << std::endl;
// CHECK-NOT: Wrong suffix
std::cerr << "T1_symbol_name: " << T1_symbol_name << std::endl;
std::cerr << "expected symbol + suffix: " << symbol_name_suffix
<< std::endl;
}
if (0 != T2_symbol_name.compare(0, symbol_name_suffix.length(),
symbol_name_suffix)) {
std::cerr << "Wrong suffix" << std::endl;
// CHECK-NOT: Wrong suffix
std::cerr << "T2_symbol_name: " << T2_symbol_name << std::endl;
std::cerr << "expected symbol + suffix: " << symbol_name_suffix
<< std::endl;
}
}
// expected-no-diagnostics
.q .q