Fixed bug in symbol linking for CUDA kernel registration with LLVM 13.

Since cling was ported to LLVM 13, it is required that the
`__cuda_register_globals` function and the `__cuda_fatbin_wrapper` and
`__cuda_gpubin_handle` variables are unique when defining a CUDA kernel.
Otherwise, the JIT is lazy and reuses the compiled version of
`__cuda_register_globals`, `__cuda_fatbin_wrapper` and `__cuda_gpubin_handle`
from the first CUDA kernel definition for all subsequent CUDA kernel
definitions, which in practice means that the PTX code from the first kernel is
re-registered each time.

Increase the default CUDA SM level to 35 because SM 20 is deprecated or
removed in the current CUDA SDK versions.
This commit is contained in:
Simeon Ehrig 2022-07-27 11:23:42 +00:00 committed by jenkins
parent 927f761b8b
commit 0ee3ebcb5b
3 changed files with 142 additions and 58 deletions

View File

@ -127,29 +127,55 @@ char WeakTypeinfoVTablePass::ID = 0;
namespace {
// Add a suffix to the CUDA module ctor/dtor to generate a unique name.
// This is necessary for lazy compilation. Without suffix, cling cannot
// distinguish ctor/dtor of subsequent modules.
// Add a suffix to the CUDA module ctor/dtor, CUDA specific functions and
// variables to generate a unique name. This is necessary for lazy
// compilation. Without suffix, cling cannot distinguish ctor/dtor, register
// function and and ptx code string of subsequent modules.
class UniqueCUDAStructorName : public ModulePass {
static char ID;
bool runOnFunction(Function& F, const StringRef ModuleName){
if(F.hasName() && (F.getName() == "__cuda_module_ctor"
|| F.getName() == "__cuda_module_dtor") ){
llvm::SmallString<128> NewFunctionName;
NewFunctionName.append(F.getName());
NewFunctionName.append("_");
NewFunctionName.append(ModuleName);
// append a suffix to a symbol to make it unique
// the suffix is "_cling_module_<module number>"
llvm::SmallString<128> add_module_suffix(const StringRef SymbolName,
const StringRef ModuleName) {
llvm::SmallString<128> NewFunctionName;
NewFunctionName.append(SymbolName);
NewFunctionName.append("_");
NewFunctionName.append(ModuleName);
for (size_t i = 0; i < NewFunctionName.size(); ++i) {
// Replace everything that is not [a-zA-Z0-9._] with a _. This set
// happens to be the set of C preprocessing numbers.
if (!isPreprocessingNumberBody(NewFunctionName[i]))
NewFunctionName[i] = '_';
}
for (size_t i = 0; i < NewFunctionName.size(); ++i) {
// Replace everything that is not [a-zA-Z0-9._] with a _. This set
// happens to be the set of C preprocessing numbers.
if (!isPreprocessingNumberBody(NewFunctionName[i]))
NewFunctionName[i] = '_';
}
F.setName(NewFunctionName);
return NewFunctionName;
}
// make CUDA specific variables unique
bool runOnGlobal(GlobalValue& GV, const StringRef ModuleName) {
if (GV.isDeclaration())
return false; // no change.
if (!GV.hasName())
return false;
if (GV.getName().equals("__cuda_fatbin_wrapper") ||
GV.getName().equals("__cuda_gpubin_handle")) {
GV.setName(add_module_suffix(GV.getName(), ModuleName));
return true;
}
return false;
}
// make CUDA specific functions unique
bool runOnFunction(Function& F, const StringRef ModuleName) {
if (F.hasName() && (F.getName().equals("__cuda_module_ctor") ||
F.getName().equals("__cuda_module_dtor") ||
F.getName().equals("__cuda_register_globals"))) {
F.setName(add_module_suffix(F.getName(), ModuleName));
return true;
}
@ -159,15 +185,17 @@ namespace {
public:
UniqueCUDAStructorName() : ModulePass(ID) {}
bool runOnModule(Module &M) override {
bool runOnModule(Module& M) override {
bool ret = false;
const StringRef ModuleName = M.getName();
for (auto &&F: M)
for (auto&& F : M)
ret |= runOnFunction(F, ModuleName);
for (auto&& G : M.globals())
ret |= runOnGlobal(G, ModuleName);
return ret;
}
};
}
} // namespace
char UniqueCUDAStructorName::ID = 0;

View File

@ -121,7 +121,7 @@ namespace cling {
llvm::errs()
<< "IncrementalCUDADeviceCompiler: No valid c++ standard is set.\n";
uint32_t smVersion = 20;
uint32_t smVersion = 35;
if (!invocationOptions.CompilerOpts.CUDAGpuArch.empty()) {
llvm::StringRef(invocationOptions.CompilerOpts.CUDAGpuArch)
.drop_front(3 /* sm_ */)

View File

@ -6,9 +6,10 @@
// LICENSE.TXT for details.
//------------------------------------------------------------------------------
// The Test checks, if the symbols __cuda_module_ctor and __cuda_module_dtor are
// unique for every module. Attention, for a working test case, a cuda
// fatbinary is necessary.
// The Test checks, if the symbols __cuda_module_ctor, __cuda_module_dtor,
// __cuda_register_globals, __cuda_fatbin_wrapper and __cuda_gpubin_handle are
// unique for every module. Attention, for a working test case, a cuda fatbinary
// is necessary.
// RUN: cat %s | %cling -x cuda --cuda-path=%cudapath %cudasmlevel -Xclang -verify 2>&1 | FileCheck %s
// REQUIRES: cuda-runtime
@ -17,58 +18,113 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include <iostream>
#include <map>
#include <vector>
// Compare the cuda module ctor and dtor of two random modules.
std::string ctor1, ctor2, dtor1, dtor2;
// for each key in the map, the test searches for a symbol in a module and
// stores the full name in the vector in the value
std::map<std::string, std::vector<std::string>> module_compare;
for (std::string const& s :
{"__cuda_module_ctor", "__cuda_module_dtor", "__cuda_register_globals",
"__cuda_fatbin_wrapper", "__cuda_gpubin_handle"}) {
module_compare.emplace(s, std::vector<std::string>{});
}
auto T1 = gCling->getLatestTransaction();
// only __global__ CUDA kernel definitions has the cuda module ctor and dtor
// only __global__ CUDA kernel definitions has the cuda specific functions and
// variables to register a kernel
__global__ void g1() { int i = 1; }
auto M1 = T1->getNext()->getModule();
auto M1 = T1->getNext()->getCompiledModule();
for(auto &I : *M1){
// The trailing '_' identify the function name as modified name.
if(I.getName().startswith_lower("__cuda_module_ctor_")){
ctor1 = I.getName().str();
// search for the symbols in the llvm::Module M1
for (auto const& key_value : module_compare) {
for (auto& I : *M1) {
if (I.getName().startswith(key_value.first)) {
module_compare[key_value.first].push_back(I.getName().str());
}
}
for (auto& I : M1->globals()) {
if (I.getName().startswith(key_value.first)) {
module_compare[key_value.first].push_back(I.getName().str());
}
}
}
if(I.getName().startswith_lower("__cuda_module_dtor_")){
dtor1 = I.getName().str();
// verify, that each symbol was found in the module
// if a symbol was not found, the vector should be empty
for (auto const& key_value : module_compare) {
if (key_value.second.size() < 1) {
std::cout << "could not find symbol" << std::endl;
// CHECK-NOT: could not find symbol
std::cout << "\"" << key_value.first << "\" is not in transaction T1"
<< std::endl;
}
}
auto T2 = gCling->getLatestTransaction();
__global__ void g2() { int i = 2; }
auto M2 = T2->getNext()->getModule();
auto M2 = T2->getNext()->getCompiledModule();
// The two modules should have different names, because of the for loop.
M1->getName().str() != M2->getName().str()
// CHECK: (bool) true
for(auto &I : *M2){
if(I.getName().startswith_lower("__cuda_module_ctor_")){
ctor2 = I.getName().str();
// search for the symbols in the llvm::Module M2
for (auto const& key_value : module_compare) {
for (auto& I : *M2) {
if (I.getName().startswith(key_value.first)) {
module_compare[key_value.first].push_back(I.getName().str());
}
}
if(I.getName().startswith_lower("__cuda_module_dtor_")){
dtor2 = I.getName().str();
for (auto& I : M2->globals()) {
if (I.getName().startswith(key_value.first)) {
module_compare[key_value.first].push_back(I.getName().str());
}
}
}
// Check if the ctor and dtor of the two modules are different.
ctor1 != ctor2 // expected-note {{use '|=' to turn this inequality comparison into an or-assignment}}
// CHECK: (bool) true
dtor1 != dtor2 // expected-note {{use '|=' to turn this inequality comparison into an or-assignment}}
// CHECK: (bool) true
// verify, that each symbol was found in the second module
for (auto const& key_value : module_compare) {
if (key_value.second.size() < 2) {
std::cout << "could not find symbol" << std::endl;
// CHECK-NOT: could not find symbol
std::cout << "\"" << key_value.first << "\" is not in transaction T2"
<< std::endl;
}
}
// Check if the ctor symbol starts with the correct prefix.
std::string expectedCtorPrefix = "__cuda_module_ctor_cling_module_";
ctor1.compare(0, expectedCtorPrefix.length(), expectedCtorPrefix)
// CHECK: (int) 0
for (auto const& key_value : module_compare) {
std::string const generic_symbol_name = key_value.first;
std::string const symbol_name_suffix = generic_symbol_name + "_cling_module_";
std::string const T1_symbol_name = key_value.second[0];
std::string const T2_symbol_name = key_value.second[1];
// Check if the dtor symbol starts with the correct prefix.
std::string expectedDtorPrefix = "__cuda_module_dtor_cling_module_";
dtor1.compare(0, expectedDtorPrefix.length(), expectedDtorPrefix)
// CHECK: (int) 0
// check if each symbols are different for different modules
if (T1_symbol_name != T2_symbol_name) {
std::cout << "T1_symbol_name and T2_symbol_name are unique" << std::endl;
// CHECK: T1_symbol_name and T2_symbol_name are unique
} else {
std::cerr << "T1_symbol_name and T2_symbol_name are equals" << std::endl;
// CHECK-NOT: T1_symbol_name and T2_symbol_name are equals
std::cerr << T1_symbol_name << " == " << T2_symbol_name << std::endl;
}
// only the module number is difference for each symbol
// therefor the begin of the symbol name can be checked
if (0 != T1_symbol_name.compare(0, symbol_name_suffix.length(),
symbol_name_suffix)) {
std::cerr << "Wrong suffix" << std::endl;
// CHECK-NOT: Wrong suffix
std::cerr << "T1_symbol_name: " << T1_symbol_name << std::endl;
std::cerr << "expected symbol + suffix: " << symbol_name_suffix
<< std::endl;
}
if (0 != T2_symbol_name.compare(0, symbol_name_suffix.length(),
symbol_name_suffix)) {
std::cerr << "Wrong suffix" << std::endl;
// CHECK-NOT: Wrong suffix
std::cerr << "T2_symbol_name: " << T2_symbol_name << std::endl;
std::cerr << "expected symbol + suffix: " << symbol_name_suffix
<< std::endl;
}
}
// expected-no-diagnostics
.q