Fixed bug in symbol linking for CUDA kernel registration with LLVM 13.
Since cling was ported to LLVM 13, it is required that the `__cuda_register_globals` function and the `__cuda_fatbin_wrapper` and `__cuda_gpubin_handle` variables are unique when defining a CUDA kernel. Otherwise, the JIT is lazy and reuses the compiled version of `__cuda_register_globals`, `__cuda_fatbin_wrapper` and `__cuda_gpubin_handle` from the first CUDA kernel definition for all subsequent CUDA kernel definitions, which in practice means that the PTX code from the first kernel is re-registered each time. Increase the default CUDA SM level to 35 because SM 20 is deprecated or removed in the current CUDA SDK versions.
This commit is contained in:
parent
927f761b8b
commit
0ee3ebcb5b
@ -127,29 +127,55 @@ char WeakTypeinfoVTablePass::ID = 0;
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// Add a suffix to the CUDA module ctor/dtor to generate a unique name.
|
// Add a suffix to the CUDA module ctor/dtor, CUDA specific functions and
|
||||||
// This is necessary for lazy compilation. Without suffix, cling cannot
|
// variables to generate a unique name. This is necessary for lazy
|
||||||
// distinguish ctor/dtor of subsequent modules.
|
// compilation. Without suffix, cling cannot distinguish ctor/dtor, register
|
||||||
|
// function and and ptx code string of subsequent modules.
|
||||||
class UniqueCUDAStructorName : public ModulePass {
|
class UniqueCUDAStructorName : public ModulePass {
|
||||||
static char ID;
|
static char ID;
|
||||||
|
|
||||||
bool runOnFunction(Function& F, const StringRef ModuleName){
|
// append a suffix to a symbol to make it unique
|
||||||
if(F.hasName() && (F.getName() == "__cuda_module_ctor"
|
// the suffix is "_cling_module_<module number>"
|
||||||
|| F.getName() == "__cuda_module_dtor") ){
|
llvm::SmallString<128> add_module_suffix(const StringRef SymbolName,
|
||||||
llvm::SmallString<128> NewFunctionName;
|
const StringRef ModuleName) {
|
||||||
NewFunctionName.append(F.getName());
|
llvm::SmallString<128> NewFunctionName;
|
||||||
NewFunctionName.append("_");
|
NewFunctionName.append(SymbolName);
|
||||||
NewFunctionName.append(ModuleName);
|
NewFunctionName.append("_");
|
||||||
|
NewFunctionName.append(ModuleName);
|
||||||
|
|
||||||
for (size_t i = 0; i < NewFunctionName.size(); ++i) {
|
for (size_t i = 0; i < NewFunctionName.size(); ++i) {
|
||||||
// Replace everything that is not [a-zA-Z0-9._] with a _. This set
|
// Replace everything that is not [a-zA-Z0-9._] with a _. This set
|
||||||
// happens to be the set of C preprocessing numbers.
|
// happens to be the set of C preprocessing numbers.
|
||||||
if (!isPreprocessingNumberBody(NewFunctionName[i]))
|
if (!isPreprocessingNumberBody(NewFunctionName[i]))
|
||||||
NewFunctionName[i] = '_';
|
NewFunctionName[i] = '_';
|
||||||
}
|
}
|
||||||
|
|
||||||
F.setName(NewFunctionName);
|
return NewFunctionName;
|
||||||
|
}
|
||||||
|
|
||||||
|
// make CUDA specific variables unique
|
||||||
|
bool runOnGlobal(GlobalValue& GV, const StringRef ModuleName) {
|
||||||
|
if (GV.isDeclaration())
|
||||||
|
return false; // no change.
|
||||||
|
|
||||||
|
if (!GV.hasName())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (GV.getName().equals("__cuda_fatbin_wrapper") ||
|
||||||
|
GV.getName().equals("__cuda_gpubin_handle")) {
|
||||||
|
GV.setName(add_module_suffix(GV.getName(), ModuleName));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// make CUDA specific functions unique
|
||||||
|
bool runOnFunction(Function& F, const StringRef ModuleName) {
|
||||||
|
if (F.hasName() && (F.getName().equals("__cuda_module_ctor") ||
|
||||||
|
F.getName().equals("__cuda_module_dtor") ||
|
||||||
|
F.getName().equals("__cuda_register_globals"))) {
|
||||||
|
F.setName(add_module_suffix(F.getName(), ModuleName));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,15 +185,17 @@ namespace {
|
|||||||
public:
|
public:
|
||||||
UniqueCUDAStructorName() : ModulePass(ID) {}
|
UniqueCUDAStructorName() : ModulePass(ID) {}
|
||||||
|
|
||||||
bool runOnModule(Module &M) override {
|
bool runOnModule(Module& M) override {
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
const StringRef ModuleName = M.getName();
|
const StringRef ModuleName = M.getName();
|
||||||
for (auto &&F: M)
|
for (auto&& F : M)
|
||||||
ret |= runOnFunction(F, ModuleName);
|
ret |= runOnFunction(F, ModuleName);
|
||||||
|
for (auto&& G : M.globals())
|
||||||
|
ret |= runOnGlobal(G, ModuleName);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
} // namespace
|
||||||
|
|
||||||
char UniqueCUDAStructorName::ID = 0;
|
char UniqueCUDAStructorName::ID = 0;
|
||||||
|
|
||||||
|
@ -121,7 +121,7 @@ namespace cling {
|
|||||||
llvm::errs()
|
llvm::errs()
|
||||||
<< "IncrementalCUDADeviceCompiler: No valid c++ standard is set.\n";
|
<< "IncrementalCUDADeviceCompiler: No valid c++ standard is set.\n";
|
||||||
|
|
||||||
uint32_t smVersion = 20;
|
uint32_t smVersion = 35;
|
||||||
if (!invocationOptions.CompilerOpts.CUDAGpuArch.empty()) {
|
if (!invocationOptions.CompilerOpts.CUDAGpuArch.empty()) {
|
||||||
llvm::StringRef(invocationOptions.CompilerOpts.CUDAGpuArch)
|
llvm::StringRef(invocationOptions.CompilerOpts.CUDAGpuArch)
|
||||||
.drop_front(3 /* sm_ */)
|
.drop_front(3 /* sm_ */)
|
||||||
|
@ -6,9 +6,10 @@
|
|||||||
// LICENSE.TXT for details.
|
// LICENSE.TXT for details.
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
// The Test checks, if the symbols __cuda_module_ctor and __cuda_module_dtor are
|
// The Test checks, if the symbols __cuda_module_ctor, __cuda_module_dtor,
|
||||||
// unique for every module. Attention, for a working test case, a cuda
|
// __cuda_register_globals, __cuda_fatbin_wrapper and __cuda_gpubin_handle are
|
||||||
// fatbinary is necessary.
|
// unique for every module. Attention, for a working test case, a cuda fatbinary
|
||||||
|
// is necessary.
|
||||||
// RUN: cat %s | %cling -x cuda --cuda-path=%cudapath %cudasmlevel -Xclang -verify 2>&1 | FileCheck %s
|
// RUN: cat %s | %cling -x cuda --cuda-path=%cudapath %cudasmlevel -Xclang -verify 2>&1 | FileCheck %s
|
||||||
// REQUIRES: cuda-runtime
|
// REQUIRES: cuda-runtime
|
||||||
|
|
||||||
@ -17,58 +18,113 @@
|
|||||||
#include "llvm/IR/Function.h"
|
#include "llvm/IR/Function.h"
|
||||||
#include "llvm/IR/Module.h"
|
#include "llvm/IR/Module.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
// Compare the cuda module ctor and dtor of two random modules.
|
// for each key in the map, the test searches for a symbol in a module and
|
||||||
std::string ctor1, ctor2, dtor1, dtor2;
|
// stores the full name in the vector in the value
|
||||||
|
std::map<std::string, std::vector<std::string>> module_compare;
|
||||||
|
for (std::string const& s :
|
||||||
|
{"__cuda_module_ctor", "__cuda_module_dtor", "__cuda_register_globals",
|
||||||
|
"__cuda_fatbin_wrapper", "__cuda_gpubin_handle"}) {
|
||||||
|
module_compare.emplace(s, std::vector<std::string>{});
|
||||||
|
}
|
||||||
|
|
||||||
auto T1 = gCling->getLatestTransaction();
|
auto T1 = gCling->getLatestTransaction();
|
||||||
// only __global__ CUDA kernel definitions has the cuda module ctor and dtor
|
// only __global__ CUDA kernel definitions has the cuda specific functions and
|
||||||
|
// variables to register a kernel
|
||||||
__global__ void g1() { int i = 1; }
|
__global__ void g1() { int i = 1; }
|
||||||
auto M1 = T1->getNext()->getModule();
|
auto M1 = T1->getNext()->getCompiledModule();
|
||||||
|
|
||||||
for(auto &I : *M1){
|
// search for the symbols in the llvm::Module M1
|
||||||
// The trailing '_' identify the function name as modified name.
|
for (auto const& key_value : module_compare) {
|
||||||
if(I.getName().startswith_lower("__cuda_module_ctor_")){
|
for (auto& I : *M1) {
|
||||||
ctor1 = I.getName().str();
|
if (I.getName().startswith(key_value.first)) {
|
||||||
|
module_compare[key_value.first].push_back(I.getName().str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
for (auto& I : M1->globals()) {
|
||||||
|
if (I.getName().startswith(key_value.first)) {
|
||||||
|
module_compare[key_value.first].push_back(I.getName().str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(I.getName().startswith_lower("__cuda_module_dtor_")){
|
// verify, that each symbol was found in the module
|
||||||
dtor1 = I.getName().str();
|
// if a symbol was not found, the vector should be empty
|
||||||
|
for (auto const& key_value : module_compare) {
|
||||||
|
if (key_value.second.size() < 1) {
|
||||||
|
std::cout << "could not find symbol" << std::endl;
|
||||||
|
// CHECK-NOT: could not find symbol
|
||||||
|
std::cout << "\"" << key_value.first << "\" is not in transaction T1"
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto T2 = gCling->getLatestTransaction();
|
auto T2 = gCling->getLatestTransaction();
|
||||||
__global__ void g2() { int i = 2; }
|
__global__ void g2() { int i = 2; }
|
||||||
auto M2 = T2->getNext()->getModule();
|
auto M2 = T2->getNext()->getCompiledModule();
|
||||||
|
|
||||||
// The two modules should have different names, because of the for loop.
|
// search for the symbols in the llvm::Module M2
|
||||||
M1->getName().str() != M2->getName().str()
|
for (auto const& key_value : module_compare) {
|
||||||
// CHECK: (bool) true
|
for (auto& I : *M2) {
|
||||||
|
if (I.getName().startswith(key_value.first)) {
|
||||||
for(auto &I : *M2){
|
module_compare[key_value.first].push_back(I.getName().str());
|
||||||
if(I.getName().startswith_lower("__cuda_module_ctor_")){
|
}
|
||||||
ctor2 = I.getName().str();
|
|
||||||
}
|
}
|
||||||
|
for (auto& I : M2->globals()) {
|
||||||
if(I.getName().startswith_lower("__cuda_module_dtor_")){
|
if (I.getName().startswith(key_value.first)) {
|
||||||
dtor2 = I.getName().str();
|
module_compare[key_value.first].push_back(I.getName().str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the ctor and dtor of the two modules are different.
|
// verify, that each symbol was found in the second module
|
||||||
ctor1 != ctor2 // expected-note {{use '|=' to turn this inequality comparison into an or-assignment}}
|
for (auto const& key_value : module_compare) {
|
||||||
// CHECK: (bool) true
|
if (key_value.second.size() < 2) {
|
||||||
dtor1 != dtor2 // expected-note {{use '|=' to turn this inequality comparison into an or-assignment}}
|
std::cout << "could not find symbol" << std::endl;
|
||||||
// CHECK: (bool) true
|
// CHECK-NOT: could not find symbol
|
||||||
|
std::cout << "\"" << key_value.first << "\" is not in transaction T2"
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check if the ctor symbol starts with the correct prefix.
|
for (auto const& key_value : module_compare) {
|
||||||
std::string expectedCtorPrefix = "__cuda_module_ctor_cling_module_";
|
std::string const generic_symbol_name = key_value.first;
|
||||||
ctor1.compare(0, expectedCtorPrefix.length(), expectedCtorPrefix)
|
std::string const symbol_name_suffix = generic_symbol_name + "_cling_module_";
|
||||||
// CHECK: (int) 0
|
std::string const T1_symbol_name = key_value.second[0];
|
||||||
|
std::string const T2_symbol_name = key_value.second[1];
|
||||||
|
|
||||||
// Check if the dtor symbol starts with the correct prefix.
|
// check if each symbols are different for different modules
|
||||||
std::string expectedDtorPrefix = "__cuda_module_dtor_cling_module_";
|
if (T1_symbol_name != T2_symbol_name) {
|
||||||
dtor1.compare(0, expectedDtorPrefix.length(), expectedDtorPrefix)
|
std::cout << "T1_symbol_name and T2_symbol_name are unique" << std::endl;
|
||||||
// CHECK: (int) 0
|
// CHECK: T1_symbol_name and T2_symbol_name are unique
|
||||||
|
} else {
|
||||||
|
std::cerr << "T1_symbol_name and T2_symbol_name are equals" << std::endl;
|
||||||
|
// CHECK-NOT: T1_symbol_name and T2_symbol_name are equals
|
||||||
|
std::cerr << T1_symbol_name << " == " << T2_symbol_name << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// only the module number is difference for each symbol
|
||||||
|
// therefor the begin of the symbol name can be checked
|
||||||
|
if (0 != T1_symbol_name.compare(0, symbol_name_suffix.length(),
|
||||||
|
symbol_name_suffix)) {
|
||||||
|
std::cerr << "Wrong suffix" << std::endl;
|
||||||
|
// CHECK-NOT: Wrong suffix
|
||||||
|
std::cerr << "T1_symbol_name: " << T1_symbol_name << std::endl;
|
||||||
|
std::cerr << "expected symbol + suffix: " << symbol_name_suffix
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != T2_symbol_name.compare(0, symbol_name_suffix.length(),
|
||||||
|
symbol_name_suffix)) {
|
||||||
|
std::cerr << "Wrong suffix" << std::endl;
|
||||||
|
// CHECK-NOT: Wrong suffix
|
||||||
|
std::cerr << "T2_symbol_name: " << T2_symbol_name << std::endl;
|
||||||
|
std::cerr << "expected symbol + suffix: " << symbol_name_suffix
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// expected-no-diagnostics
|
||||||
.q
|
.q
|
||||||
|
Loading…
x
Reference in New Issue
Block a user