Fixed bug in symbol linking for CUDA kernel registration with LLVM 13.

Since cling was ported to LLVM 13, it is required that the `__cuda_register_globals` function and the `__cuda_fatbin_wrapper` and `__cuda_gpubin_handle` variables are unique when defining a CUDA kernel. Otherwise, the JIT is lazy and reuses the compiled version of `__cuda_register_globals`, `__cuda_fatbin_wrapper` and `__cuda_gpubin_handle` from the first CUDA kernel definition for all subsequent CUDA kernel definitions, which in practice means that the PTX code from the first kernel is re-registered each time. Increase the default CUDA SM level to 35 because SM 20 is deprecated or removed in the current CUDA SDK versions.
2022-07-27 11:23:42 +00:00 · 2022-07-27 11:23:42 +00:00 · 0ee3ebcb5b
commit 0ee3ebcb5b
parent 927f761b8b
3 changed files with 142 additions and 58 deletions
--- a/lib/Interpreter/BackendPasses.cpp
+++ b/lib/Interpreter/BackendPasses.cpp
@ -127,17 +127,19 @@ char WeakTypeinfoVTablePass::ID = 0;
 namespace {
-  // Add a suffix to the CUDA module ctor/dtor to generate a unique name.
+  // Add a suffix to the CUDA module ctor/dtor, CUDA specific functions and
-  // This is necessary for lazy compilation. Without suffix, cling cannot
+  // variables to generate a unique name. This is necessary for lazy
-  // distinguish ctor/dtor of subsequent modules.
+  // compilation. Without suffix, cling cannot distinguish ctor/dtor, register
  // function and and ptx code string of subsequent modules.
  class UniqueCUDAStructorName : public ModulePass {
    static char ID;
-    bool runOnFunction(Function& F, const StringRef ModuleName){
+    // append a suffix to a symbol to make it unique
-      if(F.hasName() && (F.getName() == "__cuda_module_ctor"
+    // the suffix is "_cling_module_<module number>"
-          || F.getName() == "__cuda_module_dtor") ){
+    llvm::SmallString<128> add_module_suffix(const StringRef SymbolName,
                                             const StringRef ModuleName) {
      llvm::SmallString<128> NewFunctionName;
-        NewFunctionName.append(F.getName());
+      NewFunctionName.append(SymbolName);
      NewFunctionName.append("_");
      NewFunctionName.append(ModuleName);
@ -148,8 +150,32 @@ namespace {
          NewFunctionName[i] = '_';
      }
-        F.setName(NewFunctionName);
+      return NewFunctionName;
    }
    // make CUDA specific variables unique
    bool runOnGlobal(GlobalValue& GV, const StringRef ModuleName) {
      if (GV.isDeclaration())
        return false; // no change.
      if (!GV.hasName())
        return false;
      if (GV.getName().equals("__cuda_fatbin_wrapper") ||
          GV.getName().equals("__cuda_gpubin_handle")) {
        GV.setName(add_module_suffix(GV.getName(), ModuleName));
        return true;
      }
      return false;
    }
    // make CUDA specific functions unique
    bool runOnFunction(Function& F, const StringRef ModuleName) {
      if (F.hasName() && (F.getName().equals("__cuda_module_ctor") ||
                          F.getName().equals("__cuda_module_dtor") ||
                          F.getName().equals("__cuda_register_globals"))) {
        F.setName(add_module_suffix(F.getName(), ModuleName));
        return true;
      }
@ -164,10 +190,12 @@ namespace {
      const StringRef ModuleName = M.getName();
      for (auto&& F : M)
        ret |= runOnFunction(F, ModuleName);
      for (auto&& G : M.globals())
        ret |= runOnGlobal(G, ModuleName);
      return ret;
    }
  };
-}
+} // namespace
 char UniqueCUDAStructorName::ID = 0;
--- a/lib/Interpreter/IncrementalCUDADeviceCompiler.cpp
+++ b/lib/Interpreter/IncrementalCUDADeviceCompiler.cpp
@ -121,7 +121,7 @@ namespace cling {
      llvm::errs()
          << "IncrementalCUDADeviceCompiler: No valid c++ standard is set.\n";
-    uint32_t smVersion = 20;
+    uint32_t smVersion = 35;
    if (!invocationOptions.CompilerOpts.CUDAGpuArch.empty()) {
      llvm::StringRef(invocationOptions.CompilerOpts.CUDAGpuArch)
          .drop_front(3 /* sm_ */)
--- a/test/CodeGeneration/CUDACtorDtor.C
+++ b/test/CodeGeneration/CUDACtorDtor.C
@ -6,9 +6,10 @@
 // LICENSE.TXT for details.
 //------------------------------------------------------------------------------
-// The Test checks, if the symbols __cuda_module_ctor and __cuda_module_dtor are
+// The Test checks, if the symbols __cuda_module_ctor, __cuda_module_dtor,
-// unique for every module. Attention, for a working test case, a cuda
+// __cuda_register_globals, __cuda_fatbin_wrapper and __cuda_gpubin_handle are
-// fatbinary is necessary.
+// unique for every module. Attention, for a working test case, a cuda fatbinary
 // is necessary.
 // RUN: cat %s | %cling -x cuda --cuda-path=%cudapath %cudasmlevel -Xclang -verify 2>&1 | FileCheck %s
 // REQUIRES: cuda-runtime
@ -17,58 +18,113 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include <iostream>
 #include <map>
 #include <vector>
-// Compare the cuda module ctor and dtor of two random modules.
+// for each key in the map, the test searches for a symbol in a module and
-std::string ctor1, ctor2, dtor1, dtor2;
+// stores the full name in the vector in the value
-
+std::map<std::string, std::vector<std::string>> module_compare;
-auto T1 = gCling->getLatestTransaction();
+for (std::string const& s :
-// only __global__ CUDA kernel definitions has the cuda module ctor and dtor
+     {"__cuda_module_ctor", "__cuda_module_dtor", "__cuda_register_globals",
-__global__ void g1() { int i = 1; }
+      "__cuda_fatbin_wrapper", "__cuda_gpubin_handle"}) {
-auto M1 = T1->getNext()->getModule();
+  module_compare.emplace(s, std::vector<std::string>{});
 for(auto &I : *M1){
  // The trailing '_' identify the function name as modified name.
  if(I.getName().startswith_lower("__cuda_module_ctor_")){
    ctor1 = I.getName().str();
 }
-  if(I.getName().startswith_lower("__cuda_module_dtor_")){
+auto T1 = gCling->getLatestTransaction();
-    dtor1 = I.getName().str();
+// only __global__ CUDA kernel definitions has the cuda specific functions and
 // variables to register a kernel
 __global__ void g1() { int i = 1; }
 auto M1 = T1->getNext()->getCompiledModule();
 // search for the symbols in the llvm::Module M1
 for (auto const& key_value : module_compare) {
  for (auto& I : *M1) {
    if (I.getName().startswith(key_value.first)) {
      module_compare[key_value.first].push_back(I.getName().str());
    }
  }
  for (auto& I : M1->globals()) {
    if (I.getName().startswith(key_value.first)) {
      module_compare[key_value.first].push_back(I.getName().str());
    }
  }
 }
 // verify, that each symbol was found in the module
 // if a symbol was not found, the vector should be empty
 for (auto const& key_value : module_compare) {
  if (key_value.second.size() < 1) {
    std::cout << "could not find symbol" << std::endl;
    // CHECK-NOT: could not find symbol
    std::cout << "\"" << key_value.first << "\" is not in transaction T1"
              << std::endl;
  }
 }
 auto T2 = gCling->getLatestTransaction();
 __global__ void g2() { int i = 2; }
-auto M2 = T2->getNext()->getModule();
+auto M2 = T2->getNext()->getCompiledModule();
 // The two modules should have different names, because of the for loop.
 M1->getName().str() != M2->getName().str()
 // CHECK: (bool) true
 // search for the symbols in the llvm::Module M2
 for (auto const& key_value : module_compare) {
  for (auto& I : *M2) {
-  if(I.getName().startswith_lower("__cuda_module_ctor_")){
+    if (I.getName().startswith(key_value.first)) {
-    ctor2 = I.getName().str();
+      module_compare[key_value.first].push_back(I.getName().str());
    }
  }
  for (auto& I : M2->globals()) {
    if (I.getName().startswith(key_value.first)) {
      module_compare[key_value.first].push_back(I.getName().str());
    }
  if(I.getName().startswith_lower("__cuda_module_dtor_")){
    dtor2 = I.getName().str();
  }
 }
-// Check if the ctor and dtor of the two modules are different.
+// verify, that each symbol was found in the second module
-ctor1 != ctor2 // expected-note {{use '|=' to turn this inequality comparison into an or-assignment}}
+for (auto const& key_value : module_compare) {
-// CHECK: (bool) true
+  if (key_value.second.size() < 2) {
-dtor1 != dtor2 // expected-note {{use '|=' to turn this inequality comparison into an or-assignment}}
+    std::cout << "could not find symbol" << std::endl;
-// CHECK: (bool) true
+    // CHECK-NOT: could not find symbol
    std::cout << "\"" << key_value.first << "\" is not in transaction T2"
              << std::endl;
  }
 }
-// Check if the ctor symbol starts with the correct prefix.
+for (auto const& key_value : module_compare) {
-std::string expectedCtorPrefix = "__cuda_module_ctor_cling_module_";
+  std::string const generic_symbol_name = key_value.first;
-ctor1.compare(0, expectedCtorPrefix.length(), expectedCtorPrefix)
+  std::string const symbol_name_suffix = generic_symbol_name + "_cling_module_";
-// CHECK: (int) 0
+  std::string const T1_symbol_name = key_value.second[0];
  std::string const T2_symbol_name = key_value.second[1];
-// Check if the dtor symbol starts with the correct prefix.
+  // check if each symbols are different for different modules
-std::string expectedDtorPrefix = "__cuda_module_dtor_cling_module_";
+  if (T1_symbol_name != T2_symbol_name) {
-dtor1.compare(0, expectedDtorPrefix.length(), expectedDtorPrefix)
+    std::cout << "T1_symbol_name and T2_symbol_name are unique" << std::endl;
-// CHECK: (int) 0
+    // CHECK: T1_symbol_name and T2_symbol_name are unique
  } else {
    std::cerr << "T1_symbol_name and T2_symbol_name are equals" << std::endl;
    // CHECK-NOT: T1_symbol_name and T2_symbol_name are equals
    std::cerr << T1_symbol_name << " == " << T2_symbol_name << std::endl;
  }
  // only the module number is difference for each symbol
  // therefor the begin of the symbol name can be checked
  if (0 != T1_symbol_name.compare(0, symbol_name_suffix.length(),
                                  symbol_name_suffix)) {
    std::cerr << "Wrong suffix" << std::endl;
    // CHECK-NOT: Wrong suffix
    std::cerr << "T1_symbol_name: " << T1_symbol_name << std::endl;
    std::cerr << "expected symbol + suffix: " << symbol_name_suffix
              << std::endl;
  }
  if (0 != T2_symbol_name.compare(0, symbol_name_suffix.length(),
                                  symbol_name_suffix)) {
    std::cerr << "Wrong suffix" << std::endl;
    // CHECK-NOT: Wrong suffix
    std::cerr << "T2_symbol_name: " << T2_symbol_name << std::endl;
    std::cerr << "expected symbol + suffix: " << symbol_name_suffix
              << std::endl;
  }
 }
 // expected-no-diagnostics
 .q