Extend cling::Intpereter to incremental compile PTX code for CUDA devices

- change CUDA to CUDAHost and add CUDADevice to the InvocationOptions - in the PTX mode, some ASTTransformer will not be used, which are useful for the x86 mode
2019-01-15 12:21:51 +01:00 · 2019-01-15 12:21:51 +01:00 · 6c46b65754
commit 6c46b65754
parent b1c18ae3cf
6 changed files with 52 additions and 20 deletions
--- a/include/cling/Interpreter/InvocationOptions.h
+++ b/include/cling/Interpreter/InvocationOptions.h
@ -58,7 +58,8 @@ namespace cling {
    unsigned HasOutput : 1;
    unsigned Verbose : 1;
    unsigned CxxModules : 1;
-    unsigned CUDA : 1;
+    unsigned CUDAHost : 1;
+    unsigned CUDADevice : 1;
    /// \brief The output path of any C++ PCMs we're building on demand.
    /// Equal to ModuleCachePath in the HeaderSearchOptions.
    std::string CachePath;
--- a/lib/Interpreter/CIFactory.cpp
+++ b/lib/Interpreter/CIFactory.cpp
@ -1025,7 +1025,7 @@ static void stringifyPreprocSetting(PreprocessorOptions& PPOpts,
    // This argument starts the cling instance with the x86 target. Otherwise,
    // the first job in the joblist starts the cling instance with the nvptx
    // target.
-    if(COpts.CUDA)
+    if(COpts.CUDAHost)
      argvCompile.push_back("--cuda-host-only");

    // argv[0] already inserted, get the rest
--- a/lib/Interpreter/IncrementalParser.cpp
+++ b/lib/Interpreter/IncrementalParser.cpp
@ -289,7 +289,7 @@ namespace cling {
    initializeVirtualFile();

    if(m_CI->getFrontendOpts().ProgramAction != frontend::ParseSyntaxOnly &&
-      m_Interpreter->getOptions().CompilerOpts.CUDA){
+      m_Interpreter->getOptions().CompilerOpts.CUDAHost){
        // Create temporary folder for all files, which the CUDA device compiler
        // will generate.
        llvm::SmallString<256> TmpPath;
@ -920,7 +920,8 @@ namespace cling {
    else if (Diags.getNumWarnings())
      return kSuccessWithWarnings;

-    if(!m_Interpreter->isInSyntaxOnlyMode() && m_CI->getLangOpts().CUDA )
+    if (!m_Interpreter->isInSyntaxOnlyMode() &&
+        m_Interpreter->getOptions().CompilerOpts.CUDAHost)
      m_CUDACompiler->compileDeviceCode(input, m_Consumer->getTransaction());

    return kSuccess;
@ -935,25 +936,29 @@ namespace cling {
  void IncrementalParser::SetTransformers(bool isChildInterpreter) {
    // Add transformers to the IncrementalParser, which owns them
    Sema* TheSema = &m_CI->getSema();
+    // if the interpreter compiles ptx code, some transformers should not used
+    bool isCUDADevice = m_Interpreter->getOptions().CompilerOpts.CUDADevice;
    // Register the AST Transformers
    typedef std::unique_ptr<ASTTransformer> ASTTPtr_t;
    std::vector<ASTTPtr_t> ASTTransformers;
    ASTTransformers.emplace_back(new AutoSynthesizer(TheSema));
    ASTTransformers.emplace_back(new EvaluateTSynthesizer(TheSema));
    if (hasCodeGenerator() && !m_Interpreter->getOptions().NoRuntime) {
-       // Don't protect against crashes if we cannot run anything.
-       // cling might also be in a PCH-generation mode; don't inject our Sema pointer
-       // into the PCH.
-       ASTTransformers.emplace_back(new NullDerefProtectionTransformer(m_Interpreter));
+      // Don't protect against crashes if we cannot run anything.
+      // cling might also be in a PCH-generation mode; don't inject our Sema
+      // pointer into the PCH.
+      if (!isCUDADevice)
+        ASTTransformers.emplace_back(
+            new NullDerefProtectionTransformer(m_Interpreter));
    }
    ASTTransformers.emplace_back(new DefinitionShadower(*TheSema, *m_Interpreter));

    typedef std::unique_ptr<WrapperTransformer> WTPtr_t;
    std::vector<WTPtr_t> WrapperTransformers;
-    if (!m_Interpreter->getOptions().NoRuntime)
+    if (!m_Interpreter->getOptions().NoRuntime && !isCUDADevice)
      WrapperTransformers.emplace_back(new ValuePrinterSynthesizer(TheSema));
    WrapperTransformers.emplace_back(new DeclExtractor(TheSema));
-    if (!m_Interpreter->getOptions().NoRuntime)
+    if (!m_Interpreter->getOptions().NoRuntime && !isCUDADevice)
      WrapperTransformers.emplace_back(new ValueExtractionSynthesizer(TheSema,
                                                           isChildInterpreter));
    WrapperTransformers.emplace_back(new CheckEmptyTransactionTransformer(TheSema));
--- a/lib/Interpreter/Interpreter.cpp
+++ b/lib/Interpreter/Interpreter.cpp
@ -258,7 +258,7 @@ namespace cling {
      setupCallbacks(*this, parentInterp);
    }

-    if(m_Opts.CompilerOpts.CUDA){
+    if(m_Opts.CompilerOpts.CUDAHost){
       if(m_DyLibManager->loadLibrary("libcudart.so", true) ==
         cling::DynamicLibraryManager::LoadLibResult::kLoadLibNotFound){
           llvm::errs() << "Error: libcudart.so not found!\n" <<
--- a/lib/Interpreter/InvocationOptions.cpp
+++ b/lib/Interpreter/InvocationOptions.cpp
@ -100,7 +100,7 @@ static const char kNoStdInc[] = "-nostdinc";
 CompilerOptions::CompilerOptions(int argc, const char* const* argv)
    : Language(false), ResourceDir(false), SysRoot(false), NoBuiltinInc(false),
      NoCXXInc(false), StdVersion(false), StdLib(false), HasOutput(false),
-      Verbose(false), CxxModules(false), CUDA(false) {
+      Verbose(false), CxxModules(false), CUDAHost(false), CUDADevice(false) {
  if (argc && argv) {
    // Preserve what's already in Remaining, the user might want to push args
    // to clang while still using main's argc, argv
@ -126,9 +126,11 @@ void CompilerOptions::Parse(int argc, const char* const argv[],
      // case options::OPT_d_Flag:
      case options::OPT_E:
      case options::OPT_o: HasOutput = true; break;
-      case options::OPT_x: Language = true;
-                           CUDA = llvm::StringRef(arg->getValue()) == "cuda";
-                           break;
+      case options::OPT_x:
+        Language = true;
+        CUDAHost =
+            (CUDADevice) ? 0 : llvm::StringRef(arg->getValue()) == "cuda";
+        break;
      case options::OPT_resource_dir: ResourceDir = true; break;
      case options::OPT_isysroot: SysRoot = true; break;
      case options::OPT_std_EQ: StdVersion = true; break;
@ -144,8 +146,14 @@ void CompilerOptions::Parse(int argc, const char* const argv[],
      case options::OPT_fmodules_cache_path: CachePath = arg->getValue(); break;
      case options::OPT_cuda_path_EQ: CUDAPath = arg->getValue(); break;
      case options::OPT_cuda_gpu_arch_EQ: CUDAGpuArch = arg->getValue(); break;
-      case options::OPT_Xcuda_fatbinary: CUDAFatbinaryArgs.push_back(arg->getValue());
-                                         break;
+      case options::OPT_Xcuda_fatbinary:
+        CUDAFatbinaryArgs.push_back(arg->getValue());
+        break;
+      case options::OPT_cuda_device_only:
+        Language = true;
+        CUDADevice = true;
+        CUDAHost = false;
+        break;

      default:
        if (Inputs && arg->getOption().getKind() == Option::InputClass)
@ -164,7 +172,8 @@ bool CompilerOptions::DefaultLanguage(const LangOptions* LangOpts) const {
  // Also don't set up the defaults when language is explicitly set; unless
  // the language was set to generate a PCH, in which case definitely do.
  if (Language)
-    return HasOutput || (LangOpts && LangOpts->CompilingPCH) || CUDA;
+    return HasOutput || (LangOpts && LangOpts->CompilingPCH) || CUDAHost ||
+           CUDADevice;

  return true;
 }
--- a/test/Interfaces/invocationFlags.C
+++ b/test/Interfaces/invocationFlags.C
@ -36,7 +36,9 @@ COpts.NoBuiltinInc
 // CHECK-NEXT: (unsigned int) 1
 COpts.NoCXXInc
 // CHECK-NEXT: (unsigned int) 0
-COpts.CUDA
+COpts.CUDAHost
+// CHECK-NEXT: (unsigned int) 0
+COpts.CUDADevice
 // CHECK-NEXT: (unsigned int) 0

 COpts.DefaultLanguage()
@ -67,8 +69,10 @@ IOpts.CompilerOpts.NoBuiltinInc
 // CHECK-NEXT: (unsigned int) 0
 IOpts.CompilerOpts.NoCXXInc
 // CHECK-NEXT: (unsigned int) 1
-IOpts.CompilerOpts.CUDA
+IOpts.CompilerOpts.CUDAHost
 // CHECK-NEXT: (unsigned int) 1
+IOpts.CompilerOpts.CUDADevice
+// CHECK-NEXT: (unsigned int) 0

 // if the language is cuda, it should set automatically the c++ standard
 IOpts.CompilerOpts.DefaultLanguage()
@ -80,5 +84,18 @@ IOpts.CompilerOpts.Remaining
 // Windows translates -nostdinc++ to -nostdinc++. Ignore that fact for the test.
 // CHECK-NEXT: {{.*}} { "progname", "-xcuda", "FileToExecuteA", "-isysroot", "APAth", {{.*}}, "-v", "FileToExecuteB" }

+// this flag allows to compile ptx code with the interpreter instance
+// CUDAHost and CUDADevice must not be true at the same time
+// if --cuda-device-only is set, it isn't important if -xcuda is set
+argv[10] = "--cuda-device-only";
+
+cling::InvocationOptions IOpts2(argc, argv);
+
+IOpts2.CompilerOpts.CUDAHost
+// CHECK-NEXT: (unsigned int) 0
+
+IOpts2.CompilerOpts.CUDADevice
+// CHECK-NEXT: (unsigned int) 1
+
 // expected-no-diagnostics
 .q