Improvements for Pull Request #240

- little changes at comments and code style - try to use const in IncrementalCUDADeviceCompiler, where is possible - move CUDA device code compiler instance to IncrementalParser - change the members of CuArgs to const and adjust the setCuArgs method - use std::vector<string> instead llvm::Smallvector<const char *> to build argv for executeAndWait - improve the error messages of generatePCH(), generatePTX() and generateFatbinary() - replace m_Counter with a copy in IncrementalCUDADeviceCompiler to avoid involuntary changes
2018-05-29 16:53:24 +02:00 · 2018-05-29 16:53:24 +02:00 · 9a4418b3c0
commit 9a4418b3c0
parent 309cebff41
9 changed files with 277 additions and 232 deletions
--- a/CREDITS.txt
+++ b/CREDITS.txt
@ -60,4 +60,4 @@ D: Main developer of cling.

 N: Simeon Ehrig
 E: s.ehrig@hzdr.de
-D: CUDA runtime API support
+D: cling CUDA runtime support
--- a/include/cling/Interpreter/Interpreter.h
+++ b/include/cling/Interpreter/Interpreter.h
@ -70,7 +70,6 @@ namespace cling {
  class LookupHelper;
  class Value;
  class Transaction;
-  class IncrementalCUDADeviceCompiler;

  ///\brief Class that implements the interpreter-like behavior. It manages the
  /// incremental compilation.
@ -159,10 +158,6 @@ namespace cling {
    ///
    std::unique_ptr<LookupHelper> m_LookupHelper;

-    ///\brief Cling's worker class implementing the compilation of CUDA device code
-    ///
-    std::unique_ptr<IncrementalCUDADeviceCompiler> m_CUDACompiler;
-
    ///\brief Cache of compiled destructors wrappers.
    std::unordered_map<const clang::RecordDecl*, void*> m_DtorWrappers;

@ -348,8 +343,6 @@ namespace cling {

    LookupHelper& getLookupHelper() const { return *m_LookupHelper; }

-    IncrementalCUDADeviceCompiler& getCUDADeviceCompiler() { return *m_CUDACompiler; }
-
    const clang::Parser& getParser() const;
    clang::Parser& getParser();

--- a/lib/Interpreter/IncrementalCUDADeviceCompiler.cpp
+++ b/lib/Interpreter/IncrementalCUDADeviceCompiler.cpp
@ -1,6 +1,6 @@
 //--------------------------------------------------------------------*- C++ -*-
 // CLING - the C++ LLVM-based InterpreterG :)
-// author:  Simeon Ehrig <simeonehrig@web.de>
+// author:  Simeon Ehrig <s.ehrig@hzdr.de>
 //
 // This file is dual-licensed: you can choose to license it under the University
 // of Illinois Open Source License or the GNU Lesser General Public License. See
@ -27,70 +27,87 @@

 #include <string>

+// The clang nvptx jit has an growing AST-Tree. At runtime, continuously new
+// statements will append to the AST. To improve the compiletime, the existing
+// AST will save as PCH-file. The new statements will append via source code
+// files. A bug in clang avoids, that more than 4 statements can append to the
+// PCH. If the flag is true, it improves the compiletime but it crash after the
+// fifth iteration. https://bugs.llvm.org/show_bug.cgi?id=37167
 #define PCHMODE 0

 namespace cling {

  IncrementalCUDADeviceCompiler::IncrementalCUDADeviceCompiler(
-      std::string filePath,
-      int optLevel,
-      cling::InvocationOptions & invocationOptions,
-      clang::CompilerInstance * CI)
-     : m_Counter(0),
-       m_FilePath(filePath){
-    if(CI->getCodeGenOpts().CudaGpuBinaryFileNames.empty()){
+      const std::string & filePath,
+      const int optLevel,
+      const cling::InvocationOptions & invocationOptions,
+      const clang::CompilerInstance & CI)
+     : m_FilePath(filePath),
+       m_FatbinFilePath(CI.getCodeGenOpts().CudaGpuBinaryFileNames.empty()
+         ? "" : CI.getCodeGenOpts().CudaGpuBinaryFileNames[0]),
+       m_DummyCUPath(m_FilePath + "dummy.cu"),
+       m_PTXFilePath(m_FilePath + "cling.ptx"),
+       m_GenericFileName(m_FilePath + "cling") {
+    if(m_FatbinFilePath.empty()){
      llvm::errs() << "Error: CudaGpuBinaryFileNames can't be empty\n";
-      m_Init = false;
-    } else {
-      m_FatbinFilePath = CI->getCodeGenOpts().CudaGpuBinaryFileNames[0];
-      m_Init = true;
+      return;
    }

-    m_Init = m_Init && generateHelperFiles();
-    m_Init = m_Init && searchCompilingTools(invocationOptions);
-    setCuArgs(CI->getLangOpts(), invocationOptions, optLevel,
-              CI->getCodeGenOpts().getDebugInfo());
+    if (!generateHelperFiles())
+      return;
+    if (!findToolchain(invocationOptions))
+      return;
+    setCuArgs(CI.getLangOpts(), invocationOptions, optLevel,
+              CI.getCodeGenOpts().getDebugInfo());

-    m_HeaderSearchOptions = CI->getHeaderSearchOptsPtr();
+    m_HeaderSearchOptions = CI.getHeaderSearchOptsPtr();
+
+    m_Init = true;
  }

  void IncrementalCUDADeviceCompiler::setCuArgs(
-      clang::LangOptions & langOpts,
-      cling::InvocationOptions & invocationOptions,
-      int & optLevel, clang::codegenoptions::DebugInfoKind debugInfo){
+    const clang::LangOptions & langOpts,
+    const cling::InvocationOptions & invocationOptions,
+    const int intprOptLevel,
+    const clang::codegenoptions::DebugInfoKind debugInfo){
+
+    std::string cppStdVersion;
    // Set the c++ standard. Just one condition is possible.
    if(langOpts.CPlusPlus11)
-      m_CuArgs.cppStdVersion = "-std=c++11";
+      cppStdVersion = "-std=c++11";
    if(langOpts.CPlusPlus14)
-      m_CuArgs.cppStdVersion = "-std=c++14";
+      cppStdVersion = "-std=c++14";
    if(langOpts.CPlusPlus1z)
-      m_CuArgs.cppStdVersion = "-std=c++1z";
+      cppStdVersion = "-std=c++1z";
    if(langOpts.CPlusPlus2a)
-      m_CuArgs.cppStdVersion = "-std=c++2a";
+      cppStdVersion = "-std=c++2a";

-    m_CuArgs.optLevel = "-O" + std::to_string(optLevel);

+    const std::string optLevel = "-O" + std::to_string(intprOptLevel);
+
+    std::string ptxSmVersion = "--cuda-gpu-arch=sm_20";
+    std::string fatbinSmVersion = "--image=profile=compute_20";
    if(!invocationOptions.CompilerOpts.CUDAGpuArch.empty()){
-      m_CuArgs.ptxSmVersion = "--cuda-gpu-arch="
+      ptxSmVersion = "--cuda-gpu-arch="
                              + invocationOptions.CompilerOpts.CUDAGpuArch;
-      m_CuArgs.fatbinSmVersion = "--image=profile=compute_"
+      fatbinSmVersion = "--image=profile=compute_"
                              + invocationOptions.CompilerOpts.CUDAGpuArch.substr(3);
    }

    //The generating of the fatbin file is depend of the architecture of the host.
    llvm::Triple hostTarget(llvm::sys::getDefaultTargetTriple());
-    m_CuArgs.fatbinArch = hostTarget.isArch64Bit() ? "-64" : "-32";
+    const std::string fatbinArch = hostTarget.isArch64Bit() ? "-64" : "-32";

-    m_CuArgs.verbose = invocationOptions.Verbose();

    // FIXME : Should not reduce the fine granulated debug options to a simple.
    // -g
+    bool debug = false;
    if(debugInfo == clang::codegenoptions::DebugLineTablesOnly ||
       debugInfo == clang::codegenoptions::LimitedDebugInfo ||
       debugInfo == clang::codegenoptions::FullDebugInfo)
-      m_CuArgs.debug = true;
+      debug = true;

-    // FIXME : Cling has problems to detect this arguments.
+    // FIXME : Cling has problems to detect these arguments.
    /*
    if(langOpts.CUDADeviceFlushDenormalsToZero)
      m_CuArgs.additionalPtxOpt.push_back("-fcuda-flush-denormals-to-zero");
@ -99,14 +116,17 @@ namespace cling {
    if(langOpts.CUDAAllowVariadicFunctions)
      m_CuArgs.additionalPtxOpt.push_back("-fcuda-allow-variadic-functions");
    */
+    std::vector<std::string> additionalPtxOpt;

-    m_CuArgs.fatbinaryOpt = invocationOptions.CompilerOpts.CUDAFatbinaryArgs;
+    m_CuArgs.reset(new IncrementalCUDADeviceCompiler::CUDACompilerArgs(
+      cppStdVersion, optLevel, ptxSmVersion, fatbinSmVersion, fatbinArch,
+      invocationOptions.Verbose(), debug, additionalPtxOpt,
+      invocationOptions.CompilerOpts.CUDAFatbinaryArgs));
  }


  bool IncrementalCUDADeviceCompiler::generateHelperFiles(){
    // Generate an empty dummy.cu file.
-    m_DummyCUPath = m_FilePath + "dummy.cu";
    std::error_code EC;
    llvm::raw_fd_ostream dummyCU(m_DummyCUPath, EC, llvm::sys::fs::F_Text);
    if(EC){
@ -116,13 +136,11 @@ namespace cling {
    }
    dummyCU.close();

-    m_PTXFilePath = m_FilePath + "cling.ptx";
-    m_GenericFileName = m_FilePath + "cling";
    return true;
  }

-  bool IncrementalCUDADeviceCompiler::searchCompilingTools(
-      cling::InvocationOptions & invocationOptions){
+  bool IncrementalCUDADeviceCompiler::findToolchain(
+      const cling::InvocationOptions & invocationOptions){
    // Search after clang in the folder of cling.
    llvm::SmallString<128> cwd;
    // get folder of the cling executable to find the clang which is contained
@ -164,28 +182,35 @@ namespace cling {
    return true;
  }

-  void IncrementalCUDADeviceCompiler::addHeaders(
+  void IncrementalCUDADeviceCompiler::addHeaderSearchPathFlags(
      llvm::SmallVectorImpl<std::string> & argv){
    for(clang::HeaderSearchOptions::Entry e : m_HeaderSearchOptions->UserEntries){
+      if(e.Group == clang::frontend::IncludeDirGroup::Quoted){
+        argv.push_back("-iquote");
+        argv.push_back(e.Path);
+      }
+
      if(e.Group == clang::frontend::IncludeDirGroup::Angled)
        argv.push_back("-I" + e.Path);
    }
  }

-  bool IncrementalCUDADeviceCompiler::generateFatbinary(const llvm::StringRef input,
-                                                        cling::Transaction * T){
+  bool IncrementalCUDADeviceCompiler::compileDeviceCode(const llvm::StringRef input,
+                                                        const cling::Transaction * const T){
    if(!m_Init){
      llvm::errs() << "Error: Initializiation of CUDA Device Code Compiler failed\n";
      return false;
    }

+    const unsigned int counter = getCounterCopy();
+
    // Write the (CUDA) C++ source code to a file.
    std::error_code EC;
-    llvm::raw_fd_ostream cuFile(m_GenericFileName + std::to_string(m_Counter)
+    llvm::raw_fd_ostream cuFile(m_GenericFileName + std::to_string(counter)
                                + ".cu", EC, llvm::sys::fs::F_Text);
    if (EC) {
      llvm::errs() << "Could not open " << m_GenericFileName
-        + std::to_string(m_Counter) << ".cu: " << EC.message() << "\n";
+        + std::to_string(counter) << ".cu: " << EC.message() << "\n";
      return false;
    }

@ -215,23 +240,13 @@ namespace cling {

    cuFile.close();

-    if(!generatePCH()){
-      saveFaultyCUfile();
-      return false;
-    }
-
-    if(!generatePTX()){
-      saveFaultyCUfile();
-      return false;
-    }
-
-    if(!generateFatbinaryInternal()){
+    if(!generatePCH() || !generatePTX() || !generateFatbinary()){
      saveFaultyCUfile();
      return false;
    }

 #if PCHMODE == 0
-    llvm::sys::fs::remove(m_GenericFileName + std::to_string(m_Counter)
+    llvm::sys::fs::remove(m_GenericFileName + std::to_string(counter)
                             +".cu.pch");
 #endif

@ -240,72 +255,76 @@ namespace cling {
  }

  bool IncrementalCUDADeviceCompiler::generatePCH() {
+    const unsigned int counter = getCounterCopy();
+
    // clang++ -std=c++xx -Ox -S -Xclang -emit-pch ${clingHeaders} cling[0-9].cu
    // -D__CLING__ -o cling[0-9].cu.pch [-include-pch cling[0-9].cu.pch]
    // --cuda-gpu-arch=sm_[1-7][0-9] -pthread --cuda-device-only [-v] [-g]
-    // ${m_CuArgs.additionalPtxOpt}
-    llvm::SmallVector<const char*, 256> argv;
+    // ${m_CuArgs->additionalPtxOpt}
+    llvm::SmallVector<std::string, 256> argv;

    // First argument have to be the program name.
-    argv.push_back(m_ClangPath.c_str());
+    argv.push_back(m_ClangPath);

-    argv.push_back(m_CuArgs.cppStdVersion.c_str());
-    argv.push_back(m_CuArgs.optLevel.c_str());
+    argv.push_back(m_CuArgs->cppStdVersion);
+    argv.push_back(m_CuArgs->optLevel);
    argv.push_back("-S");
    argv.push_back("-Xclang");
    argv.push_back("-emit-pch");
-    llvm::SmallVector<std::string, 256> headers;
-    addHeaders(headers);
-    for(std::string & s : headers)
-      argv.push_back(s.c_str());
+    addHeaderSearchPathFlags(argv);
    // Is necessary for the cling runtime header.
    argv.push_back("-D__CLING__");
-    std::string cuFilePath = m_GenericFileName + std::to_string(m_Counter)
-                             + ".cu";
-    argv.push_back(cuFilePath.c_str());
+    argv.push_back(m_GenericFileName + std::to_string(counter) + ".cu");
    argv.push_back("-o");
-    std::string outputname = m_GenericFileName + std::to_string(m_Counter)
-                             +".cu.pch";
-    argv.push_back(outputname.c_str());
+    argv.push_back(m_GenericFileName + std::to_string(counter) +".cu.pch");
    // If a previos file exist, include it.
 #if PCHMODE == 1
-    std::string previousFile;
-    if(m_Counter){
-      previousFile = m_GenericFileName + std::to_string(m_Counter-1) +".cu.pch";
+    if(counter){
      argv.push_back("-include-pch");
-      argv.push_back(previousFile.c_str());
+      argv.push_back(m_GenericFileName + std::to_string(counter-1) +".cu.pch");
    }
 #else
-    std::vector<std::string> previousFiles;
-    if(m_Counter){
-      for(unsigned int i = 0; i <= m_Counter-1; ++i){
-        previousFiles.push_back(m_GenericFileName + std::to_string(i) +".cu");
+    if(counter){
+      for(unsigned int i = 0; i <= counter-1; ++i){
        argv.push_back("-include");
-        argv.push_back(previousFiles[i].c_str());
+        argv.push_back(m_GenericFileName + std::to_string(i) +".cu");
      }
    }
 #endif
-    argv.push_back(m_CuArgs.ptxSmVersion.c_str());
+    argv.push_back(m_CuArgs->ptxSmVersion);
    argv.push_back("-pthread");
    argv.push_back("--cuda-device-only");
-    if(m_CuArgs.verbose)
+    if(m_CuArgs->verbose)
      argv.push_back("-v");
-    if(m_CuArgs.debug)
+    if(m_CuArgs->debug)
      argv.push_back("-g");
-    for(std::string & s : m_CuArgs.additionalPtxOpt){
+    for(const std::string & s : m_CuArgs->additionalPtxOpt){
      argv.push_back(s.c_str());
    }

+    std::vector<const char *> argvChar;
+    argvChar.resize(argv.size()+1);
+
+    std::transform(argv.begin(), argv.end(), argvChar.begin(),
+      [&](const std::string & s)
+      {
+        return s.c_str();
+      }
+    );
+
    // Argv list have to finish with a nullptr.
-    argv.push_back(nullptr);
+    argvChar.push_back(nullptr);

    std::string executionError;
-    int res = llvm::sys::ExecuteAndWait(m_ClangPath.c_str(), argv.data(),
+    int res = llvm::sys::ExecuteAndWait(m_ClangPath.c_str(), argvChar.data(),
                                        nullptr, {}, 0, 0, &executionError);

    if(res){
-      llvm::errs() << "error at launching clang instance to generate PCH file\n"
-                   << executionError << "\n";
+      llvm::errs() << "cling::IncrementalCUDADeviceCompiler::generatePCH(): error compiling PCH file:\n"
+                   << m_ClangPath;
+      for(const char * c : argvChar)
+        llvm::errs() << " " << c;
+      llvm::errs() << '\n' << executionError << "\n";
      return false;
    }

@ -313,80 +332,106 @@ namespace cling {
  }

  bool cling::IncrementalCUDADeviceCompiler::generatePTX() {
+    const unsigned int counter = getCounterCopy();
+
    // clang++ -std=c++xx -Ox -S dummy.cu -o cling.ptx -include-pch
    // cling[0-9].cu.pch --cuda-gpu-arch=sm_xx -pthread --cuda-device-only [-v]
-    // [-g] ${m_CuArgs.additionalPtxOpt}
-    llvm::SmallVector<const char*, 128> argv;
+    // [-g] ${m_CuArgs->additionalPtxOpt}
+    llvm::SmallVector<std::string, 128> argv;

    // First argument have to be the program name.
-    argv.push_back(m_ClangPath.c_str());
+    argv.push_back(m_ClangPath);

-    argv.push_back(m_CuArgs.cppStdVersion.c_str());
-    argv.push_back(m_CuArgs.optLevel.c_str());
+    argv.push_back(m_CuArgs->cppStdVersion);
+    argv.push_back(m_CuArgs->optLevel);
    argv.push_back("-S");
-    argv.push_back(m_DummyCUPath.c_str());
+    argv.push_back(m_DummyCUPath);
    argv.push_back("-o");
-    argv.push_back(m_PTXFilePath.c_str());
+    argv.push_back(m_PTXFilePath);
    argv.push_back("-include-pch");
-    std::string pchFile = m_GenericFileName + std::to_string(m_Counter) +".cu.pch";
-    argv.push_back(pchFile.c_str());
-    argv.push_back(m_CuArgs.ptxSmVersion.c_str());
+    argv.push_back(m_GenericFileName + std::to_string(counter) +".cu.pch");
+    argv.push_back(m_CuArgs->ptxSmVersion);
    argv.push_back("-pthread");
    argv.push_back("--cuda-device-only");
-    if(m_CuArgs.verbose)
+    if(m_CuArgs->verbose)
      argv.push_back("-v");
-    if(m_CuArgs.debug)
+    if(m_CuArgs->debug)
      argv.push_back("-g");
-    for(std::string & s : m_CuArgs.additionalPtxOpt){
+    for(const std::string & s : m_CuArgs->additionalPtxOpt){
      argv.push_back(s.c_str());
    }

+    std::vector<const char *> argvChar;
+    argvChar.resize(argv.size()+1);
+
+    std::transform(argv.begin(), argv.end(), argvChar.begin(),
+      [&](const std::string & s)
+      {
+        return s.c_str();
+      }
+    );
+
    // Argv list have to finish with a nullptr.
-    argv.push_back(nullptr);
+    argvChar.push_back(nullptr);

    std::string executionError;
-    int res = llvm::sys::ExecuteAndWait(m_ClangPath.c_str(), argv.data(),
+    int res = llvm::sys::ExecuteAndWait(m_ClangPath.c_str(), argvChar.data(),
                                        nullptr, {}, 0, 0, &executionError);

    if(res){
-      llvm::errs() << "error at launching clang instance to generate ptx code"
-                   << "\n" << executionError << "\n";
+      llvm::errs() << "cling::IncrementalCUDADeviceCompiler::generatePTX(): error compiling PCH file:\n"
+                   << m_ClangPath;
+      for(const char * c : argvChar)
+        llvm::errs() << " " << c;
+      llvm::errs() << '\n' << executionError << "\n";
      return false;
    }

    return true;
  }

-  bool IncrementalCUDADeviceCompiler::generateFatbinaryInternal() {
+  bool IncrementalCUDADeviceCompiler::generateFatbinary() {
    // fatbinary --cuda [-32 | -64] --create cling.fatbin
-    // --image=profile=compute_xx,file=cling.ptx [-g] ${m_CuArgs.fatbinaryOpt}
-    llvm::SmallVector<const char*, 128> argv;
+    // --image=profile=compute_xx,file=cling.ptx [-g] ${m_CuArgs->fatbinaryOpt}
+    llvm::SmallVector<std::string, 128> argv;

    // First argument have to be the program name.
-    argv.push_back(m_FatbinaryPath.c_str());
+    argv.push_back(m_FatbinaryPath);

    argv.push_back("--cuda");
-    argv.push_back(m_CuArgs.fatbinArch.c_str());
+    argv.push_back(m_CuArgs->fatbinArch);
    argv.push_back("--create");
-    argv.push_back(m_FatbinFilePath.c_str());
-    std::string ptxCode = m_CuArgs.fatbinSmVersion
-                          + ",file=" + m_PTXFilePath;
-    argv.push_back(ptxCode.c_str());
-    if(m_CuArgs.debug)
+    argv.push_back(m_FatbinFilePath);
+    argv.push_back(m_CuArgs->fatbinSmVersion + ",file=" + m_PTXFilePath);
+    if(m_CuArgs->debug)
      argv.push_back("-g");
-    for(std::string & s : m_CuArgs.fatbinaryOpt){
+    for(const std::string & s : m_CuArgs->fatbinaryOpt){
      argv.push_back(s.c_str());
    }

+    std::vector<const char *> argvChar;
+    argvChar.resize(argv.size()+1);
+
+    std::transform(argv.begin(), argv.end(), argvChar.begin(),
+      [&](const std::string & s)
+      {
+        return s.c_str();
+      }
+    );
+
    // Argv list have to finish with a nullptr.
-    argv.push_back(nullptr);
+    argvChar.push_back(nullptr);

    std::string executionError;
-    int res = llvm::sys::ExecuteAndWait(m_FatbinaryPath.c_str(), argv.data(),
+    int res = llvm::sys::ExecuteAndWait(m_FatbinaryPath.c_str(), argvChar.data(),
                                        nullptr, {}, 0, 0, &executionError);

    if(res){
-      llvm::errs() << "error at launching fatbin" << "\n" << executionError << "\n";
+      llvm::errs() << "cling::IncrementalCUDADeviceCompiler::generateFatbinary(): error compiling PCH file:\n"
+                   << m_ClangPath;
+      for(const char * c : argvChar)
+        llvm::errs() << " " << c;
+      llvm::errs() << '\n' << executionError << "\n";
      return false;
    }

@ -394,7 +439,7 @@ namespace cling {
  }

  void IncrementalCUDADeviceCompiler::dump(){
-    llvm::outs() << "current counter: " << m_Counter << "\n" <<
+    llvm::outs() << "current counter: " << getCounterCopy() << "\n" <<
                    "CUDA device compiler is valid: " << m_Init << "\n" <<
                    "file path: " << m_FilePath << "\n" <<
                    "fatbin file path: " << m_FatbinFilePath << "\n" <<
@ -404,35 +449,36 @@ namespace cling {
                    << "[0-9]*.cu{.pch}\n" <<
                    "clang++ path: " << m_ClangPath << "\n" <<
                    "nvidia fatbinary path: " << m_FatbinaryPath << "\n" <<
-                    "m_CuArgs c++ standard: " << m_CuArgs.cppStdVersion << "\n" <<
-                    "m_CuArgs opt level: " << m_CuArgs.optLevel << "\n" <<
+                    "m_CuArgs c++ standard: " << m_CuArgs->cppStdVersion << "\n" <<
+                    "m_CuArgs opt level: " << m_CuArgs->optLevel << "\n" <<
                    "m_CuArgs SM level for clang nvptx: "
-                    << m_CuArgs.ptxSmVersion << "\n" <<
+                    << m_CuArgs->ptxSmVersion << "\n" <<
                    "m_CuArgs SM level for fatbinary: "
-                    << m_CuArgs.fatbinSmVersion << "\n" <<
+                    << m_CuArgs->fatbinSmVersion << "\n" <<
                    "m_CuArgs fatbinary architectur: "
-                    << m_CuArgs.fatbinArch << "\n" <<
-                    "m_CuArgs verbose: " << m_CuArgs.verbose << "\n" <<
-                    "m_CuArgs debug: " << m_CuArgs.debug << "\n";
+                    << m_CuArgs->fatbinArch << "\n" <<
+                    "m_CuArgs verbose: " << m_CuArgs->verbose << "\n" <<
+                    "m_CuArgs debug: " << m_CuArgs->debug << "\n";
     llvm::outs() << "m_CuArgs additional clang nvptx options: ";
-     for(std::string & s : m_CuArgs.additionalPtxOpt){
+     for(const std::string & s : m_CuArgs->additionalPtxOpt){
       llvm::outs() << s << " ";
     }
     llvm::outs() << "\n";
     llvm::outs() << "m_CuArgs additional fatbinary options: ";
-     for(std::string & s : m_CuArgs.fatbinaryOpt){
+     for(const std::string & s : m_CuArgs->fatbinaryOpt){
       llvm::outs() << s << " ";
     }
     llvm::outs() << "\n";
  }

  std::error_code IncrementalCUDADeviceCompiler::saveFaultyCUfile(){
+    const unsigned int counter = getCounterCopy();
    unsigned int faultFileCounter = 0;

    // Construct the file path of the current .cu file without extension.
-    std::string originalCU = m_GenericFileName + std::to_string(m_Counter);
+    std::string originalCU = m_GenericFileName + std::to_string(counter);

-    // m_Counter will just increased, if the compiling get right. So we need a
+    // counter (= m_Counter) will just increased, if the compiling get right. So we need a
    // second counter, if two or more following files fails.
    std::string faultyCU;
    do{
@ -440,8 +486,8 @@ namespace cling {
      faultyCU = originalCU + "_fault" + std::to_string(faultFileCounter) + ".cu";
    } while(llvm::sys::fs::exists(faultyCU));

-    // orginial: cling[m_Counter].cu
-    // faulty file: cling[m_Counter]_fault[faultFileCounter].cu
+    // orginial: cling[counter].cu
+    // faulty file: cling[counter]_fault[faultFileCounter].cu
    return llvm::sys::fs::rename(originalCU + ".cu", faultyCU);

  }
--- a/lib/Interpreter/IncrementalCUDADeviceCompiler.h
+++ b/lib/Interpreter/IncrementalCUDADeviceCompiler.h
@ -1,6 +1,6 @@
 //--------------------------------------------------------------------*- C++ -*-
 // CLING - the C++ LLVM-based InterpreterG :)
-// author:  Simeon Ehrig <simeonehrig@web.de>
+// author:  Simeon Ehrig <s.ehrig@hzdr.de>
 //
 // This file is dual-licensed: you can choose to license it under the University
 // of Illinois Open Source License or the GNU Lesser General Public License. See
@ -17,18 +17,18 @@
 #include <vector>

 namespace cling{
-    class InvocationOptions;
-    class Transaction;
+  class InvocationOptions;
+  class Transaction;
 }

 namespace clang {
-    class CompilerInstance;
-    class HeaderSearchOptions;
-    class LangOptions;
+  class CompilerInstance;
+  class HeaderSearchOptions;
+  class LangOptions;
 }

 namespace llvm {
-    class StringRef;
+  class StringRef;
 }

 namespace cling {
@ -39,65 +39,59 @@ namespace cling {
  ///
  class IncrementalCUDADeviceCompiler {

-    static constexpr unsigned CxxStdCompiledWith() {
-      // The value of __cplusplus in GCC < 5.0 (e.g. 4.9.3) when
-      // either -std=c++1y or -std=c++14 is specified is 201300L, which fails
-      // the test for C++14 or more (201402L) as previously specified.
-      // I would claim that the check should be relaxed to:
-#if __cplusplus > 201402L
-      return 17;
-#elif __cplusplus > 201103L || (defined(LLVM_ON_WIN32) && _MSC_VER >= 1900)
-      return 14;
-#elif __cplusplus >= 201103L
-      return 11;
-#else
-#error "Unknown __cplusplus version"
-#endif
-    }
-
    ///\brief Contains the arguments for the cling nvptx and the nvidia
-    /// fatbinary tool. The arguments are static and will set at the constructor
-    /// of IncrementalCUDADeviceCompiler.
+    /// fatbinary tool.
    struct CUDACompilerArgs {
-      std::string cppStdVersion = "-std=c++" + std::to_string(CxxStdCompiledWith());
-      std::string optLevel = "-O0";
-      std::string ptxSmVersion = "--cuda-gpu-arch=sm_20";
-      std::string fatbinSmVersion = "--image=profile=compute_20";
+      const std::string cppStdVersion;
+      const std::string optLevel;
+      const std::string ptxSmVersion;
+      const std::string fatbinSmVersion;
      ///\brief Argument for the fatbinary tool, which is depend, if the OS is
      /// 32 bit or 64 bit.
-      std::string fatbinArch = "-32";
+      const std::string fatbinArch;
      ///\brief True, if the flag -v is set.
-      bool verbose = false;
+      const bool verbose;
      ///\brief True, if the flag -g is set.
-      bool debug = false;
+      const bool debug;
      ///\brief A list Arguments, which will passed to the clang nvptx.
-      std::vector<std::string> additionalPtxOpt;
+      const std::vector<std::string> additionalPtxOpt;
      ///\brief A list Arguments, which will passed to the fatbinary tool.
-      std::vector<std::string> fatbinaryOpt;
+      const std::vector<std::string> fatbinaryOpt;
+
+      CUDACompilerArgs(std::string cppStdVersion, std::string optLevel,
+                       std::string ptxSmVersion, std::string fatbinSmVersion,
+                       std::string fatbinArch, bool verbose, bool debug,
+                       std::vector<std::string> additionalPtxOpt,
+                       std::vector<std::string> fatbinaryOpt)
+      : cppStdVersion(cppStdVersion), optLevel(optLevel),
+        ptxSmVersion(ptxSmVersion), fatbinSmVersion(fatbinSmVersion),
+        fatbinArch(fatbinArch), verbose(verbose), debug(debug),
+        additionalPtxOpt(additionalPtxOpt), fatbinaryOpt(fatbinaryOpt) {}
+
    };

-    CUDACompilerArgs m_CuArgs;
+    std::unique_ptr<CUDACompilerArgs> m_CuArgs;

    ///\brief The counter responsible to generate a chain of .cu source files
    /// and .cu.pch files.
-    unsigned int m_Counter;
+    unsigned int m_Counter = 0;

    ///\brief Is true if all necessary files have been generated and clang and
    /// cuda NVIDIA fatbinary are found.
-    bool m_Init;
+    bool m_Init = false;

    ///\brief Path to the folder, where all files will put in. Ordinary the tmp
    /// folder. Have to end with a separator. Can be empty.
-    std::string m_FilePath;
+    const std::string m_FilePath;
    ///\brief Path to the fatbin file, which will used by the CUDACodeGen.
-    std::string m_FatbinFilePath;
+    const std::string m_FatbinFilePath;
    ///\brief Path to a empty dummy.cu file. The file is necessary to generate
    /// PTX code from the pch files.
-    std::string m_DummyCUPath;
+    const std::string m_DummyCUPath;
    ///\brief Path to the PTX file. Will be reused for every PTX generation.
-    std::string m_PTXFilePath;
+    const std::string m_PTXFilePath;
    ///\brief Will be used to generate .cu and .cu.pch files.
-    std::string m_GenericFileName;
+    const std::string m_GenericFileName;

    ///\brief Path to the clang++ compiler, which will used to compile the pch
    /// files and the PTX code. Should be in same folder, as the cling.
@ -109,6 +103,11 @@ namespace cling {
    ///
    std::shared_ptr<clang::HeaderSearchOptions> m_HeaderSearchOptions;

+    ///\brief get copy of m_Counter
+    ///
+    ///\returns copy of m_Counter
+    unsigned int getCounterCopy(){ return m_Counter;}
+
    ///\brief Generate the dummy.cu file and set the paths of m_PTXFilePath and
    /// m_GenericFileName.
    ///
@ -122,12 +121,12 @@ namespace cling {
    ///       toolkit
    ///
    ///\returns True, whether clang and fatbinary was found.
-    bool searchCompilingTools(cling::InvocationOptions & invocationOptions);
+    bool findToolchain(const cling::InvocationOptions & invocationOptions);

    ///\brief Add the include paths from the interpreter runtime to a argument list.
    ///
    ///\param [in,out] argv - The include commands will append to the argv vector.
-    void addHeaders(llvm::SmallVectorImpl<std::string> & argv);
+    void addHeaderSearchPathFlags(llvm::SmallVectorImpl<std::string> & argv);

    ///\brief Start an clang compiler with nvptx backend. Read the content of
    /// cling.cu and compile it to a new PCH file. If predecessor PCH file is
@ -147,17 +146,18 @@ namespace cling {
    /// m_FatbinFilePath.
    ///
    ///\returns True, if the fatbinary tool returns 0.
-    bool generateFatbinaryInternal();
+    bool generateFatbinary();

    ///\brief The function set the values of m_CuArgs.
    ///
    ///\param [in] langOpts - The LangOptions of the CompilerInstance.
    ///\param [in] invocationOptions - The invocationOptions of the interpreter.
-    ///\param [in] optLevel - The optimization level of the interpreter.
+    ///\param [in] intprOptLevel - The optimization level of the interpreter.
    ///\param [in] debugInfo - The debugInfo of the CompilerInstance.
-    void setCuArgs(clang::LangOptions & langOpts,
-                   cling::InvocationOptions & invocationOptions, int & optLevel,
-                   clang::codegenoptions::DebugInfoKind debugInfo);
+    void setCuArgs(const clang::LangOptions & langOpts,
+                   const cling::InvocationOptions & invocationOptions,
+                   const int intprOptLevel,
+                   const clang::codegenoptions::DebugInfoKind debugInfo);

    ///\brief Save .cu file, if cuda device code compiler failed at translation.
    ///
@ -176,14 +176,14 @@ namespace cling {
    ///       clang and the NVIDIA tool fatbinary.
    ///\param [in] CompilerInstance - Will be used for m_CuArgs and the include
    ///       path handling.
-    IncrementalCUDADeviceCompiler(std::string filePath,
-                                  int optLevel,
-                                  cling::InvocationOptions & invocationOptions,
-                                  clang::CompilerInstance * CI);
+    IncrementalCUDADeviceCompiler(const std::string & filePath,
+                                  const int optLevel,
+                                  const cling::InvocationOptions & invocationOptions,
+                                  const clang::CompilerInstance & CI);

    ///\brief Generate an new fatbin file with the path in CudaGpuBinaryFileNames.
    /// It will add the content of input, to the existing source code, which was
-    /// passed to generateFatbinary, before.
+    /// passed to compileDeviceCode, before.
    ///
    ///\param [in] input - New source code. The function can select, if code
    ///       is relevant for the device side. Have to be valid CUDA C++ code.
@ -191,7 +191,8 @@ namespace cling {
    ///
    ///\returns True, if all stages of generating fatbin runs right and a new
    /// fatbin file is written.
-    bool generateFatbinary(const llvm::StringRef input, cling::Transaction * T);
+    bool compileDeviceCode(const llvm::StringRef input,
+                           const cling::Transaction * const T);

    ///\brief Print some information of the IncrementalCUDADeviceCompiler to
    /// llvm::outs(). For Example the paths of the files and tools.
--- a/lib/Interpreter/IncrementalParser.cpp
+++ b/lib/Interpreter/IncrementalParser.cpp
@ -45,6 +45,8 @@
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "clang/Serialization/ASTWriter.h"
+#include "clang/Serialization/ASTReader.h"
+#include "llvm/Support/Path.h"

 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@ -118,7 +120,7 @@ namespace {
      m_PrevClient.EndSourceFile();
      SyncDiagCountWithTarget();
    }
-  
+
    void finish() override {
      m_PrevClient.finish();
      SyncDiagCountWithTarget();
@ -213,6 +215,34 @@ namespace cling {
    m_DiagConsumer.reset(new FilteringDiagConsumer(Diag, false));

    initializeVirtualFile();
+
+    if(m_CI->getFrontendOpts().ProgramAction != frontend::ParseSyntaxOnly &&
+      m_Interpreter->getOptions().CompilerOpts.CUDA){
+        // Create temporary folder for all files, which the CUDA device compiler
+        // will generate.
+        llvm::SmallString<256> TmpPath;
+        llvm::StringRef sep = llvm::sys::path::get_separator().data();
+        llvm::sys::path::system_temp_directory(false, TmpPath);
+        TmpPath.append(sep.data());
+        TmpPath.append("cling-%%%%");
+        TmpPath.append(sep.data());
+
+        llvm::SmallString<256> TmpFolder;
+        llvm::sys::fs::createUniqueFile(TmpPath.c_str(), TmpFolder);
+        llvm::sys::fs::create_directory(TmpFolder);
+
+        // The CUDA fatbin file is the connection beetween the CUDA device
+        // compiler and the CodeGen of cling. The file will every time reused.
+        if(getCI()->getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+          getCI()->getCodeGenOpts().CudaGpuBinaryFileNames.push_back(
+            std::string(TmpFolder.c_str()) + "cling.fatbin");
+
+        m_CUDACompiler.reset(
+          new IncrementalCUDADeviceCompiler(TmpFolder.c_str(),
+                                            m_CI->getCodeGenOpts().OptimizationLevel,
+                                            m_Interpreter->getOptions(),
+                                            *m_CI));
+    }
  }

  bool
@ -794,8 +824,7 @@ namespace cling {
      return kSuccessWithWarnings;

    if(!m_Interpreter->isInSyntaxOnlyMode() && m_CI->getLangOpts().CUDA )
-      m_Interpreter->getCUDADeviceCompiler()
-        .generateFatbinary(input, m_Consumer->getTransaction());
+      m_CUDACompiler->compileDeviceCode(input, m_Consumer->getTransaction());

    return kSuccess;
  }
--- a/lib/Interpreter/IncrementalParser.h
+++ b/lib/Interpreter/IncrementalParser.h
@ -43,6 +43,7 @@ namespace cling {
  class Transaction;
  class TransactionPool;
  class ASTTransformer;
+  class IncrementalCUDADeviceCompiler;

  ///\brief Responsible for the incremental parsing and compilation of input.
  ///
@ -94,6 +95,10 @@ namespace cling {
    ///
    std::unique_ptr<clang::DiagnosticConsumer> m_DiagConsumer;

+    ///\brief Cling's worker class implementing the compilation of CUDA device code
+    ///
+    std::unique_ptr<IncrementalCUDADeviceCompiler> m_CUDACompiler;
+
  public:
    enum EParseResult {
      kSuccess,
--- a/lib/Interpreter/Interpreter.cpp
+++ b/lib/Interpreter/Interpreter.cpp
@ -20,7 +20,6 @@
 #include "ForwardDeclPrinter.h"
 #include "IncrementalExecutor.h"
 #include "IncrementalParser.h"
-#include "IncrementalCUDADeviceCompiler.h"
 #include "MultiplexInterpreterCallbacks.h"
 #include "TransactionUnloader.h"

@ -57,7 +56,6 @@

 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Path.h"

 #include <string>
@ -167,7 +165,7 @@ namespace cling {
           m_DyLibManager && m_LookupHelper &&
           (isInSyntaxOnlyMode() || m_Executor);
  }
-  
+
  namespace internal { void symbol_requester(); }

  const char* Interpreter::getVersion() {
@ -238,33 +236,6 @@ namespace cling {
        return;
    }

-    if(!isInSyntaxOnlyMode() && m_Opts.CompilerOpts.CUDA){
-        // Create temporary folder for all files, which the CUDA device compiler
-        // will generate.
-        llvm::SmallString<256> TmpPath;
-        llvm::StringRef sep = llvm::sys::path::get_separator().data();
-        llvm::sys::path::system_temp_directory(false, TmpPath);
-        TmpPath.append(sep.data());
-        TmpPath.append("cling-%%%%");
-        TmpPath.append(sep.data());
-
-        llvm::SmallString<256> TmpFolder;
-        llvm::sys::fs::createUniqueFile(TmpPath.c_str(), TmpFolder);
-        llvm::sys::fs::create_directory(TmpFolder);
-
-        // The CUDA fatbin file is the connection beetween the CUDA device
-        // compiler and the CodeGen of cling. The file will every time reused.
-        if(getCI()->getCodeGenOpts().CudaGpuBinaryFileNames.empty())
-          getCI()->getCodeGenOpts().CudaGpuBinaryFileNames.push_back(
-            std::string(TmpFolder.c_str()) + "cling.fatbin");
-
-        m_CUDACompiler.reset(
-          new IncrementalCUDADeviceCompiler(TmpFolder.c_str(),
-                                            m_OptLevel,
-                                            m_Opts,
-                                            getCI()));
-    }
-
    // Tell the diagnostic client that we are entering file parsing mode.
    DiagnosticConsumer& DClient = getCI()->getDiagnosticClient();
    DClient.BeginSourceFile(getCI()->getLangOpts(), &PP);
@ -702,7 +673,7 @@ namespace cling {
    }
    return Value;
  }
-  
+
  ///\brief Maybe transform the input line to implement cint command line
  /// semantics (declarations are global) and compile to produce a module.
  ///
@ -898,11 +869,11 @@ namespace cling {
    // Ignore diagnostics when we tab complete.
    // This is because we get redefinition errors due to the import of the decls.
    clang::IgnoringDiagConsumer* ignoringDiagConsumer =
-                                            new clang::IgnoringDiagConsumer();                      
+                                            new clang::IgnoringDiagConsumer();
    childSemaRef.getDiagnostics().setClient(ignoringDiagConsumer, true);
    DiagnosticsEngine& parentDiagnostics = this->getCI()->getSema().getDiagnostics();

-    std::unique_ptr<DiagnosticConsumer> ownerDiagConsumer = 
+    std::unique_ptr<DiagnosticConsumer> ownerDiagConsumer =
                                                parentDiagnostics.takeClient();
    auto clientDiagConsumer = parentDiagnostics.getClient();
    parentDiagnostics.setClient(ignoringDiagConsumer, /*owns*/ false);
--- a/test/CUDADeviceCode/CUDASharedMemory.C
+++ b/test/CUDADeviceCode/CUDASharedMemory.C
@ -60,4 +60,4 @@ hostOutput[numberOfThreads-1] == numberOfThreads-1 // expected-note {{use '=' to
 expectedSum == cudaSum // expected-note {{use '=' to turn this equality comparison into an assignment}}
 // CHECK: (bool) true

-.q
+.q
--- a/test/CUDADeviceCode/CUDAStreams.C
+++ b/test/CUDADeviceCode/CUDAStreams.C
@ -78,4 +78,4 @@ expectedSum1 == cudaSum1 // expected-note {{use '=' to turn this equality compar
 expectedSum2 == cudaSum2 // expected-note {{use '=' to turn this equality comparison into an assignment}}
 // CHECK: (bool) true

-.q
+.q