Improvements for Pull Request #240

- little changes at comments and code style - try to use const in IncrementalCUDADeviceCompiler, where is possible - move CUDA device code compiler instance to IncrementalParser - change the members of CuArgs to const and adjust the setCuArgs method - use std::vector<string> instead llvm::Smallvector<const char *> to build argv for executeAndWait - improve the error messages of generatePCH(), generatePTX() and generateFatbinary() - replace m_Counter with a copy in IncrementalCUDADeviceCompiler to avoid involuntary changes
2018-05-29 16:53:24 +02:00 · 2018-05-29 16:53:24 +02:00 · 9a4418b3c0
commit 9a4418b3c0
parent 309cebff41
9 changed files with 277 additions and 232 deletions
--- a/CREDITS.txt
+++ b/CREDITS.txt
@ -60,4 +60,4 @@ D: Main developer of cling.
 N: Simeon Ehrig
 E: s.ehrig@hzdr.de
-D: CUDA runtime API support
+D: cling CUDA runtime support
--- a/include/cling/Interpreter/Interpreter.h
+++ b/include/cling/Interpreter/Interpreter.h
@ -70,7 +70,6 @@ namespace cling {
  class LookupHelper;
  class Value;
  class Transaction;
  class IncrementalCUDADeviceCompiler;
  ///\brief Class that implements the interpreter-like behavior. It manages the
  /// incremental compilation.
@ -159,10 +158,6 @@ namespace cling {
    ///
    std::unique_ptr<LookupHelper> m_LookupHelper;
    ///\brief Cling's worker class implementing the compilation of CUDA device code
    ///
    std::unique_ptr<IncrementalCUDADeviceCompiler> m_CUDACompiler;
    ///\brief Cache of compiled destructors wrappers.
    std::unordered_map<const clang::RecordDecl*, void*> m_DtorWrappers;
@ -348,8 +343,6 @@ namespace cling {
    LookupHelper& getLookupHelper() const { return *m_LookupHelper; }
    IncrementalCUDADeviceCompiler& getCUDADeviceCompiler() { return *m_CUDACompiler; }
    const clang::Parser& getParser() const;
    clang::Parser& getParser();
--- a/lib/Interpreter/IncrementalCUDADeviceCompiler.cpp
+++ b/lib/Interpreter/IncrementalCUDADeviceCompiler.cpp
@ -1,6 +1,6 @@
 //--------------------------------------------------------------------*- C++ -*-
 // CLING - the C++ LLVM-based InterpreterG :)
-// author:  Simeon Ehrig <simeonehrig@web.de>
+// author:  Simeon Ehrig <s.ehrig@hzdr.de>
 //
 // This file is dual-licensed: you can choose to license it under the University
 // of Illinois Open Source License or the GNU Lesser General Public License. See
@ -27,70 +27,87 @@
 #include <string>
 // The clang nvptx jit has an growing AST-Tree. At runtime, continuously new
 // statements will append to the AST. To improve the compiletime, the existing
 // AST will save as PCH-file. The new statements will append via source code
 // files. A bug in clang avoids, that more than 4 statements can append to the
 // PCH. If the flag is true, it improves the compiletime but it crash after the
 // fifth iteration. https://bugs.llvm.org/show_bug.cgi?id=37167
 #define PCHMODE 0
 namespace cling {
  IncrementalCUDADeviceCompiler::IncrementalCUDADeviceCompiler(
-      std::string filePath,
+      const std::string & filePath,
-      int optLevel,
+      const int optLevel,
-      cling::InvocationOptions & invocationOptions,
+      const cling::InvocationOptions & invocationOptions,
-      clang::CompilerInstance * CI)
+      const clang::CompilerInstance & CI)
-     : m_Counter(0),
+     : m_FilePath(filePath),
-       m_FilePath(filePath){
+       m_FatbinFilePath(CI.getCodeGenOpts().CudaGpuBinaryFileNames.empty()
-    if(CI->getCodeGenOpts().CudaGpuBinaryFileNames.empty()){
+         ? "" : CI.getCodeGenOpts().CudaGpuBinaryFileNames[0]),
       m_DummyCUPath(m_FilePath + "dummy.cu"),
       m_PTXFilePath(m_FilePath + "cling.ptx"),
       m_GenericFileName(m_FilePath + "cling") {
    if(m_FatbinFilePath.empty()){
      llvm::errs() << "Error: CudaGpuBinaryFileNames can't be empty\n";
-      m_Init = false;
+      return;
    } else {
      m_FatbinFilePath = CI->getCodeGenOpts().CudaGpuBinaryFileNames[0];
      m_Init = true;
    }
-    m_Init = m_Init && generateHelperFiles();
+    if (!generateHelperFiles())
-    m_Init = m_Init && searchCompilingTools(invocationOptions);
+      return;
-    setCuArgs(CI->getLangOpts(), invocationOptions, optLevel,
+    if (!findToolchain(invocationOptions))
-              CI->getCodeGenOpts().getDebugInfo());
+      return;
    setCuArgs(CI.getLangOpts(), invocationOptions, optLevel,
              CI.getCodeGenOpts().getDebugInfo());
-    m_HeaderSearchOptions = CI->getHeaderSearchOptsPtr();
+    m_HeaderSearchOptions = CI.getHeaderSearchOptsPtr();
    m_Init = true;
  }
  void IncrementalCUDADeviceCompiler::setCuArgs(
-      clang::LangOptions & langOpts,
+    const clang::LangOptions & langOpts,
-      cling::InvocationOptions & invocationOptions,
+    const cling::InvocationOptions & invocationOptions,
-      int & optLevel, clang::codegenoptions::DebugInfoKind debugInfo){
+    const int intprOptLevel,
    const clang::codegenoptions::DebugInfoKind debugInfo){
    std::string cppStdVersion;
    // Set the c++ standard. Just one condition is possible.
    if(langOpts.CPlusPlus11)
-      m_CuArgs.cppStdVersion = "-std=c++11";
+      cppStdVersion = "-std=c++11";
    if(langOpts.CPlusPlus14)
-      m_CuArgs.cppStdVersion = "-std=c++14";
+      cppStdVersion = "-std=c++14";
    if(langOpts.CPlusPlus1z)
-      m_CuArgs.cppStdVersion = "-std=c++1z";
+      cppStdVersion = "-std=c++1z";
    if(langOpts.CPlusPlus2a)
-      m_CuArgs.cppStdVersion = "-std=c++2a";
+      cppStdVersion = "-std=c++2a";
    m_CuArgs.optLevel = "-O" + std::to_string(optLevel);
    const std::string optLevel = "-O" + std::to_string(intprOptLevel);
    std::string ptxSmVersion = "--cuda-gpu-arch=sm_20";
    std::string fatbinSmVersion = "--image=profile=compute_20";
    if(!invocationOptions.CompilerOpts.CUDAGpuArch.empty()){
-      m_CuArgs.ptxSmVersion = "--cuda-gpu-arch="
+      ptxSmVersion = "--cuda-gpu-arch="
                              + invocationOptions.CompilerOpts.CUDAGpuArch;
-      m_CuArgs.fatbinSmVersion = "--image=profile=compute_"
+      fatbinSmVersion = "--image=profile=compute_"
                              + invocationOptions.CompilerOpts.CUDAGpuArch.substr(3);
    }
    //The generating of the fatbin file is depend of the architecture of the host.
    llvm::Triple hostTarget(llvm::sys::getDefaultTargetTriple());
-    m_CuArgs.fatbinArch = hostTarget.isArch64Bit() ? "-64" : "-32";
+    const std::string fatbinArch = hostTarget.isArch64Bit() ? "-64" : "-32";
    m_CuArgs.verbose = invocationOptions.Verbose();
    // FIXME : Should not reduce the fine granulated debug options to a simple.
    // -g
    bool debug = false;
    if(debugInfo == clang::codegenoptions::DebugLineTablesOnly ||
       debugInfo == clang::codegenoptions::LimitedDebugInfo ||
       debugInfo == clang::codegenoptions::FullDebugInfo)
-      m_CuArgs.debug = true;
+      debug = true;
-    // FIXME : Cling has problems to detect this arguments.
+    // FIXME : Cling has problems to detect these arguments.
    /*
    if(langOpts.CUDADeviceFlushDenormalsToZero)
      m_CuArgs.additionalPtxOpt.push_back("-fcuda-flush-denormals-to-zero");
@ -99,14 +116,17 @@ namespace cling {
    if(langOpts.CUDAAllowVariadicFunctions)
      m_CuArgs.additionalPtxOpt.push_back("-fcuda-allow-variadic-functions");
    */
    std::vector<std::string> additionalPtxOpt;
-    m_CuArgs.fatbinaryOpt = invocationOptions.CompilerOpts.CUDAFatbinaryArgs;
+    m_CuArgs.reset(new IncrementalCUDADeviceCompiler::CUDACompilerArgs(
      cppStdVersion, optLevel, ptxSmVersion, fatbinSmVersion, fatbinArch,
      invocationOptions.Verbose(), debug, additionalPtxOpt,
      invocationOptions.CompilerOpts.CUDAFatbinaryArgs));
  }
  bool IncrementalCUDADeviceCompiler::generateHelperFiles(){
    // Generate an empty dummy.cu file.
    m_DummyCUPath = m_FilePath + "dummy.cu";
    std::error_code EC;
    llvm::raw_fd_ostream dummyCU(m_DummyCUPath, EC, llvm::sys::fs::F_Text);
    if(EC){
@ -116,13 +136,11 @@ namespace cling {
    }
    dummyCU.close();
    m_PTXFilePath = m_FilePath + "cling.ptx";
    m_GenericFileName = m_FilePath + "cling";
    return true;
  }
-  bool IncrementalCUDADeviceCompiler::searchCompilingTools(
+  bool IncrementalCUDADeviceCompiler::findToolchain(
-      cling::InvocationOptions & invocationOptions){
+      const cling::InvocationOptions & invocationOptions){
    // Search after clang in the folder of cling.
    llvm::SmallString<128> cwd;
    // get folder of the cling executable to find the clang which is contained
@ -164,28 +182,35 @@ namespace cling {
    return true;
  }
-  void IncrementalCUDADeviceCompiler::addHeaders(
+  void IncrementalCUDADeviceCompiler::addHeaderSearchPathFlags(
      llvm::SmallVectorImpl<std::string> & argv){
    for(clang::HeaderSearchOptions::Entry e : m_HeaderSearchOptions->UserEntries){
      if(e.Group == clang::frontend::IncludeDirGroup::Quoted){
        argv.push_back("-iquote");
        argv.push_back(e.Path);
      }
      if(e.Group == clang::frontend::IncludeDirGroup::Angled)
        argv.push_back("-I" + e.Path);
    }
  }
-  bool IncrementalCUDADeviceCompiler::generateFatbinary(const llvm::StringRef input,
+  bool IncrementalCUDADeviceCompiler::compileDeviceCode(const llvm::StringRef input,
-                                                        cling::Transaction * T){
+                                                        const cling::Transaction * const T){
    if(!m_Init){
      llvm::errs() << "Error: Initializiation of CUDA Device Code Compiler failed\n";
      return false;
    }
    const unsigned int counter = getCounterCopy();
    // Write the (CUDA) C++ source code to a file.
    std::error_code EC;
-    llvm::raw_fd_ostream cuFile(m_GenericFileName + std::to_string(m_Counter)
+    llvm::raw_fd_ostream cuFile(m_GenericFileName + std::to_string(counter)
                                + ".cu", EC, llvm::sys::fs::F_Text);
    if (EC) {
      llvm::errs() << "Could not open " << m_GenericFileName
-        + std::to_string(m_Counter) << ".cu: " << EC.message() << "\n";
+        + std::to_string(counter) << ".cu: " << EC.message() << "\n";
      return false;
    }
@ -215,23 +240,13 @@ namespace cling {
    cuFile.close();
-    if(!generatePCH()){
+    if(!generatePCH() || !generatePTX() || !generateFatbinary()){
      saveFaultyCUfile();
      return false;
    }
    if(!generatePTX()){
      saveFaultyCUfile();
      return false;
    }
    if(!generateFatbinaryInternal()){
      saveFaultyCUfile();
      return false;
    }
 #if PCHMODE == 0
-    llvm::sys::fs::remove(m_GenericFileName + std::to_string(m_Counter)
+    llvm::sys::fs::remove(m_GenericFileName + std::to_string(counter)
                             +".cu.pch");
 #endif
@ -240,72 +255,76 @@ namespace cling {
  }
  bool IncrementalCUDADeviceCompiler::generatePCH() {
    const unsigned int counter = getCounterCopy();
    // clang++ -std=c++xx -Ox -S -Xclang -emit-pch ${clingHeaders} cling[0-9].cu
    // -D__CLING__ -o cling[0-9].cu.pch [-include-pch cling[0-9].cu.pch]
    // --cuda-gpu-arch=sm_[1-7][0-9] -pthread --cuda-device-only [-v] [-g]
-    // ${m_CuArgs.additionalPtxOpt}
+    // ${m_CuArgs->additionalPtxOpt}
-    llvm::SmallVector<const char*, 256> argv;
+    llvm::SmallVector<std::string, 256> argv;
    // First argument have to be the program name.
-    argv.push_back(m_ClangPath.c_str());
+    argv.push_back(m_ClangPath);
-    argv.push_back(m_CuArgs.cppStdVersion.c_str());
+    argv.push_back(m_CuArgs->cppStdVersion);
-    argv.push_back(m_CuArgs.optLevel.c_str());
+    argv.push_back(m_CuArgs->optLevel);
    argv.push_back("-S");
    argv.push_back("-Xclang");
    argv.push_back("-emit-pch");
-    llvm::SmallVector<std::string, 256> headers;
+    addHeaderSearchPathFlags(argv);
    addHeaders(headers);
    for(std::string & s : headers)
      argv.push_back(s.c_str());
    // Is necessary for the cling runtime header.
    argv.push_back("-D__CLING__");
-    std::string cuFilePath = m_GenericFileName + std::to_string(m_Counter)
+    argv.push_back(m_GenericFileName + std::to_string(counter) + ".cu");
                             + ".cu";
    argv.push_back(cuFilePath.c_str());
    argv.push_back("-o");
-    std::string outputname = m_GenericFileName + std::to_string(m_Counter)
+    argv.push_back(m_GenericFileName + std::to_string(counter) +".cu.pch");
                             +".cu.pch";
    argv.push_back(outputname.c_str());
    // If a previos file exist, include it.
 #if PCHMODE == 1
-    std::string previousFile;
+    if(counter){
    if(m_Counter){
      previousFile = m_GenericFileName + std::to_string(m_Counter-1) +".cu.pch";
      argv.push_back("-include-pch");
-      argv.push_back(previousFile.c_str());
+      argv.push_back(m_GenericFileName + std::to_string(counter-1) +".cu.pch");
    }
 #else
-    std::vector<std::string> previousFiles;
+    if(counter){
-    if(m_Counter){
+      for(unsigned int i = 0; i <= counter-1; ++i){
      for(unsigned int i = 0; i <= m_Counter-1; ++i){
        previousFiles.push_back(m_GenericFileName + std::to_string(i) +".cu");
        argv.push_back("-include");
-        argv.push_back(previousFiles[i].c_str());
+        argv.push_back(m_GenericFileName + std::to_string(i) +".cu");
      }
    }
 #endif
-    argv.push_back(m_CuArgs.ptxSmVersion.c_str());
+    argv.push_back(m_CuArgs->ptxSmVersion);
    argv.push_back("-pthread");
    argv.push_back("--cuda-device-only");
-    if(m_CuArgs.verbose)
+    if(m_CuArgs->verbose)
      argv.push_back("-v");
-    if(m_CuArgs.debug)
+    if(m_CuArgs->debug)
      argv.push_back("-g");
-    for(std::string & s : m_CuArgs.additionalPtxOpt){
+    for(const std::string & s : m_CuArgs->additionalPtxOpt){
      argv.push_back(s.c_str());
    }
    std::vector<const char *> argvChar;
    argvChar.resize(argv.size()+1);
    std::transform(argv.begin(), argv.end(), argvChar.begin(),
      [&](const std::string & s)
      {
        return s.c_str();
      }
    );
    // Argv list have to finish with a nullptr.
-    argv.push_back(nullptr);
+    argvChar.push_back(nullptr);
    std::string executionError;
-    int res = llvm::sys::ExecuteAndWait(m_ClangPath.c_str(), argv.data(),
+    int res = llvm::sys::ExecuteAndWait(m_ClangPath.c_str(), argvChar.data(),
                                        nullptr, {}, 0, 0, &executionError);
    if(res){
-      llvm::errs() << "error at launching clang instance to generate PCH file\n"
+      llvm::errs() << "cling::IncrementalCUDADeviceCompiler::generatePCH(): error compiling PCH file:\n"
-                   << executionError << "\n";
+                   << m_ClangPath;
      for(const char * c : argvChar)
        llvm::errs() << " " << c;
      llvm::errs() << '\n' << executionError << "\n";
      return false;
    }
@ -313,80 +332,106 @@ namespace cling {
  }
  bool cling::IncrementalCUDADeviceCompiler::generatePTX() {
    const unsigned int counter = getCounterCopy();
    // clang++ -std=c++xx -Ox -S dummy.cu -o cling.ptx -include-pch
    // cling[0-9].cu.pch --cuda-gpu-arch=sm_xx -pthread --cuda-device-only [-v]
-    // [-g] ${m_CuArgs.additionalPtxOpt}
+    // [-g] ${m_CuArgs->additionalPtxOpt}
-    llvm::SmallVector<const char*, 128> argv;
+    llvm::SmallVector<std::string, 128> argv;
    // First argument have to be the program name.
-    argv.push_back(m_ClangPath.c_str());
+    argv.push_back(m_ClangPath);
-    argv.push_back(m_CuArgs.cppStdVersion.c_str());
+    argv.push_back(m_CuArgs->cppStdVersion);
-    argv.push_back(m_CuArgs.optLevel.c_str());
+    argv.push_back(m_CuArgs->optLevel);
    argv.push_back("-S");
-    argv.push_back(m_DummyCUPath.c_str());
+    argv.push_back(m_DummyCUPath);
    argv.push_back("-o");
-    argv.push_back(m_PTXFilePath.c_str());
+    argv.push_back(m_PTXFilePath);
    argv.push_back("-include-pch");
-    std::string pchFile = m_GenericFileName + std::to_string(m_Counter) +".cu.pch";
+    argv.push_back(m_GenericFileName + std::to_string(counter) +".cu.pch");
-    argv.push_back(pchFile.c_str());
+    argv.push_back(m_CuArgs->ptxSmVersion);
    argv.push_back(m_CuArgs.ptxSmVersion.c_str());
    argv.push_back("-pthread");
    argv.push_back("--cuda-device-only");
-    if(m_CuArgs.verbose)
+    if(m_CuArgs->verbose)
      argv.push_back("-v");
-    if(m_CuArgs.debug)
+    if(m_CuArgs->debug)
      argv.push_back("-g");
-    for(std::string & s : m_CuArgs.additionalPtxOpt){
+    for(const std::string & s : m_CuArgs->additionalPtxOpt){
      argv.push_back(s.c_str());
    }
    std::vector<const char *> argvChar;
    argvChar.resize(argv.size()+1);
    std::transform(argv.begin(), argv.end(), argvChar.begin(),
      [&](const std::string & s)
      {
        return s.c_str();
      }
    );
    // Argv list have to finish with a nullptr.
-    argv.push_back(nullptr);
+    argvChar.push_back(nullptr);
    std::string executionError;
-    int res = llvm::sys::ExecuteAndWait(m_ClangPath.c_str(), argv.data(),
+    int res = llvm::sys::ExecuteAndWait(m_ClangPath.c_str(), argvChar.data(),
                                        nullptr, {}, 0, 0, &executionError);
    if(res){
-      llvm::errs() << "error at launching clang instance to generate ptx code"
+      llvm::errs() << "cling::IncrementalCUDADeviceCompiler::generatePTX(): error compiling PCH file:\n"
-                   << "\n" << executionError << "\n";
+                   << m_ClangPath;
      for(const char * c : argvChar)
        llvm::errs() << " " << c;
      llvm::errs() << '\n' << executionError << "\n";
      return false;
    }
    return true;
  }
-  bool IncrementalCUDADeviceCompiler::generateFatbinaryInternal() {
+  bool IncrementalCUDADeviceCompiler::generateFatbinary() {
    // fatbinary --cuda [-32 | -64] --create cling.fatbin
-    // --image=profile=compute_xx,file=cling.ptx [-g] ${m_CuArgs.fatbinaryOpt}
+    // --image=profile=compute_xx,file=cling.ptx [-g] ${m_CuArgs->fatbinaryOpt}
-    llvm::SmallVector<const char*, 128> argv;
+    llvm::SmallVector<std::string, 128> argv;
    // First argument have to be the program name.
-    argv.push_back(m_FatbinaryPath.c_str());
+    argv.push_back(m_FatbinaryPath);
    argv.push_back("--cuda");
-    argv.push_back(m_CuArgs.fatbinArch.c_str());
+    argv.push_back(m_CuArgs->fatbinArch);
    argv.push_back("--create");
-    argv.push_back(m_FatbinFilePath.c_str());
+    argv.push_back(m_FatbinFilePath);
-    std::string ptxCode = m_CuArgs.fatbinSmVersion
+    argv.push_back(m_CuArgs->fatbinSmVersion + ",file=" + m_PTXFilePath);
-                          + ",file=" + m_PTXFilePath;
+    if(m_CuArgs->debug)
    argv.push_back(ptxCode.c_str());
    if(m_CuArgs.debug)
      argv.push_back("-g");
-    for(std::string & s : m_CuArgs.fatbinaryOpt){
+    for(const std::string & s : m_CuArgs->fatbinaryOpt){
      argv.push_back(s.c_str());
    }
    std::vector<const char *> argvChar;
    argvChar.resize(argv.size()+1);
    std::transform(argv.begin(), argv.end(), argvChar.begin(),
      [&](const std::string & s)
      {
        return s.c_str();
      }
    );
    // Argv list have to finish with a nullptr.
-    argv.push_back(nullptr);
+    argvChar.push_back(nullptr);
    std::string executionError;
-    int res = llvm::sys::ExecuteAndWait(m_FatbinaryPath.c_str(), argv.data(),
+    int res = llvm::sys::ExecuteAndWait(m_FatbinaryPath.c_str(), argvChar.data(),
                                        nullptr, {}, 0, 0, &executionError);
    if(res){
-      llvm::errs() << "error at launching fatbin" << "\n" << executionError << "\n";
+      llvm::errs() << "cling::IncrementalCUDADeviceCompiler::generateFatbinary(): error compiling PCH file:\n"
                   << m_ClangPath;
      for(const char * c : argvChar)
        llvm::errs() << " " << c;
      llvm::errs() << '\n' << executionError << "\n";
      return false;
    }
@ -394,7 +439,7 @@ namespace cling {
  }
  void IncrementalCUDADeviceCompiler::dump(){
-    llvm::outs() << "current counter: " << m_Counter << "\n" <<
+    llvm::outs() << "current counter: " << getCounterCopy() << "\n" <<
                    "CUDA device compiler is valid: " << m_Init << "\n" <<
                    "file path: " << m_FilePath << "\n" <<
                    "fatbin file path: " << m_FatbinFilePath << "\n" <<
@ -404,35 +449,36 @@ namespace cling {
                    << "[0-9]*.cu{.pch}\n" <<
                    "clang++ path: " << m_ClangPath << "\n" <<
                    "nvidia fatbinary path: " << m_FatbinaryPath << "\n" <<
-                    "m_CuArgs c++ standard: " << m_CuArgs.cppStdVersion << "\n" <<
+                    "m_CuArgs c++ standard: " << m_CuArgs->cppStdVersion << "\n" <<
-                    "m_CuArgs opt level: " << m_CuArgs.optLevel << "\n" <<
+                    "m_CuArgs opt level: " << m_CuArgs->optLevel << "\n" <<
                    "m_CuArgs SM level for clang nvptx: "
-                    << m_CuArgs.ptxSmVersion << "\n" <<
+                    << m_CuArgs->ptxSmVersion << "\n" <<
                    "m_CuArgs SM level for fatbinary: "
-                    << m_CuArgs.fatbinSmVersion << "\n" <<
+                    << m_CuArgs->fatbinSmVersion << "\n" <<
                    "m_CuArgs fatbinary architectur: "
-                    << m_CuArgs.fatbinArch << "\n" <<
+                    << m_CuArgs->fatbinArch << "\n" <<
-                    "m_CuArgs verbose: " << m_CuArgs.verbose << "\n" <<
+                    "m_CuArgs verbose: " << m_CuArgs->verbose << "\n" <<
-                    "m_CuArgs debug: " << m_CuArgs.debug << "\n";
+                    "m_CuArgs debug: " << m_CuArgs->debug << "\n";
     llvm::outs() << "m_CuArgs additional clang nvptx options: ";
-     for(std::string & s : m_CuArgs.additionalPtxOpt){
+     for(const std::string & s : m_CuArgs->additionalPtxOpt){
       llvm::outs() << s << " ";
     }
     llvm::outs() << "\n";
     llvm::outs() << "m_CuArgs additional fatbinary options: ";
-     for(std::string & s : m_CuArgs.fatbinaryOpt){
+     for(const std::string & s : m_CuArgs->fatbinaryOpt){
       llvm::outs() << s << " ";
     }
     llvm::outs() << "\n";
  }
  std::error_code IncrementalCUDADeviceCompiler::saveFaultyCUfile(){
    const unsigned int counter = getCounterCopy();
    unsigned int faultFileCounter = 0;
    // Construct the file path of the current .cu file without extension.
-    std::string originalCU = m_GenericFileName + std::to_string(m_Counter);
+    std::string originalCU = m_GenericFileName + std::to_string(counter);
-    // m_Counter will just increased, if the compiling get right. So we need a
+    // counter (= m_Counter) will just increased, if the compiling get right. So we need a
    // second counter, if two or more following files fails.
    std::string faultyCU;
    do{
@ -440,8 +486,8 @@ namespace cling {
      faultyCU = originalCU + "_fault" + std::to_string(faultFileCounter) + ".cu";
    } while(llvm::sys::fs::exists(faultyCU));
-    // orginial: cling[m_Counter].cu
+    // orginial: cling[counter].cu
-    // faulty file: cling[m_Counter]_fault[faultFileCounter].cu
+    // faulty file: cling[counter]_fault[faultFileCounter].cu
    return llvm::sys::fs::rename(originalCU + ".cu", faultyCU);
  }
--- a/lib/Interpreter/IncrementalCUDADeviceCompiler.h
+++ b/lib/Interpreter/IncrementalCUDADeviceCompiler.h
@ -1,6 +1,6 @@
 //--------------------------------------------------------------------*- C++ -*-
 // CLING - the C++ LLVM-based InterpreterG :)
-// author:  Simeon Ehrig <simeonehrig@web.de>
+// author:  Simeon Ehrig <s.ehrig@hzdr.de>
 //
 // This file is dual-licensed: you can choose to license it under the University
 // of Illinois Open Source License or the GNU Lesser General Public License. See
@ -17,18 +17,18 @@
 #include <vector>
 namespace cling{
-    class InvocationOptions;
+  class InvocationOptions;
-    class Transaction;
+  class Transaction;
 }
 namespace clang {
-    class CompilerInstance;
+  class CompilerInstance;
-    class HeaderSearchOptions;
+  class HeaderSearchOptions;
-    class LangOptions;
+  class LangOptions;
 }
 namespace llvm {
-    class StringRef;
+  class StringRef;
 }
 namespace cling {
@ -39,65 +39,59 @@ namespace cling {
  ///
  class IncrementalCUDADeviceCompiler {
    static constexpr unsigned CxxStdCompiledWith() {
      // The value of __cplusplus in GCC < 5.0 (e.g. 4.9.3) when
      // either -std=c++1y or -std=c++14 is specified is 201300L, which fails
      // the test for C++14 or more (201402L) as previously specified.
      // I would claim that the check should be relaxed to:
 #if __cplusplus > 201402L
      return 17;
 #elif __cplusplus > 201103L || (defined(LLVM_ON_WIN32) && _MSC_VER >= 1900)
      return 14;
 #elif __cplusplus >= 201103L
      return 11;
 #else
 #error "Unknown __cplusplus version"
 #endif
    }
    ///\brief Contains the arguments for the cling nvptx and the nvidia
-    /// fatbinary tool. The arguments are static and will set at the constructor
+    /// fatbinary tool.
    /// of IncrementalCUDADeviceCompiler.
    struct CUDACompilerArgs {
-      std::string cppStdVersion = "-std=c++" + std::to_string(CxxStdCompiledWith());
+      const std::string cppStdVersion;
-      std::string optLevel = "-O0";
+      const std::string optLevel;
-      std::string ptxSmVersion = "--cuda-gpu-arch=sm_20";
+      const std::string ptxSmVersion;
-      std::string fatbinSmVersion = "--image=profile=compute_20";
+      const std::string fatbinSmVersion;
      ///\brief Argument for the fatbinary tool, which is depend, if the OS is
      /// 32 bit or 64 bit.
-      std::string fatbinArch = "-32";
+      const std::string fatbinArch;
      ///\brief True, if the flag -v is set.
-      bool verbose = false;
+      const bool verbose;
      ///\brief True, if the flag -g is set.
-      bool debug = false;
+      const bool debug;
      ///\brief A list Arguments, which will passed to the clang nvptx.
-      std::vector<std::string> additionalPtxOpt;
+      const std::vector<std::string> additionalPtxOpt;
      ///\brief A list Arguments, which will passed to the fatbinary tool.
-      std::vector<std::string> fatbinaryOpt;
+      const std::vector<std::string> fatbinaryOpt;
      CUDACompilerArgs(std::string cppStdVersion, std::string optLevel,
                       std::string ptxSmVersion, std::string fatbinSmVersion,
                       std::string fatbinArch, bool verbose, bool debug,
                       std::vector<std::string> additionalPtxOpt,
                       std::vector<std::string> fatbinaryOpt)
      : cppStdVersion(cppStdVersion), optLevel(optLevel),
        ptxSmVersion(ptxSmVersion), fatbinSmVersion(fatbinSmVersion),
        fatbinArch(fatbinArch), verbose(verbose), debug(debug),
        additionalPtxOpt(additionalPtxOpt), fatbinaryOpt(fatbinaryOpt) {}
    };
-    CUDACompilerArgs m_CuArgs;
+    std::unique_ptr<CUDACompilerArgs> m_CuArgs;
    ///\brief The counter responsible to generate a chain of .cu source files
    /// and .cu.pch files.
-    unsigned int m_Counter;
+    unsigned int m_Counter = 0;
    ///\brief Is true if all necessary files have been generated and clang and
    /// cuda NVIDIA fatbinary are found.
-    bool m_Init;
+    bool m_Init = false;
    ///\brief Path to the folder, where all files will put in. Ordinary the tmp
    /// folder. Have to end with a separator. Can be empty.
-    std::string m_FilePath;
+    const std::string m_FilePath;
    ///\brief Path to the fatbin file, which will used by the CUDACodeGen.
-    std::string m_FatbinFilePath;
+    const std::string m_FatbinFilePath;
    ///\brief Path to a empty dummy.cu file. The file is necessary to generate
    /// PTX code from the pch files.
-    std::string m_DummyCUPath;
+    const std::string m_DummyCUPath;
    ///\brief Path to the PTX file. Will be reused for every PTX generation.
-    std::string m_PTXFilePath;
+    const std::string m_PTXFilePath;
    ///\brief Will be used to generate .cu and .cu.pch files.
-    std::string m_GenericFileName;
+    const std::string m_GenericFileName;
    ///\brief Path to the clang++ compiler, which will used to compile the pch
    /// files and the PTX code. Should be in same folder, as the cling.
@ -109,6 +103,11 @@ namespace cling {
    ///
    std::shared_ptr<clang::HeaderSearchOptions> m_HeaderSearchOptions;
    ///\brief get copy of m_Counter
    ///
    ///\returns copy of m_Counter
    unsigned int getCounterCopy(){ return m_Counter;}
    ///\brief Generate the dummy.cu file and set the paths of m_PTXFilePath and
    /// m_GenericFileName.
    ///
@ -122,12 +121,12 @@ namespace cling {
    ///       toolkit
    ///
    ///\returns True, whether clang and fatbinary was found.
-    bool searchCompilingTools(cling::InvocationOptions & invocationOptions);
+    bool findToolchain(const cling::InvocationOptions & invocationOptions);
    ///\brief Add the include paths from the interpreter runtime to a argument list.
    ///
    ///\param [in,out] argv - The include commands will append to the argv vector.
-    void addHeaders(llvm::SmallVectorImpl<std::string> & argv);
+    void addHeaderSearchPathFlags(llvm::SmallVectorImpl<std::string> & argv);
    ///\brief Start an clang compiler with nvptx backend. Read the content of
    /// cling.cu and compile it to a new PCH file. If predecessor PCH file is
@ -147,17 +146,18 @@ namespace cling {
    /// m_FatbinFilePath.
    ///
    ///\returns True, if the fatbinary tool returns 0.
-    bool generateFatbinaryInternal();
+    bool generateFatbinary();
    ///\brief The function set the values of m_CuArgs.
    ///
    ///\param [in] langOpts - The LangOptions of the CompilerInstance.
    ///\param [in] invocationOptions - The invocationOptions of the interpreter.
-    ///\param [in] optLevel - The optimization level of the interpreter.
+    ///\param [in] intprOptLevel - The optimization level of the interpreter.
    ///\param [in] debugInfo - The debugInfo of the CompilerInstance.
-    void setCuArgs(clang::LangOptions & langOpts,
+    void setCuArgs(const clang::LangOptions & langOpts,
-                   cling::InvocationOptions & invocationOptions, int & optLevel,
+                   const cling::InvocationOptions & invocationOptions,
-                   clang::codegenoptions::DebugInfoKind debugInfo);
+                   const int intprOptLevel,
                   const clang::codegenoptions::DebugInfoKind debugInfo);
    ///\brief Save .cu file, if cuda device code compiler failed at translation.
    ///
@ -176,14 +176,14 @@ namespace cling {
    ///       clang and the NVIDIA tool fatbinary.
    ///\param [in] CompilerInstance - Will be used for m_CuArgs and the include
    ///       path handling.
-    IncrementalCUDADeviceCompiler(std::string filePath,
+    IncrementalCUDADeviceCompiler(const std::string & filePath,
-                                  int optLevel,
+                                  const int optLevel,
-                                  cling::InvocationOptions & invocationOptions,
+                                  const cling::InvocationOptions & invocationOptions,
-                                  clang::CompilerInstance * CI);
+                                  const clang::CompilerInstance & CI);
    ///\brief Generate an new fatbin file with the path in CudaGpuBinaryFileNames.
    /// It will add the content of input, to the existing source code, which was
-    /// passed to generateFatbinary, before.
+    /// passed to compileDeviceCode, before.
    ///
    ///\param [in] input - New source code. The function can select, if code
    ///       is relevant for the device side. Have to be valid CUDA C++ code.
@ -191,7 +191,8 @@ namespace cling {
    ///
    ///\returns True, if all stages of generating fatbin runs right and a new
    /// fatbin file is written.
-    bool generateFatbinary(const llvm::StringRef input, cling::Transaction * T);
+    bool compileDeviceCode(const llvm::StringRef input,
                           const cling::Transaction * const T);
    ///\brief Print some information of the IncrementalCUDADeviceCompiler to
    /// llvm::outs(). For Example the paths of the files and tools.
--- a/lib/Interpreter/IncrementalParser.cpp
+++ b/lib/Interpreter/IncrementalParser.cpp
@ -45,6 +45,8 @@
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "clang/Serialization/ASTWriter.h"
 #include "clang/Serialization/ASTReader.h"
 #include "llvm/Support/Path.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@ -118,7 +120,7 @@ namespace {
      m_PrevClient.EndSourceFile();
      SyncDiagCountWithTarget();
    }
-  
+
    void finish() override {
      m_PrevClient.finish();
      SyncDiagCountWithTarget();
@ -213,6 +215,34 @@ namespace cling {
    m_DiagConsumer.reset(new FilteringDiagConsumer(Diag, false));
    initializeVirtualFile();
    if(m_CI->getFrontendOpts().ProgramAction != frontend::ParseSyntaxOnly &&
      m_Interpreter->getOptions().CompilerOpts.CUDA){
        // Create temporary folder for all files, which the CUDA device compiler
        // will generate.
        llvm::SmallString<256> TmpPath;
        llvm::StringRef sep = llvm::sys::path::get_separator().data();
        llvm::sys::path::system_temp_directory(false, TmpPath);
        TmpPath.append(sep.data());
        TmpPath.append("cling-%%%%");
        TmpPath.append(sep.data());
        llvm::SmallString<256> TmpFolder;
        llvm::sys::fs::createUniqueFile(TmpPath.c_str(), TmpFolder);
        llvm::sys::fs::create_directory(TmpFolder);
        // The CUDA fatbin file is the connection beetween the CUDA device
        // compiler and the CodeGen of cling. The file will every time reused.
        if(getCI()->getCodeGenOpts().CudaGpuBinaryFileNames.empty())
          getCI()->getCodeGenOpts().CudaGpuBinaryFileNames.push_back(
            std::string(TmpFolder.c_str()) + "cling.fatbin");
        m_CUDACompiler.reset(
          new IncrementalCUDADeviceCompiler(TmpFolder.c_str(),
                                            m_CI->getCodeGenOpts().OptimizationLevel,
                                            m_Interpreter->getOptions(),
                                            *m_CI));
    }
  }
  bool
@ -794,8 +824,7 @@ namespace cling {
      return kSuccessWithWarnings;
    if(!m_Interpreter->isInSyntaxOnlyMode() && m_CI->getLangOpts().CUDA )
-      m_Interpreter->getCUDADeviceCompiler()
+      m_CUDACompiler->compileDeviceCode(input, m_Consumer->getTransaction());
        .generateFatbinary(input, m_Consumer->getTransaction());
    return kSuccess;
  }
--- a/lib/Interpreter/IncrementalParser.h
+++ b/lib/Interpreter/IncrementalParser.h
@ -43,6 +43,7 @@ namespace cling {
  class Transaction;
  class TransactionPool;
  class ASTTransformer;
  class IncrementalCUDADeviceCompiler;
  ///\brief Responsible for the incremental parsing and compilation of input.
  ///
@ -94,6 +95,10 @@ namespace cling {
    ///
    std::unique_ptr<clang::DiagnosticConsumer> m_DiagConsumer;
    ///\brief Cling's worker class implementing the compilation of CUDA device code
    ///
    std::unique_ptr<IncrementalCUDADeviceCompiler> m_CUDACompiler;
  public:
    enum EParseResult {
      kSuccess,
--- a/lib/Interpreter/Interpreter.cpp
+++ b/lib/Interpreter/Interpreter.cpp
@ -20,7 +20,6 @@
 #include "ForwardDeclPrinter.h"
 #include "IncrementalExecutor.h"
 #include "IncrementalParser.h"
 #include "IncrementalCUDADeviceCompiler.h"
 #include "MultiplexInterpreterCallbacks.h"
 #include "TransactionUnloader.h"
@ -57,7 +56,6 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Path.h"
 #include <string>
@ -167,7 +165,7 @@ namespace cling {
           m_DyLibManager && m_LookupHelper &&
           (isInSyntaxOnlyMode() || m_Executor);
  }
-  
+
  namespace internal { void symbol_requester(); }
  const char* Interpreter::getVersion() {
@ -238,33 +236,6 @@ namespace cling {
        return;
    }
    if(!isInSyntaxOnlyMode() && m_Opts.CompilerOpts.CUDA){
        // Create temporary folder for all files, which the CUDA device compiler
        // will generate.
        llvm::SmallString<256> TmpPath;
        llvm::StringRef sep = llvm::sys::path::get_separator().data();
        llvm::sys::path::system_temp_directory(false, TmpPath);
        TmpPath.append(sep.data());
        TmpPath.append("cling-%%%%");
        TmpPath.append(sep.data());
        llvm::SmallString<256> TmpFolder;
        llvm::sys::fs::createUniqueFile(TmpPath.c_str(), TmpFolder);
        llvm::sys::fs::create_directory(TmpFolder);
        // The CUDA fatbin file is the connection beetween the CUDA device
        // compiler and the CodeGen of cling. The file will every time reused.
        if(getCI()->getCodeGenOpts().CudaGpuBinaryFileNames.empty())
          getCI()->getCodeGenOpts().CudaGpuBinaryFileNames.push_back(
            std::string(TmpFolder.c_str()) + "cling.fatbin");
        m_CUDACompiler.reset(
          new IncrementalCUDADeviceCompiler(TmpFolder.c_str(),
                                            m_OptLevel,
                                            m_Opts,
                                            getCI()));
    }
    // Tell the diagnostic client that we are entering file parsing mode.
    DiagnosticConsumer& DClient = getCI()->getDiagnosticClient();
    DClient.BeginSourceFile(getCI()->getLangOpts(), &PP);
@ -702,7 +673,7 @@ namespace cling {
    }
    return Value;
  }
-  
+
  ///\brief Maybe transform the input line to implement cint command line
  /// semantics (declarations are global) and compile to produce a module.
  ///
@ -898,11 +869,11 @@ namespace cling {
    // Ignore diagnostics when we tab complete.
    // This is because we get redefinition errors due to the import of the decls.
    clang::IgnoringDiagConsumer* ignoringDiagConsumer =
-                                            new clang::IgnoringDiagConsumer();                      
+                                            new clang::IgnoringDiagConsumer();
    childSemaRef.getDiagnostics().setClient(ignoringDiagConsumer, true);
    DiagnosticsEngine& parentDiagnostics = this->getCI()->getSema().getDiagnostics();
-    std::unique_ptr<DiagnosticConsumer> ownerDiagConsumer = 
+    std::unique_ptr<DiagnosticConsumer> ownerDiagConsumer =
                                                parentDiagnostics.takeClient();
    auto clientDiagConsumer = parentDiagnostics.getClient();
    parentDiagnostics.setClient(ignoringDiagConsumer, /*owns*/ false);
--- a/test/CUDADeviceCode/CUDASharedMemory.C
+++ b/test/CUDADeviceCode/CUDASharedMemory.C
@ -60,4 +60,4 @@ hostOutput[numberOfThreads-1] == numberOfThreads-1 // expected-note {{use '=' to
 expectedSum == cudaSum // expected-note {{use '=' to turn this equality comparison into an assignment}}
 // CHECK: (bool) true
-.q
+.q
--- a/test/CUDADeviceCode/CUDAStreams.C
+++ b/test/CUDADeviceCode/CUDAStreams.C
@ -78,4 +78,4 @@ expectedSum1 == cudaSum1 // expected-note {{use '=' to turn this equality compar
 expectedSum2 == cudaSum2 // expected-note {{use '=' to turn this equality comparison into an assignment}}
 // CHECK: (bool) true
-.q
+.q