Add CUDA device compiler, which allows to generate CUDA PTX Code on runtime.

The class IncrementalCUDADeviceCompiler use external tools to generate PTX and cuda fatbin files. It runs the tools clang and fatbinary via llvm::sys::ExecuteAndWait. The class also handle to include new code in existing code. The steps of the compiler pipeline are: - clang: CUDA C++ + previous PCH -> PCH - clang: PCH -> PTX - fatbinary: PTX -> fatbin There is no selection of code. Every input of the cling will pass to the IncrementalCUDADeviceCompiler.
2018-03-20 16:50:02 +01:00 · 2018-03-20 16:50:02 +01:00 · 6652d1a7c9
commit 6652d1a7c9
parent 90454f964a
8 changed files with 474 additions and 0 deletions
--- a/include/cling/Interpreter/Interpreter.h
+++ b/include/cling/Interpreter/Interpreter.h
@ -70,6 +70,7 @@ namespace cling {
  class LookupHelper;
  class Value;
  class Transaction;
+  class IncrementalCUDADeviceCompiler;

  ///\brief Class that implements the interpreter-like behavior. It manages the
  /// incremental compilation.
@ -158,6 +159,10 @@ namespace cling {
    ///
    std::unique_ptr<LookupHelper> m_LookupHelper;

+    ///\brief Cling's worker class implementing the compilation of CUDA device code
+    ///
+    std::unique_ptr<IncrementalCUDADeviceCompiler> m_CUDACompiler;
+
    ///\brief Cache of compiled destructors wrappers.
    std::unordered_map<const clang::RecordDecl*, void*> m_DtorWrappers;

@ -343,6 +348,8 @@ namespace cling {

    LookupHelper& getLookupHelper() const { return *m_LookupHelper; }

+    IncrementalCUDADeviceCompiler& getCUDADeviceCompiler() { return *m_CUDACompiler; }
+
    const clang::Parser& getParser() const;
    clang::Parser& getParser();

--- a/include/cling/Interpreter/InvocationOptions.h
+++ b/include/cling/Interpreter/InvocationOptions.h
@ -64,6 +64,11 @@ namespace cling {
    std::string CachePath;
    // If not empty, the name of the module we're currently compiling.
    std::string ModuleName;
+    /// \brief Custom path of the CUDA toolkit
+    std::string CUDAPath;
+    /// \brief Architecture level of the CUDA gpu. Necessary for the
+    /// NVIDIA fatbinary tool.
+    std::string CUDAGpuArch;

    ///\brief The remaining arguments to pass to clang.
    ///
--- a/lib/Interpreter/CMakeLists.txt
+++ b/lib/Interpreter/CMakeLists.txt
@ -75,6 +75,7 @@ add_cling_library(clingInterpreter OBJECT
  Exception.cpp
  ExternalInterpreterSource.cpp
  ForwardDeclPrinter.cpp
+  IncrementalCUDADeviceCompiler.cpp
  IncrementalExecutor.cpp
  IncrementalJIT.cpp
  IncrementalParser.cpp
--- a/lib/Interpreter/IncrementalCUDADeviceCompiler.cpp
+++ b/lib/Interpreter/IncrementalCUDADeviceCompiler.cpp
@ -0,0 +1,273 @@
+//--------------------------------------------------------------------*- C++ -*-
+// CLING - the C++ LLVM-based InterpreterG :)
+// author:  Simeon Ehrig <simeonehrig@web.de>
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+#include "IncrementalCUDADeviceCompiler.h"
+
+#include "cling/Interpreter/InvocationOptions.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Path.h"
+#include "llvm/ADT/Triple.h"
+
+#include <string>
+
+namespace cling {
+
+
+  IncrementalCUDADeviceCompiler::IncrementalCUDADeviceCompiler(std::string filePath,
+                                                               std::string & CudaGpuBinaryFileNames,
+                                                               cling::InvocationOptions & invocationOptions,
+                                                               const llvm::SmallVectorImpl<std::string> & clingHeaders)
+     : m_Counter(0),
+       m_FilePath(filePath),
+       m_FatbinFilePath(CudaGpuBinaryFileNames),
+       // We get for example sm_20 from the cling arguments and have to shrink to
+       // 20.
+       m_SMLevel(invocationOptions.CompilerOpts.CUDAGpuArch.empty() ? "20" :
+         invocationOptions.CompilerOpts.CUDAGpuArch.substr(3) ),
+       m_ClingHeaders(clingHeaders.begin(), clingHeaders.end()) {
+    assert(!CudaGpuBinaryFileNames.empty() && "CudaGpuBinaryFileNames can't be empty");
+
+    m_Init = generateHelperFiles();
+    m_Init = m_Init && searchCompilingTools(invocationOptions);
+
+    llvm::Triple hostTarget(llvm::sys::getDefaultTargetTriple());
+    m_FatbinArch = hostTarget.isArch64Bit() ? "-64" : "-32";
+  }
+
+  bool IncrementalCUDADeviceCompiler::generateHelperFiles(){
+    // Generate an empty dummy.cu file.
+    m_DummyCUPath = m_FilePath + "dummy.cu";
+    std::error_code EC;
+    llvm::raw_fd_ostream dummyCU(m_DummyCUPath, EC, llvm::sys::fs::F_Text);
+    if(EC){ 
+      llvm::errs() << "Could not open file: " << EC.message();
+      return false;
+    }
+    dummyCU.close();
+
+    m_PTXFilePath = m_FilePath + "cling.ptx";
+    m_GenericFileName = m_FilePath + "cling";
+    return true;
+  }
+
+  bool IncrementalCUDADeviceCompiler::searchCompilingTools(cling::InvocationOptions & invocationOptions){
+    // Search after clang in the folder of cling.
+    llvm::SmallString<128> cwd;
+    llvm::sys::fs::current_path(cwd);
+    cwd.append(llvm::sys::path::get_separator());
+    cwd.append("clang++");
+    m_ClangPath = cwd.c_str();
+    // Check, if clang is existing and executable.
+    if(!llvm::sys::fs::can_execute(m_ClangPath)){
+      llvm::errs() << "Error: " << m_ClangPath << " not existing or executable!\n";
+      return false;
+    }
+
+    // Use the custom CUDA toolkit path, if it set via cling argument.
+    if(!invocationOptions.CompilerOpts.CUDAPath.empty()){
+      m_FatbinaryPath = invocationOptions.CompilerOpts.CUDAPath + "/bin/fatbinary";
+      if(!llvm::sys::fs::can_execute(m_FatbinaryPath)){
+        llvm::errs() << "Error: " << m_FatbinaryPath << " not existing or executable!\n";
+        return false;
+      }
+    }else{
+      // Search after fatbinary on the system.
+      if (llvm::ErrorOr<std::string> fatbinary = 
+            llvm::sys::findProgramByName("fatbinary")) {
+        llvm::SmallString<256> fatbinaryAbsolutePath;
+        llvm::sys::fs::real_path(*fatbinary, fatbinaryAbsolutePath);
+        m_FatbinaryPath = fatbinaryAbsolutePath.c_str();
+      } else {
+        llvm::errs() << "Error: nvidia tool fatbinary not found!\n" <<
+          "Please add the cuda /bin path to PATH or set the toolkit path via --cuda-path argument.\n";
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  void IncrementalCUDADeviceCompiler::addClingHeaders(llvm::SmallVectorImpl<const char*> & argv){
+    for(std::string &s : m_ClingHeaders){
+      argv.push_back(s.c_str());
+    }
+  }
+
+  bool IncrementalCUDADeviceCompiler::generateFatbinary(llvm::StringRef input){
+    if(!m_Init){
+      llvm::errs() << "Error: Initializiation of CUDA Device Code Compiler failed\n";
+      return false;
+    }
+
+    // Write the (CUDA) C++ source code to a file.
+    std::error_code EC;
+    llvm::raw_fd_ostream cuFile(m_GenericFileName + std::to_string(m_Counter)
+                                + ".cu", EC, llvm::sys::fs::F_Text);
+    if (EC) {
+      llvm::errs() << "Could not open file: " << EC.message();
+      return false;
+    }
+    cuFile << input;
+    cuFile.close();
+
+    if(!generatePCH()){
+      return false;
+    }
+
+    if(!generatePTX()){
+      return false;
+    }
+
+    if(!generateFatbinaryInternal()){
+      return false;
+    }
+
+    ++m_Counter;
+    return true;
+  }
+
+  bool IncrementalCUDADeviceCompiler::generatePCH() {
+    // clang++ -std=c++14 -S -Xclang -emit-pch ${clingHeaders} cling[0-9].cu
+    // -D__CLING__ -o cling[0-9].cu.pch ${ | -include-pch cling[0-9].cu.pch }
+    // --cuda-gpu-arch=sm_${m_smLevel} -pthread --cuda-device-only
+    llvm::SmallVector<const char*, 256> argv;
+
+    // First argument have to be the program name.
+    argv.push_back(m_ClangPath.c_str());
+
+    // FIXME: Should replaced by the arguments of the cling instance.
+    argv.push_back("-std=c++14");
+    argv.push_back("-S");
+    argv.push_back("-Xclang");
+    argv.push_back("-emit-pch");
+    addClingHeaders(argv);
+    // Is necessary for the cling runtime header.
+    argv.push_back("-D__CLING__");
+    std::string cuFilePath = m_GenericFileName + std::to_string(m_Counter)
+                             + ".cu";
+    argv.push_back(cuFilePath.c_str());
+    argv.push_back("-o");
+    std::string outputname = m_GenericFileName + std::to_string(m_Counter)
+                             +".cu.pch";
+    argv.push_back(outputname.c_str());
+    // If a previos file exist, include it.
+    std::string previousFile;
+    if(m_Counter){
+      previousFile = m_GenericFileName + std::to_string(m_Counter-1) +".cu.pch";
+      argv.push_back("-include-pch");
+      argv.push_back(previousFile.c_str());
+    }
+    // FIXME: Should replaced by the arguments of the cling instance.
+    std::string smString = "--cuda-gpu-arch=sm_" + m_SMLevel;
+    argv.push_back(smString.c_str());
+    argv.push_back("-pthread");
+    argv.push_back("--cuda-device-only");
+
+    // Argv list have to finish with a nullptr.
+    argv.push_back(nullptr);
+
+    std::string executionError;
+    int res = llvm::sys::ExecuteAndWait(m_ClangPath.c_str(), argv.data(),
+                                        nullptr, {}, 0, 0, &executionError);
+
+    if(res){
+      llvm::errs() << "error at launching clang instance to generate PCH file\n"
+                   << executionError << "\n";
+      return false;
+    }
+
+    return true;
+  }
+
+  bool cling::IncrementalCUDADeviceCompiler::generatePTX() {
+    // clang++ -std=c++14 -S dummy.cu -o cling.ptx -include-pch cling[0-9].cu.pch
+    // --cuda-gpu-arch=sm_${m_smLevel} -pthread --cuda-device-only
+    llvm::SmallVector<const char*, 128> argv;
+
+    // First argument have to be the program name.
+    argv.push_back(m_ClangPath.c_str());
+
+    // FIXME: Should replaced by the arguments of the cling instance.
+    argv.push_back("-std=c++14");
+    argv.push_back("-S");
+    argv.push_back(m_DummyCUPath.c_str());
+    argv.push_back("-o");
+    argv.push_back(m_PTXFilePath.c_str());
+    argv.push_back("-include-pch");
+    std::string pchFile = m_GenericFileName + std::to_string(m_Counter) +".cu.pch";
+    argv.push_back(pchFile.c_str());
+    // FIXME: Should replaced by the arguments of the cling instance.
+    std::string smString = "--cuda-gpu-arch=sm_" + m_SMLevel;
+    argv.push_back(smString.c_str());
+    argv.push_back("-pthread");
+    argv.push_back("--cuda-device-only");
+
+    // Argv list have to finish with a nullptr.
+    argv.push_back(nullptr);
+
+    std::string executionError;
+    int res = llvm::sys::ExecuteAndWait(m_ClangPath.c_str(), argv.data(),
+                                        nullptr, {}, 0, 0, &executionError);
+
+    if(res){
+      llvm::errs() << "error at launching clang instance to generate ptx code"
+                   << "\n" << executionError << "\n";
+      return false;
+    }
+
+    return true;
+  }
+
+  bool IncrementalCUDADeviceCompiler::generateFatbinaryInternal() {
+    // fatbinary --cuda [-32 | -64] --create cling.fatbin
+    // --image=profile=compute_${m_smLevel},file=cling.ptx
+    llvm::SmallVector<const char*, 128> argv;
+
+    // First argument have to be the program name.
+    argv.push_back(m_FatbinaryPath.c_str());
+
+    argv.push_back("--cuda");
+    argv.push_back(m_FatbinArch.c_str());
+    argv.push_back("--create");
+    argv.push_back(m_FatbinFilePath.c_str());
+    std::string ptxCode = "--image=profile=compute_"+ m_SMLevel
+                          + ",file=" + m_PTXFilePath;
+    argv.push_back(ptxCode.c_str());
+
+    // Argv list have to finish with a nullptr.
+    argv.push_back(nullptr);
+
+    std::string executionError;
+    int res = llvm::sys::ExecuteAndWait(m_FatbinaryPath.c_str(), argv.data(),
+                                        nullptr, {}, 0, 0, &executionError);
+
+    if(res){
+      llvm::errs() << "error at launching fatbin" << "\n" << executionError << "\n";
+      return false;
+    }
+
+    return true;
+  }
+
+  void IncrementalCUDADeviceCompiler::dump(){
+    llvm::outs() << "current counter: " << m_Counter << "\n" <<
+                    "CUDA device compiler is valid: " << m_Init << "\n" <<
+                    "file path: " << m_FilePath << "\n" <<
+                    "fatbin file path: " << m_FatbinFilePath << "\n" <<
+                    "dummy.cu file path: " << m_DummyCUPath << "\n" <<
+                    "cling.ptx file path: " << m_PTXFilePath << "\n" <<
+                    "generic file path: " << m_GenericFileName << "[0-9]*.cu{.pch}\n" <<
+                    "clang++ path: " << m_ClangPath << "\n" <<
+                    "nvidia fatbinary path: " << m_FatbinaryPath << "\n";
+  }
+
+} // end namespace cling
--- a/lib/Interpreter/IncrementalCUDADeviceCompiler.h
+++ b/lib/Interpreter/IncrementalCUDADeviceCompiler.h
@ -0,0 +1,153 @@
+//--------------------------------------------------------------------*- C++ -*-
+// CLING - the C++ LLVM-based InterpreterG :)
+// author:  Simeon Ehrig <simeonehrig@web.de>
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+#ifndef CLING_INCREMENTAL_CUDA_DEVICE_JIT_H
+#define CLING_INCREMENTAL_CUDA_DEVICE_JIT_H
+
+#include "llvm/ADT/SmallVector.h"
+
+#include <string>
+#include <vector>
+
+namespace cling{
+    class InvocationOptions;
+}
+
+namespace clang {
+    class CodeGenOptions;
+}
+
+namespace llvm {
+    class StringRef;
+}
+
+namespace cling {
+
+  ///\brief The class is responsible for generating CUDA device code in
+  /// cuda fatbinary form during the runtime. It works with
+  /// llvm::sys::ExecuteAndWait.
+  /// 
+  class IncrementalCUDADeviceCompiler {
+    /// FIXME : Add handling of new included Headers. The include commands can
+    /// be added by the prompt or via .L .
+
+    ///\brief The counter responsible to generate a chain of .cu source files
+    /// and .cu.pch files.
+    unsigned int m_Counter;
+
+    ///\brief Is true if all necessary files have been generated and clang and 
+    /// cuda NVIDIA fatbinary are found.
+    bool m_Init;
+
+    ///\brief Path to the folder, where all files will put in. Ordinary the tmp
+    /// folder. Have to end with a separator. Can be empty.
+    std::string m_FilePath;
+    ///\brief Path to the fatbin file, which will used by the CUDACodeGen.
+    std::string m_FatbinFilePath;
+    ///\brief Path to a empty dummy.cu file. The file is necessary to generate
+    /// PTX code from the pch files.
+    std::string m_DummyCUPath;
+    ///\brief Path to the PTX file. Will be reused for every PTX generation.
+    std::string m_PTXFilePath;
+    ///\brief Will be used to generate .cu and .cu.pch files.
+    std::string m_GenericFileName;
+    ///\brief The SM-Level describes, which functions are possible in the code 
+    /// and on the gpu. Just a number [1-7][0-9].
+    std::string m_SMLevel;
+
+    ///\brief Path to the clang++ compiler, which will used to compile the pch
+    /// files and the PTX code. Should be in same folder, as the cling.
+    std::string m_ClangPath;
+    ///\brief Path to the NIVDIA tool fatbinary.
+    std::string m_FatbinaryPath;
+
+    ///\brief Contains the include commands for the cling runtime headers.
+    llvm::SmallVector<std::string, 256> m_ClingHeaders;
+    ///\brief Argument for the fatbinary tool, which is depend, if the OS is
+    /// 32 bit or 64 bit.
+    std::string m_FatbinArch;
+
+    ///\brief Generate the dummy.cu file and set the paths of m_PTXFilePath and
+    /// m_GenericFileName.
+    ///
+    ///\returns True, if it created a dummy.cu file.
+    bool generateHelperFiles();
+
+    ///\brief Find the path of the clang and the NIVDIA tool fatbinary. Clang
+    /// have to be in the same folder as cling.
+    ///
+    ///\param [in] invocationOptions - Can contains a custom path to the cuda
+    ///       toolkit
+    ///
+    ///\returns True, whether clang and fatbinary was found.
+    bool searchCompilingTools(cling::InvocationOptions & invocationOptions);
+
+    ///\brief Add the include path commands (-I...) to a argument list. The path
+    /// points to the cling runtime headers.
+    ///
+    ///\param [in,out] argv - The include commands will append to the argv vector.
+    void addClingHeaders(llvm::SmallVectorImpl<const char*> & argv);
+
+    ///\brief Start an clang compiler with nvptx backend. Read the content of
+    /// cling.cu and compile it to a new PCH file. If predecessor PCH file is
+    /// existing, it will included.
+    ///
+    ///\returns True, if the clang returns 0.
+    bool generatePCH();
+
+    ///\brief Start an clang compiler with nvptx backend. Generate a PTX file
+    /// from the latest PCH file. The PTX code will write to cling.ptx.
+    ///
+    ///\returns True, if the clang returns 0.
+    bool generatePTX();
+
+    ///\brief Start the NVIDIA tool fatbinary. Generate a fatbin file
+    /// from the cling.ptx. The fatbin code will write to the path of
+    /// m_FatbinFilePath.
+    ///
+    ///\returns True, if the fatbinary tool returns 0.
+    bool generateFatbinaryInternal();
+
+  public:
+    ///\brief Constructor for IncrementalCUDADeviceCompiler
+    ///
+    ///\param [in] filePath - All files will generated in the folder of the
+    ///       filePath, except the fatbin file, if it have another path. Have
+    ///       to end with a separator. Can be empty.
+    ///\param [in] CudaGpuBinaryFileNames - Path to the fatbin file. Must not
+    ///       be empty.
+    ///\param [in] invocationOptions - Contains values for the arguments of
+    ///       clang and the NVIDIA tool fatbinary.
+    ///\param [in] clingHeaders - Contains the paths to the cling runtime
+    ///       headers with include command (-I).
+    IncrementalCUDADeviceCompiler(std::string filePath, 
+                                  std::string & CudaGpuBinaryFileNames,
+                                  cling::InvocationOptions & invocationOptions,
+                                  const llvm::SmallVectorImpl<std::string> & clingHeaders);
+
+    ///\brief Generate an new fatbin file with the path in CudaGpuBinaryFileNames.
+    /// It will add the content of input, to the existing source code, which was
+    /// passed to generateFatbinary, before.
+    ///
+    ///\param [in] input - New source code. The function can select, if code
+    ///       is relevant for the device side. Have to be valid CUDA C++ code.
+    ///
+    ///\returns True, if all stages of generating fatbin runs right and a new
+    /// fatbin file is written.
+    bool generateFatbinary(llvm::StringRef input);
+
+    ///\brief Print some information of the IncrementalCUDADeviceCompiler to
+    /// llvm::outs(). For Example the paths of the files and tools.
+    void dump();
+
+  };
+
+} // end cling
+
+#endif // CLING_INCREMENTAL_CUDA_DEVICE_JIT_H
--- a/lib/Interpreter/IncrementalParser.cpp
+++ b/lib/Interpreter/IncrementalParser.cpp
@ -17,6 +17,7 @@
 #include "DeclCollector.h"
 #include "DeclExtractor.h"
 #include "DynamicLookup.h"
+#include "IncrementalCUDADeviceCompiler.h"
 #include "IncrementalExecutor.h"
 #include "NullDerefProtectionTransformer.h"
 #include "TransactionPool.h"
@ -736,6 +737,9 @@ namespace cling {
    DiagnosticErrorTrap Trap(Diags);
    Sema::SavePendingInstantiationsRAII SavedPendingInstantiations(S);

+    if(m_CI->getLangOpts().CUDA )
+          m_Interpreter->getCUDADeviceCompiler().generateFatbinary(input);
+
    Parser::DeclGroupPtrTy ADecl;
    while (!m_Parser->ParseTopLevelDecl(ADecl)) {
      // If we got a null return and something *was* parsed, ignore it.  This
--- a/lib/Interpreter/Interpreter.cpp
+++ b/lib/Interpreter/Interpreter.cpp
@ -20,6 +20,7 @@
 #include "ForwardDeclPrinter.h"
 #include "IncrementalExecutor.h"
 #include "IncrementalParser.h"
+#include "IncrementalCUDADeviceCompiler.h"
 #include "MultiplexInterpreterCallbacks.h"
 #include "TransactionUnloader.h"

@ -56,6 +57,7 @@

 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Path.h"

 #include <string>
@ -236,6 +238,33 @@ namespace cling {
        return;
    }

+    if(!isInSyntaxOnlyMode() && m_Opts.CompilerOpts.CUDA){
+        // Create temporary folder for all files, which the CUDA device compiler
+        // will generate.
+        llvm::SmallVector<char, 256> TmpFolder;
+        llvm::StringRef sep = llvm::sys::path::get_separator().data();
+        llvm::sys::path::system_temp_directory(false, TmpFolder);
+        llvm::sys::fs::createUniqueFile(std::string(TmpFolder.data())
+                                        + sep + "cling-%%%%" + sep , TmpFolder);
+        llvm::sys::fs::create_directory(TmpFolder);
+
+        // The CUDA fatbin file is the connection beetween the CUDA device
+        // compiler and the CodeGen of cling. The file will every time reused.
+        if(getCI()->getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+          getCI()->getCodeGenOpts().CudaGpuBinaryFileNames.push_back(
+            std::string(TmpFolder.data()) + "cling.fatbin");
+
+        // Add the cling runtime headers to the CUDA device compiler, that
+        // it can handle the special functions of cling.
+        llvm::SmallVector<std::string, 256> clingHeaders;
+        GetIncludePaths(clingHeaders, false, true);
+
+        m_CUDACompiler.reset(
+          new IncrementalCUDADeviceCompiler(TmpFolder.data(),
+                                            getCI()->getCodeGenOpts().CudaGpuBinaryFileNames[0],
+                                            m_Opts, clingHeaders));
+    }
+
    // Tell the diagnostic client that we are entering file parsing mode.
    DiagnosticConsumer& DClient = getCI()->getDiagnosticClient();
    DClient.BeginSourceFile(getCI()->getLangOpts(), &PP);
--- a/lib/Interpreter/InvocationOptions.cpp
+++ b/lib/Interpreter/InvocationOptions.cpp
@ -142,6 +142,8 @@ void CompilerOptions::Parse(int argc, const char* const argv[],
      case options::OPT_fmodule_name_EQ: LLVM_FALLTHROUGH;
      case options::OPT_fmodule_name: ModuleName = arg->getValue(); break;
      case options::OPT_fmodules_cache_path: CachePath = arg->getValue(); break;
+      case options::OPT_cuda_path_EQ: CUDAPath = arg->getValue(); break;
+      case options::OPT_cuda_gpu_arch_EQ: CUDAGpuArch = arg->getValue(); break;

      default:
        if (Inputs && arg->getOption().getKind() == Option::InputClass)