Setting the arguments of cling to clang nvptx and fatbinary are possible.

Now, it is possible to set some arguments of the clang nvptx and fatbinary via arguments at cling start. The arguments are filtered. So not every argument is possible at the moment. The Arguments can’t changed during runtime, because the PCH-files forbid it. For Example, the calng nvptx use the optimization level, which is set at start of cling.

At the moment, the debug options of clang nvptx are simple. If any debug option is detected, just a -g will add to the clang nvptx.

Additional PTX options for clang nvptx doesn’t works at the moment. There is a problem at parsing at the start of cling.
This commit is contained in:
Simeon Ehrig 2018-03-22 11:55:03 +01:00 committed by sftnight
parent 4882fbe886
commit 454c359c51
5 changed files with 193 additions and 75 deletions

View File

@ -69,6 +69,9 @@ namespace cling {
/// \brief Architecture level of the CUDA gpu. Necessary for the
/// NVIDIA fatbinary tool.
std::string CUDAGpuArch;
/// \brief Contains arguments, which will passed to the nvidia tool
/// fatbinary.
std::vector<std::string> CUDAFatbinaryArgs;
///\brief The remaining arguments to pass to clang.
///

View File

@ -11,7 +11,10 @@
#include "cling/Interpreter/InvocationOptions.h"
#include "cling/Utils/Paths.h"
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Frontend/CompilerInstance.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Program.h"
@ -23,29 +26,79 @@
namespace cling {
IncrementalCUDADeviceCompiler::IncrementalCUDADeviceCompiler(
std::string filePath,
std::string & CudaGpuBinaryFileNames,
int optLevel,
cling::InvocationOptions & invocationOptions,
std::shared_ptr<clang::HeaderSearchOptions> headerSearchOptions)
clang::CompilerInstance * CI)
: m_Counter(0),
m_FilePath(filePath),
m_FatbinFilePath(CudaGpuBinaryFileNames),
// We get for example sm_20 from the cling arguments and have to shrink to
// 20.
m_SMLevel(invocationOptions.CompilerOpts.CUDAGpuArch.empty() ? "20" :
invocationOptions.CompilerOpts.CUDAGpuArch.substr(3) ),
m_HeaderSearchOptions(headerSearchOptions) {
assert(!CudaGpuBinaryFileNames.empty() && "CudaGpuBinaryFileNames can't be empty");
m_FilePath(filePath){
if(CI->getCodeGenOpts().CudaGpuBinaryFileNames.empty()){
llvm::errs() << "Error: CudaGpuBinaryFileNames can't be empty\n";
m_Init = false;
} else {
m_FatbinFilePath = CI->getCodeGenOpts().CudaGpuBinaryFileNames[0];
m_Init = true;
}
m_Init = generateHelperFiles();
m_Init = m_Init && generateHelperFiles();
m_Init = m_Init && searchCompilingTools(invocationOptions);
setCuArgs(CI->getLangOpts(), invocationOptions, optLevel,
CI->getCodeGenOpts().getDebugInfo());
llvm::Triple hostTarget(llvm::sys::getDefaultTargetTriple());
m_FatbinArch = hostTarget.isArch64Bit() ? "-64" : "-32";
m_HeaderSearchOptions = CI->getHeaderSearchOptsPtr();
}
void IncrementalCUDADeviceCompiler::setCuArgs(
clang::LangOptions & langOpts,
cling::InvocationOptions & invocationOptions,
int & optLevel, clang::codegenoptions::DebugInfoKind debugInfo){
// Set the c++ standard. Just one condition is possible.
if(langOpts.CPlusPlus11)
m_CuArgs.cppStdVersion = "-std=c++11";
if(langOpts.CPlusPlus14)
m_CuArgs.cppStdVersion = "-std=c++14";
if(langOpts.CPlusPlus1z)
m_CuArgs.cppStdVersion = "-std=c++1z";
if(langOpts.CPlusPlus2a)
m_CuArgs.cppStdVersion = "-std=c++2a";
m_CuArgs.optLevel = "-O" + std::to_string(optLevel);
if(!invocationOptions.CompilerOpts.CUDAGpuArch.empty()){
m_CuArgs.ptxSmVersion = "--cuda-gpu-arch="
+ invocationOptions.CompilerOpts.CUDAGpuArch;
m_CuArgs.fatbinSmVersion = "--image=profile=compute_"
+ invocationOptions.CompilerOpts.CUDAGpuArch.substr(3);
}
//The generating of the fatbin file is depend of the architecture of the host.
llvm::Triple hostTarget(llvm::sys::getDefaultTargetTriple());
m_CuArgs.fatbinArch = hostTarget.isArch64Bit() ? "-64" : "-32";
m_CuArgs.verbose = invocationOptions.Verbose();
// FIXME : Should not reduce the fine granulated debug options to a simple.
// -g
if(debugInfo == clang::codegenoptions::DebugLineTablesOnly ||
debugInfo == clang::codegenoptions::LimitedDebugInfo ||
debugInfo == clang::codegenoptions::FullDebugInfo)
m_CuArgs.debug = true;
// FIXME : Cling has problems to detect this arguments.
/*
if(langOpts.CUDADeviceFlushDenormalsToZero)
m_CuArgs.additionalPtxOpt.push_back("-fcuda-flush-denormals-to-zero");
if(langOpts.CUDADeviceApproxTranscendentals)
m_CuArgs.additionalPtxOpt.push_back("-fcuda-approx-transcendentals");
if(langOpts.CUDAAllowVariadicFunctions)
m_CuArgs.additionalPtxOpt.push_back("-fcuda-allow-variadic-functions");
*/
m_CuArgs.fatbinaryOpt = invocationOptions.CompilerOpts.CUDAFatbinaryArgs;
}
bool IncrementalCUDADeviceCompiler::generateHelperFiles(){
// Generate an empty dummy.cu file.
m_DummyCUPath = m_FilePath + "dummy.cu";
@ -62,7 +115,8 @@ namespace cling {
return true;
}
bool IncrementalCUDADeviceCompiler::searchCompilingTools(cling::InvocationOptions & invocationOptions){
bool IncrementalCUDADeviceCompiler::searchCompilingTools(
cling::InvocationOptions & invocationOptions){
// Search after clang in the folder of cling.
llvm::SmallString<128> cwd;
llvm::sys::fs::current_path(cwd);
@ -141,16 +195,17 @@ namespace cling {
}
bool IncrementalCUDADeviceCompiler::generatePCH() {
// clang++ -std=c++14 -S -Xclang -emit-pch ${clingHeaders} cling[0-9].cu
// -D__CLING__ -o cling[0-9].cu.pch ${ | -include-pch cling[0-9].cu.pch }
// --cuda-gpu-arch=sm_${m_smLevel} -pthread --cuda-device-only
// clang++ -std=c++xx -Ox -S -Xclang -emit-pch ${clingHeaders} cling[0-9].cu
// -D__CLING__ -o cling[0-9].cu.pch [-include-pch cling[0-9].cu.pch]
// --cuda-gpu-arch=sm_[1-7][0-9] -pthread --cuda-device-only [-v] [-g]
// ${m_CuArgs.additionalPtxOpt}
llvm::SmallVector<const char*, 256> argv;
// First argument have to be the program name.
argv.push_back(m_ClangPath.c_str());
// FIXME: Should replaced by the arguments of the cling instance.
argv.push_back("-std=c++14");
argv.push_back(m_CuArgs.cppStdVersion.c_str());
argv.push_back(m_CuArgs.optLevel.c_str());
argv.push_back("-S");
argv.push_back("-Xclang");
argv.push_back("-emit-pch");
@ -174,11 +229,16 @@ namespace cling {
argv.push_back("-include-pch");
argv.push_back(previousFile.c_str());
}
// FIXME: Should replaced by the arguments of the cling instance.
std::string smString = "--cuda-gpu-arch=sm_" + m_SMLevel;
argv.push_back(smString.c_str());
argv.push_back(m_CuArgs.ptxSmVersion.c_str());
argv.push_back("-pthread");
argv.push_back("--cuda-device-only");
if(m_CuArgs.verbose)
argv.push_back("-v");
if(m_CuArgs.debug)
argv.push_back("-g");
for(std::string & s : m_CuArgs.additionalPtxOpt){
argv.push_back(s.c_str());
}
// Argv list have to finish with a nullptr.
argv.push_back(nullptr);
@ -197,15 +257,16 @@ namespace cling {
}
bool cling::IncrementalCUDADeviceCompiler::generatePTX() {
// clang++ -std=c++14 -S dummy.cu -o cling.ptx -include-pch cling[0-9].cu.pch
// --cuda-gpu-arch=sm_${m_smLevel} -pthread --cuda-device-only
// clang++ -std=c++xx -Ox -S dummy.cu -o cling.ptx -include-pch
// cling[0-9].cu.pch --cuda-gpu-arch=sm_xx -pthread --cuda-device-only [-v]
// [-g] ${m_CuArgs.additionalPtxOpt}
llvm::SmallVector<const char*, 128> argv;
// First argument have to be the program name.
argv.push_back(m_ClangPath.c_str());
// FIXME: Should replaced by the arguments of the cling instance.
argv.push_back("-std=c++14");
argv.push_back(m_CuArgs.cppStdVersion.c_str());
argv.push_back(m_CuArgs.optLevel.c_str());
argv.push_back("-S");
argv.push_back(m_DummyCUPath.c_str());
argv.push_back("-o");
@ -213,11 +274,16 @@ namespace cling {
argv.push_back("-include-pch");
std::string pchFile = m_GenericFileName + std::to_string(m_Counter) +".cu.pch";
argv.push_back(pchFile.c_str());
// FIXME: Should replaced by the arguments of the cling instance.
std::string smString = "--cuda-gpu-arch=sm_" + m_SMLevel;
argv.push_back(smString.c_str());
argv.push_back(m_CuArgs.ptxSmVersion.c_str());
argv.push_back("-pthread");
argv.push_back("--cuda-device-only");
if(m_CuArgs.verbose)
argv.push_back("-v");
if(m_CuArgs.debug)
argv.push_back("-g");
for(std::string & s : m_CuArgs.additionalPtxOpt){
argv.push_back(s.c_str());
}
// Argv list have to finish with a nullptr.
argv.push_back(nullptr);
@ -237,19 +303,24 @@ namespace cling {
bool IncrementalCUDADeviceCompiler::generateFatbinaryInternal() {
// fatbinary --cuda [-32 | -64] --create cling.fatbin
// --image=profile=compute_${m_smLevel},file=cling.ptx
// --image=profile=compute_xx,file=cling.ptx [-g] ${m_CuArgs.fatbinaryOpt}
llvm::SmallVector<const char*, 128> argv;
// First argument have to be the program name.
argv.push_back(m_FatbinaryPath.c_str());
argv.push_back("--cuda");
argv.push_back(m_FatbinArch.c_str());
argv.push_back(m_CuArgs.fatbinArch.c_str());
argv.push_back("--create");
argv.push_back(m_FatbinFilePath.c_str());
std::string ptxCode = "--image=profile=compute_"+ m_SMLevel
std::string ptxCode = m_CuArgs.fatbinSmVersion
+ ",file=" + m_PTXFilePath;
argv.push_back(ptxCode.c_str());
if(m_CuArgs.debug)
argv.push_back("-g");
for(std::string & s : m_CuArgs.fatbinaryOpt){
argv.push_back(s.c_str());
}
// Argv list have to finish with a nullptr.
argv.push_back(nullptr);
@ -266,27 +337,6 @@ namespace cling {
return true;
}
void IncrementalCUDADeviceCompiler::addIncludePath(llvm::StringRef pathStr,
bool leadingIncludeCommand){
if(leadingIncludeCommand) {
m_Headers.push_back(pathStr);
} else {
m_Headers.push_back("-I" + std::string(pathStr.data()));
}
}
void IncrementalCUDADeviceCompiler::addIncludePaths(
const llvm::SmallVectorImpl<std::string> & headers,
bool leadingIncludeCommand){
if(leadingIncludeCommand){
m_Headers.append(headers.begin(), headers.end());
} else {
for(std::string header : headers){
m_Headers.push_back("-I" + header);
}
}
}
void IncrementalCUDADeviceCompiler::dump(){
llvm::outs() << "current counter: " << m_Counter << "\n" <<
"CUDA device compiler is valid: " << m_Init << "\n" <<
@ -294,9 +344,30 @@ namespace cling {
"fatbin file path: " << m_FatbinFilePath << "\n" <<
"dummy.cu file path: " << m_DummyCUPath << "\n" <<
"cling.ptx file path: " << m_PTXFilePath << "\n" <<
"generic file path: " << m_GenericFileName << "[0-9]*.cu{.pch}\n" <<
"generic file path: " << m_GenericFileName
<< "[0-9]*.cu{.pch}\n" <<
"clang++ path: " << m_ClangPath << "\n" <<
"nvidia fatbinary path: " << m_FatbinaryPath << "\n";
"nvidia fatbinary path: " << m_FatbinaryPath << "\n" <<
"m_CuArgs c++ standard: " << m_CuArgs.cppStdVersion << "\n" <<
"m_CuArgs opt level: " << m_CuArgs.optLevel << "\n" <<
"m_CuArgs SM level for clang nvptx: "
<< m_CuArgs.ptxSmVersion << "\n" <<
"m_CuArgs SM level for fatbinary: "
<< m_CuArgs.fatbinSmVersion << "\n" <<
"m_CuArgs fatbinary architectur: "
<< m_CuArgs.fatbinArch << "\n" <<
"m_CuArgs verbose: " << m_CuArgs.verbose << "\n" <<
"m_CuArgs debug: " << m_CuArgs.debug << "\n";
llvm::outs() << "m_CuArgs additional clang nvptx options: ";
for(std::string & s : m_CuArgs.additionalPtxOpt){
llvm::outs() << s << " ";
}
llvm::outs() << "\n";
llvm::outs() << "m_CuArgs additional fatbinary options: ";
for(std::string & s : m_CuArgs.fatbinaryOpt){
llvm::outs() << s << " ";
}
llvm::outs() << "\n";
}
} // end namespace cling

View File

@ -10,6 +10,7 @@
#ifndef CLING_INCREMENTAL_CUDA_DEVICE_JIT_H
#define CLING_INCREMENTAL_CUDA_DEVICE_JIT_H
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/SmallVector.h"
#include <string>
@ -20,8 +21,9 @@ namespace cling{
}
namespace clang {
class CodeGenOptions;
class CompilerInstance;
class HeaderSearchOptions;
class LangOptions;
}
namespace llvm {
@ -35,8 +37,45 @@ namespace cling {
/// llvm::sys::ExecuteAndWait.
///
class IncrementalCUDADeviceCompiler {
/// FIXME : Add handling of new included Headers. The include commands can
/// be added by the prompt or via .L .
static constexpr unsigned CxxStdCompiledWith() {
// The value of __cplusplus in GCC < 5.0 (e.g. 4.9.3) when
// either -std=c++1y or -std=c++14 is specified is 201300L, which fails
// the test for C++14 or more (201402L) as previously specified.
// I would claim that the check should be relaxed to:
#if __cplusplus > 201402L
return 17;
#elif __cplusplus > 201103L || (defined(LLVM_ON_WIN32) && _MSC_VER >= 1900)
return 14;
#elif __cplusplus >= 201103L
return 11;
#else
#error "Unknown __cplusplus version"
#endif
}
///\brief Contains the arguments for the cling nvptx and the nvidia
/// fatbinary tool. The arguments are static and will set at the constructor
/// of IncrementalCUDADeviceCompiler.
struct CUDACompilerArgs {
std::string cppStdVersion = "-std=c++" + std::to_string(CxxStdCompiledWith());
std::string optLevel = "-O0";
std::string ptxSmVersion = "--cuda-gpu-arch=sm_20";
std::string fatbinSmVersion = "--image=profile=compute_20";
///\brief Argument for the fatbinary tool, which is depend, if the OS is
/// 32 bit or 64 bit.
std::string fatbinArch = "-32";
///\brief True, if the flag -v is set.
bool verbose = false;
///\brief True, if the flag -g is set.
bool debug = false;
///\brief A list Arguments, which will passed to the clang nvptx.
std::vector<std::string> additionalPtxOpt;
///\brief A list Arguments, which will passed to the fatbinary tool.
std::vector<std::string> fatbinaryOpt;
};
CUDACompilerArgs m_CuArgs;
///\brief The counter responsible to generate a chain of .cu source files
/// and .cu.pch files.
@ -58,9 +97,6 @@ namespace cling {
std::string m_PTXFilePath;
///\brief Will be used to generate .cu and .cu.pch files.
std::string m_GenericFileName;
///\brief The SM-Level describes, which functions are possible in the code
/// and on the gpu. Just a number [1-7][0-9].
std::string m_SMLevel;
///\brief Path to the clang++ compiler, which will used to compile the pch
/// files and the PTX code. Should be in same folder, as the cling.
@ -68,10 +104,6 @@ namespace cling {
///\brief Path to the NIVDIA tool fatbinary.
std::string m_FatbinaryPath;
///\brief Argument for the fatbinary tool, which is depend, if the OS is
/// 32 bit or 64 bit.
std::string m_FatbinArch;
///\brief Contains information about all include paths.
///
std::shared_ptr<clang::HeaderSearchOptions> m_HeaderSearchOptions;
@ -116,22 +148,32 @@ namespace cling {
///\returns True, if the fatbinary tool returns 0.
bool generateFatbinaryInternal();
///\brief The function set the values of m_CuArgs.
///
///\param [in] langOpts - The LangOptions of the CompilerInstance.
///\param [in] invocationOptions - The invocationOptions of the interpreter.
///\param [in] optLevel - The optimization level of the interpreter.
///\param [in] debugInfo - The debugInfo of the CompilerInstance.
void setCuArgs(clang::LangOptions & langOpts,
cling::InvocationOptions & invocationOptions, int & optLevel,
clang::codegenoptions::DebugInfoKind debugInfo);
public:
///\brief Constructor for IncrementalCUDADeviceCompiler
///
///\param [in] filePath - All files will generated in the folder of the
/// filePath, except the fatbin file, if it have another path. Have
/// to end with a separator. Can be empty.
///\param [in] CudaGpuBinaryFileNames - Path to the fatbin file. Must not
/// be empty.
///\param [in] optLevel - The optimization level of the interpreter instance.
/// The value will be copied, because a change of it is not allowed.
///\param [in] invocationOptions - Contains values for the arguments of
/// clang and the NVIDIA tool fatbinary.
///\param [in] headerSearchOptions - Contains information about all include
/// paths.
IncrementalCUDADeviceCompiler(std::string filePath,
std::string & CudaGpuBinaryFileNames,
///\param [in] CompilerInstance - Will be used for m_CuArgs and the include
/// path handling.
IncrementalCUDADeviceCompiler(std::string filePath,
int optLevel,
cling::InvocationOptions & invocationOptions,
std::shared_ptr<clang::HeaderSearchOptions> headerSearchOptions);
clang::CompilerInstance * CI);
///\brief Generate an new fatbin file with the path in CudaGpuBinaryFileNames.
/// It will add the content of input, to the existing source code, which was

View File

@ -256,9 +256,9 @@ namespace cling {
m_CUDACompiler.reset(
new IncrementalCUDADeviceCompiler(TmpFolder.data(),
getCI()->getCodeGenOpts().CudaGpuBinaryFileNames[0],
m_OptLevel,
m_Opts,
getCI()->getHeaderSearchOptsPtr()));
getCI()));
}
// Tell the diagnostic client that we are entering file parsing mode.

View File

@ -144,6 +144,8 @@ void CompilerOptions::Parse(int argc, const char* const argv[],
case options::OPT_fmodules_cache_path: CachePath = arg->getValue(); break;
case options::OPT_cuda_path_EQ: CUDAPath = arg->getValue(); break;
case options::OPT_cuda_gpu_arch_EQ: CUDAGpuArch = arg->getValue(); break;
case options::OPT_Xcuda_fatbinary: CUDAFatbinaryArgs.push_back(arg->getValue());
break;
default:
if (Inputs && arg->getOption().getKind() == Option::InputClass)