Re-enable CodeGen-OptLevel after llvm13 upgrade:
With the upgrade, BackendPasses was modifying a TargetMachine that was not used by SimpleCompiler. Change that by - using a SimpleCompiler that uses IncrementalJIT::TM; - moving the TM creation to IncrementalJIT, and giving access to it This reduces the runtime of https://github.com/root-project/root/issues/11927 to - before llvm upgrade: 2.69s - llvm13, without this commit: ??? - llvm13, with this commit: 2.89s i.e, a slow-down of 7% (that is likely caused by the different emission mechanism of Orc-v2; to be confirmed...)
This commit is contained in:
parent
cefa80ef54
commit
da247bd77a
@ -20,19 +20,12 @@
|
||||
#include "cling/Utils/Platform.h"
|
||||
|
||||
#include "clang/Basic/Diagnostic.h"
|
||||
#include "clang/Basic/TargetOptions.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
|
||||
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Support/Host.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
@ -40,73 +33,6 @@ using namespace llvm;
|
||||
|
||||
namespace cling {
|
||||
|
||||
namespace {
|
||||
|
||||
static std::unique_ptr<TargetMachine>
|
||||
CreateHostTargetMachine(const clang::CompilerInstance& CI) {
|
||||
const clang::TargetOptions& TargetOpts = CI.getTargetOpts();
|
||||
const clang::CodeGenOptions& CGOpt = CI.getCodeGenOpts();
|
||||
const std::string& Triple = TargetOpts.Triple;
|
||||
|
||||
std::string Error;
|
||||
const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
|
||||
if (!TheTarget) {
|
||||
cling::errs() << "cling::IncrementalExecutor: unable to find target:\n"
|
||||
<< Error;
|
||||
return std::unique_ptr<TargetMachine>();
|
||||
}
|
||||
|
||||
CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
|
||||
switch (CGOpt.OptimizationLevel) {
|
||||
case 0: OptLevel = CodeGenOpt::None; break;
|
||||
case 1: OptLevel = CodeGenOpt::Less; break;
|
||||
case 2: OptLevel = CodeGenOpt::Default; break;
|
||||
case 3: OptLevel = CodeGenOpt::Aggressive; break;
|
||||
default: OptLevel = CodeGenOpt::Default;
|
||||
}
|
||||
using namespace llvm::orc;
|
||||
auto JTMB = JITTargetMachineBuilder::detectHost();
|
||||
if (!JTMB)
|
||||
logAllUnhandledErrors(JTMB.takeError(), llvm::errs(),
|
||||
"Error detecting host");
|
||||
|
||||
JTMB->setCodeGenOptLevel(OptLevel);
|
||||
#ifdef _WIN32
|
||||
JTMB->getOptions().EmulatedTLS = false;
|
||||
#endif // _WIN32
|
||||
|
||||
#if defined(__powerpc64__) || defined(__PPC64__)
|
||||
// We have to use large code model for PowerPC64 because TOC and text sections
|
||||
// can be more than 2GB apart.
|
||||
JTMB->setCodeModel(CodeModel::Large);
|
||||
#endif
|
||||
|
||||
std::unique_ptr<TargetMachine> TM = cantFail(JTMB->createTargetMachine());
|
||||
|
||||
// Forcefully disable GlobalISel, it might be enabled on AArch64 without
|
||||
// optimizations. In tests on an Apple M1 after the upgrade to LLVM 9, this
|
||||
// new instruction selection framework emits branches / calls that expect all
|
||||
// code to be reachable in +/- 128 MB. This cannot be guaranteed during JIT,
|
||||
// which generates code into allocated pages on the heap and could span the
|
||||
// entire address space of the process.
|
||||
//
|
||||
// TODO:
|
||||
// 1. Try to reproduce the problem with vanilla lli of LLVM 9 to check that
|
||||
// this is not related to the way Cling incrementally JITs and executes.
|
||||
// 2. Figure out exactly why GlobalISel emits different branch instructions,
|
||||
// and whether this is a problem in the framework or of the generated IR.
|
||||
// 3. Verify if the same happens with LLVM 11/12 (whatever Cling will move to
|
||||
// next), and possibly fix the underlying issue in LLVM upstream's `main`.
|
||||
//
|
||||
// FIXME: Lift this restriction and allow the target to enable GlobalISel,
|
||||
// if deemed ready by upstream developers.
|
||||
TM->setGlobalISel(false);
|
||||
|
||||
return TM;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
IncrementalExecutor::IncrementalExecutor(clang::DiagnosticsEngine& /*diags*/,
|
||||
const clang::CompilerInstance& CI,
|
||||
void *ExtraLibHandle, bool Verbose):
|
||||
@ -120,18 +46,16 @@ IncrementalExecutor::IncrementalExecutor(clang::DiagnosticsEngine& /*diags*/,
|
||||
// MSVC doesn't support m_AtExitFuncsSpinLock=ATOMIC_FLAG_INIT; in the class definition
|
||||
std::atomic_flag_clear( &m_AtExitFuncsSpinLock );
|
||||
|
||||
std::unique_ptr<TargetMachine> TM(CreateHostTargetMachine(CI));
|
||||
auto &TMRef = *TM;
|
||||
llvm::Error Err = llvm::Error::success();
|
||||
auto EPC = llvm::cantFail(llvm::orc::SelfExecutorProcessControl::Create());
|
||||
m_JIT.reset(new IncrementalJIT(*this, std::move(TM), std::move(EPC), Err,
|
||||
m_JIT.reset(new IncrementalJIT(*this, CI, std::move(EPC), Err,
|
||||
ExtraLibHandle, Verbose));
|
||||
if (Err) {
|
||||
llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "Fatal: ");
|
||||
llvm_unreachable("Propagate this error and exit gracefully");
|
||||
}
|
||||
|
||||
m_BackendPasses.reset(new BackendPasses(CI.getCodeGenOpts(), TMRef));
|
||||
m_BackendPasses.reset(new BackendPasses(CI.getCodeGenOpts(), m_JIT->getTargetMachine()));
|
||||
}
|
||||
|
||||
IncrementalExecutor::~IncrementalExecutor() {}
|
||||
|
@ -15,11 +15,18 @@
|
||||
#include "cling/Utils/Output.h"
|
||||
#include "cling/Utils/Utils.h"
|
||||
|
||||
#include <clang/Basic/TargetOptions.h>
|
||||
#include <clang/Frontend/CompilerInstance.h>
|
||||
|
||||
#include <llvm/ADT/Triple.h>
|
||||
#include <llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h>
|
||||
#include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
|
||||
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <llvm/Support/Host.h>
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Target/TargetMachine.h>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::orc;
|
||||
@ -283,6 +290,69 @@ Error RTDynamicLibrarySearchGenerator::tryToGenerate(
|
||||
|
||||
return JD.define(absoluteSymbols(std::move(NewSymbols)), CurrentRT());
|
||||
}
|
||||
|
||||
static std::unique_ptr<TargetMachine>
|
||||
CreateHostTargetMachine(const clang::CompilerInstance& CI) {
|
||||
const clang::TargetOptions& TargetOpts = CI.getTargetOpts();
|
||||
const clang::CodeGenOptions& CGOpt = CI.getCodeGenOpts();
|
||||
const std::string& Triple = TargetOpts.Triple;
|
||||
|
||||
std::string Error;
|
||||
const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
|
||||
if (!TheTarget) {
|
||||
cling::errs() << "cling::IncrementalExecutor: unable to find target:\n"
|
||||
<< Error;
|
||||
return std::unique_ptr<TargetMachine>();
|
||||
}
|
||||
|
||||
CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
|
||||
switch (CGOpt.OptimizationLevel) {
|
||||
case 0: OptLevel = CodeGenOpt::None; break;
|
||||
case 1: OptLevel = CodeGenOpt::Less; break;
|
||||
case 2: OptLevel = CodeGenOpt::Default; break;
|
||||
case 3: OptLevel = CodeGenOpt::Aggressive; break;
|
||||
default: OptLevel = CodeGenOpt::Default;
|
||||
}
|
||||
using namespace llvm::orc;
|
||||
auto JTMB = JITTargetMachineBuilder::detectHost();
|
||||
if (!JTMB)
|
||||
logAllUnhandledErrors(JTMB.takeError(), llvm::errs(),
|
||||
"Error detecting host");
|
||||
|
||||
JTMB->setCodeGenOptLevel(OptLevel);
|
||||
#ifdef _WIN32
|
||||
JTMB->getOptions().EmulatedTLS = false;
|
||||
#endif // _WIN32
|
||||
|
||||
#if defined(__powerpc64__) || defined(__PPC64__)
|
||||
// We have to use large code model for PowerPC64 because TOC and text sections
|
||||
// can be more than 2GB apart.
|
||||
JTMB->setCodeModel(CodeModel::Large);
|
||||
#endif
|
||||
|
||||
std::unique_ptr<TargetMachine> TM = cantFail(JTMB->createTargetMachine());
|
||||
|
||||
// Forcefully disable GlobalISel, it might be enabled on AArch64 without
|
||||
// optimizations. In tests on an Apple M1 after the upgrade to LLVM 9, this
|
||||
// new instruction selection framework emits branches / calls that expect all
|
||||
// code to be reachable in +/- 128 MB. This cannot be guaranteed during JIT,
|
||||
// which generates code into allocated pages on the heap and could span the
|
||||
// entire address space of the process.
|
||||
//
|
||||
// TODO:
|
||||
// 1. Try to reproduce the problem with vanilla lli of LLVM 9 to check that
|
||||
// this is not related to the way Cling incrementally JITs and executes.
|
||||
// 2. Figure out exactly why GlobalISel emits different branch instructions,
|
||||
// and whether this is a problem in the framework or of the generated IR.
|
||||
// 3. Verify if the same happens with LLVM 11/12 (whatever Cling will move to
|
||||
// next), and possibly fix the underlying issue in LLVM upstream's `main`.
|
||||
//
|
||||
// FIXME: Lift this restriction and allow the target to enable GlobalISel,
|
||||
// if deemed ready by upstream developers.
|
||||
TM->setGlobalISel(false);
|
||||
|
||||
return TM;
|
||||
}
|
||||
} // unnamed namespace
|
||||
|
||||
namespace cling {
|
||||
@ -291,24 +361,15 @@ namespace cling {
|
||||
llvm::JITEventListener* createPerfJITEventListener();
|
||||
|
||||
IncrementalJIT::IncrementalJIT(
|
||||
IncrementalExecutor& Executor, std::unique_ptr<TargetMachine> TM,
|
||||
IncrementalExecutor& Executor, const clang::CompilerInstance &CI,
|
||||
std::unique_ptr<llvm::orc::ExecutorProcessControl> EPC, Error& Err,
|
||||
void *ExtraLibHandle, bool Verbose)
|
||||
: SkipHostProcessLookup(false),
|
||||
TM(std::move(TM)),
|
||||
TM(CreateHostTargetMachine(CI)),
|
||||
SingleThreadedContext(std::make_unique<LLVMContext>()) {
|
||||
ErrorAsOutParameter _(&Err);
|
||||
|
||||
// FIXME: We should probably take codegen settings from the CompilerInvocation
|
||||
// and not from the target machine
|
||||
JITTargetMachineBuilder JTMB(this->TM->getTargetTriple());
|
||||
JTMB.setCodeModel(this->TM->getCodeModel());
|
||||
JTMB.setCodeGenOptLevel(this->TM->getOptLevel());
|
||||
JTMB.setFeatures(this->TM->getTargetFeatureString());
|
||||
JTMB.setRelocationModel(this->TM->getRelocationModel());
|
||||
|
||||
LLJITBuilder Builder;
|
||||
Builder.setJITTargetMachineBuilder(std::move(JTMB));
|
||||
Builder.setExecutorProcessControl(std::move(EPC));
|
||||
|
||||
// Create ObjectLinkingLayer with our own MemoryManager.
|
||||
@ -341,6 +402,11 @@ IncrementalJIT::IncrementalJIT(
|
||||
return Layer;
|
||||
});
|
||||
|
||||
Builder.setCompileFunctionCreator([&](llvm::orc::JITTargetMachineBuilder)
|
||||
-> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
|
||||
return std::make_unique<SimpleCompiler>(*TM);
|
||||
});
|
||||
|
||||
if (Expected<std::unique_ptr<LLJIT>> JitInstance = Builder.create()) {
|
||||
Jit = std::move(*JitInstance);
|
||||
} else {
|
||||
|
@ -27,6 +27,10 @@
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace clang {
|
||||
class CompilerInstance;
|
||||
}
|
||||
|
||||
namespace cling {
|
||||
|
||||
class IncrementalExecutor;
|
||||
@ -52,7 +56,7 @@ private:
|
||||
class IncrementalJIT {
|
||||
public:
|
||||
IncrementalJIT(IncrementalExecutor& Executor,
|
||||
std::unique_ptr<llvm::TargetMachine> TM,
|
||||
const clang::CompilerInstance &CI,
|
||||
std::unique_ptr<llvm::orc::ExecutorProcessControl> EPC,
|
||||
llvm::Error &Err, void *ExtraLibHandle, bool Verbose);
|
||||
|
||||
@ -82,6 +86,11 @@ public:
|
||||
llvm::Error runCtors() const {
|
||||
return Jit->initialize(Jit->getMainJITDylib());
|
||||
}
|
||||
|
||||
/// @brief Get the TargetMachine used by the JIT.
|
||||
/// Non-const because BackendPasses need to update OptLevel.
|
||||
llvm::TargetMachine &getTargetMachine() { return *TM; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<llvm::orc::LLJIT> Jit;
|
||||
llvm::orc::SymbolMap m_InjectedSymbols;
|
||||
|
Loading…
x
Reference in New Issue
Block a user