Re-enable CodeGen-OptLevel after llvm13 upgrade:

With the upgrade, BackendPasses was modifying a TargetMachine that was
not used by SimpleCompiler.

Change that by
- using a SimpleCompiler that uses IncrementalJIT::TM;
- moving the TM creation to IncrementalJIT, and giving access to it

This reduces the runtime of https://github.com/root-project/root/issues/11927
to
- before llvm upgrade: 2.69s
- llvm13, without this commit: ???
- llvm13, with this commit: 2.89s
i.e, a slow-down of 7% (that is likely caused by the different emission
mechanism of Orc-v2; to be confirmed...)
This commit is contained in:
Axel Naumann 2022-12-21 08:45:57 +01:00 committed by jenkins
parent cefa80ef54
commit da247bd77a
3 changed files with 89 additions and 90 deletions

View File

@ -20,19 +20,12 @@
#include "cling/Utils/Platform.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetOptions.h"
#include "clang/Frontend/CompilerInstance.h"
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <iostream>
@ -40,73 +33,6 @@ using namespace llvm;
namespace cling {
namespace {
static std::unique_ptr<TargetMachine>
CreateHostTargetMachine(const clang::CompilerInstance& CI) {
const clang::TargetOptions& TargetOpts = CI.getTargetOpts();
const clang::CodeGenOptions& CGOpt = CI.getCodeGenOpts();
const std::string& Triple = TargetOpts.Triple;
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
if (!TheTarget) {
cling::errs() << "cling::IncrementalExecutor: unable to find target:\n"
<< Error;
return std::unique_ptr<TargetMachine>();
}
CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
switch (CGOpt.OptimizationLevel) {
case 0: OptLevel = CodeGenOpt::None; break;
case 1: OptLevel = CodeGenOpt::Less; break;
case 2: OptLevel = CodeGenOpt::Default; break;
case 3: OptLevel = CodeGenOpt::Aggressive; break;
default: OptLevel = CodeGenOpt::Default;
}
using namespace llvm::orc;
auto JTMB = JITTargetMachineBuilder::detectHost();
if (!JTMB)
logAllUnhandledErrors(JTMB.takeError(), llvm::errs(),
"Error detecting host");
JTMB->setCodeGenOptLevel(OptLevel);
#ifdef _WIN32
JTMB->getOptions().EmulatedTLS = false;
#endif // _WIN32
#if defined(__powerpc64__) || defined(__PPC64__)
// We have to use large code model for PowerPC64 because TOC and text sections
// can be more than 2GB apart.
JTMB->setCodeModel(CodeModel::Large);
#endif
std::unique_ptr<TargetMachine> TM = cantFail(JTMB->createTargetMachine());
// Forcefully disable GlobalISel, it might be enabled on AArch64 without
// optimizations. In tests on an Apple M1 after the upgrade to LLVM 9, this
// new instruction selection framework emits branches / calls that expect all
// code to be reachable in +/- 128 MB. This cannot be guaranteed during JIT,
// which generates code into allocated pages on the heap and could span the
// entire address space of the process.
//
// TODO:
// 1. Try to reproduce the problem with vanilla lli of LLVM 9 to check that
// this is not related to the way Cling incrementally JITs and executes.
// 2. Figure out exactly why GlobalISel emits different branch instructions,
// and whether this is a problem in the framework or of the generated IR.
// 3. Verify if the same happens with LLVM 11/12 (whatever Cling will move to
// next), and possibly fix the underlying issue in LLVM upstream's `main`.
//
// FIXME: Lift this restriction and allow the target to enable GlobalISel,
// if deemed ready by upstream developers.
TM->setGlobalISel(false);
return TM;
}
} // anonymous namespace
IncrementalExecutor::IncrementalExecutor(clang::DiagnosticsEngine& /*diags*/,
const clang::CompilerInstance& CI,
void *ExtraLibHandle, bool Verbose):
@ -120,18 +46,16 @@ IncrementalExecutor::IncrementalExecutor(clang::DiagnosticsEngine& /*diags*/,
// MSVC doesn't support m_AtExitFuncsSpinLock=ATOMIC_FLAG_INIT; in the class definition
std::atomic_flag_clear( &m_AtExitFuncsSpinLock );
std::unique_ptr<TargetMachine> TM(CreateHostTargetMachine(CI));
auto &TMRef = *TM;
llvm::Error Err = llvm::Error::success();
auto EPC = llvm::cantFail(llvm::orc::SelfExecutorProcessControl::Create());
m_JIT.reset(new IncrementalJIT(*this, std::move(TM), std::move(EPC), Err,
m_JIT.reset(new IncrementalJIT(*this, CI, std::move(EPC), Err,
ExtraLibHandle, Verbose));
if (Err) {
llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "Fatal: ");
llvm_unreachable("Propagate this error and exit gracefully");
}
m_BackendPasses.reset(new BackendPasses(CI.getCodeGenOpts(), TMRef));
m_BackendPasses.reset(new BackendPasses(CI.getCodeGenOpts(), m_JIT->getTargetMachine()));
}
IncrementalExecutor::~IncrementalExecutor() {}

View File

@ -15,11 +15,18 @@
#include "cling/Utils/Output.h"
#include "cling/Utils/Utils.h"
#include <clang/Basic/TargetOptions.h>
#include <clang/Frontend/CompilerInstance.h>
#include <llvm/ADT/Triple.h>
#include <llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h>
#include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/Host.h>
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Target/TargetMachine.h>
using namespace llvm;
using namespace llvm::orc;
@ -283,6 +290,69 @@ Error RTDynamicLibrarySearchGenerator::tryToGenerate(
return JD.define(absoluteSymbols(std::move(NewSymbols)), CurrentRT());
}
static std::unique_ptr<TargetMachine>
CreateHostTargetMachine(const clang::CompilerInstance& CI) {
const clang::TargetOptions& TargetOpts = CI.getTargetOpts();
const clang::CodeGenOptions& CGOpt = CI.getCodeGenOpts();
const std::string& Triple = TargetOpts.Triple;
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
if (!TheTarget) {
cling::errs() << "cling::IncrementalExecutor: unable to find target:\n"
<< Error;
return std::unique_ptr<TargetMachine>();
}
CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
switch (CGOpt.OptimizationLevel) {
case 0: OptLevel = CodeGenOpt::None; break;
case 1: OptLevel = CodeGenOpt::Less; break;
case 2: OptLevel = CodeGenOpt::Default; break;
case 3: OptLevel = CodeGenOpt::Aggressive; break;
default: OptLevel = CodeGenOpt::Default;
}
using namespace llvm::orc;
auto JTMB = JITTargetMachineBuilder::detectHost();
if (!JTMB)
logAllUnhandledErrors(JTMB.takeError(), llvm::errs(),
"Error detecting host");
JTMB->setCodeGenOptLevel(OptLevel);
#ifdef _WIN32
JTMB->getOptions().EmulatedTLS = false;
#endif // _WIN32
#if defined(__powerpc64__) || defined(__PPC64__)
// We have to use large code model for PowerPC64 because TOC and text sections
// can be more than 2GB apart.
JTMB->setCodeModel(CodeModel::Large);
#endif
std::unique_ptr<TargetMachine> TM = cantFail(JTMB->createTargetMachine());
// Forcefully disable GlobalISel, it might be enabled on AArch64 without
// optimizations. In tests on an Apple M1 after the upgrade to LLVM 9, this
// new instruction selection framework emits branches / calls that expect all
// code to be reachable in +/- 128 MB. This cannot be guaranteed during JIT,
// which generates code into allocated pages on the heap and could span the
// entire address space of the process.
//
// TODO:
// 1. Try to reproduce the problem with vanilla lli of LLVM 9 to check that
// this is not related to the way Cling incrementally JITs and executes.
// 2. Figure out exactly why GlobalISel emits different branch instructions,
// and whether this is a problem in the framework or of the generated IR.
// 3. Verify if the same happens with LLVM 11/12 (whatever Cling will move to
// next), and possibly fix the underlying issue in LLVM upstream's `main`.
//
// FIXME: Lift this restriction and allow the target to enable GlobalISel,
// if deemed ready by upstream developers.
TM->setGlobalISel(false);
return TM;
}
} // unnamed namespace
namespace cling {
@ -291,24 +361,15 @@ namespace cling {
llvm::JITEventListener* createPerfJITEventListener();
IncrementalJIT::IncrementalJIT(
IncrementalExecutor& Executor, std::unique_ptr<TargetMachine> TM,
IncrementalExecutor& Executor, const clang::CompilerInstance &CI,
std::unique_ptr<llvm::orc::ExecutorProcessControl> EPC, Error& Err,
void *ExtraLibHandle, bool Verbose)
: SkipHostProcessLookup(false),
TM(std::move(TM)),
TM(CreateHostTargetMachine(CI)),
SingleThreadedContext(std::make_unique<LLVMContext>()) {
ErrorAsOutParameter _(&Err);
// FIXME: We should probably take codegen settings from the CompilerInvocation
// and not from the target machine
JITTargetMachineBuilder JTMB(this->TM->getTargetTriple());
JTMB.setCodeModel(this->TM->getCodeModel());
JTMB.setCodeGenOptLevel(this->TM->getOptLevel());
JTMB.setFeatures(this->TM->getTargetFeatureString());
JTMB.setRelocationModel(this->TM->getRelocationModel());
LLJITBuilder Builder;
Builder.setJITTargetMachineBuilder(std::move(JTMB));
Builder.setExecutorProcessControl(std::move(EPC));
// Create ObjectLinkingLayer with our own MemoryManager.
@ -341,6 +402,11 @@ IncrementalJIT::IncrementalJIT(
return Layer;
});
Builder.setCompileFunctionCreator([&](llvm::orc::JITTargetMachineBuilder)
-> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
return std::make_unique<SimpleCompiler>(*TM);
});
if (Expected<std::unique_ptr<LLJIT>> JitInstance = Builder.create()) {
Jit = std::move(*JitInstance);
} else {

View File

@ -27,6 +27,10 @@
#include <string>
#include <utility>
namespace clang {
class CompilerInstance;
}
namespace cling {
class IncrementalExecutor;
@ -52,7 +56,7 @@ private:
class IncrementalJIT {
public:
IncrementalJIT(IncrementalExecutor& Executor,
std::unique_ptr<llvm::TargetMachine> TM,
const clang::CompilerInstance &CI,
std::unique_ptr<llvm::orc::ExecutorProcessControl> EPC,
llvm::Error &Err, void *ExtraLibHandle, bool Verbose);
@ -82,6 +86,11 @@ public:
llvm::Error runCtors() const {
return Jit->initialize(Jit->getMainJITDylib());
}
/// @brief Get the TargetMachine used by the JIT.
/// Non-const because BackendPasses need to update OptLevel.
llvm::TargetMachine &getTargetMachine() { return *TM; }
private:
std::unique_ptr<llvm::orc::LLJIT> Jit;
llvm::orc::SymbolMap m_InjectedSymbols;