Re-enable CodeGen-OptLevel after llvm13 upgrade:

With the upgrade, BackendPasses was modifying a TargetMachine that was not used by SimpleCompiler. Change that by - using a SimpleCompiler that uses IncrementalJIT::TM; - moving the TM creation to IncrementalJIT, and giving access to it This reduces the runtime of https://github.com/root-project/root/issues/11927 to - before llvm upgrade: 2.69s - llvm13, without this commit: ??? - llvm13, with this commit: 2.89s i.e, a slow-down of 7% (that is likely caused by the different emission mechanism of Orc-v2; to be confirmed...)
2022-12-21 08:45:57 +01:00 · 2022-12-21 08:45:57 +01:00 · da247bd77a
commit da247bd77a
parent cefa80ef54
3 changed files with 89 additions and 90 deletions
--- a/lib/Interpreter/IncrementalExecutor.cpp
+++ b/lib/Interpreter/IncrementalExecutor.cpp
@ -20,19 +20,12 @@
 #include "cling/Utils/Platform.h"

 #include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/TargetOptions.h"
 #include "clang/Frontend/CompilerInstance.h"

 #include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetMachine.h"

 #include <iostream>

@ -40,73 +33,6 @@ using namespace llvm;

 namespace cling {

-namespace {
-
-static std::unique_ptr<TargetMachine>
-CreateHostTargetMachine(const clang::CompilerInstance& CI) {
-  const clang::TargetOptions& TargetOpts = CI.getTargetOpts();
-  const clang::CodeGenOptions& CGOpt = CI.getCodeGenOpts();
-  const std::string& Triple = TargetOpts.Triple;
-
-  std::string Error;
-  const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
-  if (!TheTarget) {
-    cling::errs() << "cling::IncrementalExecutor: unable to find target:\n"
-                  << Error;
-    return std::unique_ptr<TargetMachine>();
-  }
-
-  CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
-  switch (CGOpt.OptimizationLevel) {
-    case 0: OptLevel = CodeGenOpt::None; break;
-    case 1: OptLevel = CodeGenOpt::Less; break;
-    case 2: OptLevel = CodeGenOpt::Default; break;
-    case 3: OptLevel = CodeGenOpt::Aggressive; break;
-    default: OptLevel = CodeGenOpt::Default;
-  }
-  using namespace llvm::orc;
-  auto JTMB = JITTargetMachineBuilder::detectHost();
-  if (!JTMB)
-    logAllUnhandledErrors(JTMB.takeError(), llvm::errs(),
-                          "Error detecting host");
-
-  JTMB->setCodeGenOptLevel(OptLevel);
-#ifdef _WIN32
-  JTMB->getOptions().EmulatedTLS = false;
-#endif // _WIN32
-
-#if defined(__powerpc64__) || defined(__PPC64__)
-  // We have to use large code model for PowerPC64 because TOC and text sections
-  // can be more than 2GB apart.
-  JTMB->setCodeModel(CodeModel::Large);
-#endif
-
-  std::unique_ptr<TargetMachine> TM = cantFail(JTMB->createTargetMachine());
-
-  // Forcefully disable GlobalISel, it might be enabled on AArch64 without
-  // optimizations. In tests on an Apple M1 after the upgrade to LLVM 9, this
-  // new instruction selection framework emits branches / calls that expect all
-  // code to be reachable in +/- 128 MB. This cannot be guaranteed during JIT,
-  // which generates code into allocated pages on the heap and could span the
-  // entire address space of the process.
-  //
-  // TODO:
-  // 1. Try to reproduce the problem with vanilla lli of LLVM 9 to check that
-  //    this is not related to the way Cling incrementally JITs and executes.
-  // 2. Figure out exactly why GlobalISel emits different branch instructions,
-  //    and whether this is a problem in the framework or of the generated IR.
-  // 3. Verify if the same happens with LLVM 11/12 (whatever Cling will move to
-  //    next), and possibly fix the underlying issue in LLVM upstream's `main`.
-  //
-  // FIXME: Lift this restriction and allow the target to enable GlobalISel,
-  // if deemed ready by upstream developers.
-  TM->setGlobalISel(false);
-
-  return TM;
-}
-
-} // anonymous namespace
-
 IncrementalExecutor::IncrementalExecutor(clang::DiagnosticsEngine& /*diags*/,
                                         const clang::CompilerInstance& CI,
                                         void *ExtraLibHandle, bool Verbose):
@ -120,18 +46,16 @@ IncrementalExecutor::IncrementalExecutor(clang::DiagnosticsEngine& /*diags*/,
  // MSVC doesn't support m_AtExitFuncsSpinLock=ATOMIC_FLAG_INIT; in the class definition
  std::atomic_flag_clear( &m_AtExitFuncsSpinLock );

-  std::unique_ptr<TargetMachine> TM(CreateHostTargetMachine(CI));
-  auto &TMRef = *TM;
  llvm::Error Err = llvm::Error::success();
  auto EPC = llvm::cantFail(llvm::orc::SelfExecutorProcessControl::Create());
-  m_JIT.reset(new IncrementalJIT(*this, std::move(TM), std::move(EPC), Err,
+  m_JIT.reset(new IncrementalJIT(*this, CI, std::move(EPC), Err,
    ExtraLibHandle, Verbose));
  if (Err) {
    llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "Fatal: ");
    llvm_unreachable("Propagate this error and exit gracefully");
  }

-  m_BackendPasses.reset(new BackendPasses(CI.getCodeGenOpts(), TMRef));
+  m_BackendPasses.reset(new BackendPasses(CI.getCodeGenOpts(), m_JIT->getTargetMachine()));
 }

 IncrementalExecutor::~IncrementalExecutor() {}
--- a/lib/Interpreter/IncrementalJIT.cpp
+++ b/lib/Interpreter/IncrementalJIT.cpp
@ -15,11 +15,18 @@
 #include "cling/Utils/Output.h"
 #include "cling/Utils/Utils.h"

+#include <clang/Basic/TargetOptions.h>
+#include <clang/Frontend/CompilerInstance.h>
+
+#include <llvm/ADT/Triple.h>
 #include <llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h>
 #include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/Support/raw_ostream.h>
+#include <llvm/Support/Host.h>
+#include <llvm/Support/TargetRegistry.h>
+#include <llvm/Target/TargetMachine.h>

 using namespace llvm;
 using namespace llvm::orc;
@ -283,6 +290,69 @@ Error RTDynamicLibrarySearchGenerator::tryToGenerate(

  return JD.define(absoluteSymbols(std::move(NewSymbols)), CurrentRT());
 }
+
+static std::unique_ptr<TargetMachine>
+CreateHostTargetMachine(const clang::CompilerInstance& CI) {
+  const clang::TargetOptions& TargetOpts = CI.getTargetOpts();
+  const clang::CodeGenOptions& CGOpt = CI.getCodeGenOpts();
+  const std::string& Triple = TargetOpts.Triple;
+
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
+  if (!TheTarget) {
+    cling::errs() << "cling::IncrementalExecutor: unable to find target:\n"
+                  << Error;
+    return std::unique_ptr<TargetMachine>();
+  }
+
+  CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
+  switch (CGOpt.OptimizationLevel) {
+    case 0: OptLevel = CodeGenOpt::None; break;
+    case 1: OptLevel = CodeGenOpt::Less; break;
+    case 2: OptLevel = CodeGenOpt::Default; break;
+    case 3: OptLevel = CodeGenOpt::Aggressive; break;
+    default: OptLevel = CodeGenOpt::Default;
+  }
+  using namespace llvm::orc;
+  auto JTMB = JITTargetMachineBuilder::detectHost();
+  if (!JTMB)
+    logAllUnhandledErrors(JTMB.takeError(), llvm::errs(),
+                          "Error detecting host");
+
+  JTMB->setCodeGenOptLevel(OptLevel);
+#ifdef _WIN32
+  JTMB->getOptions().EmulatedTLS = false;
+#endif // _WIN32
+
+#if defined(__powerpc64__) || defined(__PPC64__)
+  // We have to use large code model for PowerPC64 because TOC and text sections
+  // can be more than 2GB apart.
+  JTMB->setCodeModel(CodeModel::Large);
+#endif
+
+  std::unique_ptr<TargetMachine> TM = cantFail(JTMB->createTargetMachine());
+
+  // Forcefully disable GlobalISel, it might be enabled on AArch64 without
+  // optimizations. In tests on an Apple M1 after the upgrade to LLVM 9, this
+  // new instruction selection framework emits branches / calls that expect all
+  // code to be reachable in +/- 128 MB. This cannot be guaranteed during JIT,
+  // which generates code into allocated pages on the heap and could span the
+  // entire address space of the process.
+  //
+  // TODO:
+  // 1. Try to reproduce the problem with vanilla lli of LLVM 9 to check that
+  //    this is not related to the way Cling incrementally JITs and executes.
+  // 2. Figure out exactly why GlobalISel emits different branch instructions,
+  //    and whether this is a problem in the framework or of the generated IR.
+  // 3. Verify if the same happens with LLVM 11/12 (whatever Cling will move to
+  //    next), and possibly fix the underlying issue in LLVM upstream's `main`.
+  //
+  // FIXME: Lift this restriction and allow the target to enable GlobalISel,
+  // if deemed ready by upstream developers.
+  TM->setGlobalISel(false);
+
+  return TM;
+}
 } // unnamed namespace

 namespace cling {
@ -291,24 +361,15 @@ namespace cling {
 llvm::JITEventListener* createPerfJITEventListener();

 IncrementalJIT::IncrementalJIT(
-    IncrementalExecutor& Executor, std::unique_ptr<TargetMachine> TM,
+    IncrementalExecutor& Executor, const clang::CompilerInstance &CI,
    std::unique_ptr<llvm::orc::ExecutorProcessControl> EPC, Error& Err,
    void *ExtraLibHandle, bool Verbose)
    : SkipHostProcessLookup(false),
-      TM(std::move(TM)),
+      TM(CreateHostTargetMachine(CI)),
      SingleThreadedContext(std::make_unique<LLVMContext>()) {
  ErrorAsOutParameter _(&Err);

-  // FIXME: We should probably take codegen settings from the CompilerInvocation
-  // and not from the target machine
-  JITTargetMachineBuilder JTMB(this->TM->getTargetTriple());
-  JTMB.setCodeModel(this->TM->getCodeModel());
-  JTMB.setCodeGenOptLevel(this->TM->getOptLevel());
-  JTMB.setFeatures(this->TM->getTargetFeatureString());
-  JTMB.setRelocationModel(this->TM->getRelocationModel());
-
  LLJITBuilder Builder;
-  Builder.setJITTargetMachineBuilder(std::move(JTMB));
  Builder.setExecutorProcessControl(std::move(EPC));

  // Create ObjectLinkingLayer with our own MemoryManager.
@ -341,6 +402,11 @@ IncrementalJIT::IncrementalJIT(
    return Layer;
  });

+  Builder.setCompileFunctionCreator([&](llvm::orc::JITTargetMachineBuilder)
+  -> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
+    return std::make_unique<SimpleCompiler>(*TM);
+  });
+
  if (Expected<std::unique_ptr<LLJIT>> JitInstance = Builder.create()) {
    Jit = std::move(*JitInstance);
  } else {
--- a/lib/Interpreter/IncrementalJIT.h
+++ b/lib/Interpreter/IncrementalJIT.h
@ -27,6 +27,10 @@
 #include <string>
 #include <utility>

+namespace clang {
+class CompilerInstance;
+}
+
 namespace cling {

 class IncrementalExecutor;
@ -52,7 +56,7 @@ private:
 class IncrementalJIT {
 public:
  IncrementalJIT(IncrementalExecutor& Executor,
-                 std::unique_ptr<llvm::TargetMachine> TM,
+                 const clang::CompilerInstance &CI,
                 std::unique_ptr<llvm::orc::ExecutorProcessControl> EPC,
                 llvm::Error &Err, void *ExtraLibHandle, bool Verbose);

@ -82,6 +86,11 @@ public:
  llvm::Error runCtors() const {
    return Jit->initialize(Jit->getMainJITDylib());
  }
+
+  /// @brief Get the TargetMachine used by the JIT.
+  /// Non-const because BackendPasses need to update OptLevel.
+  llvm::TargetMachine &getTargetMachine() { return *TM; }
+
 private:
  std::unique_ptr<llvm::orc::LLJIT> Jit;
  llvm::orc::SymbolMap m_InjectedSymbols;