Allow a Dyld per DynamicLibraryManager.

This addresses the rest of the comments of root-project/root#4717

Patch by Alexander Penev and me.
This commit is contained in:
Vassil Vassilev 2020-05-17 18:09:39 +00:00 committed by jenkins
parent 807dafd1cb
commit dea63b95b3
3 changed files with 584 additions and 616 deletions

View File

@ -18,6 +18,7 @@
#include "llvm/Support/Path.h"
namespace cling {
class Dyld;
class InterpreterCallbacks;
class InvocationOptions;
@ -66,7 +67,9 @@ namespace cling {
///
SearchPathInfos m_SearchPaths;
InterpreterCallbacks* m_Callbacks;
InterpreterCallbacks* m_Callbacks = nullptr;
Dyld* m_Dyld = nullptr;
///\brief Concatenates current include paths and the system include paths
/// and performs a lookup for the filename.
@ -146,8 +149,7 @@ namespace cling {
/// dangerous libraries such as the ones overriding malloc.
///
void
initializeDyld(std::function<bool(llvm::StringRef)> shouldPermanentlyIgnore)
const;
initializeDyld(std::function<bool(llvm::StringRef)> shouldPermanentlyIgnore);
/// Find the first not-yet-loaded shared object that contains the symbol
///
@ -159,6 +161,16 @@ namespace cling {
std::string searchLibrariesForSymbol(const std::string& mangledName,
bool searchSystem = true) const;
/// On a success returns to full path to a shared object that holds the
/// symbol pointed by func.
///
template <class T>
static std::string getSymbolLocation(T func) {
static_assert(std::is_pointer<T>::value, "Must be a function pointer!");
return getSymbolLocation(reinterpret_cast<void*>(func));
}
///\brief Explicitly tell the execution engine to use symbols from
/// a shared library that would otherwise not be used for symbol
/// resolution, e.g. because it was dlopened with RTLD_LOCAL.
@ -176,15 +188,6 @@ namespace cling {
/// is a library but of incompatible file format.
///
static bool isSharedLibrary(llvm::StringRef libFullPath, bool* exists = 0);
/// On a success returns to full path to a shared object that holds the
/// symbol pointed by func.
///
template <class T>
static std::string getSymbolLocation(T func) {
static_assert(std::is_pointer<T>::value, "Must be a function pointer!");
return getSymbolLocation(reinterpret_cast<void*>(func));
}
};
} // end namespace cling
#endif // CLING_DYNAMIC_LIBRARY_MANAGER_H

View File

@ -23,7 +23,7 @@
namespace cling {
DynamicLibraryManager::DynamicLibraryManager(const InvocationOptions& Opts)
: m_Opts(Opts), m_Callbacks(0) {
: m_Opts(Opts) {
const llvm::SmallVector<const char*, 10> kSysLibraryEnv = {
"LD_LIBRARY_PATH",
#if __APPLE__
@ -65,8 +65,6 @@ namespace cling {
m_SearchPaths.push_back({".", /*IsUser*/true});
}
DynamicLibraryManager::~DynamicLibraryManager() {}
std::string
DynamicLibraryManager::lookupLibInPaths(llvm::StringRef libStem) const {
llvm::SmallVector<SearchPathInfo, 128> Paths;

View File

@ -128,21 +128,24 @@ struct BloomFilter {
struct LibraryPath {
const BasePath& m_Path;
std::string m_LibName;
std::string m_FullName;
BloomFilter m_Filter;
llvm::StringSet<> m_Symbols;
LibraryPath(const BasePath& Path, const std::string& LibName)
: m_Path(Path), m_LibName(LibName) { }
: m_Path(Path), m_LibName(LibName) {
llvm::SmallString<512> Vec(m_Path);
llvm::sys::path::append(Vec, llvm::StringRef(m_LibName));
m_FullName = Vec.str().str();
}
bool operator==(const LibraryPath &other) const {
return (&m_Path == &other.m_Path || m_Path == other.m_Path) &&
m_LibName == other.m_LibName;
}
std::string GetFullName() const {
llvm::SmallString<512> Vec(m_Path);
llvm::sys::path::append(Vec, llvm::StringRef(m_LibName));
return Vec.str().str();
const std::string& GetFullName() const {
return m_FullName;
}
void AddBloom(llvm::StringRef symbol) {
@ -229,8 +232,80 @@ public:
}
};
class Dyld {
static std::string getRealPath(llvm::StringRef path) {
llvm::SmallString<512> realPath;
llvm::sys::fs::real_path(path, realPath, /*expandTilde*/true);
return realPath.str().str();
}
static llvm::StringRef GetGnuHashSection(llvm::object::ObjectFile *file) {
for (auto S : file->sections()) {
llvm::StringRef name;
S.getName(name);
if (name == ".gnu.hash") {
llvm::StringRef content;
S.getContents(content);
return content;
}
}
return "";
}
/// Bloom filter is a stochastic data structure which can tell us if a symbol
/// name does not exist in a library with 100% certainty. If it tells us it
/// exists this may not be true:
/// https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2
///
/// ELF has this optimization in the new linkers by default, It is stored in the
/// gnu.hash section of the object file.
///
///\returns true if the symbol may be in the library.
static bool MayExistInElfObjectFile(llvm::object::ObjectFile *soFile,
uint32_t hash) {
assert(soFile->isELF() && "Not ELF");
// LLVM9: soFile->makeTriple().is64Bit()
const int bits = 8 * soFile->getBytesInAddress();
llvm::StringRef contents = GetGnuHashSection(soFile);
if (contents.size() < 16)
// We need to search if the library doesn't have .gnu.hash section!
return true;
const char* hashContent = contents.data();
// See https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ for .gnu.hash
// table layout.
uint32_t maskWords = *reinterpret_cast<const uint32_t *>(hashContent + 8);
uint32_t shift2 = *reinterpret_cast<const uint32_t *>(hashContent + 12);
uint32_t hash2 = hash >> shift2;
uint32_t n = (hash / bits) % maskWords;
const char *bloomfilter = hashContent + 16;
const char *hash_pos = bloomfilter + n*(bits/8); // * (Bits / 8)
uint64_t word = *reinterpret_cast<const uint64_t *>(hash_pos);
uint64_t bitmask = ( (1ULL << (hash % bits)) | (1ULL << (hash2 % bits)));
return (bitmask & word) == bitmask;
}
} // anon namespace
// This function isn't referenced outside its translation unit, but it
// can't use the "static" keyword because its address is used for
// GetMainExecutable (since some platforms don't support taking the
// address of main, and some platforms can't implement GetMainExecutable
// without being given the address of a function in the main executable).
std::string GetExecutablePath() {
// This just needs to be some symbol in the binary; C++ doesn't
// allow taking the address of ::main however.
return cling::DynamicLibraryManager::getSymbolLocation(&GetExecutablePath);
}
namespace cling {
DynamicLibraryManager::~DynamicLibraryManager() {
delete m_Dyld;
}
class Dyld {
struct BasePathHashFunction {
size_t operator()(const BasePath& item) const {
return std::hash<std::string>()(item);
@ -282,6 +357,10 @@ class Dyld {
/// useless iterations.
std::vector<LibraryPath> m_QueriedLibraries;
using PermanentlyIgnoreCallbackProto = std::function<bool(llvm::StringRef)>;
const PermanentlyIgnoreCallbackProto m_ShouldPermanentlyIgnoreCallback;
const llvm::StringRef m_ExecutableFormat;
/// Scan for shared objects which are not yet loaded. They are a our symbol
/// resolution candidate sources.
/// NOTE: We only scan not loaded shared objects.
@ -302,112 +381,22 @@ class Dyld {
bool ContainsSymbol(const LibraryPath* Lib, const std::string &mangledName,
unsigned IgnoreSymbolFlags = 0) const;
protected:
Dyld(const cling::DynamicLibraryManager &DLM)
: m_DynamicLibraryManager(DLM) { }
~Dyld() = default;
bool ShouldPermanentlyIgnore(const std::string& FileName) const;
public:
static Dyld& getInstance(const cling::DynamicLibraryManager &DLM) {
static Dyld instance(DLM);
Dyld(const cling::DynamicLibraryManager &DLM,
PermanentlyIgnoreCallbackProto shouldIgnore,
llvm::StringRef execFormat)
: m_DynamicLibraryManager(DLM),
m_ShouldPermanentlyIgnoreCallback(shouldIgnore),
m_ExecutableFormat(execFormat) { }
#ifndef NDEBUG
auto &NewSearchPaths = DLM.getSearchPaths();
auto &OldSearchPaths = instance.m_DynamicLibraryManager.getSearchPaths();
// FIXME: Move the Dyld logic to the cling::DynamicLibraryManager itself!
assert(std::equal(OldSearchPaths.begin(), OldSearchPaths.end(),
NewSearchPaths.begin()) && "Path was added/removed!");
#endif
return instance;
}
// delete copy and move constructors and assign operators
Dyld(Dyld const&) = delete;
Dyld(Dyld&&) = delete;
Dyld& operator=(Dyld const&) = delete;
Dyld& operator=(Dyld &&) = delete;
~Dyld(){};
std::string searchLibrariesForSymbol(const std::string& mangledName,
bool searchSystem);
};
static bool s_IsDyldInitialized = false;
static std::function<bool(llvm::StringRef)> s_ShouldPermanentlyIgnoreCallback;
static std::string getRealPath(llvm::StringRef path) {
llvm::SmallString<512> realPath;
llvm::sys::fs::real_path(path, realPath, /*expandTilde*/true);
return realPath.str().str();
}
static llvm::StringRef s_ExecutableFormat;
static bool shouldPermanentlyIgnore(const std::string& FileName,
const cling::DynamicLibraryManager& dyLibManager) {
assert(FileName == getRealPath(FileName));
assert(!s_ExecutableFormat.empty() && "Failed to find the object format!");
if (llvm::sys::fs::is_directory(FileName))
return true;
if (!cling::DynamicLibraryManager::isSharedLibrary(FileName))
return true;
// No need to check linked libraries, as this function is only invoked
// for symbols that cannot be found (neither by dlsym nor in the JIT).
if (dyLibManager.isLibraryLoaded(FileName.c_str()))
return true;
auto ObjF = llvm::object::ObjectFile::createObjectFile(FileName);
if (!ObjF) {
if (DEBUG > 1)
cling::errs() << "[DyLD] Failed to read object file "
<< FileName << "\n";
return true;
}
llvm::object::ObjectFile *file = ObjF.get().getBinary();
if (DEBUG > 1)
cling::errs() << "Current executable format: " << s_ExecutableFormat
<< ". Executable format of " << FileName << " : "
<< file->getFileFormatName() << "\n";
// Ignore libraries with different format than the executing one.
if (s_ExecutableFormat != file->getFileFormatName())
return true;
if (llvm::isa<llvm::object::ELFObjectFileBase>(*file)) {
for (auto S : file->sections()) {
llvm::StringRef name;
S.getName(name);
if (name == ".text") {
// Check if the library has only debug symbols, usually when
// stripped with objcopy --only-keep-debug. This check is done by
// reading the manual of objcopy and inspection of stripped with
// objcopy libraries.
auto SecRef = static_cast<llvm::object::ELFSectionRef&>(S);
if (SecRef.getType() == llvm::ELF::SHT_NOBITS)
return true;
return (SecRef.getFlags() & llvm::ELF::SHF_ALLOC) == 0;
}
}
return true;
}
//FIXME: Handle osx using isStripped after upgrading to llvm9.
return s_ShouldPermanentlyIgnoreCallback(FileName);
}
void Dyld::ScanForLibraries(bool searchSystemLibraries/* = false*/) {
// #ifndef NDEBUG
// if (!m_FirstRun && !m_FirstRunSysLib)
// assert(0 && "Already initialized");
@ -419,7 +408,7 @@ void Dyld::ScanForLibraries(bool searchSystemLibraries/* = false*/) {
// #endif
const auto &searchPaths = m_DynamicLibraryManager.getSearchPaths();
for (const cling::DynamicLibraryManager::SearchPathInfo &Info : searchPaths) {
for (const DynamicLibraryManager::SearchPathInfo &Info : searchPaths) {
if (Info.IsUser || searchSystemLibraries) {
// Examples which we should handle.
// File Real
@ -461,7 +450,7 @@ void Dyld::ScanForLibraries(bool searchSystemLibraries/* = false*/) {
std::string FileName = getRealPath(DirIt->path());
assert(!llvm::sys::fs::is_symlink_file(FileName));
if (shouldPermanentlyIgnore(FileName, m_DynamicLibraryManager))
if (ShouldPermanentlyIgnore(FileName))
continue;
std::string FileRealPath = llvm::sys::path::parent_path(FileName);
@ -493,7 +482,7 @@ void Dyld::BuildBloomFilter(LibraryPath* Lib,
// If BloomFilter is empty then build it.
// Count Symbols and generate BloomFilter
uint32_t SymbolsCount = 0;
std::list<std::string> symbols;
std::list<llvm::StringRef> symbols;
for (const llvm::object::SymbolRef &S : BinObjFile->symbols()) {
uint32_t Flags = S.getFlags();
// Do not insert in the table symbols flagged to ignore.
@ -580,56 +569,6 @@ void Dyld::BuildBloomFilter(LibraryPath* Lib,
}
}
static llvm::StringRef GetGnuHashSection(llvm::object::ObjectFile *file) {
for (auto S : file->sections()) {
llvm::StringRef name;
S.getName(name);
if (name == ".gnu.hash") {
llvm::StringRef content;
S.getContents(content);
return content;
}
}
return "";
}
/// Bloom filter in a stohastic data structure which can tell us if a symbol
/// name does not exist in a library with 100% certainty. If it tells us it
/// exists this may not be true:
/// https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2
///
/// ELF has this optimization in the new linkers by default, It is stored in the
/// gnu.hash section of the object file.
///
///\returns true if the symbol may be in the library.
static bool MayExistInElfObjectFile(llvm::object::ObjectFile *soFile,
uint32_t hash) {
assert(soFile->isELF() && "Not ELF");
// LLVM9: soFile->makeTriple().is64Bit()
const int bits = 8 * soFile->getBytesInAddress();
llvm::StringRef contents = GetGnuHashSection(soFile);
if (contents.size() < 16)
// We need to search if the library doesn't have .gnu.hash section!
return true;
const char* hashContent = contents.data();
// See https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ for .gnu.hash
// table layout.
uint32_t maskWords = *reinterpret_cast<const uint32_t *>(hashContent + 8);
uint32_t shift2 = *reinterpret_cast<const uint32_t *>(hashContent + 12);
uint32_t hash2 = hash >> shift2;
uint32_t n = (hash / bits) % maskWords;
const char *bloomfilter = hashContent + 16;
const char *hash_pos = bloomfilter + n*(bits/8); // * (Bits / 8)
uint64_t word = *reinterpret_cast<const uint64_t *>(hash_pos);
uint64_t bitmask = ( (1ULL << (hash % bits)) | (1ULL << (hash2 % bits)));
return (bitmask & word) == bitmask;
}
bool Dyld::ContainsSymbol(const LibraryPath* Lib,
const std::string &mangledName,
unsigned IgnoreSymbolFlags /*= 0*/) const {
@ -659,7 +598,8 @@ bool Dyld::ContainsSymbol(const LibraryPath* Lib,
uint32_t hashedMangle = GNUHash(mangledName);
// Check for the gnu.hash section if ELF.
// If the symbol doesn't exist, exit early.
if (BinObjFile->isELF() && !MayExistInElfObjectFile(BinObjFile, hashedMangle))
if (BinObjFile->isELF() &&
!MayExistInElfObjectFile(BinObjFile, hashedMangle))
return false;
if (m_UseBloomFilter) {
@ -687,11 +627,10 @@ bool Dyld::ContainsSymbol(const LibraryPath* Lib,
return result;
}
// Symbol may exist. Iterate.
// If no hash symbol then iterate to detect symbol
// We Iterate only if BloomFilter and/or SymbolHashTable are not supported.
for (const llvm::object::SymbolRef &S : BinObjFile->symbols()) {
auto ForeachSymbol =
[&library_filename](llvm::iterator_range<llvm::object::symbol_iterator> range,
unsigned IgnoreSymbolFlags, llvm::StringRef mangledName) -> bool {
for (const llvm::object::SymbolRef &S : range) {
uint32_t Flags = S.getFlags();
// Do not insert in the table symbols flagged to ignore.
if (Flags & IgnoreSymbolFlags)
@ -725,42 +664,85 @@ bool Dyld::ContainsSymbol(const LibraryPath* Lib,
}
}
}
return false;
};
// If no hash symbol then iterate to detect symbol
// We Iterate only if BloomFilter and/or SymbolHashTable are not supported.
// Symbol may exist. Iterate.
if (ForeachSymbol(BinObjFile->symbols(), IgnoreSymbolFlags, mangledName))
return true;
if (!BinObjFile->isELF())
return false;
// ELF file format has .dynstr section for the dynamic symbol table.
const auto *ElfObj = llvm::cast<llvm::object::ELFObjectFileBase>(BinObjFile);
const auto *ElfObj =
llvm::cast<llvm::object::ELFObjectFileBase>(BinObjFile);
for (const llvm::object::SymbolRef &S : ElfObj->getDynamicSymbolIterators()) {
uint32_t Flags = S.getFlags();
// DO NOT insert to table if symbol was undefined
if (Flags & llvm::object::SymbolRef::SF_Undefined)
continue;
// Note, we are at last resort and loading library based on a weak
// symbol is allowed. Otherwise, the JIT will issue an unresolved
// symbol error.
//
// There are other weak symbol kinds (marked as 'V') to denote
// typeinfo and vtables. It is unclear whether we should load such
// libraries or from which library we should resolve the symbol.
// We seem to not have a way to differentiate it from the symbol API.
llvm::Expected<llvm::StringRef> SymNameErr = S.getName();
if (!SymNameErr) {
cling::errs() << "Dyld::ContainsSymbol: Failed to read symbol "
<< mangledName << "\n";
continue;
return ForeachSymbol(ElfObj->getDynamicSymbolIterators(),
IgnoreSymbolFlags, mangledName);
}
if (SymNameErr.get().empty())
continue;
bool Dyld::ShouldPermanentlyIgnore(const std::string& FileName) const {
assert(FileName == getRealPath(FileName));
assert(!m_ExecutableFormat.empty() && "Failed to find the object format!");
if (SymNameErr.get() == mangledName)
if (llvm::sys::fs::is_directory(FileName))
return true;
if (!cling::DynamicLibraryManager::isSharedLibrary(FileName))
return true;
// No need to check linked libraries, as this function is only invoked
// for symbols that cannot be found (neither by dlsym nor in the JIT).
if (m_DynamicLibraryManager.isLibraryLoaded(FileName.c_str()))
return true;
auto ObjF = llvm::object::ObjectFile::createObjectFile(FileName);
if (!ObjF) {
if (DEBUG > 1)
cling::errs() << "[DyLD] Failed to read object file "
<< FileName << "\n";
return true;
}
return false;
llvm::object::ObjectFile *file = ObjF.get().getBinary();
if (DEBUG > 1)
cling::errs() << "Current executable format: " << m_ExecutableFormat
<< ". Executable format of " << FileName << " : "
<< file->getFileFormatName() << "\n";
// Ignore libraries with different format than the executing one.
if (m_ExecutableFormat != file->getFileFormatName())
return true;
if (llvm::isa<llvm::object::ELFObjectFileBase>(*file)) {
for (auto S : file->sections()) {
llvm::StringRef name;
S.getName(name);
if (name == ".text") {
// Check if the library has only debug symbols, usually when
// stripped with objcopy --only-keep-debug. This check is done by
// reading the manual of objcopy and inspection of stripped with
// objcopy libraries.
auto SecRef = static_cast<llvm::object::ELFSectionRef&>(S);
if (SecRef.getType() == llvm::ELF::SHT_NOBITS)
return true;
return (SecRef.getFlags() & llvm::ELF::SHF_ALLOC) == 0;
}
}
return true;
}
//FIXME: Handle osx using isStripped after upgrading to llvm9.
return m_ShouldPermanentlyIgnoreCallback(FileName);
}
std::string Dyld::searchLibrariesForSymbol(const std::string& mangledName,
@ -871,39 +853,24 @@ std::string Dyld::searchLibrariesForSymbol(const std::string& mangledName,
return ""; // Search found no match.
}
} // anon namespace
// This function isn't referenced outside its translation unit, but it
// can't use the "static" keyword because its address is used for
// GetMainExecutable (since some platforms don't support taking the
// address of main, and some platforms can't implement GetMainExecutable
// without being given the address of a function in the main executable).
std::string GetExecutablePath() {
// This just needs to be some symbol in the binary; C++ doesn't
// allow taking the address of ::main however.
return cling::DynamicLibraryManager::getSymbolLocation(&GetExecutablePath);
}
namespace cling {
void DynamicLibraryManager::initializeDyld(
std::function<bool(llvm::StringRef)> shouldPermanentlyIgnore) const {
assert(!s_IsDyldInitialized);
s_ShouldPermanentlyIgnoreCallback = shouldPermanentlyIgnore;
std::function<bool(llvm::StringRef)> shouldPermanentlyIgnore) {
assert(!m_Dyld && "Already initialized!");
std::string exeP = GetExecutablePath();
auto ObjF =
cantFail(llvm::object::ObjectFile::createObjectFile(exeP));
s_ExecutableFormat = ObjF.getBinary()->getFileFormatName();
s_IsDyldInitialized = true;
m_Dyld = new Dyld(*this, shouldPermanentlyIgnore,
ObjF.getBinary()->getFileFormatName());
}
std::string
DynamicLibraryManager::searchLibrariesForSymbol(const std::string& mangledName,
bool searchSystem/* = true*/) const {
assert(s_IsDyldInitialized && "Must call initialize dyld before!");
static Dyld& dyld = Dyld::getInstance(*this);
return dyld.searchLibrariesForSymbol(mangledName, searchSystem);
assert(m_Dyld && "Must call initialize dyld before!");
return m_Dyld->searchLibrariesForSymbol(mangledName, searchSystem);
}
std::string DynamicLibraryManager::getSymbolLocation(void *func) {