1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-12 09:18:10 +03:00
samba-mirror/buildtools/wafsamba/symbols.py
Joe Guo 9fa698b023 wafsamba/symbols: change regex to match both rpath and runpath for different readelf output
In `wafsamba.dumplicate_symbols` test, it will use Popen to call:

    readelf --dynamic bin/default/source3/lib/netapi/examples/netlogon/netlogon_control2

then try to find rpath lib lines from output with regex:

    re_rpath     = re.compile(b'Library rpath: \[(.*)\]')

In ubuntu 14.04 docker image, which current CI is using, the actual output
from `readelf` is `runpath` instead of 'rpath':

    ...
    Library runpath: [/home/gitlab-runner/samba/bin/shared:/home/gitlab-runner/samba/bin/shared/private]\n'
    ...

So the regex never matched, and hide a bug.

In Ubuntu 1604 docker image, the output changes to `rpath` and matched the
regex, which expose the error in previous commit.

Improve the regex to match both `rpath` and `runpath`.

Signed-off-by: Joe Guo <joeg@catalyst.net.nz>
Reviewed-by: Alexander Bokovoy <ab@samba.org>
Reviewed-by: Andrew Bartlett <abartlet@samba.org>
2019-02-18 09:25:21 +01:00

660 lines
22 KiB
Python

# a waf tool to extract symbols from object files or libraries
# using nm, producing a set of exposed defined/undefined symbols
import os, re, subprocess
from waflib import Utils, Build, Options, Logs, Errors
from waflib.Logs import debug
from samba_utils import TO_LIST, LOCAL_CACHE, get_tgt_list, os_path_relpath
# these are the data structures used in symbols.py:
#
# bld.env.symbol_map : dictionary mapping public symbol names to list of
# subsystem names where that symbol exists
#
# t.in_library : list of libraries that t is in
#
# bld.env.public_symbols: set of public symbols for each subsystem
# bld.env.used_symbols : set of used symbols for each subsystem
#
# bld.env.syslib_symbols: dictionary mapping system library name to set of symbols
# for that library
# bld.env.library_dict : dictionary mapping built library paths to subsystem names
#
# LOCAL_CACHE(bld, 'TARGET_TYPE') : dictionary mapping subsystem name to target type
def symbols_extract(bld, objfiles, dynamic=False):
'''extract symbols from objfile, returning a dictionary containing
the set of undefined and public symbols for each file'''
ret = {}
# see if we can get some results from the nm cache
if not bld.env.nm_cache:
bld.env.nm_cache = {}
objfiles = set(objfiles).copy()
remaining = set()
for obj in objfiles:
if obj in bld.env.nm_cache:
ret[obj] = bld.env.nm_cache[obj].copy()
else:
remaining.add(obj)
objfiles = remaining
if len(objfiles) == 0:
return ret
cmd = ["nm"]
if dynamic:
# needed for some .so files
cmd.append("-D")
cmd.extend(list(objfiles))
nmpipe = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
if len(objfiles) == 1:
filename = list(objfiles)[0]
ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set()}
for line in nmpipe:
line = line.strip()
if line.endswith(b':'):
filename = line[:-1]
ret[filename] = { "PUBLIC": set(), "UNDEFINED" : set() }
continue
cols = line.split(b" ")
if cols == [b'']:
continue
# see if the line starts with an address
if len(cols) == 3:
symbol_type = cols[1]
symbol = cols[2]
else:
symbol_type = cols[0]
symbol = cols[1]
if symbol_type in b"BDGTRVWSi":
# its a public symbol
ret[filename]["PUBLIC"].add(symbol)
elif symbol_type in b"U":
ret[filename]["UNDEFINED"].add(symbol)
# add to the cache
for obj in objfiles:
if obj in ret:
bld.env.nm_cache[obj] = ret[obj].copy()
else:
bld.env.nm_cache[obj] = { "PUBLIC": set(), "UNDEFINED" : set() }
return ret
def real_name(name):
if name.find(".objlist") != -1:
name = name[:-8]
return name
def find_ldd_path(bld, libname, binary):
'''find the path to the syslib we will link against'''
ret = None
if not bld.env.syslib_paths:
bld.env.syslib_paths = {}
if libname in bld.env.syslib_paths:
return bld.env.syslib_paths[libname]
lddpipe = subprocess.Popen(['ldd', binary], stdout=subprocess.PIPE).stdout
for line in lddpipe:
line = line.strip()
cols = line.split(b" ")
if len(cols) < 3 or cols[1] != b"=>":
continue
if cols[0].startswith(b"libc."):
# save this one too
bld.env.libc_path = cols[2]
if cols[0].startswith(libname):
ret = cols[2]
bld.env.syslib_paths[libname] = ret
return ret
# some regular expressions for parsing readelf output
re_sharedlib = re.compile(b'Shared library: \[(.*)\]')
# output from readelf could be `Library rpath` or `Libray runpath`
re_rpath = re.compile(b'Library (rpath|runpath): \[(.*)\]')
def get_libs(bld, binname):
'''find the list of linked libraries for any binary or library
binname is the path to the binary/library on disk
We do this using readelf instead of ldd as we need to avoid recursing
into system libraries
'''
# see if we can get the result from the ldd cache
if not bld.env.lib_cache:
bld.env.lib_cache = {}
if binname in bld.env.lib_cache:
return bld.env.lib_cache[binname].copy()
rpath = []
libs = set()
elfpipe = subprocess.Popen(['readelf', '--dynamic', binname], stdout=subprocess.PIPE).stdout
for line in elfpipe:
m = re_sharedlib.search(line)
if m:
libs.add(m.group(1))
m = re_rpath.search(line)
if m:
# output from Popen is always bytestr even in py3
rpath.extend(m.group(2).split(b":"))
ret = set()
for lib in libs:
found = False
for r in rpath:
path = os.path.join(r, lib)
if os.path.exists(path):
ret.add(os.path.realpath(path))
found = True
break
if not found:
# we didn't find this lib using rpath. It is probably a system
# library, so to find the path to it we either need to use ldd
# or we need to start parsing /etc/ld.so.conf* ourselves. We'll
# use ldd for now, even though it is slow
path = find_ldd_path(bld, lib, binname)
if path:
ret.add(os.path.realpath(path))
bld.env.lib_cache[binname] = ret.copy()
return ret
def get_libs_recursive(bld, binname, seen):
'''find the recursive list of linked libraries for any binary or library
binname is the path to the binary/library on disk. seen is a set used
to prevent loops
'''
if binname in seen:
return set()
ret = get_libs(bld, binname)
seen.add(binname)
for lib in ret:
# we don't want to recurse into system libraries. If a system
# library that we use (eg. libcups) happens to use another library
# (such as libkrb5) which contains common symbols with our own
# libraries, then that is not an error
if lib in bld.env.library_dict:
ret = ret.union(get_libs_recursive(bld, lib, seen))
return ret
def find_syslib_path(bld, libname, deps):
'''find the path to the syslib we will link against'''
# the strategy is to use the targets that depend on the library, and run ldd
# on it to find the real location of the library that is used
linkpath = deps[0].link_task.outputs[0].abspath(bld.env)
if libname == "python":
libname += bld.env.PYTHON_VERSION
return find_ldd_path(bld, "lib%s" % libname.lower(), linkpath)
def build_symbol_sets(bld, tgt_list):
'''build the public_symbols and undefined_symbols attributes for each target'''
if bld.env.public_symbols:
return
objlist = [] # list of object file
objmap = {} # map from object filename to target (subsystem) name
for t in tgt_list:
t.public_symbols = set()
t.undefined_symbols = set()
t.used_symbols = set()
for tsk in getattr(t, 'compiled_tasks', []):
for output in tsk.outputs:
objpath = output.abspath(bld.env)
objlist.append(objpath)
objmap[objpath] = t
symbols = symbols_extract(bld, objlist)
for obj in objlist:
t = objmap[obj]
t.public_symbols = t.public_symbols.union(symbols[obj]["PUBLIC"])
t.undefined_symbols = t.undefined_symbols.union(symbols[obj]["UNDEFINED"])
t.used_symbols = t.used_symbols.union(symbols[obj]["UNDEFINED"])
t.undefined_symbols = t.undefined_symbols.difference(t.public_symbols)
# and the reverse map of public symbols to subsystem name
bld.env.symbol_map = {}
for t in tgt_list:
for s in t.public_symbols:
if not s in bld.env.symbol_map:
bld.env.symbol_map[s] = []
bld.env.symbol_map[s].append(real_name(t.sname))
targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
bld.env.public_symbols = {}
for t in tgt_list:
name = real_name(t.sname)
if name in bld.env.public_symbols:
bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t.public_symbols)
else:
bld.env.public_symbols[name] = t.public_symbols
if t.samba_type == 'LIBRARY':
for dep in t.add_objects:
t2 = bld.get_tgen_by_name(dep)
bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep))
bld.env.public_symbols[name] = bld.env.public_symbols[name].union(t2.public_symbols)
bld.env.used_symbols = {}
for t in tgt_list:
name = real_name(t.sname)
if name in bld.env.used_symbols:
bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t.used_symbols)
else:
bld.env.used_symbols[name] = t.used_symbols
if t.samba_type == 'LIBRARY':
for dep in t.add_objects:
t2 = bld.get_tgen_by_name(dep)
bld.ASSERT(t2 is not None, "Library '%s' has unknown dependency '%s'" % (name, dep))
bld.env.used_symbols[name] = bld.env.used_symbols[name].union(t2.used_symbols)
def build_library_dict(bld, tgt_list):
'''build the library_dict dictionary'''
if bld.env.library_dict:
return
bld.env.library_dict = {}
for t in tgt_list:
if t.samba_type in [ 'LIBRARY', 'PYTHON' ]:
linkpath = os.path.realpath(t.link_task.outputs[0].abspath(bld.env))
bld.env.library_dict[linkpath] = t.sname
def build_syslib_sets(bld, tgt_list):
'''build the public_symbols for all syslibs'''
if bld.env.syslib_symbols:
return
# work out what syslibs we depend on, and what targets those are used in
syslibs = {}
objmap = {}
for t in tgt_list:
if getattr(t, 'uselib', []) and t.samba_type in [ 'LIBRARY', 'BINARY', 'PYTHON' ]:
for lib in t.uselib:
if lib in ['PYEMBED', 'PYEXT']:
lib = "python"
if not lib in syslibs:
syslibs[lib] = []
syslibs[lib].append(t)
# work out the paths to each syslib
syslib_paths = []
for lib in syslibs:
path = find_syslib_path(bld, lib, syslibs[lib])
if path is None:
Logs.warn("Unable to find syslib path for %s" % lib)
if path is not None:
syslib_paths.append(path)
objmap[path] = lib.lower()
# add in libc
syslib_paths.append(bld.env.libc_path)
objmap[bld.env.libc_path] = 'c'
symbols = symbols_extract(bld, syslib_paths, dynamic=True)
# keep a map of syslib names to public symbols
bld.env.syslib_symbols = {}
for lib in symbols:
bld.env.syslib_symbols[lib] = symbols[lib]["PUBLIC"]
# add to the map of symbols to dependencies
for lib in symbols:
for sym in symbols[lib]["PUBLIC"]:
if not sym in bld.env.symbol_map:
bld.env.symbol_map[sym] = []
bld.env.symbol_map[sym].append(objmap[lib])
# keep the libc symbols as well, as these are useful for some of the
# sanity checks
bld.env.libc_symbols = symbols[bld.env.libc_path]["PUBLIC"]
# add to the combined map of dependency name to public_symbols
for lib in bld.env.syslib_symbols:
bld.env.public_symbols[objmap[lib]] = bld.env.syslib_symbols[lib]
def build_autodeps(bld, t):
'''build the set of dependencies for a target'''
deps = set()
name = real_name(t.sname)
targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
for sym in t.undefined_symbols:
if sym in t.public_symbols:
continue
if sym in bld.env.symbol_map:
depname = bld.env.symbol_map[sym]
if depname == [ name ]:
# self dependencies aren't interesting
continue
if t.in_library == depname:
# no need to depend on the library we are part of
continue
if depname[0] in ['c', 'python']:
# these don't go into autodeps
continue
if targets[depname[0]] in [ 'SYSLIB' ]:
deps.add(depname[0])
continue
t2 = bld.get_tgen_by_name(depname[0])
if len(t2.in_library) != 1:
deps.add(depname[0])
continue
if t2.in_library == t.in_library:
# if we're part of the same library, we don't need to autodep
continue
deps.add(t2.in_library[0])
t.autodeps = deps
def build_library_names(bld, tgt_list):
'''add a in_library attribute to all targets that are part of a library'''
if bld.env.done_build_library_names:
return
for t in tgt_list:
t.in_library = []
for t in tgt_list:
if t.samba_type in [ 'LIBRARY' ]:
for obj in t.samba_deps_extended:
t2 = bld.get_tgen_by_name(obj)
if t2 and t2.samba_type in [ 'SUBSYSTEM', 'ASN1' ]:
if not t.sname in t2.in_library:
t2.in_library.append(t.sname)
bld.env.done_build_library_names = True
def check_library_deps(bld, t):
'''check that all the autodeps that have mutual dependency of this
target are in the same library as the target'''
name = real_name(t.sname)
if len(t.in_library) > 1:
Logs.warn("WARNING: Target '%s' in multiple libraries: %s" % (t.sname, t.in_library))
for dep in t.autodeps:
t2 = bld.get_tgen_by_name(dep)
if t2 is None:
continue
for dep2 in t2.autodeps:
if dep2 == name and t.in_library != t2.in_library:
Logs.warn("WARNING: mutual dependency %s <=> %s" % (name, real_name(t2.sname)))
Logs.warn("Libraries should match. %s != %s" % (t.in_library, t2.in_library))
# raise Errors.WafError("illegal mutual dependency")
def check_syslib_collisions(bld, tgt_list):
'''check if a target has any symbol collisions with a syslib
We do not want any code in Samba to use a symbol name from a
system library. The chance of that causing problems is just too
high. Note that libreplace uses a rep_XX approach of renaming
symbols via macros
'''
has_error = False
for t in tgt_list:
for lib in bld.env.syslib_symbols:
common = t.public_symbols.intersection(bld.env.syslib_symbols[lib])
if common:
Logs.error("ERROR: Target '%s' has symbols '%s' which is also in syslib '%s'" % (t.sname, common, lib))
has_error = True
if has_error:
raise Errors.WafError("symbols in common with system libraries")
def check_dependencies(bld, t):
'''check for depenencies that should be changed'''
if bld.get_tgen_by_name(t.sname + ".objlist"):
return
targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
remaining = t.undefined_symbols.copy()
remaining = remaining.difference(t.public_symbols)
sname = real_name(t.sname)
deps = set(t.samba_deps)
for d in t.samba_deps:
if targets[d] in [ 'EMPTY', 'DISABLED', 'SYSLIB', 'GENERATOR' ]:
continue
bld.ASSERT(d in bld.env.public_symbols, "Failed to find symbol list for dependency '%s'" % d)
diff = remaining.intersection(bld.env.public_symbols[d])
if not diff and targets[sname] != 'LIBRARY':
Logs.info("Target '%s' has no dependency on %s" % (sname, d))
else:
remaining = remaining.difference(diff)
t.unsatisfied_symbols = set()
needed = {}
for sym in remaining:
if sym in bld.env.symbol_map:
dep = bld.env.symbol_map[sym]
if not dep[0] in needed:
needed[dep[0]] = set()
needed[dep[0]].add(sym)
else:
t.unsatisfied_symbols.add(sym)
for dep in needed:
Logs.info("Target '%s' should add dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep])))
def check_syslib_dependencies(bld, t):
'''check for syslib depenencies'''
if bld.get_tgen_by_name(t.sname + ".objlist"):
return
sname = real_name(t.sname)
remaining = set()
features = TO_LIST(t.features)
if 'pyembed' in features or 'pyext' in features:
if 'python' in bld.env.public_symbols:
t.unsatisfied_symbols = t.unsatisfied_symbols.difference(bld.env.public_symbols['python'])
needed = {}
for sym in t.unsatisfied_symbols:
if sym in bld.env.symbol_map:
dep = bld.env.symbol_map[sym][0]
if dep == 'c':
continue
if not dep in needed:
needed[dep] = set()
needed[dep].add(sym)
else:
remaining.add(sym)
for dep in needed:
Logs.info("Target '%s' should add syslib dep '%s' for symbols %s" % (sname, dep, " ".join(needed[dep])))
if remaining:
debug("deps: Target '%s' has unsatisfied symbols: %s" % (sname, " ".join(remaining)))
def symbols_symbolcheck(task):
'''check the internal dependency lists'''
bld = task.env.bld
tgt_list = get_tgt_list(bld)
build_symbol_sets(bld, tgt_list)
build_library_names(bld, tgt_list)
for t in tgt_list:
t.autodeps = set()
if getattr(t, 'source', ''):
build_autodeps(bld, t)
for t in tgt_list:
check_dependencies(bld, t)
for t in tgt_list:
check_library_deps(bld, t)
def symbols_syslibcheck(task):
'''check the syslib dependencies'''
bld = task.env.bld
tgt_list = get_tgt_list(bld)
build_syslib_sets(bld, tgt_list)
check_syslib_collisions(bld, tgt_list)
for t in tgt_list:
check_syslib_dependencies(bld, t)
def symbols_whyneeded(task):
"""check why 'target' needs to link to 'subsystem'"""
bld = task.env.bld
tgt_list = get_tgt_list(bld)
why = Options.options.WHYNEEDED.split(":")
if len(why) != 2:
raise Errors.WafError("usage: WHYNEEDED=TARGET:DEPENDENCY")
target = why[0]
subsystem = why[1]
build_symbol_sets(bld, tgt_list)
build_library_names(bld, tgt_list)
build_syslib_sets(bld, tgt_list)
Logs.info("Checking why %s needs to link to %s" % (target, subsystem))
if not target in bld.env.used_symbols:
Logs.warn("unable to find target '%s' in used_symbols dict" % target)
return
if not subsystem in bld.env.public_symbols:
Logs.warn("unable to find subsystem '%s' in public_symbols dict" % subsystem)
return
overlap = bld.env.used_symbols[target].intersection(bld.env.public_symbols[subsystem])
if not overlap:
Logs.info("target '%s' doesn't use any public symbols from '%s'" % (target, subsystem))
else:
Logs.info("target '%s' uses symbols %s from '%s'" % (target, overlap, subsystem))
def report_duplicate(bld, binname, sym, libs, fail_on_error):
'''report duplicated symbols'''
if sym in ['_init', '_fini', '_edata', '_end', '__bss_start']:
return
libnames = []
for lib in libs:
if lib in bld.env.library_dict:
libnames.append(bld.env.library_dict[lib])
else:
libnames.append(lib)
if fail_on_error:
raise Errors.WafError("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames))
else:
print("%s: Symbol %s linked in multiple libraries %s" % (binname, sym, libnames))
def symbols_dupcheck_binary(bld, binname, fail_on_error):
'''check for duplicated symbols in one binary'''
libs = get_libs_recursive(bld, binname, set())
symlist = symbols_extract(bld, libs, dynamic=True)
symmap = {}
for libpath in symlist:
for sym in symlist[libpath]['PUBLIC']:
if sym == '_GLOBAL_OFFSET_TABLE_':
continue
if not sym in symmap:
symmap[sym] = set()
symmap[sym].add(libpath)
for sym in symmap:
if len(symmap[sym]) > 1:
for libpath in symmap[sym]:
if libpath in bld.env.library_dict:
report_duplicate(bld, binname, sym, symmap[sym], fail_on_error)
break
def symbols_dupcheck(task, fail_on_error=False):
'''check for symbols defined in two different subsystems'''
bld = task.env.bld
tgt_list = get_tgt_list(bld)
targets = LOCAL_CACHE(bld, 'TARGET_TYPE')
build_library_dict(bld, tgt_list)
for t in tgt_list:
if t.samba_type == 'BINARY':
binname = os_path_relpath(t.link_task.outputs[0].abspath(bld.env), os.getcwd())
symbols_dupcheck_binary(bld, binname, fail_on_error)
def symbols_dupcheck_fatal(task):
'''check for symbols defined in two different subsystems (and fail if duplicates are found)'''
symbols_dupcheck(task, fail_on_error=True)
def SYMBOL_CHECK(bld):
'''check our dependency lists'''
if Options.options.SYMBOLCHECK:
bld.SET_BUILD_GROUP('symbolcheck')
task = bld(rule=symbols_symbolcheck, always=True, name='symbol checking')
task.env.bld = bld
bld.SET_BUILD_GROUP('syslibcheck')
task = bld(rule=symbols_syslibcheck, always=True, name='syslib checking')
task.env.bld = bld
bld.SET_BUILD_GROUP('syslibcheck')
task = bld(rule=symbols_dupcheck, always=True, name='symbol duplicate checking')
task.env.bld = bld
if Options.options.WHYNEEDED:
bld.SET_BUILD_GROUP('syslibcheck')
task = bld(rule=symbols_whyneeded, always=True, name='check why a dependency is needed')
task.env.bld = bld
Build.BuildContext.SYMBOL_CHECK = SYMBOL_CHECK
def DUP_SYMBOL_CHECK(bld):
if Options.options.DUP_SYMBOLCHECK and bld.env.DEVELOPER:
'''check for duplicate symbols'''
bld.SET_BUILD_GROUP('syslibcheck')
task = bld(rule=symbols_dupcheck_fatal, always=True, name='symbol duplicate checking')
task.env.bld = bld
Build.BuildContext.DUP_SYMBOL_CHECK = DUP_SYMBOL_CHECK