mirror of
https://github.com/samba-team/samba.git
synced 2024-12-23 17:34:34 +03:00
90ec37cf90
Signed-Off-By: Jelmer Vernooij <jelmer@samba.org> Reviewed-by: Jeremy Allison <jra@samba.org>
1200 lines
34 KiB
Python
1200 lines
34 KiB
Python
#!/usr/bin/env python
|
|
# encoding: utf-8
|
|
# Thomas Nagy, 2005-2008 (ita)
|
|
|
|
"""
|
|
Running tasks in parallel is a simple problem, but in practice it is more complicated:
|
|
* dependencies discovered during the build (dynamic task creation)
|
|
* dependencies discovered after files are compiled
|
|
* the amount of tasks and dependencies (graph size) can be huge
|
|
|
|
This is why the dependency management is split on three different levels:
|
|
1. groups of tasks that run all after another group of tasks
|
|
2. groups of tasks that can be run in parallel
|
|
3. tasks that can run in parallel, but with possible unknown ad-hoc dependencies
|
|
|
|
The point #1 represents a strict sequential order between groups of tasks, for example a compiler is produced
|
|
and used to compile the rest, whereas #2 and #3 represent partial order constraints where #2 applies to the kind of task
|
|
and #3 applies to the task instances.
|
|
|
|
#1 is held by the task manager: ordered list of TaskGroups (see bld.add_group)
|
|
#2 is held by the task groups and the task types: precedence after/before (topological sort),
|
|
and the constraints extracted from file extensions
|
|
#3 is held by the tasks individually (attribute run_after),
|
|
and the scheduler (Runner.py) use Task::runnable_status to reorder the tasks
|
|
|
|
--
|
|
|
|
To try, use something like this in your code:
|
|
import Constants, Task
|
|
Task.algotype = Constants.MAXPARALLEL
|
|
|
|
--
|
|
|
|
There are two concepts with the tasks (individual units of change):
|
|
* dependency (if 1 is recompiled, recompile 2)
|
|
* order (run 2 after 1)
|
|
|
|
example 1: if t1 depends on t2 and t2 depends on t3 it is not necessary to make t1 depend on t3 (dependency is transitive)
|
|
example 2: if t1 depends on a node produced by t2, it is not immediately obvious that t1 must run after t2 (order is not obvious)
|
|
|
|
The role of the Task Manager is to give the tasks in order (groups of task that may be run in parallel one after the other)
|
|
|
|
"""
|
|
|
|
import os, shutil, sys, re, random, datetime, tempfile, shlex
|
|
from Utils import md5
|
|
import Build, Runner, Utils, Node, Logs, Options
|
|
from Logs import debug, warn, error
|
|
from Constants import *
|
|
|
|
algotype = NORMAL
|
|
#algotype = JOBCONTROL
|
|
#algotype = MAXPARALLEL
|
|
|
|
COMPILE_TEMPLATE_SHELL = '''
|
|
def f(task):
|
|
env = task.env
|
|
wd = getattr(task, 'cwd', None)
|
|
p = env.get_flat
|
|
cmd = \'\'\' %s \'\'\' % s
|
|
return task.exec_command(cmd, cwd=wd)
|
|
'''
|
|
|
|
COMPILE_TEMPLATE_NOSHELL = '''
|
|
def f(task):
|
|
env = task.env
|
|
wd = getattr(task, 'cwd', None)
|
|
def to_list(xx):
|
|
if isinstance(xx, str): return [xx]
|
|
return xx
|
|
lst = []
|
|
%s
|
|
lst = [x for x in lst if x]
|
|
return task.exec_command(lst, cwd=wd)
|
|
'''
|
|
|
|
|
|
"""
|
|
Enable different kind of dependency algorithms:
|
|
1 make groups: first compile all cpps and then compile all links (NORMAL)
|
|
2 parallelize all (each link task run after its dependencies) (MAXPARALLEL)
|
|
3 like 1 but provide additional constraints for the parallelization (MAXJOBS)
|
|
|
|
In theory 1. will be faster than 2 for waf, but might be slower for builds
|
|
The scheme 2 will not allow for running tasks one by one so it can cause disk thrashing on huge builds
|
|
"""
|
|
|
|
file_deps = Utils.nada
|
|
"""
|
|
Additional dependency pre-check may be added by replacing the function file_deps.
|
|
e.g. extract_outputs, extract_deps below.
|
|
"""
|
|
|
|
class TaskManager(object):
|
|
"""The manager is attached to the build object, it holds a list of TaskGroup"""
|
|
def __init__(self):
|
|
self.groups = []
|
|
self.tasks_done = []
|
|
self.current_group = 0
|
|
self.groups_names = {}
|
|
|
|
def group_name(self, g):
|
|
"""name for the group g (utility)"""
|
|
if not isinstance(g, TaskGroup):
|
|
g = self.groups[g]
|
|
for x in self.groups_names:
|
|
if id(self.groups_names[x]) == id(g):
|
|
return x
|
|
return ''
|
|
|
|
def group_idx(self, tg):
|
|
"""group the task generator tg is in"""
|
|
se = id(tg)
|
|
for i in range(len(self.groups)):
|
|
g = self.groups[i]
|
|
for t in g.tasks_gen:
|
|
if id(t) == se:
|
|
return i
|
|
return None
|
|
|
|
def get_next_set(self):
|
|
"""return the next set of tasks to execute
|
|
the first parameter is the maximum amount of parallelization that may occur"""
|
|
ret = None
|
|
while not ret and self.current_group < len(self.groups):
|
|
ret = self.groups[self.current_group].get_next_set()
|
|
if ret: return ret
|
|
else:
|
|
self.groups[self.current_group].process_install()
|
|
self.current_group += 1
|
|
return (None, None)
|
|
|
|
def add_group(self, name=None, set=True):
|
|
#if self.groups and not self.groups[0].tasks:
|
|
# error('add_group: an empty group is already present')
|
|
g = TaskGroup()
|
|
|
|
if name and name in self.groups_names:
|
|
error('add_group: name %s already present' % name)
|
|
self.groups_names[name] = g
|
|
self.groups.append(g)
|
|
if set:
|
|
self.current_group = len(self.groups) - 1
|
|
|
|
def set_group(self, idx):
|
|
if isinstance(idx, str):
|
|
g = self.groups_names[idx]
|
|
for x in xrange(len(self.groups)):
|
|
if id(g) == id(self.groups[x]):
|
|
self.current_group = x
|
|
else:
|
|
self.current_group = idx
|
|
|
|
def add_task_gen(self, tgen):
|
|
if not self.groups: self.add_group()
|
|
self.groups[self.current_group].tasks_gen.append(tgen)
|
|
|
|
def add_task(self, task):
|
|
if not self.groups: self.add_group()
|
|
self.groups[self.current_group].tasks.append(task)
|
|
|
|
def total(self):
|
|
total = 0
|
|
if not self.groups: return 0
|
|
for group in self.groups:
|
|
total += len(group.tasks)
|
|
return total
|
|
|
|
def add_finished(self, tsk):
|
|
self.tasks_done.append(tsk)
|
|
bld = tsk.generator.bld
|
|
if bld.is_install:
|
|
f = None
|
|
if 'install' in tsk.__dict__:
|
|
f = tsk.__dict__['install']
|
|
# install=0 to prevent installation
|
|
if f: f(tsk)
|
|
else:
|
|
tsk.install()
|
|
|
|
class TaskGroup(object):
|
|
"the compilation of one group does not begin until the previous group has finished (in the manager)"
|
|
def __init__(self):
|
|
self.tasks = [] # this list will be consumed
|
|
self.tasks_gen = []
|
|
|
|
self.cstr_groups = Utils.DefaultDict(list) # tasks having equivalent constraints
|
|
self.cstr_order = Utils.DefaultDict(set) # partial order between the cstr groups
|
|
self.temp_tasks = [] # tasks put on hold
|
|
self.ready = 0
|
|
self.post_funs = []
|
|
|
|
def reset(self):
|
|
"clears the state of the object (put back the tasks into self.tasks)"
|
|
for x in self.cstr_groups:
|
|
self.tasks += self.cstr_groups[x]
|
|
self.tasks = self.temp_tasks + self.tasks
|
|
self.temp_tasks = []
|
|
self.cstr_groups = Utils.DefaultDict(list)
|
|
self.cstr_order = Utils.DefaultDict(set)
|
|
self.ready = 0
|
|
|
|
def process_install(self):
|
|
for (f, k, kw) in self.post_funs:
|
|
f(*k, **kw)
|
|
|
|
def prepare(self):
|
|
"prepare the scheduling"
|
|
self.ready = 1
|
|
file_deps(self.tasks)
|
|
self.make_cstr_groups()
|
|
self.extract_constraints()
|
|
|
|
def get_next_set(self):
|
|
"next list of tasks to execute using max job settings, returns (maxjobs, task_list)"
|
|
global algotype
|
|
if algotype == NORMAL:
|
|
tasks = self.tasks_in_parallel()
|
|
maxj = MAXJOBS
|
|
elif algotype == JOBCONTROL:
|
|
(maxj, tasks) = self.tasks_by_max_jobs()
|
|
elif algotype == MAXPARALLEL:
|
|
tasks = self.tasks_with_inner_constraints()
|
|
maxj = MAXJOBS
|
|
else:
|
|
raise Utils.WafError("unknown algorithm type %s" % (algotype))
|
|
|
|
if not tasks: return ()
|
|
return (maxj, tasks)
|
|
|
|
def make_cstr_groups(self):
|
|
"unite the tasks that have similar constraints"
|
|
self.cstr_groups = Utils.DefaultDict(list)
|
|
for x in self.tasks:
|
|
h = x.hash_constraints()
|
|
self.cstr_groups[h].append(x)
|
|
|
|
def set_order(self, a, b):
|
|
self.cstr_order[a].add(b)
|
|
|
|
def compare_exts(self, t1, t2):
|
|
"extension production"
|
|
x = "ext_in"
|
|
y = "ext_out"
|
|
in_ = t1.attr(x, ())
|
|
out_ = t2.attr(y, ())
|
|
for k in in_:
|
|
if k in out_:
|
|
return -1
|
|
in_ = t2.attr(x, ())
|
|
out_ = t1.attr(y, ())
|
|
for k in in_:
|
|
if k in out_:
|
|
return 1
|
|
return 0
|
|
|
|
def compare_partial(self, t1, t2):
|
|
"partial relations after/before"
|
|
m = "after"
|
|
n = "before"
|
|
name = t2.__class__.__name__
|
|
if name in Utils.to_list(t1.attr(m, ())): return -1
|
|
elif name in Utils.to_list(t1.attr(n, ())): return 1
|
|
name = t1.__class__.__name__
|
|
if name in Utils.to_list(t2.attr(m, ())): return 1
|
|
elif name in Utils.to_list(t2.attr(n, ())): return -1
|
|
return 0
|
|
|
|
def extract_constraints(self):
|
|
"extract the parallelization constraints from the tasks with different constraints"
|
|
keys = self.cstr_groups.keys()
|
|
max = len(keys)
|
|
# hopefully the length of this list is short
|
|
for i in xrange(max):
|
|
t1 = self.cstr_groups[keys[i]][0]
|
|
for j in xrange(i + 1, max):
|
|
t2 = self.cstr_groups[keys[j]][0]
|
|
|
|
# add the constraints based on the comparisons
|
|
val = (self.compare_exts(t1, t2)
|
|
or self.compare_partial(t1, t2)
|
|
)
|
|
if val > 0:
|
|
self.set_order(keys[i], keys[j])
|
|
elif val < 0:
|
|
self.set_order(keys[j], keys[i])
|
|
|
|
def tasks_in_parallel(self):
|
|
"(NORMAL) next list of tasks that may be executed in parallel"
|
|
|
|
if not self.ready: self.prepare()
|
|
|
|
keys = self.cstr_groups.keys()
|
|
|
|
unconnected = []
|
|
remainder = []
|
|
|
|
for u in keys:
|
|
for k in self.cstr_order.values():
|
|
if u in k:
|
|
remainder.append(u)
|
|
break
|
|
else:
|
|
unconnected.append(u)
|
|
|
|
toreturn = []
|
|
for y in unconnected:
|
|
toreturn.extend(self.cstr_groups[y])
|
|
|
|
# remove stuff only after
|
|
for y in unconnected:
|
|
try: self.cstr_order.__delitem__(y)
|
|
except KeyError: pass
|
|
self.cstr_groups.__delitem__(y)
|
|
|
|
if not toreturn and remainder:
|
|
raise Utils.WafError("circular order constraint detected %r" % remainder)
|
|
|
|
return toreturn
|
|
|
|
def tasks_by_max_jobs(self):
|
|
"(JOBCONTROL) returns the tasks that can run in parallel with the max amount of jobs"
|
|
if not self.ready: self.prepare()
|
|
if not self.temp_tasks: self.temp_tasks = self.tasks_in_parallel()
|
|
if not self.temp_tasks: return (None, None)
|
|
|
|
maxjobs = MAXJOBS
|
|
ret = []
|
|
remaining = []
|
|
for t in self.temp_tasks:
|
|
m = getattr(t, "maxjobs", getattr(self.__class__, "maxjobs", MAXJOBS))
|
|
if m > maxjobs:
|
|
remaining.append(t)
|
|
elif m < maxjobs:
|
|
remaining += ret
|
|
ret = [t]
|
|
maxjobs = m
|
|
else:
|
|
ret.append(t)
|
|
self.temp_tasks = remaining
|
|
return (maxjobs, ret)
|
|
|
|
def tasks_with_inner_constraints(self):
|
|
"""(MAXPARALLEL) returns all tasks in this group, but add the constraints on each task instance
|
|
as an optimization, it might be desirable to discard the tasks which do not have to run"""
|
|
if not self.ready: self.prepare()
|
|
|
|
if getattr(self, "done", None): return None
|
|
|
|
for p in self.cstr_order:
|
|
for v in self.cstr_order[p]:
|
|
for m in self.cstr_groups[p]:
|
|
for n in self.cstr_groups[v]:
|
|
n.set_run_after(m)
|
|
self.cstr_order = Utils.DefaultDict(set)
|
|
self.cstr_groups = Utils.DefaultDict(list)
|
|
self.done = 1
|
|
return self.tasks[:] # make a copy
|
|
|
|
class store_task_type(type):
|
|
"store the task types that have a name ending in _task into a map (remember the existing task types)"
|
|
def __init__(cls, name, bases, dict):
|
|
super(store_task_type, cls).__init__(name, bases, dict)
|
|
name = cls.__name__
|
|
|
|
if name.endswith('_task'):
|
|
name = name.replace('_task', '')
|
|
if name != 'TaskBase':
|
|
TaskBase.classes[name] = cls
|
|
|
|
class TaskBase(object):
|
|
"""Base class for all Waf tasks
|
|
|
|
The most important methods are (by usual order of call):
|
|
1 runnable_status: ask the task if it should be run, skipped, or if we have to ask later
|
|
2 __str__: string to display to the user
|
|
3 run: execute the task
|
|
4 post_run: after the task is run, update the cache about the task
|
|
|
|
This class should be seen as an interface, it provides the very minimum necessary for the scheduler
|
|
so it does not do much.
|
|
|
|
For illustration purposes, TaskBase instances try to execute self.fun (if provided)
|
|
"""
|
|
|
|
__metaclass__ = store_task_type
|
|
|
|
color = "GREEN"
|
|
maxjobs = MAXJOBS
|
|
classes = {}
|
|
stat = None
|
|
|
|
def __init__(self, *k, **kw):
|
|
self.hasrun = NOT_RUN
|
|
|
|
try:
|
|
self.generator = kw['generator']
|
|
except KeyError:
|
|
self.generator = self
|
|
self.bld = Build.bld
|
|
|
|
if kw.get('normal', 1):
|
|
self.generator.bld.task_manager.add_task(self)
|
|
|
|
def __repr__(self):
|
|
"used for debugging"
|
|
return '\n\t{task: %s %s}' % (self.__class__.__name__, str(getattr(self, "fun", "")))
|
|
|
|
def __str__(self):
|
|
"string to display to the user"
|
|
if hasattr(self, 'fun'):
|
|
return 'executing: %s\n' % self.fun.__name__
|
|
return self.__class__.__name__ + '\n'
|
|
|
|
def exec_command(self, *k, **kw):
|
|
"use this for executing commands from tasks"
|
|
# TODO in waf 1.6, eliminate bld.exec_command, and move the cwd processing to here
|
|
if self.env['env']:
|
|
kw['env'] = self.env['env']
|
|
return self.generator.bld.exec_command(*k, **kw)
|
|
|
|
def runnable_status(self):
|
|
"RUN_ME SKIP_ME or ASK_LATER"
|
|
return RUN_ME
|
|
|
|
def can_retrieve_cache(self):
|
|
return False
|
|
|
|
def call_run(self):
|
|
if self.can_retrieve_cache():
|
|
return 0
|
|
return self.run()
|
|
|
|
def run(self):
|
|
"called if the task must run"
|
|
if hasattr(self, 'fun'):
|
|
return self.fun(self)
|
|
return 0
|
|
|
|
def post_run(self):
|
|
"update the dependency tree (node stats)"
|
|
pass
|
|
|
|
def display(self):
|
|
"print either the description (using __str__) or the progress bar or the ide output"
|
|
col1 = Logs.colors(self.color)
|
|
col2 = Logs.colors.NORMAL
|
|
|
|
if Options.options.progress_bar == 1:
|
|
return self.generator.bld.progress_line(self.position[0], self.position[1], col1, col2)
|
|
|
|
if Options.options.progress_bar == 2:
|
|
ela = Utils.get_elapsed_time(self.generator.bld.ini)
|
|
try:
|
|
ins = ','.join([n.name for n in self.inputs])
|
|
except AttributeError:
|
|
ins = ''
|
|
try:
|
|
outs = ','.join([n.name for n in self.outputs])
|
|
except AttributeError:
|
|
outs = ''
|
|
return '|Total %s|Current %s|Inputs %s|Outputs %s|Time %s|\n' % (self.position[1], self.position[0], ins, outs, ela)
|
|
|
|
total = self.position[1]
|
|
n = len(str(total))
|
|
fs = '[%%%dd/%%%dd] %%s%%s%%s' % (n, n)
|
|
return fs % (self.position[0], self.position[1], col1, str(self), col2)
|
|
|
|
def attr(self, att, default=None):
|
|
"retrieve an attribute from the instance or from the class (microoptimization here)"
|
|
ret = getattr(self, att, self)
|
|
if ret is self: return getattr(self.__class__, att, default)
|
|
return ret
|
|
|
|
def hash_constraints(self):
|
|
"identify a task type for all the constraints relevant for the scheduler: precedence, file production"
|
|
a = self.attr
|
|
sum = hash((self.__class__.__name__,
|
|
str(a('before', '')),
|
|
str(a('after', '')),
|
|
str(a('ext_in', '')),
|
|
str(a('ext_out', '')),
|
|
self.__class__.maxjobs))
|
|
return sum
|
|
|
|
def format_error(self):
|
|
"error message to display to the user (when a build fails)"
|
|
if getattr(self, "err_msg", None):
|
|
return self.err_msg
|
|
elif self.hasrun == CRASHED:
|
|
try:
|
|
return " -> task failed (err #%d): %r" % (self.err_code, self)
|
|
except AttributeError:
|
|
return " -> task failed: %r" % self
|
|
elif self.hasrun == MISSING:
|
|
return " -> missing files: %r" % self
|
|
else:
|
|
return ''
|
|
|
|
def install(self):
|
|
"""
|
|
installation is performed by looking at the task attributes:
|
|
* install_path: installation path like "${PREFIX}/bin"
|
|
* filename: install the first node in the outputs as a file with a particular name, be certain to give os.sep
|
|
* chmod: permissions
|
|
"""
|
|
bld = self.generator.bld
|
|
d = self.attr('install')
|
|
|
|
if self.attr('install_path'):
|
|
lst = [a.relpath_gen(bld.srcnode) for a in self.outputs]
|
|
perm = self.attr('chmod', O644)
|
|
if self.attr('src'):
|
|
# if src is given, install the sources too
|
|
lst += [a.relpath_gen(bld.srcnode) for a in self.inputs]
|
|
if self.attr('filename'):
|
|
dir = self.install_path.rstrip(os.sep) + os.sep + self.attr('filename')
|
|
bld.install_as(dir, lst[0], self.env, perm)
|
|
else:
|
|
bld.install_files(self.install_path, lst, self.env, perm)
|
|
|
|
class Task(TaskBase):
|
|
"""The parent class is quite limited, in this version:
|
|
* file system interaction: input and output nodes
|
|
* persistence: do not re-execute tasks that have already run
|
|
* caching: same files can be saved and retrieved from a cache directory
|
|
* dependencies:
|
|
implicit, like .c files depending on .h files
|
|
explicit, like the input nodes or the dep_nodes
|
|
environment variables, like the CXXFLAGS in self.env
|
|
"""
|
|
vars = []
|
|
def __init__(self, env, **kw):
|
|
TaskBase.__init__(self, **kw)
|
|
self.env = env
|
|
|
|
# inputs and outputs are nodes
|
|
# use setters when possible
|
|
self.inputs = []
|
|
self.outputs = []
|
|
|
|
self.dep_nodes = []
|
|
self.run_after = []
|
|
|
|
# Additionally, you may define the following
|
|
#self.dep_vars = 'PREFIX DATADIR'
|
|
|
|
def __str__(self):
|
|
"string to display to the user"
|
|
env = self.env
|
|
src_str = ' '.join([a.nice_path(env) for a in self.inputs])
|
|
tgt_str = ' '.join([a.nice_path(env) for a in self.outputs])
|
|
if self.outputs: sep = ' -> '
|
|
else: sep = ''
|
|
return '%s: %s%s%s\n' % (self.__class__.__name__.replace('_task', ''), src_str, sep, tgt_str)
|
|
|
|
def __repr__(self):
|
|
return "".join(['\n\t{task: ', self.__class__.__name__, " ", ",".join([x.name for x in self.inputs]), " -> ", ",".join([x.name for x in self.outputs]), '}'])
|
|
|
|
def unique_id(self):
|
|
"get a unique id: hash the node paths, the variant, the class, the function"
|
|
try:
|
|
return self.uid
|
|
except AttributeError:
|
|
"this is not a real hot zone, but we want to avoid surprizes here"
|
|
m = md5()
|
|
up = m.update
|
|
up(self.__class__.__name__)
|
|
up(self.env.variant())
|
|
p = None
|
|
for x in self.inputs + self.outputs:
|
|
if p != x.parent.id:
|
|
p = x.parent.id
|
|
up(x.parent.abspath())
|
|
up(x.name)
|
|
self.uid = m.digest()
|
|
return self.uid
|
|
|
|
def set_inputs(self, inp):
|
|
if isinstance(inp, list): self.inputs += inp
|
|
else: self.inputs.append(inp)
|
|
|
|
def set_outputs(self, out):
|
|
if isinstance(out, list): self.outputs += out
|
|
else: self.outputs.append(out)
|
|
|
|
def set_run_after(self, task):
|
|
"set (scheduler) order on another task"
|
|
# TODO: handle list or object
|
|
assert isinstance(task, TaskBase)
|
|
self.run_after.append(task)
|
|
|
|
def add_file_dependency(self, filename):
|
|
"TODO user-provided file dependencies"
|
|
node = self.generator.bld.path.find_resource(filename)
|
|
self.dep_nodes.append(node)
|
|
|
|
def signature(self):
|
|
# compute the result one time, and suppose the scan_signature will give the good result
|
|
try: return self.cache_sig[0]
|
|
except AttributeError: pass
|
|
|
|
self.m = md5()
|
|
|
|
# explicit deps
|
|
exp_sig = self.sig_explicit_deps()
|
|
|
|
# env vars
|
|
var_sig = self.sig_vars()
|
|
|
|
# implicit deps
|
|
|
|
imp_sig = SIG_NIL
|
|
if self.scan:
|
|
try:
|
|
imp_sig = self.sig_implicit_deps()
|
|
except ValueError:
|
|
return self.signature()
|
|
|
|
# we now have the signature (first element) and the details (for debugging)
|
|
ret = self.m.digest()
|
|
self.cache_sig = (ret, exp_sig, imp_sig, var_sig)
|
|
return ret
|
|
|
|
def runnable_status(self):
|
|
"SKIP_ME RUN_ME or ASK_LATER"
|
|
#return 0 # benchmarking
|
|
|
|
if self.inputs and (not self.outputs):
|
|
if not getattr(self.__class__, 'quiet', None):
|
|
warn("invalid task (no inputs OR outputs): override in a Task subclass or set the attribute 'quiet' %r" % self)
|
|
|
|
for t in self.run_after:
|
|
if not t.hasrun:
|
|
return ASK_LATER
|
|
|
|
env = self.env
|
|
bld = self.generator.bld
|
|
|
|
# first compute the signature
|
|
new_sig = self.signature()
|
|
|
|
# compare the signature to a signature computed previously
|
|
key = self.unique_id()
|
|
try:
|
|
prev_sig = bld.task_sigs[key][0]
|
|
except KeyError:
|
|
debug("task: task %r must run as it was never run before or the task code changed", self)
|
|
return RUN_ME
|
|
|
|
# compare the signatures of the outputs
|
|
for node in self.outputs:
|
|
variant = node.variant(env)
|
|
try:
|
|
if bld.node_sigs[variant][node.id] != new_sig:
|
|
return RUN_ME
|
|
except KeyError:
|
|
debug("task: task %r must run as the output nodes do not exist", self)
|
|
return RUN_ME
|
|
|
|
# debug if asked to
|
|
if Logs.verbose: self.debug_why(bld.task_sigs[key])
|
|
|
|
if new_sig != prev_sig:
|
|
return RUN_ME
|
|
return SKIP_ME
|
|
|
|
def post_run(self):
|
|
"called after a successful task run"
|
|
bld = self.generator.bld
|
|
env = self.env
|
|
sig = self.signature()
|
|
ssig = sig.encode('hex')
|
|
|
|
variant = env.variant()
|
|
for node in self.outputs:
|
|
# check if the node exists ..
|
|
try:
|
|
os.stat(node.abspath(env))
|
|
except OSError:
|
|
self.hasrun = MISSING
|
|
self.err_msg = '-> missing file: %r' % node.abspath(env)
|
|
raise Utils.WafError
|
|
|
|
# important, store the signature for the next run
|
|
bld.node_sigs[variant][node.id] = sig
|
|
bld.task_sigs[self.unique_id()] = self.cache_sig
|
|
|
|
# file caching, if possible
|
|
# try to avoid data corruption as much as possible
|
|
if not Options.cache_global or Options.options.nocache or not self.outputs:
|
|
return None
|
|
|
|
if getattr(self, 'cached', None):
|
|
return None
|
|
|
|
dname = os.path.join(Options.cache_global, ssig)
|
|
tmpdir = tempfile.mkdtemp(prefix=Options.cache_global + os.sep + 'waf')
|
|
|
|
try:
|
|
shutil.rmtree(dname)
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
i = 0
|
|
for node in self.outputs:
|
|
variant = node.variant(env)
|
|
dest = os.path.join(tmpdir, str(i) + node.name)
|
|
shutil.copy2(node.abspath(env), dest)
|
|
i += 1
|
|
except (OSError, IOError):
|
|
try:
|
|
shutil.rmtree(tmpdir)
|
|
except:
|
|
pass
|
|
else:
|
|
try:
|
|
os.rename(tmpdir, dname)
|
|
except OSError:
|
|
try:
|
|
shutil.rmtree(tmpdir)
|
|
except:
|
|
pass
|
|
else:
|
|
try:
|
|
os.chmod(dname, O755)
|
|
except:
|
|
pass
|
|
|
|
def can_retrieve_cache(self):
|
|
"""
|
|
Retrieve build nodes from the cache
|
|
update the file timestamps to help cleaning the least used entries from the cache
|
|
additionally, set an attribute 'cached' to avoid re-creating the same cache files
|
|
|
|
suppose there are files in cache/dir1/file1 and cache/dir2/file2
|
|
first, read the timestamp of dir1
|
|
then try to copy the files
|
|
then look at the timestamp again, if it has changed, the data may have been corrupt (cache update by another process)
|
|
should an exception occur, ignore the data
|
|
"""
|
|
if not Options.cache_global or Options.options.nocache or not self.outputs:
|
|
return None
|
|
|
|
env = self.env
|
|
sig = self.signature()
|
|
ssig = sig.encode('hex')
|
|
|
|
# first try to access the cache folder for the task
|
|
dname = os.path.join(Options.cache_global, ssig)
|
|
try:
|
|
t1 = os.stat(dname).st_mtime
|
|
except OSError:
|
|
return None
|
|
|
|
i = 0
|
|
for node in self.outputs:
|
|
variant = node.variant(env)
|
|
|
|
orig = os.path.join(dname, str(i) + node.name)
|
|
try:
|
|
shutil.copy2(orig, node.abspath(env))
|
|
# mark the cache file as used recently (modified)
|
|
os.utime(orig, None)
|
|
except (OSError, IOError):
|
|
debug('task: failed retrieving file')
|
|
return None
|
|
i += 1
|
|
|
|
# is it the same folder?
|
|
try:
|
|
t2 = os.stat(dname).st_mtime
|
|
except OSError:
|
|
return None
|
|
|
|
if t1 != t2:
|
|
return None
|
|
|
|
for node in self.outputs:
|
|
self.generator.bld.node_sigs[variant][node.id] = sig
|
|
if Options.options.progress_bar < 1:
|
|
self.generator.bld.printout('restoring from cache %r\n' % node.bldpath(env))
|
|
|
|
self.cached = True
|
|
return 1
|
|
|
|
def debug_why(self, old_sigs):
|
|
"explains why a task is run"
|
|
|
|
new_sigs = self.cache_sig
|
|
def v(x):
|
|
return x.encode('hex')
|
|
|
|
debug("Task %r", self)
|
|
msgs = ['Task must run', '* Source file or manual dependency', '* Implicit dependency', '* Environment variable']
|
|
tmp = 'task: -> %s: %s %s'
|
|
for x in xrange(len(msgs)):
|
|
if (new_sigs[x] != old_sigs[x]):
|
|
debug(tmp, msgs[x], v(old_sigs[x]), v(new_sigs[x]))
|
|
|
|
def sig_explicit_deps(self):
|
|
bld = self.generator.bld
|
|
up = self.m.update
|
|
|
|
# the inputs
|
|
for x in self.inputs + getattr(self, 'dep_nodes', []):
|
|
if not x.parent.id in bld.cache_scanned_folders:
|
|
bld.rescan(x.parent)
|
|
|
|
variant = x.variant(self.env)
|
|
try:
|
|
up(bld.node_sigs[variant][x.id])
|
|
except KeyError:
|
|
raise Utils.WafError('Missing node signature for %r (required by %r)' % (x, self))
|
|
|
|
# manual dependencies, they can slow down the builds
|
|
if bld.deps_man:
|
|
additional_deps = bld.deps_man
|
|
for x in self.inputs + self.outputs:
|
|
try:
|
|
d = additional_deps[x.id]
|
|
except KeyError:
|
|
continue
|
|
|
|
for v in d:
|
|
if isinstance(v, Node.Node):
|
|
bld.rescan(v.parent)
|
|
variant = v.variant(self.env)
|
|
try:
|
|
v = bld.node_sigs[variant][v.id]
|
|
except KeyError:
|
|
raise Utils.WafError('Missing node signature for %r (required by %r)' % (v, self))
|
|
elif hasattr(v, '__call__'):
|
|
v = v() # dependency is a function, call it
|
|
up(v)
|
|
|
|
for x in self.dep_nodes:
|
|
v = bld.node_sigs[x.variant(self.env)][x.id]
|
|
up(v)
|
|
|
|
return self.m.digest()
|
|
|
|
def sig_vars(self):
|
|
bld = self.generator.bld
|
|
env = self.env
|
|
|
|
# dependencies on the environment vars
|
|
act_sig = bld.hash_env_vars(env, self.__class__.vars)
|
|
self.m.update(act_sig)
|
|
|
|
# additional variable dependencies, if provided
|
|
dep_vars = getattr(self, 'dep_vars', None)
|
|
if dep_vars:
|
|
self.m.update(bld.hash_env_vars(env, dep_vars))
|
|
|
|
return self.m.digest()
|
|
|
|
#def scan(self, node):
|
|
# """this method returns a tuple containing:
|
|
# * a list of nodes corresponding to real files
|
|
# * a list of names for files not found in path_lst
|
|
# the input parameters may have more parameters that the ones used below
|
|
# """
|
|
# return ((), ())
|
|
scan = None
|
|
|
|
# compute the signature, recompute it if there is no match in the cache
|
|
def sig_implicit_deps(self):
|
|
"the signature obtained may not be the one if the files have changed, we do it in two steps"
|
|
|
|
bld = self.generator.bld
|
|
|
|
# get the task signatures from previous runs
|
|
key = self.unique_id()
|
|
prev_sigs = bld.task_sigs.get(key, ())
|
|
if prev_sigs:
|
|
try:
|
|
# for issue #379
|
|
if prev_sigs[2] == self.compute_sig_implicit_deps():
|
|
return prev_sigs[2]
|
|
except (KeyError, OSError):
|
|
pass
|
|
del bld.task_sigs[key]
|
|
raise ValueError('rescan')
|
|
|
|
# no previous run or the signature of the dependencies has changed, rescan the dependencies
|
|
(nodes, names) = self.scan()
|
|
if Logs.verbose:
|
|
debug('deps: scanner for %s returned %s %s', str(self), str(nodes), str(names))
|
|
|
|
# store the dependencies in the cache
|
|
bld.node_deps[key] = nodes
|
|
bld.raw_deps[key] = names
|
|
|
|
# recompute the signature and return it
|
|
try:
|
|
sig = self.compute_sig_implicit_deps()
|
|
except KeyError:
|
|
try:
|
|
nodes = []
|
|
for k in bld.node_deps.get(self.unique_id(), []):
|
|
if k.id & 3 == 2: # Node.FILE:
|
|
if not k.id in bld.node_sigs[0]:
|
|
nodes.append(k)
|
|
else:
|
|
if not k.id in bld.node_sigs[self.env.variant()]:
|
|
nodes.append(k)
|
|
except:
|
|
nodes = '?'
|
|
raise Utils.WafError('Missing node signature for %r (for implicit dependencies %r)' % (nodes, self))
|
|
|
|
return sig
|
|
|
|
def compute_sig_implicit_deps(self):
|
|
"""it is intended for .cpp and inferred .h files
|
|
there is a single list (no tree traversal)
|
|
this is the hot spot so ... do not touch"""
|
|
upd = self.m.update
|
|
|
|
bld = self.generator.bld
|
|
tstamp = bld.node_sigs
|
|
env = self.env
|
|
|
|
for k in bld.node_deps.get(self.unique_id(), []):
|
|
# unlikely but necessary if it happens
|
|
if not k.parent.id in bld.cache_scanned_folders:
|
|
# if the parent folder is removed, an OSError may be thrown
|
|
bld.rescan(k.parent)
|
|
|
|
# if the parent folder is removed, a KeyError will be thrown
|
|
if k.id & 3 == 2: # Node.FILE:
|
|
upd(tstamp[0][k.id])
|
|
else:
|
|
upd(tstamp[env.variant()][k.id])
|
|
|
|
return self.m.digest()
|
|
|
|
def funex(c):
|
|
dc = {}
|
|
exec(c, dc)
|
|
return dc['f']
|
|
|
|
reg_act = re.compile(r"(?P<backslash>\\)|(?P<dollar>\$\$)|(?P<subst>\$\{(?P<var>\w+)(?P<code>.*?)\})", re.M)
|
|
def compile_fun_shell(name, line):
|
|
"""Compiles a string (once) into a function, eg:
|
|
simple_task_type('c++', '${CXX} -o ${TGT[0]} ${SRC} -I ${SRC[0].parent.bldpath()}')
|
|
|
|
The env variables (CXX, ..) on the task must not hold dicts (order)
|
|
The reserved keywords TGT and SRC represent the task input and output nodes
|
|
|
|
quick test:
|
|
bld(source='wscript', rule='echo "foo\\${SRC[0].name}\\bar"')
|
|
"""
|
|
|
|
extr = []
|
|
def repl(match):
|
|
g = match.group
|
|
if g('dollar'): return "$"
|
|
elif g('backslash'): return '\\\\'
|
|
elif g('subst'): extr.append((g('var'), g('code'))); return "%s"
|
|
return None
|
|
|
|
line = reg_act.sub(repl, line) or line
|
|
|
|
parm = []
|
|
dvars = []
|
|
app = parm.append
|
|
for (var, meth) in extr:
|
|
if var == 'SRC':
|
|
if meth: app('task.inputs%s' % meth)
|
|
else: app('" ".join([a.srcpath(env) for a in task.inputs])')
|
|
elif var == 'TGT':
|
|
if meth: app('task.outputs%s' % meth)
|
|
else: app('" ".join([a.bldpath(env) for a in task.outputs])')
|
|
else:
|
|
if not var in dvars: dvars.append(var)
|
|
app("p('%s')" % var)
|
|
if parm: parm = "%% (%s) " % (',\n\t\t'.join(parm))
|
|
else: parm = ''
|
|
|
|
c = COMPILE_TEMPLATE_SHELL % (line, parm)
|
|
|
|
debug('action: %s', c)
|
|
return (funex(c), dvars)
|
|
|
|
def compile_fun_noshell(name, line):
|
|
|
|
extr = []
|
|
def repl(match):
|
|
g = match.group
|
|
if g('dollar'): return "$"
|
|
elif g('subst'): extr.append((g('var'), g('code'))); return "<<|@|>>"
|
|
return None
|
|
|
|
line2 = reg_act.sub(repl, line)
|
|
params = line2.split('<<|@|>>')
|
|
|
|
buf = []
|
|
dvars = []
|
|
app = buf.append
|
|
for x in xrange(len(extr)):
|
|
params[x] = params[x].strip()
|
|
if params[x]:
|
|
app("lst.extend(%r)" % params[x].split())
|
|
(var, meth) = extr[x]
|
|
if var == 'SRC':
|
|
if meth: app('lst.append(task.inputs%s)' % meth)
|
|
else: app("lst.extend([a.srcpath(env) for a in task.inputs])")
|
|
elif var == 'TGT':
|
|
if meth: app('lst.append(task.outputs%s)' % meth)
|
|
else: app("lst.extend([a.bldpath(env) for a in task.outputs])")
|
|
else:
|
|
app('lst.extend(to_list(env[%r]))' % var)
|
|
if not var in dvars: dvars.append(var)
|
|
|
|
if params[-1]:
|
|
app("lst.extend(%r)" % shlex.split(params[-1]))
|
|
|
|
fun = COMPILE_TEMPLATE_NOSHELL % "\n\t".join(buf)
|
|
debug('action: %s', fun)
|
|
return (funex(fun), dvars)
|
|
|
|
def compile_fun(name, line, shell=None):
|
|
"commands can be launched by the shell or not"
|
|
if line.find('<') > 0 or line.find('>') > 0 or line.find('&&') > 0:
|
|
shell = True
|
|
#else:
|
|
# shell = False
|
|
|
|
if shell is None:
|
|
if sys.platform == 'win32':
|
|
shell = False
|
|
else:
|
|
shell = True
|
|
|
|
if shell:
|
|
return compile_fun_shell(name, line)
|
|
else:
|
|
return compile_fun_noshell(name, line)
|
|
|
|
def simple_task_type(name, line, color='GREEN', vars=[], ext_in=[], ext_out=[], before=[], after=[], shell=None):
|
|
"""return a new Task subclass with the function run compiled from the line given"""
|
|
(fun, dvars) = compile_fun(name, line, shell)
|
|
fun.code = line
|
|
return task_type_from_func(name, fun, vars or dvars, color, ext_in, ext_out, before, after)
|
|
|
|
def task_type_from_func(name, func, vars=[], color='GREEN', ext_in=[], ext_out=[], before=[], after=[]):
|
|
"""return a new Task subclass with the function run compiled from the line given"""
|
|
params = {
|
|
'run': func,
|
|
'vars': vars,
|
|
'color': color,
|
|
'name': name,
|
|
'ext_in': Utils.to_list(ext_in),
|
|
'ext_out': Utils.to_list(ext_out),
|
|
'before': Utils.to_list(before),
|
|
'after': Utils.to_list(after),
|
|
}
|
|
|
|
cls = type(Task)(name, (Task,), params)
|
|
TaskBase.classes[name] = cls
|
|
return cls
|
|
|
|
def always_run(cls):
|
|
"""Set all task instances of this class to be executed whenever a build is started
|
|
The task signature is calculated, but the result of the comparation between
|
|
task signatures is bypassed
|
|
"""
|
|
old = cls.runnable_status
|
|
def always(self):
|
|
ret = old(self)
|
|
if ret == SKIP_ME:
|
|
return RUN_ME
|
|
return ret
|
|
cls.runnable_status = always
|
|
|
|
def update_outputs(cls):
|
|
"""When a command is always run, it is possible that the output only change
|
|
sometimes. By default the build node have as a hash the signature of the task
|
|
which may not change. With this, the output nodes (produced) are hashed,
|
|
and the hashes are set to the build nodes
|
|
|
|
This may avoid unnecessary recompilations, but it uses more resources
|
|
(hashing the output files) so it is not used by default
|
|
"""
|
|
old_post_run = cls.post_run
|
|
def post_run(self):
|
|
old_post_run(self)
|
|
bld = self.generator.bld
|
|
for output in self.outputs:
|
|
bld.node_sigs[self.env.variant()][output.id] = Utils.h_file(output.abspath(self.env))
|
|
bld.task_sigs[output.id] = self.unique_id()
|
|
cls.post_run = post_run
|
|
|
|
old_runnable_status = cls.runnable_status
|
|
def runnable_status(self):
|
|
status = old_runnable_status(self)
|
|
if status != RUN_ME:
|
|
return status
|
|
|
|
uid = self.unique_id()
|
|
try:
|
|
bld = self.outputs[0].__class__.bld
|
|
new_sig = self.signature()
|
|
prev_sig = bld.task_sigs[uid][0]
|
|
if prev_sig == new_sig:
|
|
for x in self.outputs:
|
|
if not x.id in bld.node_sigs[self.env.variant()]:
|
|
return RUN_ME
|
|
if bld.task_sigs[x.id] != uid: # ensure the outputs are associated with *this* task
|
|
return RUN_ME
|
|
return SKIP_ME
|
|
except KeyError:
|
|
pass
|
|
except IndexError:
|
|
pass
|
|
return RUN_ME
|
|
cls.runnable_status = runnable_status
|
|
|
|
def extract_outputs(tasks):
|
|
"""file_deps: Infer additional dependencies from task input and output nodes
|
|
"""
|
|
v = {}
|
|
for x in tasks:
|
|
try:
|
|
(ins, outs) = v[x.env.variant()]
|
|
except KeyError:
|
|
ins = {}
|
|
outs = {}
|
|
v[x.env.variant()] = (ins, outs)
|
|
|
|
for a in getattr(x, 'inputs', []):
|
|
try: ins[a.id].append(x)
|
|
except KeyError: ins[a.id] = [x]
|
|
for a in getattr(x, 'outputs', []):
|
|
try: outs[a.id].append(x)
|
|
except KeyError: outs[a.id] = [x]
|
|
|
|
for (ins, outs) in v.values():
|
|
links = set(ins.iterkeys()).intersection(outs.iterkeys())
|
|
for k in links:
|
|
for a in ins[k]:
|
|
for b in outs[k]:
|
|
a.set_run_after(b)
|
|
|
|
def extract_deps(tasks):
|
|
"""file_deps: Infer additional dependencies from task input and output nodes and from implicit dependencies
|
|
returned by the scanners - that will only work if all tasks are created
|
|
|
|
this is aimed at people who have pathological builds and who do not care enough
|
|
to implement the build dependencies properly
|
|
|
|
with two loops over the list of tasks, do not expect this to be really fast
|
|
"""
|
|
|
|
# first reuse the function above
|
|
extract_outputs(tasks)
|
|
|
|
# map the output nodes to the tasks producing them
|
|
out_to_task = {}
|
|
for x in tasks:
|
|
v = x.env.variant()
|
|
try:
|
|
lst = x.outputs
|
|
except AttributeError:
|
|
pass
|
|
else:
|
|
for node in lst:
|
|
out_to_task[(v, node.id)] = x
|
|
|
|
# map the dependencies found to the tasks compiled
|
|
dep_to_task = {}
|
|
for x in tasks:
|
|
try:
|
|
x.signature()
|
|
except: # this is on purpose
|
|
pass
|
|
|
|
v = x.env.variant()
|
|
key = x.unique_id()
|
|
for k in x.generator.bld.node_deps.get(x.unique_id(), []):
|
|
try: dep_to_task[(v, k.id)].append(x)
|
|
except KeyError: dep_to_task[(v, k.id)] = [x]
|
|
|
|
# now get the intersection
|
|
deps = set(dep_to_task.keys()).intersection(set(out_to_task.keys()))
|
|
|
|
# and add the dependencies from task to task
|
|
for idx in deps:
|
|
for k in dep_to_task[idx]:
|
|
k.set_run_after(out_to_task[idx])
|
|
|
|
# cleanup, remove the signatures
|
|
for x in tasks:
|
|
try:
|
|
delattr(x, 'cache_sig')
|
|
except AttributeError:
|
|
pass
|