MEDIUM: tasks: apply a fair CPU distribution between tasklet classes

Till now in process_runnable_tasks() we used to reserve a fixed portion
of max_processed to urgent tasks, then a portion of what remains for
normal tasks, then what remains for bulk tasks. This causes two issues:

  - the current budget for processed tasks could be drained once for
    all by higher level tasks so that they couldn't have enough left
    for the next run. For example, if bulk tasklets cause task wakeups,
    the required share to run them could be eaten by other bulk tasklets.

  - it forces the urgent tasks to be run before scanning the tree so that
    we know how many tasks to pick from the tree, and this isn't very
    efficient cache-wise.

This patch changes this so that we compute upfront how max_processed will
be shared between classes that require so. We can then decide in advance
to pick a certain number of tasks from the tree, then execute all tasklets
in turn. When reaching the end, if there's still some budget, we can go
back and do the same thing again, improving chances to pick new work
before the global budget is depleted.

The default weights have been set to 50% for urgent tasklets, 37% for
normal ones and 13% for the bulk ones. In practice, there are not that
many urgent tasklets but when they appear they are cheap and must be
processed in as large batches as possible. Every time there is nothing
to pick there, the unused budget is shared between normal and bulk and
this allows bulk tasklets to still have quite some CPU to run on.
This commit is contained in:
Willy Tarreau 2020-06-24 07:21:08 +02:00
parent b24086923c
commit 3ef7a190b0

View File

@ -422,14 +422,27 @@ int run_tasks_from_list(struct list *list, int max)
void process_runnable_tasks()
{
struct task_per_thread * const tt = sched;
struct eb32sc_node *lrq = NULL; // next local run queue entry
struct eb32sc_node *grq = NULL; // next global run queue entry
struct eb32sc_node *lrq; // next local run queue entry
struct eb32sc_node *grq; // next global run queue entry
struct task *t;
int max_processed, done;
const unsigned int default_weights[TL_CLASSES] = {
[TL_URGENT] = 64, // ~50% of CPU bandwidth for I/O
[TL_NORMAL] = 48, // ~37% of CPU bandwidth for tasks
[TL_BULK] = 16, // ~13% of CPU bandwidth for self-wakers
};
unsigned int max[TL_CLASSES]; // max to be run per class
unsigned int max_total; // sum of max above
struct mt_list *tmp_list;
unsigned int queue;
int max_processed;
ti->flags &= ~TI_FL_STUCK; // this thread is still running
if (!thread_has_tasks()) {
activity[tid].empty_rq++;
return;
}
tasks_run_queue_cur = tasks_run_queue; /* keep a copy for reporting */
nb_tasks_cur = nb_tasks;
max_processed = global.tune.runqueue_depth;
@ -437,30 +450,37 @@ void process_runnable_tasks()
if (likely(niced_tasks))
max_processed = (max_processed + 3) / 4;
if (!thread_has_tasks()) {
activity[tid].empty_rq++;
return;
}
not_done_yet:
/* Merge the list of tasklets waken up by other threads to the
* main list.
max[TL_URGENT] = max[TL_NORMAL] = max[TL_BULK] = 0;
/* urgent tasklets list gets a default weight of ~50% */
if (!LIST_ISEMPTY(&tt->tasklets[TL_URGENT]) ||
!MT_LIST_ISEMPTY(&tt->shared_tasklet_list))
max[TL_URGENT] = default_weights[TL_URGENT];
/* normal tasklets list gets a default weight of ~37% */
if (!LIST_ISEMPTY(&tt->tasklets[TL_NORMAL]) ||
(sched->rqueue_size > 0) || (global_tasks_mask & tid_bit))
max[TL_NORMAL] = default_weights[TL_NORMAL];
/* bulk tasklets list gets a default weight of ~13% */
if (!LIST_ISEMPTY(&tt->tasklets[TL_BULK]))
max[TL_BULK] = default_weights[TL_BULK];
/* Now compute a fair share of the weights. Total may slightly exceed
* 100% due to rounding, this is not a problem. Note that by design
* the sum cannot be NULL as we cannot get there without tasklets to
* process.
*/
tmp_list = MT_LIST_BEHEAD(&sched->shared_tasklet_list);
if (tmp_list)
LIST_SPLICE_END_DETACHED(&sched->tasklets[TL_URGENT], (struct list *)tmp_list);
max_total = max[TL_URGENT] + max[TL_NORMAL] + max[TL_BULK];
for (queue = 0; queue < TL_CLASSES; queue++)
max[queue] = ((unsigned)max_processed * max[queue] + max_total - 1) / max_total;
/* run up to max_processed/3 urgent tasklets */
if (max_processed > 0) {
done = run_tasks_from_list(&tt->tasklets[TL_URGENT], (max_processed + 2) / 3);
max_processed -= done;
}
/* pick up to max_processed/2 (~=3/4*(max_processed-done)) regular tasks from prio-ordered run queues */
lrq = grq = NULL;
/* pick up to max[TL_NORMAL] regular tasks from prio-ordered run queues */
/* Note: the grq lock is always held when grq is not null */
while (tt->task_list_size < (3 * max_processed + 3) / 4) {
while (tt->task_list_size < max[TL_NORMAL]) {
if ((global_tasks_mask & tid_bit) && !grq) {
#ifdef USE_THREAD
HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock);
@ -522,16 +542,17 @@ void process_runnable_tasks()
grq = NULL;
}
/* run between 0.4*max_processed and max_processed/2 regular tasks */
if (max_processed > 0) {
done = run_tasks_from_list(&tt->tasklets[TL_NORMAL], (3 * max_processed + 3) / 4);
max_processed -= done;
}
/* Merge the list of tasklets waken up by other threads to the
* main list.
*/
tmp_list = MT_LIST_BEHEAD(&tt->shared_tasklet_list);
if (tmp_list)
LIST_SPLICE_END_DETACHED(&tt->tasklets[TL_URGENT], (struct list *)tmp_list);
/* run between max_processed/4 and max_processed bulk tasklets */
if (max_processed > 0) {
done = run_tasks_from_list(&tt->tasklets[TL_BULK], max_processed);
max_processed -= done;
/* execute tasklets in each queue */
for (queue = 0; queue < TL_CLASSES; queue++) {
if (max[queue] > 0)
max_processed -= run_tasks_from_list(&tt->tasklets[queue], max[queue]);
}
/* some tasks may have woken other ones up */