32927393dc
Instead of having all the sysctl handlers deal with user pointers, which is rather hairy in terms of the BPF interaction, copy the input to and from userspace in common code. This also means that the strings are always NUL-terminated by the common code, making the API a little bit safer. As most handler just pass through the data to one of the common handlers a lot of the changes are mechnical. Signed-off-by: Christoph Hellwig <hch@lst.de> Acked-by: Andrey Ignatov <rdna@fb.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
206 lines
5.1 KiB
C
206 lines
5.1 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* itmt.c: Support Intel Turbo Boost Max Technology 3.0
|
|
*
|
|
* (C) Copyright 2016 Intel Corporation
|
|
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
|
*
|
|
* On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
|
|
* the maximum turbo frequencies of some cores in a CPU package may be
|
|
* higher than for the other cores in the same package. In that case,
|
|
* better performance can be achieved by making the scheduler prefer
|
|
* to run tasks on the CPUs with higher max turbo frequencies.
|
|
*
|
|
* This file provides functions and data structures for enabling the
|
|
* scheduler to favor scheduling on cores can be boosted to a higher
|
|
* frequency under ITMT.
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/cpuset.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/nodemask.h>
|
|
|
|
static DEFINE_MUTEX(itmt_update_mutex);
|
|
DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
|
|
|
|
/* Boolean to track if system has ITMT capabilities */
|
|
static bool __read_mostly sched_itmt_capable;
|
|
|
|
/*
|
|
* Boolean to control whether we want to move processes to cpu capable
|
|
* of higher turbo frequency for cpus supporting Intel Turbo Boost Max
|
|
* Technology 3.0.
|
|
*
|
|
* It can be set via /proc/sys/kernel/sched_itmt_enabled
|
|
*/
|
|
unsigned int __read_mostly sysctl_sched_itmt_enabled;
|
|
|
|
static int sched_itmt_update_handler(struct ctl_table *table, int write,
|
|
void *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
unsigned int old_sysctl;
|
|
int ret;
|
|
|
|
mutex_lock(&itmt_update_mutex);
|
|
|
|
if (!sched_itmt_capable) {
|
|
mutex_unlock(&itmt_update_mutex);
|
|
return -EINVAL;
|
|
}
|
|
|
|
old_sysctl = sysctl_sched_itmt_enabled;
|
|
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
|
|
if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
|
|
x86_topology_update = true;
|
|
rebuild_sched_domains();
|
|
}
|
|
|
|
mutex_unlock(&itmt_update_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static struct ctl_table itmt_kern_table[] = {
|
|
{
|
|
.procname = "sched_itmt_enabled",
|
|
.data = &sysctl_sched_itmt_enabled,
|
|
.maxlen = sizeof(unsigned int),
|
|
.mode = 0644,
|
|
.proc_handler = sched_itmt_update_handler,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_ONE,
|
|
},
|
|
{}
|
|
};
|
|
|
|
static struct ctl_table itmt_root_table[] = {
|
|
{
|
|
.procname = "kernel",
|
|
.mode = 0555,
|
|
.child = itmt_kern_table,
|
|
},
|
|
{}
|
|
};
|
|
|
|
static struct ctl_table_header *itmt_sysctl_header;
|
|
|
|
/**
|
|
* sched_set_itmt_support() - Indicate platform supports ITMT
|
|
*
|
|
* This function is used by the OS to indicate to scheduler that the platform
|
|
* is capable of supporting the ITMT feature.
|
|
*
|
|
* The current scheme has the pstate driver detects if the system
|
|
* is ITMT capable and call sched_set_itmt_support.
|
|
*
|
|
* This must be done only after sched_set_itmt_core_prio
|
|
* has been called to set the cpus' priorities.
|
|
* It must not be called with cpu hot plug lock
|
|
* held as we need to acquire the lock to rebuild sched domains
|
|
* later.
|
|
*
|
|
* Return: 0 on success
|
|
*/
|
|
int sched_set_itmt_support(void)
|
|
{
|
|
mutex_lock(&itmt_update_mutex);
|
|
|
|
if (sched_itmt_capable) {
|
|
mutex_unlock(&itmt_update_mutex);
|
|
return 0;
|
|
}
|
|
|
|
itmt_sysctl_header = register_sysctl_table(itmt_root_table);
|
|
if (!itmt_sysctl_header) {
|
|
mutex_unlock(&itmt_update_mutex);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
sched_itmt_capable = true;
|
|
|
|
sysctl_sched_itmt_enabled = 1;
|
|
|
|
x86_topology_update = true;
|
|
rebuild_sched_domains();
|
|
|
|
mutex_unlock(&itmt_update_mutex);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* sched_clear_itmt_support() - Revoke platform's support of ITMT
|
|
*
|
|
* This function is used by the OS to indicate that it has
|
|
* revoked the platform's support of ITMT feature.
|
|
*
|
|
* It must not be called with cpu hot plug lock
|
|
* held as we need to acquire the lock to rebuild sched domains
|
|
* later.
|
|
*/
|
|
void sched_clear_itmt_support(void)
|
|
{
|
|
mutex_lock(&itmt_update_mutex);
|
|
|
|
if (!sched_itmt_capable) {
|
|
mutex_unlock(&itmt_update_mutex);
|
|
return;
|
|
}
|
|
sched_itmt_capable = false;
|
|
|
|
if (itmt_sysctl_header) {
|
|
unregister_sysctl_table(itmt_sysctl_header);
|
|
itmt_sysctl_header = NULL;
|
|
}
|
|
|
|
if (sysctl_sched_itmt_enabled) {
|
|
/* disable sched_itmt if we are no longer ITMT capable */
|
|
sysctl_sched_itmt_enabled = 0;
|
|
x86_topology_update = true;
|
|
rebuild_sched_domains();
|
|
}
|
|
|
|
mutex_unlock(&itmt_update_mutex);
|
|
}
|
|
|
|
int arch_asym_cpu_priority(int cpu)
|
|
{
|
|
return per_cpu(sched_core_priority, cpu);
|
|
}
|
|
|
|
/**
|
|
* sched_set_itmt_core_prio() - Set CPU priority based on ITMT
|
|
* @prio: Priority of cpu core
|
|
* @core_cpu: The cpu number associated with the core
|
|
*
|
|
* The pstate driver will find out the max boost frequency
|
|
* and call this function to set a priority proportional
|
|
* to the max boost frequency. CPU with higher boost
|
|
* frequency will receive higher priority.
|
|
*
|
|
* No need to rebuild sched domain after updating
|
|
* the CPU priorities. The sched domains have no
|
|
* dependency on CPU priorities.
|
|
*/
|
|
void sched_set_itmt_core_prio(int prio, int core_cpu)
|
|
{
|
|
int cpu, i = 1;
|
|
|
|
for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
|
|
int smt_prio;
|
|
|
|
/*
|
|
* Ensure that the siblings are moved to the end
|
|
* of the priority chain and only used when
|
|
* all other high priority cpus are out of capacity.
|
|
*/
|
|
smt_prio = prio * smp_num_siblings / i;
|
|
per_cpu(sched_core_priority, cpu) = smt_prio;
|
|
i++;
|
|
}
|
|
}
|