powerpc/pseries/mobility: use stop_machine for join/suspend
The partition suspend sequence as specified in the platform architecture requires that all active processor threads call H_JOIN, which: - suspends the calling thread until it is the target of an H_PROD; or - immediately returns H_CONTINUE, if the calling thread is the last to call H_JOIN. This thread is expected to call ibm,suspend-me to completely suspend the partition. Upon returning from ibm,suspend-me the calling thread must wake all others using H_PROD. rtas_ibm_suspend_me_unsafe() uses on_each_cpu() to implement this protocol, but because of its synchronizing nature this is susceptible to deadlock versus users of stop_machine() or other callers of on_each_cpu(). Not only is stop_machine() intended for use cases like this, it handles error propagation and allows us to keep the data shared between CPUs minimal: a single atomic counter which ensures exactly one CPU will wake the others from their joined states. Switch the migration code to use stop_machine() and a less complex local implementation of the H_JOIN/ibm,suspend-me logic, which carries additional benefits: - more informative error reporting, appropriately ratelimited - resets the lockup detector / watchdog on resume to prevent lockup warnings when the OS has been suspended for a time exceeding the threshold. Fixes: 91dc182ca6e2 ("[PATCH] powerpc: special-case ibm,suspend-me RTAS call") Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201207215200.1785968-13-nathanl@linux.ibm.com
This commit is contained in:
parent
d9213319b8
commit
9327dc0aee
@ -12,9 +12,11 @@
|
|||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/kobject.h>
|
#include <linux/kobject.h>
|
||||||
|
#include <linux/nmi.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/smp.h>
|
#include <linux/smp.h>
|
||||||
#include <linux/stat.h>
|
#include <linux/stat.h>
|
||||||
|
#include <linux/stop_machine.h>
|
||||||
#include <linux/completion.h>
|
#include <linux/completion.h>
|
||||||
#include <linux/device.h>
|
#include <linux/device.h>
|
||||||
#include <linux/delay.h>
|
#include <linux/delay.h>
|
||||||
@ -405,6 +407,128 @@ static int wait_for_vasi_session_suspending(u64 handle)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void prod_single(unsigned int target_cpu)
|
||||||
|
{
|
||||||
|
long hvrc;
|
||||||
|
int hwid;
|
||||||
|
|
||||||
|
hwid = get_hard_smp_processor_id(target_cpu);
|
||||||
|
hvrc = plpar_hcall_norets(H_PROD, hwid);
|
||||||
|
if (hvrc == H_SUCCESS)
|
||||||
|
return;
|
||||||
|
pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n",
|
||||||
|
target_cpu, hwid, hvrc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void prod_others(void)
|
||||||
|
{
|
||||||
|
unsigned int cpu;
|
||||||
|
|
||||||
|
for_each_online_cpu(cpu) {
|
||||||
|
if (cpu != smp_processor_id())
|
||||||
|
prod_single(cpu);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static u16 clamp_slb_size(void)
|
||||||
|
{
|
||||||
|
u16 prev = mmu_slb_size;
|
||||||
|
|
||||||
|
slb_set_size(SLB_MIN_SIZE);
|
||||||
|
|
||||||
|
return prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int do_suspend(void)
|
||||||
|
{
|
||||||
|
u16 saved_slb_size;
|
||||||
|
int status;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id());
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The destination processor model may have fewer SLB entries
|
||||||
|
* than the source. We reduce mmu_slb_size to a safe minimum
|
||||||
|
* before suspending in order to minimize the possibility of
|
||||||
|
* programming non-existent entries on the destination. If
|
||||||
|
* suspend fails, we restore it before returning. On success
|
||||||
|
* the OF reconfig path will update it from the new device
|
||||||
|
* tree after resuming on the destination.
|
||||||
|
*/
|
||||||
|
saved_slb_size = clamp_slb_size();
|
||||||
|
|
||||||
|
ret = rtas_ibm_suspend_me(&status);
|
||||||
|
if (ret != 0) {
|
||||||
|
pr_err("ibm,suspend-me error: %d\n", status);
|
||||||
|
slb_set_size(saved_slb_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int do_join(void *arg)
|
||||||
|
{
|
||||||
|
atomic_t *counter = arg;
|
||||||
|
long hvrc;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* Must ensure MSR.EE off for H_JOIN. */
|
||||||
|
hard_irq_disable();
|
||||||
|
hvrc = plpar_hcall_norets(H_JOIN);
|
||||||
|
|
||||||
|
switch (hvrc) {
|
||||||
|
case H_CONTINUE:
|
||||||
|
/*
|
||||||
|
* All other CPUs are offline or in H_JOIN. This CPU
|
||||||
|
* attempts the suspend.
|
||||||
|
*/
|
||||||
|
ret = do_suspend();
|
||||||
|
break;
|
||||||
|
case H_SUCCESS:
|
||||||
|
/*
|
||||||
|
* The suspend is complete and this cpu has received a
|
||||||
|
* prod.
|
||||||
|
*/
|
||||||
|
ret = 0;
|
||||||
|
break;
|
||||||
|
case H_BAD_MODE:
|
||||||
|
case H_HARDWARE:
|
||||||
|
default:
|
||||||
|
ret = -EIO;
|
||||||
|
pr_err_ratelimited("H_JOIN error %ld on CPU %i\n",
|
||||||
|
hvrc, smp_processor_id());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (atomic_inc_return(counter) == 1) {
|
||||||
|
pr_info("CPU %u waking all threads\n", smp_processor_id());
|
||||||
|
prod_others();
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Execution may have been suspended for several seconds, so
|
||||||
|
* reset the watchdog.
|
||||||
|
*/
|
||||||
|
touch_nmi_watchdog();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int pseries_migrate_partition(u64 handle)
|
||||||
|
{
|
||||||
|
atomic_t counter = ATOMIC_INIT(0);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = wait_for_vasi_session_suspending(handle);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = stop_machine(do_join, &counter, cpu_online_mask);
|
||||||
|
if (ret == 0)
|
||||||
|
post_mobility_fixup();
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t migration_store(struct class *class,
|
static ssize_t migration_store(struct class *class,
|
||||||
struct class_attribute *attr, const char *buf,
|
struct class_attribute *attr, const char *buf,
|
||||||
size_t count)
|
size_t count)
|
||||||
@ -416,16 +540,10 @@ static ssize_t migration_store(struct class *class,
|
|||||||
if (rc)
|
if (rc)
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
rc = wait_for_vasi_session_suspending(streamid);
|
rc = pseries_migrate_partition(streamid);
|
||||||
if (rc)
|
if (rc)
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
rc = rtas_ibm_suspend_me_unsafe(streamid);
|
|
||||||
if (rc)
|
|
||||||
return rc;
|
|
||||||
|
|
||||||
post_mobility_fixup();
|
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user