2019-05-19 16:51:42 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
2005-04-17 02:20:36 +04:00
/*
2013-03-04 02:18:03 +04:00
* Copyright ( C ) 2004 , 2013 Intel Corporation
* Author : Naveen B S < naveen . b . s @ intel . com >
* Author : Rafael J . Wysocki < rafael . j . wysocki @ intel . com >
2005-04-17 02:20:36 +04:00
*
* All rights reserved .
*
* ACPI based HotPlug driver that supports Memory Hotplug
2009-01-03 10:53:39 +03:00
* This driver fields notifications from firmware for memory add
2005-04-17 02:20:36 +04:00
* and remove operations and alerts the VM of the affected memory
* ranges .
*/
2012-11-21 03:42:28 +04:00
# include <linux/acpi.h>
2013-05-08 02:29:49 +04:00
# include <linux/memory.h>
2013-03-04 02:18:03 +04:00
# include <linux/memory_hotplug.h>
# include "internal.h"
2005-04-17 02:20:36 +04:00
# define ACPI_MEMORY_DEVICE_CLASS "memory"
# define ACPI_MEMORY_DEVICE_HID "PNP0C80"
# define ACPI_MEMORY_DEVICE_NAME "Hotplug Mem Device"
# define _COMPONENT ACPI_MEMORY_DEVICE_COMPONENT
2009-07-03 06:49:03 +04:00
# undef PREFIX
# define PREFIX "ACPI:memory_hp:"
2007-02-13 06:42:12 +03:00
ACPI_MODULE_NAME ( " acpi_memhotplug " ) ;
2005-04-17 02:20:36 +04:00
2014-05-30 06:29:14 +04:00
static const struct acpi_device_id memory_device_ids [ ] = {
{ ACPI_MEMORY_DEVICE_HID , 0 } ,
{ " " , 0 } ,
} ;
# ifdef CONFIG_ACPI_HOTPLUG_MEMORY
2005-04-17 02:20:36 +04:00
/* Memory Device States */
# define MEMORY_INVALID_STATE 0
# define MEMORY_POWER_ON_STATE 1
# define MEMORY_POWER_OFF_STATE 2
2013-03-04 02:18:03 +04:00
static int acpi_memory_device_add ( struct acpi_device * device ,
const struct acpi_device_id * not_used ) ;
static void acpi_memory_device_remove ( struct acpi_device * device ) ;
2005-04-17 02:20:36 +04:00
2013-03-04 02:18:03 +04:00
static struct acpi_scan_handler memory_device_handler = {
2007-07-23 16:44:41 +04:00
. ids = memory_device_ids ,
2013-03-04 02:18:03 +04:00
. attach = acpi_memory_device_add ,
. detach = acpi_memory_device_remove ,
. hotplug = {
. enabled = true ,
} ,
2005-04-17 02:20:36 +04:00
} ;
2006-06-27 13:53:27 +04:00
struct acpi_memory_info {
struct list_head list ;
u64 start_addr ; /* Memory Range start physical addr */
u64 length ; /* Memory Range length */
unsigned short caching ; /* memory cache attribute */
unsigned short write_protect ; /* memory read/write attribute */
unsigned int enabled : 1 ;
} ;
2005-04-17 02:20:36 +04:00
struct acpi_memory_device {
2006-05-20 00:54:38 +04:00
struct acpi_device * device ;
2005-08-05 08:44:28 +04:00
unsigned int state ; /* State of the memory device */
2006-06-27 13:53:27 +04:00
struct list_head res_list ;
2005-04-17 02:20:36 +04:00
} ;
2006-06-27 13:53:27 +04:00
static acpi_status
acpi_memory_get_resource ( struct acpi_resource * resource , void * context )
{
struct acpi_memory_device * mem_device = context ;
struct acpi_resource_address64 address64 ;
struct acpi_memory_info * info , * new ;
acpi_status status ;
status = acpi_resource_to_address64 ( resource , & address64 ) ;
if ( ACPI_FAILURE ( status ) | |
( address64 . resource_type ! = ACPI_MEMORY_RANGE ) )
return AE_OK ;
list_for_each_entry ( info , & mem_device - > res_list , list ) {
/* Can we combine the resource range information? */
if ( ( info - > caching = = address64 . info . mem . caching ) & &
( info - > write_protect = = address64 . info . mem . write_protect ) & &
2015-01-26 11:58:56 +03:00
( info - > start_addr + info - > length = = address64 . address . minimum ) ) {
info - > length + = address64 . address . address_length ;
2006-06-27 13:53:27 +04:00
return AE_OK ;
}
}
new = kzalloc ( sizeof ( struct acpi_memory_info ) , GFP_KERNEL ) ;
if ( ! new )
return AE_ERROR ;
INIT_LIST_HEAD ( & new - > list ) ;
new - > caching = address64 . info . mem . caching ;
new - > write_protect = address64 . info . mem . write_protect ;
2015-01-26 11:58:56 +03:00
new - > start_addr = address64 . address . minimum ;
new - > length = address64 . address . address_length ;
2006-06-27 13:53:27 +04:00
list_add_tail ( & new - > list , & mem_device - > res_list ) ;
return AE_OK ;
}
2012-11-16 05:06:06 +04:00
static void
acpi_memory_free_device_resources ( struct acpi_memory_device * mem_device )
{
struct acpi_memory_info * info , * n ;
list_for_each_entry_safe ( info , n , & mem_device - > res_list , list )
kfree ( info ) ;
INIT_LIST_HEAD ( & mem_device - > res_list ) ;
}
2005-04-17 02:20:36 +04:00
static int
acpi_memory_get_device_resources ( struct acpi_memory_device * mem_device )
{
acpi_status status ;
2006-08-05 23:15:04 +04:00
if ( ! list_empty ( & mem_device - > res_list ) )
return 0 ;
2006-05-20 00:54:41 +04:00
status = acpi_walk_resources ( mem_device - > device - > handle , METHOD_NAME__CRS ,
2006-06-27 13:53:27 +04:00
acpi_memory_get_resource , mem_device ) ;
if ( ACPI_FAILURE ( status ) ) {
2012-11-16 05:06:06 +04:00
acpi_memory_free_device_resources ( mem_device ) ;
2006-06-27 08:41:40 +04:00
return - EINVAL ;
2005-04-17 02:20:36 +04:00
}
2006-06-27 08:41:40 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2005-08-05 08:44:28 +04:00
static int acpi_memory_check_device ( struct acpi_memory_device * mem_device )
2005-04-17 02:20:36 +04:00
{
2008-10-10 10:22:59 +04:00
unsigned long long current_status ;
2005-04-17 02:20:36 +04:00
/* Get device present/absent information from the _STA */
2013-10-02 12:27:37 +04:00
if ( ACPI_FAILURE ( acpi_evaluate_integer ( mem_device - > device - > handle ,
METHOD_NAME__STA , NULL ,
& current_status ) ) )
2006-06-27 08:41:40 +04:00
return - ENODEV ;
2005-04-17 02:20:36 +04:00
/*
* Check for device status . Device should be
* present / enabled / functioning .
*/
2007-04-25 22:17:39 +04:00
if ( ! ( ( current_status & ACPI_STA_DEVICE_PRESENT )
& & ( current_status & ACPI_STA_DEVICE_ENABLED )
& & ( current_status & ACPI_STA_DEVICE_FUNCTIONING ) ) )
2006-06-27 08:41:40 +04:00
return - ENODEV ;
2005-04-17 02:20:36 +04:00
2006-06-27 08:41:40 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2013-05-08 02:29:49 +04:00
static unsigned long acpi_meminfo_start_pfn ( struct acpi_memory_info * info )
{
return PFN_DOWN ( info - > start_addr ) ;
}
static unsigned long acpi_meminfo_end_pfn ( struct acpi_memory_info * info )
{
return PFN_UP ( info - > start_addr + info - > length - 1 ) ;
}
static int acpi_bind_memblk ( struct memory_block * mem , void * arg )
{
2013-11-29 19:27:43 +04:00
return acpi_bind_one ( & mem - > dev , arg ) ;
2013-05-08 02:29:49 +04:00
}
static int acpi_bind_memory_blocks ( struct acpi_memory_info * info ,
2013-11-29 19:27:43 +04:00
struct acpi_device * adev )
2013-05-08 02:29:49 +04:00
{
return walk_memory_range ( acpi_meminfo_start_pfn ( info ) ,
2013-11-29 19:27:43 +04:00
acpi_meminfo_end_pfn ( info ) , adev ,
2013-05-08 02:29:49 +04:00
acpi_bind_memblk ) ;
}
static int acpi_unbind_memblk ( struct memory_block * mem , void * arg )
{
acpi_unbind_one ( & mem - > dev ) ;
return 0 ;
}
2013-11-29 19:27:43 +04:00
static void acpi_unbind_memory_blocks ( struct acpi_memory_info * info )
2013-05-08 02:29:49 +04:00
{
walk_memory_range ( acpi_meminfo_start_pfn ( info ) ,
acpi_meminfo_end_pfn ( info ) , NULL , acpi_unbind_memblk ) ;
}
2005-08-05 08:44:28 +04:00
static int acpi_memory_enable_device ( struct acpi_memory_device * mem_device )
2005-04-17 02:20:36 +04:00
{
2013-05-08 02:29:49 +04:00
acpi_handle handle = mem_device - > device - > handle ;
2006-06-27 13:53:27 +04:00
int result , num_enabled = 0 ;
struct acpi_memory_info * info ;
2006-06-27 13:53:31 +04:00
int node ;
2005-04-17 02:20:36 +04:00
2013-05-08 02:29:49 +04:00
node = acpi_get_node ( handle ) ;
2005-04-17 02:20:36 +04:00
/*
* Tell the VM there is more memory here . . .
* Note : Assume that this function returns zero on success
2006-06-27 13:53:27 +04:00
* We don ' t have memory - hot - add rollback function , now .
* ( i . e . memory - hot - remove function )
2005-04-17 02:20:36 +04:00
*/
2006-06-27 13:53:27 +04:00
list_for_each_entry ( info , & mem_device - > res_list , list ) {
2006-08-05 23:15:02 +04:00
if ( info - > enabled ) { /* just sanity check...*/
2006-06-27 13:53:29 +04:00
num_enabled + + ;
continue ;
}
2009-07-07 06:56:11 +04:00
/*
* If the memory block size is zero , please ignore it .
* Don ' t try to do the following memory hotplug flowchart .
*/
if ( ! info - > length )
continue ;
2006-10-01 10:27:07 +04:00
if ( node < 0 )
node = memory_add_physaddr_to_nid ( info - > start_addr ) ;
2018-10-31 01:10:24 +03:00
result = __add_memory ( node , info - > start_addr , info - > length ) ;
2012-11-16 05:10:37 +04:00
/*
* If the memory block has been used by the kernel , add_memory ( )
* returns - EEXIST . If add_memory ( ) returns the other error , it
* means that this memory block is not used by the kernel .
*/
2013-03-22 05:53:49 +04:00
if ( result & & result ! = - EEXIST )
2006-06-27 13:53:27 +04:00
continue ;
2012-11-16 05:10:37 +04:00
2013-11-29 19:27:43 +04:00
result = acpi_bind_memory_blocks ( info , mem_device - > device ) ;
2013-05-08 02:29:49 +04:00
if ( result ) {
2013-11-29 19:27:43 +04:00
acpi_unbind_memory_blocks ( info ) ;
2013-05-08 02:29:49 +04:00
return - ENODEV ;
}
2013-03-21 08:36:12 +04:00
info - > enabled = 1 ;
2012-11-16 05:10:37 +04:00
/*
* Add num_enable even if add_memory ( ) returns - EEXIST , so the
* device is bound to this driver .
*/
2006-06-27 13:53:27 +04:00
num_enabled + + ;
}
if ( ! num_enabled ) {
2012-11-21 03:42:28 +04:00
dev_err ( & mem_device - > device - > dev , " add_memory failed \n " ) ;
2005-04-17 02:20:36 +04:00
mem_device - > state = MEMORY_INVALID_STATE ;
2006-06-27 13:53:27 +04:00
return - EINVAL ;
2005-04-17 02:20:36 +04:00
}
2009-07-07 06:56:11 +04:00
/*
* Sometimes the memory device will contain several memory blocks .
* When one memory block is hot - added to the system memory , it will
* be regarded as a success .
* Otherwise if the last memory block can ' t be hot - added to the system
* memory , it will be failure and the memory device can ' t be bound with
* driver .
*/
return 0 ;
2005-04-17 02:20:36 +04:00
}
2013-05-27 14:58:46 +04:00
static void acpi_memory_remove_memory ( struct acpi_memory_device * mem_device )
2005-04-17 02:20:36 +04:00
{
2013-05-08 02:29:49 +04:00
acpi_handle handle = mem_device - > device - > handle ;
2006-06-27 13:53:27 +04:00
struct acpi_memory_info * info , * n ;
2013-05-27 14:58:46 +04:00
int nid = acpi_get_node ( handle ) ;
2013-02-23 04:33:14 +04:00
2006-06-27 13:53:27 +04:00
list_for_each_entry_safe ( info , n , & mem_device - > res_list , list ) {
2012-11-16 05:10:37 +04:00
if ( ! info - > enabled )
2013-03-22 05:53:49 +04:00
continue ;
2012-11-16 05:10:37 +04:00
2013-08-30 05:25:40 +04:00
if ( nid = = NUMA_NO_NODE )
2013-02-23 04:33:14 +04:00
nid = memory_add_physaddr_to_nid ( info - > start_addr ) ;
2013-05-08 02:29:49 +04:00
2013-11-29 19:27:43 +04:00
acpi_unbind_memory_blocks ( info ) ;
mm/memory_hotplug: make remove_memory() take the device_hotplug_lock
Patch series "mm: online/offline_pages called w.o. mem_hotplug_lock", v3.
Reading through the code and studying how mem_hotplug_lock is to be used,
I noticed that there are two places where we can end up calling
device_online()/device_offline() - online_pages()/offline_pages() without
the mem_hotplug_lock. And there are other places where we call
device_online()/device_offline() without the device_hotplug_lock.
While e.g.
echo "online" > /sys/devices/system/memory/memory9/state
is fine, e.g.
echo 1 > /sys/devices/system/memory/memory9/online
Will not take the mem_hotplug_lock. However the device_lock() and
device_hotplug_lock.
E.g. via memory_probe_store(), we can end up calling
add_memory()->online_pages() without the device_hotplug_lock. So we can
have concurrent callers in online_pages(). We e.g. touch in
online_pages() basically unprotected zone->present_pages then.
Looks like there is a longer history to that (see Patch #2 for details),
and fixing it to work the way it was intended is not really possible. We
would e.g. have to take the mem_hotplug_lock in device/base/core.c, which
sounds wrong.
Summary: We had a lock inversion on mem_hotplug_lock and device_lock().
More details can be found in patch 3 and patch 6.
I propose the general rules (documentation added in patch 6):
1. add_memory/add_memory_resource() must only be called with
device_hotplug_lock.
2. remove_memory() must only be called with device_hotplug_lock. This is
already documented and holds for all callers.
3. device_online()/device_offline() must only be called with
device_hotplug_lock. This is already documented and true for now in core
code. Other callers (related to memory hotplug) have to be fixed up.
4. mem_hotplug_lock is taken inside of add_memory/remove_memory/
online_pages/offline_pages.
To me, this looks way cleaner than what we have right now (and easier to
verify). And looking at the documentation of remove_memory, using
lock_device_hotplug also for add_memory() feels natural.
This patch (of 6):
remove_memory() is exported right now but requires the
device_hotplug_lock, which is not exported. So let's provide a variant
that takes the lock and only export that one.
The lock is already held in
arch/powerpc/platforms/pseries/hotplug-memory.c
drivers/acpi/acpi_memhotplug.c
arch/powerpc/platforms/powernv/memtrace.c
Apart from that, there are not other users in the tree.
Link: http://lkml.kernel.org/r/20180925091457.28651-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Rashmica Gupta <rashmica.g@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Cc: John Allen <jallen@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: YASUAKI ISHIMATSU <yasu.isimatu@gmail.com>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-31 01:10:18 +03:00
__remove_memory ( nid , info - > start_addr , info - > length ) ;
2012-11-15 10:59:31 +04:00
list_del ( & info - > list ) ;
2006-06-27 13:53:27 +04:00
kfree ( info ) ;
2005-04-17 02:20:36 +04:00
}
2012-11-15 10:59:31 +04:00
}
2012-11-16 05:06:06 +04:00
static void acpi_memory_device_free ( struct acpi_memory_device * mem_device )
{
if ( ! mem_device )
return ;
acpi_memory_free_device_resources ( mem_device ) ;
2013-03-04 02:18:03 +04:00
mem_device - > device - > driver_data = NULL ;
2012-11-16 05:06:06 +04:00
kfree ( mem_device ) ;
}
2013-03-04 02:18:03 +04:00
static int acpi_memory_device_add ( struct acpi_device * device ,
const struct acpi_device_id * not_used )
2005-04-17 02:20:36 +04:00
{
2013-03-04 02:18:03 +04:00
struct acpi_memory_device * mem_device ;
2005-04-17 02:20:36 +04:00
int result ;
if ( ! device )
2006-06-27 08:41:40 +04:00
return - EINVAL ;
2005-04-17 02:20:36 +04:00
2006-12-19 23:56:11 +03:00
mem_device = kzalloc ( sizeof ( struct acpi_memory_device ) , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! mem_device )
2006-06-27 08:41:40 +04:00
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
2006-06-27 13:53:27 +04:00
INIT_LIST_HEAD ( & mem_device - > res_list ) ;
2006-05-20 00:54:38 +04:00
mem_device - > device = device ;
2005-04-17 02:20:36 +04:00
sprintf ( acpi_device_name ( device ) , " %s " , ACPI_MEMORY_DEVICE_NAME ) ;
sprintf ( acpi_device_class ( device ) , " %s " , ACPI_MEMORY_DEVICE_CLASS ) ;
2008-09-23 01:37:34 +04:00
device - > driver_data = mem_device ;
2005-04-17 02:20:36 +04:00
/* Get the range from the _CRS */
result = acpi_memory_get_device_resources ( mem_device ) ;
if ( result ) {
2013-07-10 20:47:13 +04:00
device - > driver_data = NULL ;
2005-04-17 02:20:36 +04:00
kfree ( mem_device ) ;
2006-06-27 08:41:40 +04:00
return result ;
2005-04-17 02:20:36 +04:00
}
/* Set the device state */
mem_device - > state = MEMORY_POWER_ON_STATE ;
2013-03-04 02:18:03 +04:00
result = acpi_memory_check_device ( mem_device ) ;
if ( result ) {
acpi_memory_device_free ( mem_device ) ;
return 0 ;
}
2005-04-17 02:20:36 +04:00
2013-03-04 02:18:03 +04:00
result = acpi_memory_enable_device ( mem_device ) ;
if ( result ) {
dev_err ( & device - > dev , " acpi_memory_enable_device() error \n " ) ;
acpi_memory_device_free ( mem_device ) ;
2013-05-08 02:29:49 +04:00
return result ;
2006-06-27 13:53:28 +04:00
}
2013-03-04 02:18:03 +04:00
dev_dbg ( & device - > dev , " Memory device configured by ACPI \n " ) ;
return 1 ;
2006-06-27 13:53:28 +04:00
}
2013-03-04 02:18:03 +04:00
static void acpi_memory_device_remove ( struct acpi_device * device )
2009-06-23 00:41:25 +04:00
{
2013-03-04 02:18:03 +04:00
struct acpi_memory_device * mem_device ;
2009-06-23 00:41:25 +04:00
if ( ! device | | ! acpi_driver_data ( device ) )
2013-03-04 02:18:03 +04:00
return ;
2009-06-23 00:41:25 +04:00
mem_device = acpi_driver_data ( device ) ;
2013-03-04 02:18:03 +04:00
acpi_memory_remove_memory ( mem_device ) ;
2012-11-16 05:06:06 +04:00
acpi_memory_device_free ( mem_device ) ;
2005-04-17 02:20:36 +04:00
}
2014-01-14 23:21:13 +04:00
static bool __initdata acpi_no_memhotplug ;
2013-03-04 02:18:03 +04:00
void __init acpi_memory_hotplug_init ( void )
2005-04-17 02:20:36 +04:00
{
2014-05-30 06:29:14 +04:00
if ( acpi_no_memhotplug ) {
memory_device_handler . attach = NULL ;
acpi_scan_add_handler ( & memory_device_handler ) ;
2014-01-14 23:21:13 +04:00
return ;
2014-05-30 06:29:14 +04:00
}
2013-03-04 02:18:03 +04:00
acpi_scan_add_handler_with_hotplug ( & memory_device_handler , " memory " ) ;
2005-04-17 02:20:36 +04:00
}
2014-01-14 23:21:13 +04:00
static int __init disable_acpi_memory_hotplug ( char * str )
{
acpi_no_memhotplug = true ;
return 1 ;
}
__setup ( " acpi_no_memhotplug " , disable_acpi_memory_hotplug ) ;
2014-05-30 06:29:14 +04:00
# else
static struct acpi_scan_handler memory_device_handler = {
. ids = memory_device_ids ,
} ;
void __init acpi_memory_hotplug_init ( void )
{
acpi_scan_add_handler ( & memory_device_handler ) ;
}
# endif /* CONFIG_ACPI_HOTPLUG_MEMORY */