linux/drivers/pci/hotplug/acpiphp_glue.c

1178 lines
29 KiB
C
Raw Normal View History

/*
* ACPI PCI HotPlug glue functions to ACPI CA subsystem
*
* Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
* Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
* Copyright (C) 2002,2003 NEC Corporation
* Copyright (C) 2003-2005 Matthew Wilcox (matthew.wilcox@hp.com)
* Copyright (C) 2003-2005 Hewlett Packard
* Copyright (C) 2005 Rajesh Shah (rajesh.shah@intel.com)
* Copyright (C) 2005 Intel Corporation
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to <kristen.c.accardi@intel.com>
*
*/
/*
* Lifetime rules for pci_dev:
* - The one in acpiphp_bridge has its refcount elevated by pci_get_slot()
* when the bridge is scanned and it loses a refcount when the bridge
* is removed.
* - When a P2P bridge is present, we elevate the refcount on the subordinate
* bus. It loses the refcount when the the driver unloads.
*/
#define pr_fmt(fmt) "acpiphp_glue: " fmt
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
#include <linux/pci-acpi.h>
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
#include <linux/pm_runtime.h>
#include <linux/mutex.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
#include <linux/slab.h>
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-28 19:40:53 -04:00
#include <linux/acpi.h>
#include "../pci.h"
#include "acpiphp.h"
static LIST_HEAD(bridge_list);
static DEFINE_MUTEX(bridge_mutex);
static DEFINE_MUTEX(acpiphp_context_lock);
static void handle_hotplug_event(acpi_handle handle, u32 type, void *data);
static void acpiphp_sanitize_bus(struct pci_bus *bus);
static void acpiphp_set_hpp_values(struct pci_bus *bus);
static void hotplug_event(acpi_handle handle, u32 type, void *data);
static void free_bridge(struct kref *kref);
static void acpiphp_context_handler(acpi_handle handle, void *context)
{
/* Intentionally empty. */
}
/**
* acpiphp_init_context - Create hotplug context and grab a reference to it.
* @adev: ACPI device object to create the context for.
*
* Call under acpiphp_context_lock.
*/
static struct acpiphp_context *acpiphp_init_context(struct acpi_device *adev)
{
struct acpiphp_context *context;
acpi_status status;
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return NULL;
context->adev = adev;
context->refcount = 1;
status = acpi_attach_data(adev->handle, acpiphp_context_handler, context);
if (ACPI_FAILURE(status)) {
kfree(context);
return NULL;
}
return context;
}
/**
* acpiphp_get_context - Get hotplug context and grab a reference to it.
* @handle: ACPI object handle to get the context for.
*
* Call under acpiphp_context_lock.
*/
static struct acpiphp_context *acpiphp_get_context(acpi_handle handle)
{
struct acpiphp_context *context = NULL;
acpi_status status;
void *data;
status = acpi_get_data(handle, acpiphp_context_handler, &data);
if (ACPI_SUCCESS(status)) {
context = data;
context->refcount++;
}
return context;
}
/**
* acpiphp_put_context - Drop a reference to ACPI hotplug context.
* @context: ACPI hotplug context to drop a reference to.
*
* The context object is removed if there are no more references to it.
*
* Call under acpiphp_context_lock.
*/
static void acpiphp_put_context(struct acpiphp_context *context)
{
if (--context->refcount)
return;
WARN_ON(context->bridge);
acpi_detach_data(context->adev->handle, acpiphp_context_handler);
kfree(context);
}
static inline void get_bridge(struct acpiphp_bridge *bridge)
{
kref_get(&bridge->ref);
}
static inline void put_bridge(struct acpiphp_bridge *bridge)
{
kref_put(&bridge->ref, free_bridge);
}
static void free_bridge(struct kref *kref)
{
struct acpiphp_context *context;
struct acpiphp_bridge *bridge;
struct acpiphp_slot *slot, *next;
struct acpiphp_func *func, *tmp;
mutex_lock(&acpiphp_context_lock);
bridge = container_of(kref, struct acpiphp_bridge, ref);
list_for_each_entry_safe(slot, next, &bridge->slots, node) {
list_for_each_entry_safe(func, tmp, &slot->funcs, sibling)
acpiphp_put_context(func_to_context(func));
kfree(slot);
}
context = bridge->context;
/* Root bridges will not have hotplug context. */
if (context) {
/* Release the reference taken by acpiphp_enumerate_slots(). */
put_bridge(context->func.parent);
context->bridge = NULL;
acpiphp_put_context(context);
}
put_device(&bridge->pci_bus->dev);
pci_dev_put(bridge->pci_dev);
kfree(bridge);
mutex_unlock(&acpiphp_context_lock);
}
/*
* the _DCK method can do funny things... and sometimes not
* hah-hah funny.
*
* TBD - figure out a way to only call fixups for
* systems that require them.
*/
static void post_dock_fixups(acpi_handle not_used, u32 event, void *data)
{
struct acpiphp_context *context = data;
struct pci_bus *bus = context->func.slot->bus;
u32 buses;
if (!bus->self)
return;
/* fixup bad _DCK function that rewrites
* secondary bridge on slot
*/
pci_read_config_dword(bus->self,
PCI_PRIMARY_BUS,
&buses);
if (((buses >> 8) & 0xff) != bus->busn_res.start) {
buses = (buses & 0xff000000)
| ((unsigned int)(bus->primary) << 0)
| ((unsigned int)(bus->busn_res.start) << 8)
| ((unsigned int)(bus->busn_res.end) << 16);
pci_write_config_dword(bus->self, PCI_PRIMARY_BUS, buses);
}
}
ACPI / hotplug / PCI: Fix bridge removal race vs dock events If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge unregister_hotplug_dock_device (drops dock references to the bridge) put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (context) Now, if a dock event affecting one of the bridge's child devices occurs (roughly at the same time), it will lead to the following code path: acpi_dock_deferred_cb dock_notify handle_eject_request hot_remove_dock_devices dock_hotplug_event hotplug_event (dereferences context) That may lead to a kernel crash in hotplug_event() if it is executed after the last kfree() in the bridge removal code path. To prevent that from happening, add a wrapper around hotplug_event() called dock_event() and point the .handler pointer in acpiphp_dock_ops to it. Make that wrapper retrieve the device's ACPIPHP context using acpiphp_get_context() (instead of taking it from the data argument) under acpiphp_context_lock and check if the parent bridge's is_going_away flag is set. If that flag is set, it will return immediately and if it is not set it will grab a reference to the device's parent bridge before executing hotplug_event(). Then, in the above scenario, the reference to the parent bridge held by dock_event() will prevent free_bridge() from being executed for it until hotplug_event() returns. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-03 22:30:15 +01:00
static void dock_event(acpi_handle handle, u32 type, void *data)
{
struct acpiphp_context *context;
mutex_lock(&acpiphp_context_lock);
context = acpiphp_get_context(handle);
if (!context || WARN_ON(context->adev->handle != handle)
ACPI / hotplug / PCI: Fix bridge removal race vs dock events If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge unregister_hotplug_dock_device (drops dock references to the bridge) put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (context) Now, if a dock event affecting one of the bridge's child devices occurs (roughly at the same time), it will lead to the following code path: acpi_dock_deferred_cb dock_notify handle_eject_request hot_remove_dock_devices dock_hotplug_event hotplug_event (dereferences context) That may lead to a kernel crash in hotplug_event() if it is executed after the last kfree() in the bridge removal code path. To prevent that from happening, add a wrapper around hotplug_event() called dock_event() and point the .handler pointer in acpiphp_dock_ops to it. Make that wrapper retrieve the device's ACPIPHP context using acpiphp_get_context() (instead of taking it from the data argument) under acpiphp_context_lock and check if the parent bridge's is_going_away flag is set. If that flag is set, it will return immediately and if it is not set it will grab a reference to the device's parent bridge before executing hotplug_event(). Then, in the above scenario, the reference to the parent bridge held by dock_event() will prevent free_bridge() from being executed for it until hotplug_event() returns. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-03 22:30:15 +01:00
|| context->func.parent->is_going_away) {
mutex_unlock(&acpiphp_context_lock);
return;
}
get_bridge(context->func.parent);
acpiphp_put_context(context);
mutex_unlock(&acpiphp_context_lock);
hotplug_event(handle, type, data);
put_bridge(context->func.parent);
}
static const struct acpi_dock_ops acpiphp_dock_ops = {
.fixup = post_dock_fixups,
ACPI / hotplug / PCI: Fix bridge removal race vs dock events If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge unregister_hotplug_dock_device (drops dock references to the bridge) put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (context) Now, if a dock event affecting one of the bridge's child devices occurs (roughly at the same time), it will lead to the following code path: acpi_dock_deferred_cb dock_notify handle_eject_request hot_remove_dock_devices dock_hotplug_event hotplug_event (dereferences context) That may lead to a kernel crash in hotplug_event() if it is executed after the last kfree() in the bridge removal code path. To prevent that from happening, add a wrapper around hotplug_event() called dock_event() and point the .handler pointer in acpiphp_dock_ops to it. Make that wrapper retrieve the device's ACPIPHP context using acpiphp_get_context() (instead of taking it from the data argument) under acpiphp_context_lock and check if the parent bridge's is_going_away flag is set. If that flag is set, it will return immediately and if it is not set it will grab a reference to the device's parent bridge before executing hotplug_event(). Then, in the above scenario, the reference to the parent bridge held by dock_event() will prevent free_bridge() from being executed for it until hotplug_event() returns. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-03 22:30:15 +01:00
.handler = dock_event,
};
PCI: acpiphp: Handle PCIe ports without native hotplug capability Commit 0d52f54e2ef64c189dedc332e680b2eb4a34590a (PCI / ACPI: Make acpiphp ignore root bridges using PCIe native hotplug) added code that made the acpiphp driver completely ignore PCIe root complexes for which the kernel had been granted control of the native PCIe hotplug feature by the BIOS through _OSC. Later commit 619a5182d1f38a3d629ee48e04fa182ef9170052 "PCI hotplug: Always allow acpiphp to handle non-PCIe bridges" relaxed the constraints to allow acpiphp driver handle non-PCIe bridges under such a complex. The constraint needs to be relaxed further to allow acpiphp driver to handle PCIe ports without native PCIe hotplug capability. Some MR-IOV switch chipsets, such PLX8696, support multiple virtual PCIe switches and may migrate downstream ports among virtual switches. To migrate a downstream port from the source virtual switch to the target, the port needs to be hot-removed from the source and hot-added into the target. The pciehp driver can't be used here because there are no slots within the virtual PCIe switch. So acpiphp driver is used to support downstream port migration. A typical configuration is as below: [Root without native PCIe HP] [Upstream port of vswitch without native PCIe HP] [Downstream port of vswitch with native PCIe HP] [PCIe endpoint] Here acpiphp driver will be used to handle root ports and upstream port in the virtual switch, and pciehp driver will be used to handle downstream ports in the virtual switch. Signed-off-by: Jiang Liu <liuj97@gmail.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
2012-08-22 23:16:45 +08:00
/* Check whether the PCI device is managed by native PCIe hotplug driver */
static bool device_is_managed_by_native_pciehp(struct pci_dev *pdev)
{
u32 reg32;
acpi_handle tmp;
struct acpi_pci_root *root;
/* Check whether the PCIe port supports native PCIe hotplug */
if (pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &reg32))
return false;
if (!(reg32 & PCI_EXP_SLTCAP_HPC))
return false;
/*
* Check whether native PCIe hotplug has been enabled for
* this PCIe hierarchy.
*/
tmp = acpi_find_root_bridge_handle(pdev);
if (!tmp)
return false;
root = acpi_pci_find_root(tmp);
if (!root)
return false;
if (!(root->osc_control_set & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL))
return false;
return true;
}
ACPI / dock / PCI: Synchronous handling of dock events for PCI devices The interactions between the ACPI dock driver and the ACPI-based PCI hotplug (acpiphp) are currently problematic because of ordering issues during hot-remove operations. First of all, the current ACPI glue code expects that physical devices will always be deleted before deleting the companion ACPI device objects. Otherwise, acpi_unbind_one() will fail with a warning message printed to the kernel log, for example: [ 185.026073] usb usb5: Oops, 'acpi_handle' corrupt [ 185.035150] pci 0000:1b:00.0: Oops, 'acpi_handle' corrupt [ 185.035515] pci 0000:18:02.0: Oops, 'acpi_handle' corrupt [ 180.013656] port1: Oops, 'acpi_handle' corrupt This means, in particular, that struct pci_dev objects have to be deleted before the struct acpi_device objects they are "glued" with. Now, the following happens the during the undocking of an ACPI-based dock station: 1) hotplug_dock_devices() invokes registered hotplug callbacks to destroy physical devices associated with the ACPI device objects depending on the dock station. It calls dd->ops->handler() for each of those device objects. 2) For PCI devices dd->ops->handler() points to handle_hotplug_event_func() that queues up a separate work item to execute _handle_hotplug_event_func() for the given device and returns immediately. That work item will be executed later. 3) hotplug_dock_devices() calls dock_remove_acpi_device() for each device depending on the dock station. This runs acpi_bus_trim() for each of them, which causes the underlying ACPI device object to be destroyed, but the work items queued up by handle_hotplug_event_func() haven't been started yet. 4) _handle_hotplug_event_func() queued up in step 2) are executed and cause the above failure to happen, because the PCI devices they handle do not have the companion ACPI device objects any more (those objects have been deleted in step 3). The possible breakage doesn't end here, though, because hotplug_dock_devices() may return before at least some of the _handle_hotplug_event_func() work items spawned by it have a chance to complete and then undock() will cause _DCK to be evaluated and that will cause the devices handled by the _handle_hotplug_event_func() to go away possibly while they are being accessed. This means that dd->ops->handler() for PCI devices should not point to handle_hotplug_event_func(). Instead, it should point to a function that will do the work of _handle_hotplug_event_func() synchronously. For this reason, introduce such a function, hotplug_event_func(), and modity acpiphp_dock_ops to point to it as the handler. Unfortunately, however, this is not sufficient, because if the dock code were not changed further, hotplug_event_func() would now deadlock with hotplug_dock_devices() that called it, since it would run unregister_hotplug_dock_device() which in turn would attempt to acquire the dock station's hp_lock mutex already acquired by hotplug_dock_devices(). To resolve that deadlock use the observation that unregister_hotplug_dock_device() won't need to acquire hp_lock if PCI bridges the devices on the dock station depend on are prevented from being removed prematurely while the first loop in hotplug_dock_devices() is in progress. To make that possible, introduce a mechanism by which the callers of register_hotplug_dock_device() can provide "init" and "release" routines that will be executed, respectively, during the addition and removal of the physical device object associated with the given ACPI device handle. Make acpiphp use two new functions, acpiphp_dock_init() and acpiphp_dock_release(), that call get_bridge() and put_bridge(), respectively, on the acpiphp bridge holding the given device, for this purpose. In addition to that, remove the dock station's list of "hotplug devices" and make the dock code always walk the whole list of "dependent devices" instead in such a way that the loops in hotplug_dock_devices() and dock_event() (replacing the loops over "hotplug devices") will take references to the list entries that register_hotplug_dock_device() has been called for. That prevents the "release" routines associated with those entries from being called while the given entry is being processed and for PCI devices this means that their bridges won't be removed (by a concurrent thread) while hotplug_event_func() handling them is being executed. This change is based on two earlier patches from Jiang Liu. References: https://bugzilla.kernel.org/show_bug.cgi?id=59501 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tracked-down-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org>
2013-06-24 11:22:53 +02:00
static void acpiphp_dock_init(void *data)
{
struct acpiphp_context *context = data;
ACPI / dock / PCI: Synchronous handling of dock events for PCI devices The interactions between the ACPI dock driver and the ACPI-based PCI hotplug (acpiphp) are currently problematic because of ordering issues during hot-remove operations. First of all, the current ACPI glue code expects that physical devices will always be deleted before deleting the companion ACPI device objects. Otherwise, acpi_unbind_one() will fail with a warning message printed to the kernel log, for example: [ 185.026073] usb usb5: Oops, 'acpi_handle' corrupt [ 185.035150] pci 0000:1b:00.0: Oops, 'acpi_handle' corrupt [ 185.035515] pci 0000:18:02.0: Oops, 'acpi_handle' corrupt [ 180.013656] port1: Oops, 'acpi_handle' corrupt This means, in particular, that struct pci_dev objects have to be deleted before the struct acpi_device objects they are "glued" with. Now, the following happens the during the undocking of an ACPI-based dock station: 1) hotplug_dock_devices() invokes registered hotplug callbacks to destroy physical devices associated with the ACPI device objects depending on the dock station. It calls dd->ops->handler() for each of those device objects. 2) For PCI devices dd->ops->handler() points to handle_hotplug_event_func() that queues up a separate work item to execute _handle_hotplug_event_func() for the given device and returns immediately. That work item will be executed later. 3) hotplug_dock_devices() calls dock_remove_acpi_device() for each device depending on the dock station. This runs acpi_bus_trim() for each of them, which causes the underlying ACPI device object to be destroyed, but the work items queued up by handle_hotplug_event_func() haven't been started yet. 4) _handle_hotplug_event_func() queued up in step 2) are executed and cause the above failure to happen, because the PCI devices they handle do not have the companion ACPI device objects any more (those objects have been deleted in step 3). The possible breakage doesn't end here, though, because hotplug_dock_devices() may return before at least some of the _handle_hotplug_event_func() work items spawned by it have a chance to complete and then undock() will cause _DCK to be evaluated and that will cause the devices handled by the _handle_hotplug_event_func() to go away possibly while they are being accessed. This means that dd->ops->handler() for PCI devices should not point to handle_hotplug_event_func(). Instead, it should point to a function that will do the work of _handle_hotplug_event_func() synchronously. For this reason, introduce such a function, hotplug_event_func(), and modity acpiphp_dock_ops to point to it as the handler. Unfortunately, however, this is not sufficient, because if the dock code were not changed further, hotplug_event_func() would now deadlock with hotplug_dock_devices() that called it, since it would run unregister_hotplug_dock_device() which in turn would attempt to acquire the dock station's hp_lock mutex already acquired by hotplug_dock_devices(). To resolve that deadlock use the observation that unregister_hotplug_dock_device() won't need to acquire hp_lock if PCI bridges the devices on the dock station depend on are prevented from being removed prematurely while the first loop in hotplug_dock_devices() is in progress. To make that possible, introduce a mechanism by which the callers of register_hotplug_dock_device() can provide "init" and "release" routines that will be executed, respectively, during the addition and removal of the physical device object associated with the given ACPI device handle. Make acpiphp use two new functions, acpiphp_dock_init() and acpiphp_dock_release(), that call get_bridge() and put_bridge(), respectively, on the acpiphp bridge holding the given device, for this purpose. In addition to that, remove the dock station's list of "hotplug devices" and make the dock code always walk the whole list of "dependent devices" instead in such a way that the loops in hotplug_dock_devices() and dock_event() (replacing the loops over "hotplug devices") will take references to the list entries that register_hotplug_dock_device() has been called for. That prevents the "release" routines associated with those entries from being called while the given entry is being processed and for PCI devices this means that their bridges won't be removed (by a concurrent thread) while hotplug_event_func() handling them is being executed. This change is based on two earlier patches from Jiang Liu. References: https://bugzilla.kernel.org/show_bug.cgi?id=59501 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tracked-down-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org>
2013-06-24 11:22:53 +02:00
get_bridge(context->func.parent);
ACPI / dock / PCI: Synchronous handling of dock events for PCI devices The interactions between the ACPI dock driver and the ACPI-based PCI hotplug (acpiphp) are currently problematic because of ordering issues during hot-remove operations. First of all, the current ACPI glue code expects that physical devices will always be deleted before deleting the companion ACPI device objects. Otherwise, acpi_unbind_one() will fail with a warning message printed to the kernel log, for example: [ 185.026073] usb usb5: Oops, 'acpi_handle' corrupt [ 185.035150] pci 0000:1b:00.0: Oops, 'acpi_handle' corrupt [ 185.035515] pci 0000:18:02.0: Oops, 'acpi_handle' corrupt [ 180.013656] port1: Oops, 'acpi_handle' corrupt This means, in particular, that struct pci_dev objects have to be deleted before the struct acpi_device objects they are "glued" with. Now, the following happens the during the undocking of an ACPI-based dock station: 1) hotplug_dock_devices() invokes registered hotplug callbacks to destroy physical devices associated with the ACPI device objects depending on the dock station. It calls dd->ops->handler() for each of those device objects. 2) For PCI devices dd->ops->handler() points to handle_hotplug_event_func() that queues up a separate work item to execute _handle_hotplug_event_func() for the given device and returns immediately. That work item will be executed later. 3) hotplug_dock_devices() calls dock_remove_acpi_device() for each device depending on the dock station. This runs acpi_bus_trim() for each of them, which causes the underlying ACPI device object to be destroyed, but the work items queued up by handle_hotplug_event_func() haven't been started yet. 4) _handle_hotplug_event_func() queued up in step 2) are executed and cause the above failure to happen, because the PCI devices they handle do not have the companion ACPI device objects any more (those objects have been deleted in step 3). The possible breakage doesn't end here, though, because hotplug_dock_devices() may return before at least some of the _handle_hotplug_event_func() work items spawned by it have a chance to complete and then undock() will cause _DCK to be evaluated and that will cause the devices handled by the _handle_hotplug_event_func() to go away possibly while they are being accessed. This means that dd->ops->handler() for PCI devices should not point to handle_hotplug_event_func(). Instead, it should point to a function that will do the work of _handle_hotplug_event_func() synchronously. For this reason, introduce such a function, hotplug_event_func(), and modity acpiphp_dock_ops to point to it as the handler. Unfortunately, however, this is not sufficient, because if the dock code were not changed further, hotplug_event_func() would now deadlock with hotplug_dock_devices() that called it, since it would run unregister_hotplug_dock_device() which in turn would attempt to acquire the dock station's hp_lock mutex already acquired by hotplug_dock_devices(). To resolve that deadlock use the observation that unregister_hotplug_dock_device() won't need to acquire hp_lock if PCI bridges the devices on the dock station depend on are prevented from being removed prematurely while the first loop in hotplug_dock_devices() is in progress. To make that possible, introduce a mechanism by which the callers of register_hotplug_dock_device() can provide "init" and "release" routines that will be executed, respectively, during the addition and removal of the physical device object associated with the given ACPI device handle. Make acpiphp use two new functions, acpiphp_dock_init() and acpiphp_dock_release(), that call get_bridge() and put_bridge(), respectively, on the acpiphp bridge holding the given device, for this purpose. In addition to that, remove the dock station's list of "hotplug devices" and make the dock code always walk the whole list of "dependent devices" instead in such a way that the loops in hotplug_dock_devices() and dock_event() (replacing the loops over "hotplug devices") will take references to the list entries that register_hotplug_dock_device() has been called for. That prevents the "release" routines associated with those entries from being called while the given entry is being processed and for PCI devices this means that their bridges won't be removed (by a concurrent thread) while hotplug_event_func() handling them is being executed. This change is based on two earlier patches from Jiang Liu. References: https://bugzilla.kernel.org/show_bug.cgi?id=59501 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tracked-down-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org>
2013-06-24 11:22:53 +02:00
}
static void acpiphp_dock_release(void *data)
{
struct acpiphp_context *context = data;
ACPI / dock / PCI: Synchronous handling of dock events for PCI devices The interactions between the ACPI dock driver and the ACPI-based PCI hotplug (acpiphp) are currently problematic because of ordering issues during hot-remove operations. First of all, the current ACPI glue code expects that physical devices will always be deleted before deleting the companion ACPI device objects. Otherwise, acpi_unbind_one() will fail with a warning message printed to the kernel log, for example: [ 185.026073] usb usb5: Oops, 'acpi_handle' corrupt [ 185.035150] pci 0000:1b:00.0: Oops, 'acpi_handle' corrupt [ 185.035515] pci 0000:18:02.0: Oops, 'acpi_handle' corrupt [ 180.013656] port1: Oops, 'acpi_handle' corrupt This means, in particular, that struct pci_dev objects have to be deleted before the struct acpi_device objects they are "glued" with. Now, the following happens the during the undocking of an ACPI-based dock station: 1) hotplug_dock_devices() invokes registered hotplug callbacks to destroy physical devices associated with the ACPI device objects depending on the dock station. It calls dd->ops->handler() for each of those device objects. 2) For PCI devices dd->ops->handler() points to handle_hotplug_event_func() that queues up a separate work item to execute _handle_hotplug_event_func() for the given device and returns immediately. That work item will be executed later. 3) hotplug_dock_devices() calls dock_remove_acpi_device() for each device depending on the dock station. This runs acpi_bus_trim() for each of them, which causes the underlying ACPI device object to be destroyed, but the work items queued up by handle_hotplug_event_func() haven't been started yet. 4) _handle_hotplug_event_func() queued up in step 2) are executed and cause the above failure to happen, because the PCI devices they handle do not have the companion ACPI device objects any more (those objects have been deleted in step 3). The possible breakage doesn't end here, though, because hotplug_dock_devices() may return before at least some of the _handle_hotplug_event_func() work items spawned by it have a chance to complete and then undock() will cause _DCK to be evaluated and that will cause the devices handled by the _handle_hotplug_event_func() to go away possibly while they are being accessed. This means that dd->ops->handler() for PCI devices should not point to handle_hotplug_event_func(). Instead, it should point to a function that will do the work of _handle_hotplug_event_func() synchronously. For this reason, introduce such a function, hotplug_event_func(), and modity acpiphp_dock_ops to point to it as the handler. Unfortunately, however, this is not sufficient, because if the dock code were not changed further, hotplug_event_func() would now deadlock with hotplug_dock_devices() that called it, since it would run unregister_hotplug_dock_device() which in turn would attempt to acquire the dock station's hp_lock mutex already acquired by hotplug_dock_devices(). To resolve that deadlock use the observation that unregister_hotplug_dock_device() won't need to acquire hp_lock if PCI bridges the devices on the dock station depend on are prevented from being removed prematurely while the first loop in hotplug_dock_devices() is in progress. To make that possible, introduce a mechanism by which the callers of register_hotplug_dock_device() can provide "init" and "release" routines that will be executed, respectively, during the addition and removal of the physical device object associated with the given ACPI device handle. Make acpiphp use two new functions, acpiphp_dock_init() and acpiphp_dock_release(), that call get_bridge() and put_bridge(), respectively, on the acpiphp bridge holding the given device, for this purpose. In addition to that, remove the dock station's list of "hotplug devices" and make the dock code always walk the whole list of "dependent devices" instead in such a way that the loops in hotplug_dock_devices() and dock_event() (replacing the loops over "hotplug devices") will take references to the list entries that register_hotplug_dock_device() has been called for. That prevents the "release" routines associated with those entries from being called while the given entry is being processed and for PCI devices this means that their bridges won't be removed (by a concurrent thread) while hotplug_event_func() handling them is being executed. This change is based on two earlier patches from Jiang Liu. References: https://bugzilla.kernel.org/show_bug.cgi?id=59501 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tracked-down-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org>
2013-06-24 11:22:53 +02:00
put_bridge(context->func.parent);
ACPI / dock / PCI: Synchronous handling of dock events for PCI devices The interactions between the ACPI dock driver and the ACPI-based PCI hotplug (acpiphp) are currently problematic because of ordering issues during hot-remove operations. First of all, the current ACPI glue code expects that physical devices will always be deleted before deleting the companion ACPI device objects. Otherwise, acpi_unbind_one() will fail with a warning message printed to the kernel log, for example: [ 185.026073] usb usb5: Oops, 'acpi_handle' corrupt [ 185.035150] pci 0000:1b:00.0: Oops, 'acpi_handle' corrupt [ 185.035515] pci 0000:18:02.0: Oops, 'acpi_handle' corrupt [ 180.013656] port1: Oops, 'acpi_handle' corrupt This means, in particular, that struct pci_dev objects have to be deleted before the struct acpi_device objects they are "glued" with. Now, the following happens the during the undocking of an ACPI-based dock station: 1) hotplug_dock_devices() invokes registered hotplug callbacks to destroy physical devices associated with the ACPI device objects depending on the dock station. It calls dd->ops->handler() for each of those device objects. 2) For PCI devices dd->ops->handler() points to handle_hotplug_event_func() that queues up a separate work item to execute _handle_hotplug_event_func() for the given device and returns immediately. That work item will be executed later. 3) hotplug_dock_devices() calls dock_remove_acpi_device() for each device depending on the dock station. This runs acpi_bus_trim() for each of them, which causes the underlying ACPI device object to be destroyed, but the work items queued up by handle_hotplug_event_func() haven't been started yet. 4) _handle_hotplug_event_func() queued up in step 2) are executed and cause the above failure to happen, because the PCI devices they handle do not have the companion ACPI device objects any more (those objects have been deleted in step 3). The possible breakage doesn't end here, though, because hotplug_dock_devices() may return before at least some of the _handle_hotplug_event_func() work items spawned by it have a chance to complete and then undock() will cause _DCK to be evaluated and that will cause the devices handled by the _handle_hotplug_event_func() to go away possibly while they are being accessed. This means that dd->ops->handler() for PCI devices should not point to handle_hotplug_event_func(). Instead, it should point to a function that will do the work of _handle_hotplug_event_func() synchronously. For this reason, introduce such a function, hotplug_event_func(), and modity acpiphp_dock_ops to point to it as the handler. Unfortunately, however, this is not sufficient, because if the dock code were not changed further, hotplug_event_func() would now deadlock with hotplug_dock_devices() that called it, since it would run unregister_hotplug_dock_device() which in turn would attempt to acquire the dock station's hp_lock mutex already acquired by hotplug_dock_devices(). To resolve that deadlock use the observation that unregister_hotplug_dock_device() won't need to acquire hp_lock if PCI bridges the devices on the dock station depend on are prevented from being removed prematurely while the first loop in hotplug_dock_devices() is in progress. To make that possible, introduce a mechanism by which the callers of register_hotplug_dock_device() can provide "init" and "release" routines that will be executed, respectively, during the addition and removal of the physical device object associated with the given ACPI device handle. Make acpiphp use two new functions, acpiphp_dock_init() and acpiphp_dock_release(), that call get_bridge() and put_bridge(), respectively, on the acpiphp bridge holding the given device, for this purpose. In addition to that, remove the dock station's list of "hotplug devices" and make the dock code always walk the whole list of "dependent devices" instead in such a way that the loops in hotplug_dock_devices() and dock_event() (replacing the loops over "hotplug devices") will take references to the list entries that register_hotplug_dock_device() has been called for. That prevents the "release" routines associated with those entries from being called while the given entry is being processed and for PCI devices this means that their bridges won't be removed (by a concurrent thread) while hotplug_event_func() handling them is being executed. This change is based on two earlier patches from Jiang Liu. References: https://bugzilla.kernel.org/show_bug.cgi?id=59501 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tracked-down-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org>
2013-06-24 11:22:53 +02:00
}
/* callback routine to register each ACPI PCI slot object */
static acpi_status register_slot(acpi_handle handle, u32 lvl, void *data,
void **rv)
{
struct acpiphp_bridge *bridge = data;
struct acpiphp_context *context;
struct acpi_device *adev;
struct acpiphp_slot *slot;
struct acpiphp_func *newfunc;
acpi_status status = AE_OK;
unsigned long long adr;
int device, function;
struct pci_bus *pbus = bridge->pci_bus;
struct pci_dev *pdev = bridge->pci_dev;
u32 val;
if (pdev && device_is_managed_by_native_pciehp(pdev))
return AE_OK;
status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
if (ACPI_FAILURE(status)) {
if (status != AE_NOT_FOUND)
acpi_handle_warn(handle,
"can't evaluate _ADR (%#x)\n", status);
return AE_OK;
}
if (acpi_bus_get_device(handle, &adev))
return AE_OK;
device = (adr >> 16) & 0xffff;
function = adr & 0xffff;
mutex_lock(&acpiphp_context_lock);
context = acpiphp_init_context(adev);
if (!context) {
mutex_unlock(&acpiphp_context_lock);
acpi_handle_err(handle, "No hotplug context\n");
return AE_NOT_EXIST;
}
newfunc = &context->func;
newfunc->function = function;
newfunc->parent = bridge;
mutex_unlock(&acpiphp_context_lock);
if (acpi_has_method(handle, "_EJ0"))
newfunc->flags = FUNC_HAS_EJ0;
if (acpi_has_method(handle, "_STA"))
newfunc->flags |= FUNC_HAS_STA;
if (acpi_has_method(handle, "_DCK"))
newfunc->flags |= FUNC_HAS_DCK;
/* search for objects that share the same slot */
list_for_each_entry(slot, &bridge->slots, node)
if (slot->device == device)
goto slot_found;
slot = kzalloc(sizeof(struct acpiphp_slot), GFP_KERNEL);
if (!slot) {
mutex_lock(&acpiphp_context_lock);
acpiphp_put_context(context);
mutex_unlock(&acpiphp_context_lock);
return AE_NO_MEMORY;
}
slot->bus = bridge->pci_bus;
slot->device = device;
INIT_LIST_HEAD(&slot->funcs);
mutex_init(&slot->crit_sect);
list_add_tail(&slot->node, &bridge->slots);
/* Register slots for ejectable functions only. */
if (acpi_pci_check_ejectable(pbus, handle) || is_dock_device(handle)) {
unsigned long long sun;
int retval;
bridge->nr_slots++;
status = acpi_evaluate_integer(handle, "_SUN", NULL, &sun);
if (ACPI_FAILURE(status))
sun = bridge->nr_slots;
pr_debug("found ACPI PCI Hotplug slot %llu at PCI %04x:%02x:%02x\n",
sun, pci_domain_nr(pbus), pbus->number, device);
retval = acpiphp_register_hotplug_slot(slot, sun);
if (retval) {
slot->slot = NULL;
bridge->nr_slots--;
if (retval == -EBUSY)
pr_warn("Slot %llu already registered by another "
"hotplug driver\n", sun);
else
pr_warn("acpiphp_register_hotplug_slot failed "
"(err code = 0x%x)\n", retval);
}
/* Even if the slot registration fails, we can still use it. */
}
slot_found:
newfunc->slot = slot;
list_add_tail(&newfunc->sibling, &slot->funcs);
if (pci_bus_read_dev_vendor_id(pbus, PCI_DEVFN(device, function),
&val, 60*1000))
slot->flags |= SLOT_ENABLED;
if (is_dock_device(handle)) {
/* we don't want to call this device's _EJ0
* because we want the dock notify handler
* to call it after it calls _DCK
*/
newfunc->flags &= ~FUNC_HAS_EJ0;
if (register_hotplug_dock_device(handle,
&acpiphp_dock_ops, context,
ACPI / dock / PCI: Synchronous handling of dock events for PCI devices The interactions between the ACPI dock driver and the ACPI-based PCI hotplug (acpiphp) are currently problematic because of ordering issues during hot-remove operations. First of all, the current ACPI glue code expects that physical devices will always be deleted before deleting the companion ACPI device objects. Otherwise, acpi_unbind_one() will fail with a warning message printed to the kernel log, for example: [ 185.026073] usb usb5: Oops, 'acpi_handle' corrupt [ 185.035150] pci 0000:1b:00.0: Oops, 'acpi_handle' corrupt [ 185.035515] pci 0000:18:02.0: Oops, 'acpi_handle' corrupt [ 180.013656] port1: Oops, 'acpi_handle' corrupt This means, in particular, that struct pci_dev objects have to be deleted before the struct acpi_device objects they are "glued" with. Now, the following happens the during the undocking of an ACPI-based dock station: 1) hotplug_dock_devices() invokes registered hotplug callbacks to destroy physical devices associated with the ACPI device objects depending on the dock station. It calls dd->ops->handler() for each of those device objects. 2) For PCI devices dd->ops->handler() points to handle_hotplug_event_func() that queues up a separate work item to execute _handle_hotplug_event_func() for the given device and returns immediately. That work item will be executed later. 3) hotplug_dock_devices() calls dock_remove_acpi_device() for each device depending on the dock station. This runs acpi_bus_trim() for each of them, which causes the underlying ACPI device object to be destroyed, but the work items queued up by handle_hotplug_event_func() haven't been started yet. 4) _handle_hotplug_event_func() queued up in step 2) are executed and cause the above failure to happen, because the PCI devices they handle do not have the companion ACPI device objects any more (those objects have been deleted in step 3). The possible breakage doesn't end here, though, because hotplug_dock_devices() may return before at least some of the _handle_hotplug_event_func() work items spawned by it have a chance to complete and then undock() will cause _DCK to be evaluated and that will cause the devices handled by the _handle_hotplug_event_func() to go away possibly while they are being accessed. This means that dd->ops->handler() for PCI devices should not point to handle_hotplug_event_func(). Instead, it should point to a function that will do the work of _handle_hotplug_event_func() synchronously. For this reason, introduce such a function, hotplug_event_func(), and modity acpiphp_dock_ops to point to it as the handler. Unfortunately, however, this is not sufficient, because if the dock code were not changed further, hotplug_event_func() would now deadlock with hotplug_dock_devices() that called it, since it would run unregister_hotplug_dock_device() which in turn would attempt to acquire the dock station's hp_lock mutex already acquired by hotplug_dock_devices(). To resolve that deadlock use the observation that unregister_hotplug_dock_device() won't need to acquire hp_lock if PCI bridges the devices on the dock station depend on are prevented from being removed prematurely while the first loop in hotplug_dock_devices() is in progress. To make that possible, introduce a mechanism by which the callers of register_hotplug_dock_device() can provide "init" and "release" routines that will be executed, respectively, during the addition and removal of the physical device object associated with the given ACPI device handle. Make acpiphp use two new functions, acpiphp_dock_init() and acpiphp_dock_release(), that call get_bridge() and put_bridge(), respectively, on the acpiphp bridge holding the given device, for this purpose. In addition to that, remove the dock station's list of "hotplug devices" and make the dock code always walk the whole list of "dependent devices" instead in such a way that the loops in hotplug_dock_devices() and dock_event() (replacing the loops over "hotplug devices") will take references to the list entries that register_hotplug_dock_device() has been called for. That prevents the "release" routines associated with those entries from being called while the given entry is being processed and for PCI devices this means that their bridges won't be removed (by a concurrent thread) while hotplug_event_func() handling them is being executed. This change is based on two earlier patches from Jiang Liu. References: https://bugzilla.kernel.org/show_bug.cgi?id=59501 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tracked-down-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org>
2013-06-24 11:22:53 +02:00
acpiphp_dock_init, acpiphp_dock_release))
pr_debug("failed to register dock device\n");
}
/* install notify handler */
if (!(newfunc->flags & FUNC_HAS_DCK)) {
status = acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
handle_hotplug_event,
context);
if (ACPI_FAILURE(status))
acpi_handle_err(handle,
"failed to install notify handler\n");
}
return AE_OK;
}
static struct acpiphp_bridge *acpiphp_handle_to_bridge(acpi_handle handle)
{
struct acpiphp_context *context;
struct acpiphp_bridge *bridge = NULL;
mutex_lock(&acpiphp_context_lock);
context = acpiphp_get_context(handle);
if (context) {
bridge = context->bridge;
if (bridge)
get_bridge(bridge);
acpiphp_put_context(context);
}
mutex_unlock(&acpiphp_context_lock);
return bridge;
}
static void cleanup_bridge(struct acpiphp_bridge *bridge)
{
struct acpiphp_slot *slot;
struct acpiphp_func *func;
acpi_status status;
list_for_each_entry(slot, &bridge->slots, node) {
list_for_each_entry(func, &slot->funcs, sibling) {
acpi_handle handle = func_to_handle(func);
if (is_dock_device(handle))
unregister_hotplug_dock_device(handle);
if (!(func->flags & FUNC_HAS_DCK)) {
status = acpi_remove_notify_handler(handle,
ACPI_SYSTEM_NOTIFY,
handle_hotplug_event);
if (ACPI_FAILURE(status))
pr_err("failed to remove notify handler\n");
}
}
slot->flags |= SLOT_IS_GOING_AWAY;
if (slot->slot)
acpiphp_unregister_hotplug_slot(slot);
}
mutex_lock(&bridge_mutex);
list_del(&bridge->list);
mutex_unlock(&bridge_mutex);
ACPI / hotplug / PCI: Fix bridge removal race in handle_hotplug_event() If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (child context) Now, if a hotplug notify is dispatched for one of the bridge's children and the timing is such that handle_hotplug_event() for that notify is executed while free_bridge() above is running, the get_bridge(context->func.parent) in handle_hotplug_event() will not really help, because it is too late to prevent the bridge from going away and the child's context may be freed before hotplug_event_work() scheduled from handle_hotplug_event() dereferences the pointer to it passed via the data argument. That will cause a kernel crash to happpen in hotplug_event_work(). To prevent that from happening, make handle_hotplug_event() check the is_going_away flag of the function's parent bridge (under acpiphp_context_lock) and bail out if it's set. Also, make cleanup_bridge() set the bridge's is_going_away flag under acpiphp_context_lock so that it cannot be changed between the check and the subsequent get_bridge(context->func.parent) in handle_hotplug_event(). Then, in the above scenario, handle_hotplug_event() will notice that context->func.parent->is_going_away is already set and it will exit immediately preventing the crash from happening. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-03 22:30:06 +01:00
mutex_lock(&acpiphp_context_lock);
bridge->is_going_away = true;
ACPI / hotplug / PCI: Fix bridge removal race in handle_hotplug_event() If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (child context) Now, if a hotplug notify is dispatched for one of the bridge's children and the timing is such that handle_hotplug_event() for that notify is executed while free_bridge() above is running, the get_bridge(context->func.parent) in handle_hotplug_event() will not really help, because it is too late to prevent the bridge from going away and the child's context may be freed before hotplug_event_work() scheduled from handle_hotplug_event() dereferences the pointer to it passed via the data argument. That will cause a kernel crash to happpen in hotplug_event_work(). To prevent that from happening, make handle_hotplug_event() check the is_going_away flag of the function's parent bridge (under acpiphp_context_lock) and bail out if it's set. Also, make cleanup_bridge() set the bridge's is_going_away flag under acpiphp_context_lock so that it cannot be changed between the check and the subsequent get_bridge(context->func.parent) in handle_hotplug_event(). Then, in the above scenario, handle_hotplug_event() will notice that context->func.parent->is_going_away is already set and it will exit immediately preventing the crash from happening. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-03 22:30:06 +01:00
mutex_unlock(&acpiphp_context_lock);
}
/**
* acpiphp_max_busnr - return the highest reserved bus number under the given bus.
* @bus: bus to start search with
*/
static unsigned char acpiphp_max_busnr(struct pci_bus *bus)
{
struct list_head *tmp;
unsigned char max, n;
/*
* pci_bus_max_busnr will return the highest
* reserved busnr for all these children.
* that is equivalent to the bus->subordinate
* value. We don't want to use the parent's
* bus->subordinate value because it could have
* padding in it.
*/
max = bus->busn_res.start;
list_for_each(tmp, &bus->children) {
n = pci_bus_max_busnr(pci_bus_b(tmp));
if (n > max)
max = n;
}
return max;
}
/**
* acpiphp_bus_add - Scan ACPI namespace subtree.
* @handle: ACPI object handle to start the scan from.
*/
static void acpiphp_bus_add(acpi_handle handle)
{
struct acpi_device *adev = NULL;
acpi_bus_scan(handle);
acpi_bus_get_device(handle, &adev);
ACPI / scan: Add acpi_device objects for all device nodes in the namespace Modify the ACPI namespace scanning code to register a struct acpi_device object for every namespace node representing a device, processor and so on, even if the device represented by that namespace node is reported to be not present and not functional by _STA. There are multiple reasons to do that. First of all, it avoids quite a lot of overhead when struct acpi_device objects are deleted every time acpi_bus_trim() is run and then added again by a subsequent acpi_bus_scan() for the same scope, although the namespace objects they correspond to stay in memory all the time (which always is the case on a vast majority of systems). Second, it will allow user space to see that there are namespace nodes representing devices that are not present at the moment and may be added to the system. It will also allow user space to evaluate _SUN for those nodes to check what physical slots the "missing" devices may be put into and it will make sense to add a sysfs attribute for _STA evaluation after this change (that will be useful for thermal management on some systems). Next, it will help to consolidate the ACPI hotplug handling among subsystems by making it possible to store hotplug-related information in struct acpi_device objects in a standard common way. Finally, it will help to avoid a race condition related to the deletion of ACPI namespace nodes. Namely, namespace nodes may be deleted as a result of a table unload triggered by _EJ0 or _DCK. If a hotplug notification for one of those nodes is triggered right before the deletion and it executes a hotplug callback via acpi_hotplug_execute(), the ACPI handle passed to that callback may be stale when the callback actually runs. One way to work around that is to always pass struct acpi_device pointers to hotplug callbacks after doing a get_device() on the objects in question which eliminates the use-after-free possibility (the ACPI handles in those objects are invalidated by acpi_scan_drop_device(), so they will trigger ACPICA errors on attempts to use them). Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-11-22 21:54:37 +01:00
if (acpi_device_enumerated(adev))
acpi_device_set_power(adev, ACPI_STATE_D0);
}
static void acpiphp_set_acpi_region(struct acpiphp_slot *slot)
{
struct acpiphp_func *func;
union acpi_object params[2];
struct acpi_object_list arg_list;
list_for_each_entry(func, &slot->funcs, sibling) {
arg_list.count = 2;
arg_list.pointer = params;
params[0].type = ACPI_TYPE_INTEGER;
params[0].integer.value = ACPI_ADR_SPACE_PCI_CONFIG;
params[1].type = ACPI_TYPE_INTEGER;
params[1].integer.value = 1;
/* _REG is optional, we don't care about if there is failure */
acpi_evaluate_object(func_to_handle(func), "_REG", &arg_list,
NULL);
}
}
static void check_hotplug_bridge(struct acpiphp_slot *slot, struct pci_dev *dev)
{
struct acpiphp_func *func;
/* quirk, or pcie could set it already */
if (dev->is_hotplug_bridge)
return;
list_for_each_entry(func, &slot->funcs, sibling) {
if (PCI_FUNC(dev->devfn) == func->function) {
dev->is_hotplug_bridge = 1;
break;
}
}
}
ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks In the current ACPIPHP notify handler we always go directly for a rescan of the parent bus if we get a device check notification for a device that is not a bridge. However, this obviously is overzealous if nothing really changes, because this way we may rescan the whole PCI hierarchy pretty much in vain. That happens on Alex Williamson's machine whose ACPI tables contain device objects that are supposed to coresspond to PCIe root ports, but those ports aren't physically present (or at least they aren't visible in the PCI config space to us). The BIOS generates multiple device check notifies for those objects during boot and for each of them we go straight for the parent bus rescan, but the parent bus is the root bus in this particular case. In consequence, we rescan the whole PCI bus from the top several times in a row, which is completely unnecessary, increases boot time by 50% (after previous fixes) and generates excess dmesg output from the PCI subsystem. Fix the problem by checking if we can find anything new in the slot corresponding to the device we've got a device check notify for and doing nothig if that's not the case. The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior, as it says: Device Check. Used to notify OSPM that the device either appeared or disappeared. If the device has appeared, OSPM will re-enumerate from the parent. If the device has disappeared, OSPM will invalidate the state of the device. OSPM may optimize out re-enumeration. Therefore, according to the spec, we are free to do nothing if nothing changes. References: https://bugzilla.kernel.org/show_bug.cgi?id=60865 Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-08 00:07:28 +02:00
static int acpiphp_rescan_slot(struct acpiphp_slot *slot)
{
struct acpiphp_func *func;
list_for_each_entry(func, &slot->funcs, sibling)
acpiphp_bus_add(func_to_handle(func));
return pci_scan_slot(slot->bus, PCI_DEVFN(slot->device, 0));
}
/**
* enable_slot - enable, configure a slot
* @slot: slot to be enabled
*
* This function should be called per *physical slot*,
* not per each slot object in ACPI namespace.
*/
static void __ref enable_slot(struct acpiphp_slot *slot)
{
struct pci_dev *dev;
struct pci_bus *bus = slot->bus;
struct acpiphp_func *func;
int max, pass;
PCI / ACPI: Use boot-time resource allocation rules during hotplug On x86 platforms, the kernel respects PCI resource assignments from the BIOS and only reassigns resources for unassigned BARs at boot time. However, with the ACPI-based hotplug (acpiphp), it ignores the BIOS' PCI resource assignments completely and reassigns all resources by itself. This causes differences in PCI resource allocation between boot time and runtime hotplug to occur, which is generally undesirable and sometimes actively breaks things. Namely, if there are enough resources, reassigning all PCI resources during runtime hotplug should work, but it may fail if the resources are constrained. This may happen, for instance, when some PCI devices with huge MMIO BARs are involved in the runtime hotplug operations, because the current PCI MMIO alignment algorithm may waste huge chunks of MMIO address space in those cases. On the Alexander's Sony VAIO VPCZ23A4R the BIOS allocates limited MMIO resources for the dock station which contains a device (graphics adapter) with a 256MB MMIO BAR. An attempt to reassign that during runtime hotplug causes the dock station MMIO window to be exhausted and acpiphp fails to allocate resources for the majority of devices on the dock station as a result. To prevent that from happening, modify acpiphp to follow the boot time resources allocation behavior so that the BIOS' resource assignments are respected during runtime hotplug too. [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=56531 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Jiang Liu <jiang.liu@huawei.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-06-23 01:01:35 +02:00
LIST_HEAD(add_list);
acpiphp_rescan_slot(slot);
max = acpiphp_max_busnr(bus);
for (pass = 0; pass < 2; pass++) {
list_for_each_entry(dev, &bus->devices, bus_list) {
if (PCI_SLOT(dev->devfn) != slot->device)
continue;
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
max = pci_scan_bridge(bus, dev, max, pass);
if (pass && dev->subordinate) {
check_hotplug_bridge(slot, dev);
PCI / ACPI: Use boot-time resource allocation rules during hotplug On x86 platforms, the kernel respects PCI resource assignments from the BIOS and only reassigns resources for unassigned BARs at boot time. However, with the ACPI-based hotplug (acpiphp), it ignores the BIOS' PCI resource assignments completely and reassigns all resources by itself. This causes differences in PCI resource allocation between boot time and runtime hotplug to occur, which is generally undesirable and sometimes actively breaks things. Namely, if there are enough resources, reassigning all PCI resources during runtime hotplug should work, but it may fail if the resources are constrained. This may happen, for instance, when some PCI devices with huge MMIO BARs are involved in the runtime hotplug operations, because the current PCI MMIO alignment algorithm may waste huge chunks of MMIO address space in those cases. On the Alexander's Sony VAIO VPCZ23A4R the BIOS allocates limited MMIO resources for the dock station which contains a device (graphics adapter) with a 256MB MMIO BAR. An attempt to reassign that during runtime hotplug causes the dock station MMIO window to be exhausted and acpiphp fails to allocate resources for the majority of devices on the dock station as a result. To prevent that from happening, modify acpiphp to follow the boot time resources allocation behavior so that the BIOS' resource assignments are respected during runtime hotplug too. [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=56531 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Jiang Liu <jiang.liu@huawei.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-06-23 01:01:35 +02:00
pcibios_resource_survey_bus(dev->subordinate);
__pci_bus_size_bridges(dev->subordinate,
&add_list);
}
}
}
}
PCI / ACPI: Use boot-time resource allocation rules during hotplug On x86 platforms, the kernel respects PCI resource assignments from the BIOS and only reassigns resources for unassigned BARs at boot time. However, with the ACPI-based hotplug (acpiphp), it ignores the BIOS' PCI resource assignments completely and reassigns all resources by itself. This causes differences in PCI resource allocation between boot time and runtime hotplug to occur, which is generally undesirable and sometimes actively breaks things. Namely, if there are enough resources, reassigning all PCI resources during runtime hotplug should work, but it may fail if the resources are constrained. This may happen, for instance, when some PCI devices with huge MMIO BARs are involved in the runtime hotplug operations, because the current PCI MMIO alignment algorithm may waste huge chunks of MMIO address space in those cases. On the Alexander's Sony VAIO VPCZ23A4R the BIOS allocates limited MMIO resources for the dock station which contains a device (graphics adapter) with a 256MB MMIO BAR. An attempt to reassign that during runtime hotplug causes the dock station MMIO window to be exhausted and acpiphp fails to allocate resources for the majority of devices on the dock station as a result. To prevent that from happening, modify acpiphp to follow the boot time resources allocation behavior so that the BIOS' resource assignments are respected during runtime hotplug too. [rjw: Changelog] References: https://bugzilla.kernel.org/show_bug.cgi?id=56531 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Jiang Liu <jiang.liu@huawei.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-06-23 01:01:35 +02:00
__pci_bus_assign_resources(bus, &add_list, NULL);
acpiphp_sanitize_bus(bus);
acpiphp_set_hpp_values(bus);
acpiphp_set_acpi_region(slot);
list_for_each_entry(dev, &bus->devices, bus_list) {
/* Assume that newly added devices are powered on already. */
if (!dev->is_added)
dev->current_state = PCI_D0;
}
pci_bus_add_devices(bus);
slot->flags |= SLOT_ENABLED;
list_for_each_entry(func, &slot->funcs, sibling) {
PCI Hotplug: acpiphp: don't store a pci_dev in acpiphp_func An oops can occur if a user attempts to use both PCI logical hotplug and the ACPI physical hotplug driver (acpiphp) in this sequence, where $slot/address == $device. In other words, if acpiphp has claimed a PCI device, and that device is logically removed, then acpiphp may oops when it attempts to access it again. # echo 1 > /sys/bus/pci/devices/$device/remove # echo 0 > /sys/bus/pci/slots/$slot/power Unable to handle kernel NULL pointer dereference (address 0000000000000000) Call Trace: [<a000000100016390>] show_stack+0x50/0xa0 [<a000000100016c60>] show_regs+0x820/0x860 [<a00000010003b390>] die+0x190/0x2a0 [<a000000100066a40>] ia64_do_page_fault+0x8e0/0xa40 [<a00000010000c7a0>] ia64_native_leave_kernel+0x0/0x270 [<a0000001003b2660>] pci_remove_bus_device+0x120/0x260 [<a0000002060549f0>] acpiphp_disable_slot+0x410/0x540 [acpiphp] [<a0000002060505c0>] disable_slot+0xc0/0x120 [acpiphp] [<a0000002040d21c0>] power_write_file+0x1e0/0x2a0 [pci_hotplug] [<a0000001003bb820>] pci_slot_attr_store+0x60/0xa0 [<a000000100240f70>] sysfs_write_file+0x230/0x2c0 [<a000000100195750>] vfs_write+0x190/0x2e0 [<a0000001001961a0>] sys_write+0x80/0x100 [<a00000010000c600>] ia64_ret_from_syscall+0x0/0x20 [<a000000000010720>] __kernel_syscall_via_break+0x0/0x20 The root cause of this oops is that the logical remove ("echo 1 > /sys/bus/pci/devices/$device/remove") destroyed the pci_dev. The pci_dev struct itself wasn't deallocated because acpiphp kept a reference, but some of its fields became invalid. acpiphp doesn't have any real reason to keep a pointer to a pci_dev around. It can always derive it using pci_get_slot(). If a logical remove destroys the pci_dev, acpiphp won't find it and is thus prevented from causing mischief. Reviewed-by: Matthew Wilcox <willy@linux.intel.com> Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Tested-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Reported-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com> Signed-off-by: Alex Chiang <achiang@hp.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2009-05-21 16:21:15 -06:00
dev = pci_get_slot(bus, PCI_DEVFN(slot->device,
func->function));
if (!dev) {
/* Do not set SLOT_ENABLED flag if some funcs
are not added. */
slot->flags &= (~SLOT_ENABLED);
continue;
}
}
}
/**
* disable_slot - disable a slot
* @slot: ACPI PHP slot
*/
static void disable_slot(struct acpiphp_slot *slot)
{
struct pci_bus *bus = slot->bus;
struct pci_dev *dev, *prev;
struct acpiphp_func *func;
/*
* enable_slot() enumerates all functions in this device via
* pci_scan_slot(), whether they have associated ACPI hotplug
* methods (_EJ0, etc.) or not. Therefore, we remove all functions
* here.
*/
list_for_each_entry_safe_reverse(dev, prev, &bus->devices, bus_list)
if (PCI_SLOT(dev->devfn) == slot->device)
pci_stop_and_remove_bus_device(dev);
list_for_each_entry(func, &slot->funcs, sibling)
acpi_bus_trim(func_to_acpi_device(func));
slot->flags &= (~SLOT_ENABLED);
}
static bool acpiphp_no_hotplug(struct acpi_device *adev)
ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made during the 3.12 development cycle uncovered a problem with VGA switcheroo that on some systems, when the device-specific method (ATPX in the radeon case, _DSM in the nouveau case) is used to turn off the discrete graphics, the BIOS generates ACPI hotplug events for that device and those events cause ACPIPHP to attempt to remove the device from the system (they are events for a device that was present previously and is not present any more, so that's what should be done according to the spec). Then, the system stops functioning correctly. Since the hotplug events in question were simply silently ignored previously, the least intrusive way to address that problem is to make ACPIPHP ignore them again. For this purpose, introduce a new ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug events for PCI devices whose ACPI companions have that flag set. Next, make the radeon and nouveau switcheroo detection code set the no_hotplug flag for the discrete graphics' ACPI companion. Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge) References: https://bugzilla.kernel.org/show_bug.cgi?id=61891 References: https://bugzilla.kernel.org/show_bug.cgi?id=64891 Reported-and-tested-by: Mike Lothian <mike@fireburn.co.uk> Reported-and-tested-by: <madcatx@atlas.cz> Reported-and-tested-by: Joaquín Aramendía <samsagax@gmail.com> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Dave Airlie <airlied@linux.ie> Cc: Takashi Iwai <tiwai@suse.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: 3.12+ <stable@vger.kernel.org> # 3.12+
2013-12-31 13:39:42 +01:00
{
return adev && adev->flags.no_hotplug;
}
static bool slot_no_hotplug(struct acpiphp_slot *slot)
{
struct acpiphp_func *func;
list_for_each_entry(func, &slot->funcs, sibling)
if (acpiphp_no_hotplug(func_to_acpi_device(func)))
return true;
ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made during the 3.12 development cycle uncovered a problem with VGA switcheroo that on some systems, when the device-specific method (ATPX in the radeon case, _DSM in the nouveau case) is used to turn off the discrete graphics, the BIOS generates ACPI hotplug events for that device and those events cause ACPIPHP to attempt to remove the device from the system (they are events for a device that was present previously and is not present any more, so that's what should be done according to the spec). Then, the system stops functioning correctly. Since the hotplug events in question were simply silently ignored previously, the least intrusive way to address that problem is to make ACPIPHP ignore them again. For this purpose, introduce a new ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug events for PCI devices whose ACPI companions have that flag set. Next, make the radeon and nouveau switcheroo detection code set the no_hotplug flag for the discrete graphics' ACPI companion. Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge) References: https://bugzilla.kernel.org/show_bug.cgi?id=61891 References: https://bugzilla.kernel.org/show_bug.cgi?id=64891 Reported-and-tested-by: Mike Lothian <mike@fireburn.co.uk> Reported-and-tested-by: <madcatx@atlas.cz> Reported-and-tested-by: Joaquín Aramendía <samsagax@gmail.com> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Dave Airlie <airlied@linux.ie> Cc: Takashi Iwai <tiwai@suse.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: 3.12+ <stable@vger.kernel.org> # 3.12+
2013-12-31 13:39:42 +01:00
return false;
}
/**
* get_slot_status - get ACPI slot status
* @slot: ACPI PHP slot
*
* If a slot has _STA for each function and if any one of them
* returned non-zero status, return it.
*
* If a slot doesn't have _STA and if any one of its functions'
* configuration space is configured, return 0x0f as a _STA.
*
* Otherwise return 0.
*/
static unsigned int get_slot_status(struct acpiphp_slot *slot)
{
unsigned long long sta = 0;
struct acpiphp_func *func;
list_for_each_entry(func, &slot->funcs, sibling) {
if (func->flags & FUNC_HAS_STA) {
acpi_status status;
status = acpi_evaluate_integer(func_to_handle(func),
"_STA", NULL, &sta);
if (ACPI_SUCCESS(status) && sta)
break;
} else {
u32 dvid;
pci_bus_read_config_dword(slot->bus,
PCI_DEVFN(slot->device,
func->function),
PCI_VENDOR_ID, &dvid);
if (dvid != 0xffffffff) {
sta = ACPI_STA_ALL;
break;
}
}
}
return (unsigned int)sta;
}
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
/**
* trim_stale_devices - remove PCI devices that are not responding.
* @dev: PCI device to start walking the hierarchy from.
*/
static void trim_stale_devices(struct pci_dev *dev)
{
struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
struct pci_bus *bus = dev->subordinate;
bool alive = false;
if (adev) {
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
acpi_status status;
unsigned long long sta;
status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made during the 3.12 development cycle uncovered a problem with VGA switcheroo that on some systems, when the device-specific method (ATPX in the radeon case, _DSM in the nouveau case) is used to turn off the discrete graphics, the BIOS generates ACPI hotplug events for that device and those events cause ACPIPHP to attempt to remove the device from the system (they are events for a device that was present previously and is not present any more, so that's what should be done according to the spec). Then, the system stops functioning correctly. Since the hotplug events in question were simply silently ignored previously, the least intrusive way to address that problem is to make ACPIPHP ignore them again. For this purpose, introduce a new ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug events for PCI devices whose ACPI companions have that flag set. Next, make the radeon and nouveau switcheroo detection code set the no_hotplug flag for the discrete graphics' ACPI companion. Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge) References: https://bugzilla.kernel.org/show_bug.cgi?id=61891 References: https://bugzilla.kernel.org/show_bug.cgi?id=64891 Reported-and-tested-by: Mike Lothian <mike@fireburn.co.uk> Reported-and-tested-by: <madcatx@atlas.cz> Reported-and-tested-by: Joaquín Aramendía <samsagax@gmail.com> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Dave Airlie <airlied@linux.ie> Cc: Takashi Iwai <tiwai@suse.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: 3.12+ <stable@vger.kernel.org> # 3.12+
2013-12-31 13:39:42 +01:00
alive = (ACPI_SUCCESS(status) && sta == ACPI_STA_ALL)
|| acpiphp_no_hotplug(adev);
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
}
if (!alive) {
u32 v;
/* Check if the device responds. */
alive = pci_bus_read_dev_vendor_id(dev->bus, dev->devfn, &v, 0);
}
if (!alive) {
pci_stop_and_remove_bus_device(dev);
if (adev)
acpi_bus_trim(adev);
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
} else if (bus) {
struct pci_dev *child, *tmp;
/* The device is a bridge. so check the bus below it. */
pm_runtime_get_sync(&dev->dev);
list_for_each_entry_safe_reverse(child, tmp, &bus->devices, bus_list)
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
trim_stale_devices(child);
pm_runtime_put(&dev->dev);
}
}
/**
* acpiphp_check_bridge - re-enumerate devices
* @bridge: where to begin re-enumeration
*
* Iterate over all slots under this bridge and make sure that if a
* card is present they are enabled, and if not they are disabled.
*/
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
static void acpiphp_check_bridge(struct acpiphp_bridge *bridge)
{
struct acpiphp_slot *slot;
/* Bail out if the bridge is going away. */
if (bridge->is_going_away)
return;
list_for_each_entry(slot, &bridge->slots, node) {
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
struct pci_bus *bus = slot->bus;
struct pci_dev *dev, *tmp;
mutex_lock(&slot->crit_sect);
ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made during the 3.12 development cycle uncovered a problem with VGA switcheroo that on some systems, when the device-specific method (ATPX in the radeon case, _DSM in the nouveau case) is used to turn off the discrete graphics, the BIOS generates ACPI hotplug events for that device and those events cause ACPIPHP to attempt to remove the device from the system (they are events for a device that was present previously and is not present any more, so that's what should be done according to the spec). Then, the system stops functioning correctly. Since the hotplug events in question were simply silently ignored previously, the least intrusive way to address that problem is to make ACPIPHP ignore them again. For this purpose, introduce a new ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug events for PCI devices whose ACPI companions have that flag set. Next, make the radeon and nouveau switcheroo detection code set the no_hotplug flag for the discrete graphics' ACPI companion. Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge) References: https://bugzilla.kernel.org/show_bug.cgi?id=61891 References: https://bugzilla.kernel.org/show_bug.cgi?id=64891 Reported-and-tested-by: Mike Lothian <mike@fireburn.co.uk> Reported-and-tested-by: <madcatx@atlas.cz> Reported-and-tested-by: Joaquín Aramendía <samsagax@gmail.com> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Dave Airlie <airlied@linux.ie> Cc: Takashi Iwai <tiwai@suse.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: 3.12+ <stable@vger.kernel.org> # 3.12+
2013-12-31 13:39:42 +01:00
if (slot_no_hotplug(slot)) {
; /* do nothing */
} else if (get_slot_status(slot) == ACPI_STA_ALL) {
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
/* remove stale devices if any */
list_for_each_entry_safe_reverse(dev, tmp,
&bus->devices, bus_list)
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
if (PCI_SLOT(dev->devfn) == slot->device)
trim_stale_devices(dev);
/* configure all functions */
enable_slot(slot);
} else {
disable_slot(slot);
}
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
mutex_unlock(&slot->crit_sect);
}
}
static void acpiphp_set_hpp_values(struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list)
pci_configure_slot(dev);
}
/*
* Remove devices for which we could not assign resources, call
* arch specific code to fix-up the bus
*/
static void acpiphp_sanitize_bus(struct pci_bus *bus)
{
struct pci_dev *dev, *tmp;
int i;
unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM;
list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) {
for (i=0; i<PCI_BRIDGE_RESOURCES; i++) {
struct resource *res = &dev->resource[i];
if ((res->flags & type_mask) && !res->start &&
res->end) {
/* Could not assign a required resources
* for this device, remove it */
pci_stop_and_remove_bus_device(dev);
break;
}
}
}
}
/*
* ACPI event handlers
*/
void acpiphp_check_host_bridge(acpi_handle handle)
{
struct acpiphp_bridge *bridge;
bridge = acpiphp_handle_to_bridge(handle);
if (bridge) {
pci_lock_rescan_remove();
acpiphp_check_bridge(bridge);
pci_unlock_rescan_remove();
put_bridge(bridge);
}
}
static int acpiphp_disable_and_eject_slot(struct acpiphp_slot *slot);
static void hotplug_event(acpi_handle handle, u32 type, void *data)
{
struct acpiphp_context *context = data;
struct acpiphp_func *func = &context->func;
struct acpiphp_bridge *bridge;
char objname[64];
struct acpi_buffer buffer = { .length = sizeof(objname),
.pointer = objname };
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-28 19:40:53 -04:00
mutex_lock(&acpiphp_context_lock);
bridge = context->bridge;
if (bridge)
get_bridge(bridge);
mutex_unlock(&acpiphp_context_lock);
ACPI / hotplug: Fix concurrency issues and memory leaks This changeset is aimed at fixing a few different but related problems in the ACPI hotplug infrastructure. First of all, since notify handlers may be run in parallel with acpi_bus_scan(), acpi_bus_trim() and acpi_bus_hot_remove_device() and some of them are installed for ACPI handles that have no struct acpi_device objects attached (i.e. before those objects are created), those notify handlers have to take acpi_scan_lock to prevent races from taking place (e.g. a struct acpi_device is found to be present for the given ACPI handle, but right after that it is removed by acpi_bus_trim() running in parallel to the given notify handler). Moreover, since some of them call acpi_bus_scan() and acpi_bus_trim(), this leads to the conclusion that acpi_scan_lock should be acquired by the callers of these two funtions rather by these functions themselves. For these reasons, make all notify handlers that can handle device addition and eject events take acpi_scan_lock and remove the acpi_scan_lock locking from acpi_bus_scan() and acpi_bus_trim(). Accordingly, update all of their users to make sure that they are always called under acpi_scan_lock. Furthermore, since eject operations are carried out asynchronously with respect to the notify events that trigger them, with the help of acpi_bus_hot_remove_device(), even if notify handlers take the ACPI scan lock, it still is possible that, for example, acpi_bus_trim() will run between acpi_bus_hot_remove_device() and the notify handler that scheduled its execution and that acpi_bus_trim() will remove the device node passed to acpi_bus_hot_remove_device() for ejection. In that case, the struct acpi_device object obtained by acpi_bus_hot_remove_device() will be invalid and not-so-funny things will ensue. To protect agaist that, make the users of acpi_bus_hot_remove_device() run get_device() on ACPI device node objects that are about to be passed to it and make acpi_bus_hot_remove_device() run put_device() on them and check if their ACPI handles are not NULL (make acpi_device_unregister() clear the device nodes' ACPI handles for that check to work). Finally, observe that acpi_os_hotplug_execute() actually can fail, in which case its caller ought to free memory allocated for the context object to prevent leaks from happening. It also needs to run put_device() on the device node that it ran get_device() on previously in that case. Modify the code accordingly. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org>
2013-02-13 14:36:47 +01:00
pci_lock_rescan_remove();
acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
/* bus re-enumerate */
pr_debug("%s: Bus check notify on %s\n", __func__, objname);
pr_debug("%s: re-enumerating slots under %s\n",
__func__, objname);
if (bridge) {
acpiphp_check_bridge(bridge);
} else {
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
struct acpiphp_slot *slot = func->slot;
if (slot->flags & SLOT_IS_GOING_AWAY)
break;
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
mutex_lock(&slot->crit_sect);
enable_slot(slot);
ACPI / hotplug / PCI: Check for new devices on enabled slots The current implementation of acpiphp_check_bridge() is pretty dumb: - It enables a slot if it's not enabled and the slot status is ACPI_STA_ALL. - It disables a slot if it's enabled and the slot status is not ACPI_STA_ALL. This behavior is not sufficient to handle the Thunderbolt daisy chaining case properly, however, because in that case the bus behind the already enabled slot needs to be rescanned for new devices. For this reason, modify acpiphp_check_bridge() so that slots are disabled and stopped if they are not in the ACPI_STA_ALL state. For slots in the ACPI_STA_ALL state, devices behind them that don't respond are trimmed using a new function, trim_stale_devices(), introduced specifically for this purpose. That function walks the given bus and checks each device on it. If the device doesn't respond, it is assumed to be gone and is removed. Once all of the stale devices directy behind the slot have been removed, acpiphp_check_bridge() will start looking for new devices that might have appeared on the given bus. It will do that even if the slot is already enabled (SLOT_ENABLED is set for it). In addition to that, make the bus check notification ignore SLOT_ENABLED and go for enable_device() directly if bridge is NULL, so that devices behind the slot are re-enumerated in that case too. This change is based on earlier patches from Kirill A Shutemov and Mika Westerberg. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-07-16 22:10:35 +02:00
mutex_unlock(&slot->crit_sect);
}
break;
case ACPI_NOTIFY_DEVICE_CHECK:
/* device check */
pr_debug("%s: Device check notify on %s\n", __func__, objname);
ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks In the current ACPIPHP notify handler we always go directly for a rescan of the parent bus if we get a device check notification for a device that is not a bridge. However, this obviously is overzealous if nothing really changes, because this way we may rescan the whole PCI hierarchy pretty much in vain. That happens on Alex Williamson's machine whose ACPI tables contain device objects that are supposed to coresspond to PCIe root ports, but those ports aren't physically present (or at least they aren't visible in the PCI config space to us). The BIOS generates multiple device check notifies for those objects during boot and for each of them we go straight for the parent bus rescan, but the parent bus is the root bus in this particular case. In consequence, we rescan the whole PCI bus from the top several times in a row, which is completely unnecessary, increases boot time by 50% (after previous fixes) and generates excess dmesg output from the PCI subsystem. Fix the problem by checking if we can find anything new in the slot corresponding to the device we've got a device check notify for and doing nothig if that's not the case. The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior, as it says: Device Check. Used to notify OSPM that the device either appeared or disappeared. If the device has appeared, OSPM will re-enumerate from the parent. If the device has disappeared, OSPM will invalidate the state of the device. OSPM may optimize out re-enumeration. Therefore, according to the spec, we are free to do nothing if nothing changes. References: https://bugzilla.kernel.org/show_bug.cgi?id=60865 Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-08 00:07:28 +02:00
if (bridge) {
acpiphp_check_bridge(bridge);
ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks In the current ACPIPHP notify handler we always go directly for a rescan of the parent bus if we get a device check notification for a device that is not a bridge. However, this obviously is overzealous if nothing really changes, because this way we may rescan the whole PCI hierarchy pretty much in vain. That happens on Alex Williamson's machine whose ACPI tables contain device objects that are supposed to coresspond to PCIe root ports, but those ports aren't physically present (or at least they aren't visible in the PCI config space to us). The BIOS generates multiple device check notifies for those objects during boot and for each of them we go straight for the parent bus rescan, but the parent bus is the root bus in this particular case. In consequence, we rescan the whole PCI bus from the top several times in a row, which is completely unnecessary, increases boot time by 50% (after previous fixes) and generates excess dmesg output from the PCI subsystem. Fix the problem by checking if we can find anything new in the slot corresponding to the device we've got a device check notify for and doing nothig if that's not the case. The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior, as it says: Device Check. Used to notify OSPM that the device either appeared or disappeared. If the device has appeared, OSPM will re-enumerate from the parent. If the device has disappeared, OSPM will invalidate the state of the device. OSPM may optimize out re-enumeration. Therefore, according to the spec, we are free to do nothing if nothing changes. References: https://bugzilla.kernel.org/show_bug.cgi?id=60865 Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-08 00:07:28 +02:00
} else {
struct acpiphp_slot *slot = func->slot;
int ret;
if (slot->flags & SLOT_IS_GOING_AWAY)
break;
ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks In the current ACPIPHP notify handler we always go directly for a rescan of the parent bus if we get a device check notification for a device that is not a bridge. However, this obviously is overzealous if nothing really changes, because this way we may rescan the whole PCI hierarchy pretty much in vain. That happens on Alex Williamson's machine whose ACPI tables contain device objects that are supposed to coresspond to PCIe root ports, but those ports aren't physically present (or at least they aren't visible in the PCI config space to us). The BIOS generates multiple device check notifies for those objects during boot and for each of them we go straight for the parent bus rescan, but the parent bus is the root bus in this particular case. In consequence, we rescan the whole PCI bus from the top several times in a row, which is completely unnecessary, increases boot time by 50% (after previous fixes) and generates excess dmesg output from the PCI subsystem. Fix the problem by checking if we can find anything new in the slot corresponding to the device we've got a device check notify for and doing nothig if that's not the case. The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior, as it says: Device Check. Used to notify OSPM that the device either appeared or disappeared. If the device has appeared, OSPM will re-enumerate from the parent. If the device has disappeared, OSPM will invalidate the state of the device. OSPM may optimize out re-enumeration. Therefore, according to the spec, we are free to do nothing if nothing changes. References: https://bugzilla.kernel.org/show_bug.cgi?id=60865 Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-08 00:07:28 +02:00
/*
* Check if anything has changed in the slot and rescan
* from the parent if that's the case.
*/
mutex_lock(&slot->crit_sect);
ret = acpiphp_rescan_slot(slot);
mutex_unlock(&slot->crit_sect);
if (ret)
acpiphp_check_bridge(func->parent);
}
break;
case ACPI_NOTIFY_EJECT_REQUEST:
/* request device eject */
pr_debug("%s: Device eject notify on %s\n", __func__, objname);
acpiphp_disable_and_eject_slot(func->slot);
break;
}
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-28 19:40:53 -04:00
pci_unlock_rescan_remove();
if (bridge)
put_bridge(bridge);
ACPI / dock / PCI: Synchronous handling of dock events for PCI devices The interactions between the ACPI dock driver and the ACPI-based PCI hotplug (acpiphp) are currently problematic because of ordering issues during hot-remove operations. First of all, the current ACPI glue code expects that physical devices will always be deleted before deleting the companion ACPI device objects. Otherwise, acpi_unbind_one() will fail with a warning message printed to the kernel log, for example: [ 185.026073] usb usb5: Oops, 'acpi_handle' corrupt [ 185.035150] pci 0000:1b:00.0: Oops, 'acpi_handle' corrupt [ 185.035515] pci 0000:18:02.0: Oops, 'acpi_handle' corrupt [ 180.013656] port1: Oops, 'acpi_handle' corrupt This means, in particular, that struct pci_dev objects have to be deleted before the struct acpi_device objects they are "glued" with. Now, the following happens the during the undocking of an ACPI-based dock station: 1) hotplug_dock_devices() invokes registered hotplug callbacks to destroy physical devices associated with the ACPI device objects depending on the dock station. It calls dd->ops->handler() for each of those device objects. 2) For PCI devices dd->ops->handler() points to handle_hotplug_event_func() that queues up a separate work item to execute _handle_hotplug_event_func() for the given device and returns immediately. That work item will be executed later. 3) hotplug_dock_devices() calls dock_remove_acpi_device() for each device depending on the dock station. This runs acpi_bus_trim() for each of them, which causes the underlying ACPI device object to be destroyed, but the work items queued up by handle_hotplug_event_func() haven't been started yet. 4) _handle_hotplug_event_func() queued up in step 2) are executed and cause the above failure to happen, because the PCI devices they handle do not have the companion ACPI device objects any more (those objects have been deleted in step 3). The possible breakage doesn't end here, though, because hotplug_dock_devices() may return before at least some of the _handle_hotplug_event_func() work items spawned by it have a chance to complete and then undock() will cause _DCK to be evaluated and that will cause the devices handled by the _handle_hotplug_event_func() to go away possibly while they are being accessed. This means that dd->ops->handler() for PCI devices should not point to handle_hotplug_event_func(). Instead, it should point to a function that will do the work of _handle_hotplug_event_func() synchronously. For this reason, introduce such a function, hotplug_event_func(), and modity acpiphp_dock_ops to point to it as the handler. Unfortunately, however, this is not sufficient, because if the dock code were not changed further, hotplug_event_func() would now deadlock with hotplug_dock_devices() that called it, since it would run unregister_hotplug_dock_device() which in turn would attempt to acquire the dock station's hp_lock mutex already acquired by hotplug_dock_devices(). To resolve that deadlock use the observation that unregister_hotplug_dock_device() won't need to acquire hp_lock if PCI bridges the devices on the dock station depend on are prevented from being removed prematurely while the first loop in hotplug_dock_devices() is in progress. To make that possible, introduce a mechanism by which the callers of register_hotplug_dock_device() can provide "init" and "release" routines that will be executed, respectively, during the addition and removal of the physical device object associated with the given ACPI device handle. Make acpiphp use two new functions, acpiphp_dock_init() and acpiphp_dock_release(), that call get_bridge() and put_bridge(), respectively, on the acpiphp bridge holding the given device, for this purpose. In addition to that, remove the dock station's list of "hotplug devices" and make the dock code always walk the whole list of "dependent devices" instead in such a way that the loops in hotplug_dock_devices() and dock_event() (replacing the loops over "hotplug devices") will take references to the list entries that register_hotplug_dock_device() has been called for. That prevents the "release" routines associated with those entries from being called while the given entry is being processed and for PCI devices this means that their bridges won't be removed (by a concurrent thread) while hotplug_event_func() handling them is being executed. This change is based on two earlier patches from Jiang Liu. References: https://bugzilla.kernel.org/show_bug.cgi?id=59501 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tracked-down-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org>
2013-06-24 11:22:53 +02:00
}
ACPI / hotplug: Consolidate deferred execution of ACPI hotplug routines There are two different interfaces for queuing up work items on the ACPI hotplug workqueue, alloc_acpi_hp_work() used by PCI and PCI host bridge hotplug code and acpi_os_hotplug_execute() used by the common ACPI hotplug code and docking stations. They both are somewhat cumbersome to use and work slightly differently. The users of alloc_acpi_hp_work() have to submit a work function that will extract the necessary data items from a struct acpi_hp_work object allocated by alloc_acpi_hp_work() and then will free that object, while it would be more straightforward to simply use a work function with one more argument and let the interface take care of the execution details. The users of acpi_os_hotplug_execute() also have to deal with the fact that it takes only one argument in addition to the work function pointer, although acpi_os_execute_deferred() actually takes care of the allocation and freeing of memory, so it would have been able to pass more arguments to the work function if it hadn't been constrained by the connection with acpi_os_execute(). Moreover, while alloc_acpi_hp_work() makes GFP_KERNEL memory allocations, which is correct, because hotplug work items are always queued up from process context, acpi_os_hotplug_execute() uses GFP_ATOMIC, as that is needed by acpi_os_execute(). Also, acpi_os_execute_deferred() queued up by it waits for the ACPI event workqueues to flush before executing the work function, whereas alloc_acpi_hp_work() can't do anything similar. That leads to somewhat arbitrary differences in behavior between various ACPI hotplug code paths and has to be straightened up. For this reason, replace both alloc_acpi_hp_work() and acpi_os_hotplug_execute() with a single interface, acpi_hotplug_execute(), combining their behavior and being more friendly to its users than any of the two. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-11-07 01:45:40 +01:00
static void hotplug_event_work(void *data, u32 type)
ACPI / dock / PCI: Synchronous handling of dock events for PCI devices The interactions between the ACPI dock driver and the ACPI-based PCI hotplug (acpiphp) are currently problematic because of ordering issues during hot-remove operations. First of all, the current ACPI glue code expects that physical devices will always be deleted before deleting the companion ACPI device objects. Otherwise, acpi_unbind_one() will fail with a warning message printed to the kernel log, for example: [ 185.026073] usb usb5: Oops, 'acpi_handle' corrupt [ 185.035150] pci 0000:1b:00.0: Oops, 'acpi_handle' corrupt [ 185.035515] pci 0000:18:02.0: Oops, 'acpi_handle' corrupt [ 180.013656] port1: Oops, 'acpi_handle' corrupt This means, in particular, that struct pci_dev objects have to be deleted before the struct acpi_device objects they are "glued" with. Now, the following happens the during the undocking of an ACPI-based dock station: 1) hotplug_dock_devices() invokes registered hotplug callbacks to destroy physical devices associated with the ACPI device objects depending on the dock station. It calls dd->ops->handler() for each of those device objects. 2) For PCI devices dd->ops->handler() points to handle_hotplug_event_func() that queues up a separate work item to execute _handle_hotplug_event_func() for the given device and returns immediately. That work item will be executed later. 3) hotplug_dock_devices() calls dock_remove_acpi_device() for each device depending on the dock station. This runs acpi_bus_trim() for each of them, which causes the underlying ACPI device object to be destroyed, but the work items queued up by handle_hotplug_event_func() haven't been started yet. 4) _handle_hotplug_event_func() queued up in step 2) are executed and cause the above failure to happen, because the PCI devices they handle do not have the companion ACPI device objects any more (those objects have been deleted in step 3). The possible breakage doesn't end here, though, because hotplug_dock_devices() may return before at least some of the _handle_hotplug_event_func() work items spawned by it have a chance to complete and then undock() will cause _DCK to be evaluated and that will cause the devices handled by the _handle_hotplug_event_func() to go away possibly while they are being accessed. This means that dd->ops->handler() for PCI devices should not point to handle_hotplug_event_func(). Instead, it should point to a function that will do the work of _handle_hotplug_event_func() synchronously. For this reason, introduce such a function, hotplug_event_func(), and modity acpiphp_dock_ops to point to it as the handler. Unfortunately, however, this is not sufficient, because if the dock code were not changed further, hotplug_event_func() would now deadlock with hotplug_dock_devices() that called it, since it would run unregister_hotplug_dock_device() which in turn would attempt to acquire the dock station's hp_lock mutex already acquired by hotplug_dock_devices(). To resolve that deadlock use the observation that unregister_hotplug_dock_device() won't need to acquire hp_lock if PCI bridges the devices on the dock station depend on are prevented from being removed prematurely while the first loop in hotplug_dock_devices() is in progress. To make that possible, introduce a mechanism by which the callers of register_hotplug_dock_device() can provide "init" and "release" routines that will be executed, respectively, during the addition and removal of the physical device object associated with the given ACPI device handle. Make acpiphp use two new functions, acpiphp_dock_init() and acpiphp_dock_release(), that call get_bridge() and put_bridge(), respectively, on the acpiphp bridge holding the given device, for this purpose. In addition to that, remove the dock station's list of "hotplug devices" and make the dock code always walk the whole list of "dependent devices" instead in such a way that the loops in hotplug_dock_devices() and dock_event() (replacing the loops over "hotplug devices") will take references to the list entries that register_hotplug_dock_device() has been called for. That prevents the "release" routines associated with those entries from being called while the given entry is being processed and for PCI devices this means that their bridges won't be removed (by a concurrent thread) while hotplug_event_func() handling them is being executed. This change is based on two earlier patches from Jiang Liu. References: https://bugzilla.kernel.org/show_bug.cgi?id=59501 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tracked-down-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org>
2013-06-24 11:22:53 +02:00
{
ACPI / hotplug: Consolidate deferred execution of ACPI hotplug routines There are two different interfaces for queuing up work items on the ACPI hotplug workqueue, alloc_acpi_hp_work() used by PCI and PCI host bridge hotplug code and acpi_os_hotplug_execute() used by the common ACPI hotplug code and docking stations. They both are somewhat cumbersome to use and work slightly differently. The users of alloc_acpi_hp_work() have to submit a work function that will extract the necessary data items from a struct acpi_hp_work object allocated by alloc_acpi_hp_work() and then will free that object, while it would be more straightforward to simply use a work function with one more argument and let the interface take care of the execution details. The users of acpi_os_hotplug_execute() also have to deal with the fact that it takes only one argument in addition to the work function pointer, although acpi_os_execute_deferred() actually takes care of the allocation and freeing of memory, so it would have been able to pass more arguments to the work function if it hadn't been constrained by the connection with acpi_os_execute(). Moreover, while alloc_acpi_hp_work() makes GFP_KERNEL memory allocations, which is correct, because hotplug work items are always queued up from process context, acpi_os_hotplug_execute() uses GFP_ATOMIC, as that is needed by acpi_os_execute(). Also, acpi_os_execute_deferred() queued up by it waits for the ACPI event workqueues to flush before executing the work function, whereas alloc_acpi_hp_work() can't do anything similar. That leads to somewhat arbitrary differences in behavior between various ACPI hotplug code paths and has to be straightened up. For this reason, replace both alloc_acpi_hp_work() and acpi_os_hotplug_execute() with a single interface, acpi_hotplug_execute(), combining their behavior and being more friendly to its users than any of the two. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-11-07 01:45:40 +01:00
struct acpiphp_context *context = data;
acpi_handle handle = context->adev->handle;
ACPI / dock / PCI: Synchronous handling of dock events for PCI devices The interactions between the ACPI dock driver and the ACPI-based PCI hotplug (acpiphp) are currently problematic because of ordering issues during hot-remove operations. First of all, the current ACPI glue code expects that physical devices will always be deleted before deleting the companion ACPI device objects. Otherwise, acpi_unbind_one() will fail with a warning message printed to the kernel log, for example: [ 185.026073] usb usb5: Oops, 'acpi_handle' corrupt [ 185.035150] pci 0000:1b:00.0: Oops, 'acpi_handle' corrupt [ 185.035515] pci 0000:18:02.0: Oops, 'acpi_handle' corrupt [ 180.013656] port1: Oops, 'acpi_handle' corrupt This means, in particular, that struct pci_dev objects have to be deleted before the struct acpi_device objects they are "glued" with. Now, the following happens the during the undocking of an ACPI-based dock station: 1) hotplug_dock_devices() invokes registered hotplug callbacks to destroy physical devices associated with the ACPI device objects depending on the dock station. It calls dd->ops->handler() for each of those device objects. 2) For PCI devices dd->ops->handler() points to handle_hotplug_event_func() that queues up a separate work item to execute _handle_hotplug_event_func() for the given device and returns immediately. That work item will be executed later. 3) hotplug_dock_devices() calls dock_remove_acpi_device() for each device depending on the dock station. This runs acpi_bus_trim() for each of them, which causes the underlying ACPI device object to be destroyed, but the work items queued up by handle_hotplug_event_func() haven't been started yet. 4) _handle_hotplug_event_func() queued up in step 2) are executed and cause the above failure to happen, because the PCI devices they handle do not have the companion ACPI device objects any more (those objects have been deleted in step 3). The possible breakage doesn't end here, though, because hotplug_dock_devices() may return before at least some of the _handle_hotplug_event_func() work items spawned by it have a chance to complete and then undock() will cause _DCK to be evaluated and that will cause the devices handled by the _handle_hotplug_event_func() to go away possibly while they are being accessed. This means that dd->ops->handler() for PCI devices should not point to handle_hotplug_event_func(). Instead, it should point to a function that will do the work of _handle_hotplug_event_func() synchronously. For this reason, introduce such a function, hotplug_event_func(), and modity acpiphp_dock_ops to point to it as the handler. Unfortunately, however, this is not sufficient, because if the dock code were not changed further, hotplug_event_func() would now deadlock with hotplug_dock_devices() that called it, since it would run unregister_hotplug_dock_device() which in turn would attempt to acquire the dock station's hp_lock mutex already acquired by hotplug_dock_devices(). To resolve that deadlock use the observation that unregister_hotplug_dock_device() won't need to acquire hp_lock if PCI bridges the devices on the dock station depend on are prevented from being removed prematurely while the first loop in hotplug_dock_devices() is in progress. To make that possible, introduce a mechanism by which the callers of register_hotplug_dock_device() can provide "init" and "release" routines that will be executed, respectively, during the addition and removal of the physical device object associated with the given ACPI device handle. Make acpiphp use two new functions, acpiphp_dock_init() and acpiphp_dock_release(), that call get_bridge() and put_bridge(), respectively, on the acpiphp bridge holding the given device, for this purpose. In addition to that, remove the dock station's list of "hotplug devices" and make the dock code always walk the whole list of "dependent devices" instead in such a way that the loops in hotplug_dock_devices() and dock_event() (replacing the loops over "hotplug devices") will take references to the list entries that register_hotplug_dock_device() has been called for. That prevents the "release" routines associated with those entries from being called while the given entry is being processed and for PCI devices this means that their bridges won't be removed (by a concurrent thread) while hotplug_event_func() handling them is being executed. This change is based on two earlier patches from Jiang Liu. References: https://bugzilla.kernel.org/show_bug.cgi?id=59501 Reported-and-tested-by: Alexander E. Patrakov <patrakov@gmail.com> Tracked-down-by: Jiang Liu <jiang.liu@huawei.com> Tested-by: Illya Klymov <xanf@xanf.me> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: 3.9+ <stable@vger.kernel.org>
2013-06-24 11:22:53 +02:00
acpi_scan_lock_acquire();
ACPI / hotplug: Consolidate deferred execution of ACPI hotplug routines There are two different interfaces for queuing up work items on the ACPI hotplug workqueue, alloc_acpi_hp_work() used by PCI and PCI host bridge hotplug code and acpi_os_hotplug_execute() used by the common ACPI hotplug code and docking stations. They both are somewhat cumbersome to use and work slightly differently. The users of alloc_acpi_hp_work() have to submit a work function that will extract the necessary data items from a struct acpi_hp_work object allocated by alloc_acpi_hp_work() and then will free that object, while it would be more straightforward to simply use a work function with one more argument and let the interface take care of the execution details. The users of acpi_os_hotplug_execute() also have to deal with the fact that it takes only one argument in addition to the work function pointer, although acpi_os_execute_deferred() actually takes care of the allocation and freeing of memory, so it would have been able to pass more arguments to the work function if it hadn't been constrained by the connection with acpi_os_execute(). Moreover, while alloc_acpi_hp_work() makes GFP_KERNEL memory allocations, which is correct, because hotplug work items are always queued up from process context, acpi_os_hotplug_execute() uses GFP_ATOMIC, as that is needed by acpi_os_execute(). Also, acpi_os_execute_deferred() queued up by it waits for the ACPI event workqueues to flush before executing the work function, whereas alloc_acpi_hp_work() can't do anything similar. That leads to somewhat arbitrary differences in behavior between various ACPI hotplug code paths and has to be straightened up. For this reason, replace both alloc_acpi_hp_work() and acpi_os_hotplug_execute() with a single interface, acpi_hotplug_execute(), combining their behavior and being more friendly to its users than any of the two. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-11-07 01:45:40 +01:00
hotplug_event(handle, type, context);
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-28 19:40:53 -04:00
ACPI / hotplug: Fix concurrency issues and memory leaks This changeset is aimed at fixing a few different but related problems in the ACPI hotplug infrastructure. First of all, since notify handlers may be run in parallel with acpi_bus_scan(), acpi_bus_trim() and acpi_bus_hot_remove_device() and some of them are installed for ACPI handles that have no struct acpi_device objects attached (i.e. before those objects are created), those notify handlers have to take acpi_scan_lock to prevent races from taking place (e.g. a struct acpi_device is found to be present for the given ACPI handle, but right after that it is removed by acpi_bus_trim() running in parallel to the given notify handler). Moreover, since some of them call acpi_bus_scan() and acpi_bus_trim(), this leads to the conclusion that acpi_scan_lock should be acquired by the callers of these two funtions rather by these functions themselves. For these reasons, make all notify handlers that can handle device addition and eject events take acpi_scan_lock and remove the acpi_scan_lock locking from acpi_bus_scan() and acpi_bus_trim(). Accordingly, update all of their users to make sure that they are always called under acpi_scan_lock. Furthermore, since eject operations are carried out asynchronously with respect to the notify events that trigger them, with the help of acpi_bus_hot_remove_device(), even if notify handlers take the ACPI scan lock, it still is possible that, for example, acpi_bus_trim() will run between acpi_bus_hot_remove_device() and the notify handler that scheduled its execution and that acpi_bus_trim() will remove the device node passed to acpi_bus_hot_remove_device() for ejection. In that case, the struct acpi_device object obtained by acpi_bus_hot_remove_device() will be invalid and not-so-funny things will ensue. To protect agaist that, make the users of acpi_bus_hot_remove_device() run get_device() on ACPI device node objects that are about to be passed to it and make acpi_bus_hot_remove_device() run put_device() on them and check if their ACPI handles are not NULL (make acpi_device_unregister() clear the device nodes' ACPI handles for that check to work). Finally, observe that acpi_os_hotplug_execute() actually can fail, in which case its caller ought to free memory allocated for the context object to prevent leaks from happening. It also needs to run put_device() on the device node that it ran get_device() on previously in that case. Modify the code accordingly. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Yinghai Lu <yinghai@kernel.org>
2013-02-13 14:36:47 +01:00
acpi_scan_lock_release();
ACPI / hotplug: Consolidate deferred execution of ACPI hotplug routines There are two different interfaces for queuing up work items on the ACPI hotplug workqueue, alloc_acpi_hp_work() used by PCI and PCI host bridge hotplug code and acpi_os_hotplug_execute() used by the common ACPI hotplug code and docking stations. They both are somewhat cumbersome to use and work slightly differently. The users of alloc_acpi_hp_work() have to submit a work function that will extract the necessary data items from a struct acpi_hp_work object allocated by alloc_acpi_hp_work() and then will free that object, while it would be more straightforward to simply use a work function with one more argument and let the interface take care of the execution details. The users of acpi_os_hotplug_execute() also have to deal with the fact that it takes only one argument in addition to the work function pointer, although acpi_os_execute_deferred() actually takes care of the allocation and freeing of memory, so it would have been able to pass more arguments to the work function if it hadn't been constrained by the connection with acpi_os_execute(). Moreover, while alloc_acpi_hp_work() makes GFP_KERNEL memory allocations, which is correct, because hotplug work items are always queued up from process context, acpi_os_hotplug_execute() uses GFP_ATOMIC, as that is needed by acpi_os_execute(). Also, acpi_os_execute_deferred() queued up by it waits for the ACPI event workqueues to flush before executing the work function, whereas alloc_acpi_hp_work() can't do anything similar. That leads to somewhat arbitrary differences in behavior between various ACPI hotplug code paths and has to be straightened up. For this reason, replace both alloc_acpi_hp_work() and acpi_os_hotplug_execute() with a single interface, acpi_hotplug_execute(), combining their behavior and being more friendly to its users than any of the two. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2013-11-07 01:45:40 +01:00
acpi_evaluate_hotplug_ost(handle, type, ACPI_OST_SC_SUCCESS, NULL);
put_bridge(context->func.parent);
}
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-28 19:40:53 -04:00
/**
* handle_hotplug_event - handle ACPI hotplug event
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-28 19:40:53 -04:00
* @handle: Notify()'ed acpi_handle
* @type: Notify code
* @data: pointer to acpiphp_context structure
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-28 19:40:53 -04:00
*
* Handles ACPI event notification on slots.
*/
static void handle_hotplug_event(acpi_handle handle, u32 type, void *data)
PCI hotplug: acpiphp: Prevent deadlock on PCI-to-PCI bridge remove I originally submitted a patch to workaround this by pushing all Ejection Requests and Device Checks onto the kacpi_hotplug queue. http://marc.info/?l=linux-acpi&m=131678270930105&w=2 The patch is still insufficient in that Bus Checks also need to be added. Rather than add all events, including non-PCI-hotplug events, to the hotplug queue, mjg suggested that a better approach would be to modify the acpiphp driver so only acpiphp events would be added to the kacpi_hotplug queue. It's a longer patch, but at least we maintain the benefit of having separate queues in ACPI. This, of course, is still only a workaround the problem. As Bjorn and mjg pointed out, we have to refactor a lot of this code to do the right thing but at this point it is a better to have this code working. The acpi core places all events on the kacpi_notify queue. When the acpiphp driver is loaded and a PCI card with a PCI-to-PCI bridge is removed the following call sequence occurs: cleanup_p2p_bridge() -> cleanup_bridge() -> acpi_remove_notify_handler() -> acpi_os_wait_events_complete() -> flush_workqueue(kacpi_notify_wq) which is the queue we are currently executing on and the process will hang. Move all hotplug acpiphp events onto the kacpi_hotplug workqueue. In handle_hotplug_event_bridge() and handle_hotplug_event_func() we can simply push the rest of the work onto the kacpi_hotplug queue and then avoid the deadlock. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: mjg@redhat.com Cc: bhelgaas@google.com Cc: linux-acpi@vger.kernel.org Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2011-09-28 19:40:53 -04:00
{
struct acpiphp_context *context;
u32 ost_code = ACPI_OST_SC_SUCCESS;
ACPI / hotplug / PCI: Fix bridge removal race in handle_hotplug_event() If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (child context) Now, if a hotplug notify is dispatched for one of the bridge's children and the timing is such that handle_hotplug_event() for that notify is executed while free_bridge() above is running, the get_bridge(context->func.parent) in handle_hotplug_event() will not really help, because it is too late to prevent the bridge from going away and the child's context may be freed before hotplug_event_work() scheduled from handle_hotplug_event() dereferences the pointer to it passed via the data argument. That will cause a kernel crash to happpen in hotplug_event_work(). To prevent that from happening, make handle_hotplug_event() check the is_going_away flag of the function's parent bridge (under acpiphp_context_lock) and bail out if it's set. Also, make cleanup_bridge() set the bridge's is_going_away flag under acpiphp_context_lock so that it cannot be changed between the check and the subsequent get_bridge(context->func.parent) in handle_hotplug_event(). Then, in the above scenario, handle_hotplug_event() will notice that context->func.parent->is_going_away is already set and it will exit immediately preventing the crash from happening. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-03 22:30:06 +01:00
acpi_status status;
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
case ACPI_NOTIFY_DEVICE_CHECK:
break;
case ACPI_NOTIFY_EJECT_REQUEST:
ost_code = ACPI_OST_SC_EJECT_IN_PROGRESS;
acpi_evaluate_hotplug_ost(handle, type, ost_code, NULL);
break;
case ACPI_NOTIFY_DEVICE_WAKE:
return;
case ACPI_NOTIFY_FREQUENCY_MISMATCH:
acpi_handle_err(handle, "Device cannot be configured due "
"to a frequency mismatch\n");
goto out;
case ACPI_NOTIFY_BUS_MODE_MISMATCH:
acpi_handle_err(handle, "Device cannot be configured due "
"to a bus mode mismatch\n");
goto out;
case ACPI_NOTIFY_POWER_FAULT:
acpi_handle_err(handle, "Device has suffered a power fault\n");
goto out;
default:
acpi_handle_warn(handle, "Unsupported event type 0x%x\n", type);
ost_code = ACPI_OST_SC_UNRECOGNIZED_NOTIFY;
goto out;
}
mutex_lock(&acpiphp_context_lock);
context = acpiphp_get_context(handle);
if (!context || WARN_ON(context->adev->handle != handle)
ACPI / hotplug / PCI: Fix bridge removal race in handle_hotplug_event() If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (child context) Now, if a hotplug notify is dispatched for one of the bridge's children and the timing is such that handle_hotplug_event() for that notify is executed while free_bridge() above is running, the get_bridge(context->func.parent) in handle_hotplug_event() will not really help, because it is too late to prevent the bridge from going away and the child's context may be freed before hotplug_event_work() scheduled from handle_hotplug_event() dereferences the pointer to it passed via the data argument. That will cause a kernel crash to happpen in hotplug_event_work(). To prevent that from happening, make handle_hotplug_event() check the is_going_away flag of the function's parent bridge (under acpiphp_context_lock) and bail out if it's set. Also, make cleanup_bridge() set the bridge's is_going_away flag under acpiphp_context_lock so that it cannot be changed between the check and the subsequent get_bridge(context->func.parent) in handle_hotplug_event(). Then, in the above scenario, handle_hotplug_event() will notice that context->func.parent->is_going_away is already set and it will exit immediately preventing the crash from happening. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-03 22:30:06 +01:00
|| context->func.parent->is_going_away)
goto err_out;
get_bridge(context->func.parent);
acpiphp_put_context(context);
status = acpi_hotplug_execute(hotplug_event_work, context, type);
if (ACPI_SUCCESS(status)) {
mutex_unlock(&acpiphp_context_lock);
return;
}
ACPI / hotplug / PCI: Fix bridge removal race in handle_hotplug_event() If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (child context) Now, if a hotplug notify is dispatched for one of the bridge's children and the timing is such that handle_hotplug_event() for that notify is executed while free_bridge() above is running, the get_bridge(context->func.parent) in handle_hotplug_event() will not really help, because it is too late to prevent the bridge from going away and the child's context may be freed before hotplug_event_work() scheduled from handle_hotplug_event() dereferences the pointer to it passed via the data argument. That will cause a kernel crash to happpen in hotplug_event_work(). To prevent that from happening, make handle_hotplug_event() check the is_going_away flag of the function's parent bridge (under acpiphp_context_lock) and bail out if it's set. Also, make cleanup_bridge() set the bridge's is_going_away flag under acpiphp_context_lock so that it cannot be changed between the check and the subsequent get_bridge(context->func.parent) in handle_hotplug_event(). Then, in the above scenario, handle_hotplug_event() will notice that context->func.parent->is_going_away is already set and it will exit immediately preventing the crash from happening. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-02-03 22:30:06 +01:00
put_bridge(context->func.parent);
err_out:
mutex_unlock(&acpiphp_context_lock);
ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
out:
acpi_evaluate_hotplug_ost(handle, type, ost_code, NULL);
}
/**
* acpiphp_enumerate_slots - Enumerate PCI slots for a given bus.
* @bus: PCI bus to enumerate the slots for.
*
* A "slot" is an object associated with a PCI device number. All functions
* (PCI devices) with the same bus and device number belong to the same slot.
*/
void acpiphp_enumerate_slots(struct pci_bus *bus)
{
struct acpiphp_bridge *bridge;
struct acpi_device *adev;
acpi_handle handle;
acpi_status status;
if (acpiphp_disabled)
return;
adev = ACPI_COMPANION(bus->bridge);
if (!adev)
return;
handle = adev->handle;
bridge = kzalloc(sizeof(struct acpiphp_bridge), GFP_KERNEL);
if (!bridge) {
acpi_handle_err(handle, "No memory for bridge object\n");
return;
}
INIT_LIST_HEAD(&bridge->slots);
kref_init(&bridge->ref);
bridge->pci_dev = pci_dev_get(bus->self);
bridge->pci_bus = bus;
/*
* Grab a ref to the subordinate PCI bus in case the bus is
* removed via PCI core logical hotplug. The ref pins the bus
* (which we access during module unload).
*/
get_device(&bus->dev);
if (!pci_is_root_bus(bridge->pci_bus)) {
struct acpiphp_context *context;
/*
* This bridge should have been registered as a hotplug function
* under its parent, so the context should be there, unless the
* parent is going to be handled by pciehp, in which case this
* bridge is not interesting to us either.
*/
mutex_lock(&acpiphp_context_lock);
context = acpiphp_get_context(handle);
if (!context) {
mutex_unlock(&acpiphp_context_lock);
put_device(&bus->dev);
pci_dev_put(bridge->pci_dev);
kfree(bridge);
return;
}
bridge->context = context;
context->bridge = bridge;
/* Get a reference to the parent bridge. */
get_bridge(context->func.parent);
mutex_unlock(&acpiphp_context_lock);
}
/* must be added to the list prior to calling register_slot */
mutex_lock(&bridge_mutex);
list_add(&bridge->list, &bridge_list);
mutex_unlock(&bridge_mutex);
/* register all slot objects under this bridge */
status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
register_slot, NULL, bridge, NULL);
if (ACPI_FAILURE(status)) {
acpi_handle_err(handle, "failed to register slots\n");
cleanup_bridge(bridge);
put_bridge(bridge);
}
}
/**
* acpiphp_remove_slots - Remove slot objects associated with a given bus.
* @bus: PCI bus to remove the slot objects for.
*/
void acpiphp_remove_slots(struct pci_bus *bus)
{
struct acpiphp_bridge *bridge;
if (acpiphp_disabled)
return;
mutex_lock(&bridge_mutex);
list_for_each_entry(bridge, &bridge_list, list)
if (bridge->pci_bus == bus) {
mutex_unlock(&bridge_mutex);
cleanup_bridge(bridge);
put_bridge(bridge);
return;
}
mutex_unlock(&bridge_mutex);
}
/**
* acpiphp_enable_slot - power on slot
* @slot: ACPI PHP slot
*/
int acpiphp_enable_slot(struct acpiphp_slot *slot)
{
pci_lock_rescan_remove();
if (slot->flags & SLOT_IS_GOING_AWAY)
return -ENODEV;
mutex_lock(&slot->crit_sect);
/* configure all functions */
if (!(slot->flags & SLOT_ENABLED))
enable_slot(slot);
mutex_unlock(&slot->crit_sect);
pci_unlock_rescan_remove();
return 0;
}
/**
* acpiphp_disable_and_eject_slot - power off and eject slot
* @slot: ACPI PHP slot
*/
static int acpiphp_disable_and_eject_slot(struct acpiphp_slot *slot)
{
struct acpiphp_func *func;
if (slot->flags & SLOT_IS_GOING_AWAY)
return -ENODEV;
mutex_lock(&slot->crit_sect);
/* unconfigure all functions */
disable_slot(slot);
list_for_each_entry(func, &slot->funcs, sibling)
if (func->flags & FUNC_HAS_EJ0) {
acpi_handle handle = func_to_handle(func);
if (ACPI_FAILURE(acpi_evaluate_ej0(handle)))
acpi_handle_err(handle, "_EJ0 failed\n");
break;
}
mutex_unlock(&slot->crit_sect);
return 0;
}
int acpiphp_disable_slot(struct acpiphp_slot *slot)
{
int ret;
pci_lock_rescan_remove();
ret = acpiphp_disable_and_eject_slot(slot);
pci_unlock_rescan_remove();
return ret;
}
/*
* slot enabled: 1
* slot disabled: 0
*/
u8 acpiphp_get_power_status(struct acpiphp_slot *slot)
{
return (slot->flags & SLOT_ENABLED);
}
/*
* latch open: 1
* latch closed: 0
*/
u8 acpiphp_get_latch_status(struct acpiphp_slot *slot)
{
return !(get_slot_status(slot) & ACPI_STA_DEVICE_UI);
}
/*
* adapter presence : 1
* absence : 0
*/
u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot)
{
return !!get_slot_status(slot);
}