267 lines
6.2 KiB
C
Raw Normal View History

// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
// Copyright(c) 2015-17 Intel Corporation.
#include <linux/acpi.h>
#include <linux/of.h>
#include <linux/soundwire/sdw.h>
#include <linux/soundwire/sdw_type.h>
#include "bus.h"
#include "sysfs_local.h"
static void sdw_slave_release(struct device *dev)
{
struct sdw_slave *slave = dev_to_sdw_dev(dev);
soundwire: revisit driver bind/unbind and callbacks In the SoundWire probe, we store a pointer from the driver ops into the 'slave' structure. This can lead to kernel oopses when unbinding codec drivers, e.g. with the following sequence to remove machine driver and codec driver. /sbin/modprobe -r snd_soc_sof_sdw /sbin/modprobe -r snd_soc_rt711 The full details can be found in the BugLink below, for reference the two following examples show different cases of driver ops/callbacks being invoked after the driver .remove(). kernel: BUG: kernel NULL pointer dereference, address: 0000000000000150 kernel: Workqueue: events cdns_update_slave_status_work [soundwire_cadence] kernel: RIP: 0010:mutex_lock+0x19/0x30 kernel: Call Trace: kernel: ? sdw_handle_slave_status+0x426/0xe00 [soundwire_bus 94ff184bf398570c3f8ff7efe9e32529f532e4ae] kernel: ? newidle_balance+0x26a/0x400 kernel: ? cdns_update_slave_status_work+0x1e9/0x200 [soundwire_cadence 1bcf98eebe5ba9833cd433323769ac923c9c6f82] kernel: BUG: unable to handle page fault for address: ffffffffc07654c8 kernel: Workqueue: pm pm_runtime_work kernel: RIP: 0010:sdw_bus_prep_clk_stop+0x6f/0x160 [soundwire_bus] kernel: Call Trace: kernel: <TASK> kernel: sdw_cdns_clock_stop+0xb5/0x1b0 [soundwire_cadence 1bcf98eebe5ba9833cd433323769ac923c9c6f82] kernel: intel_suspend_runtime+0x5f/0x120 [soundwire_intel aca858f7c87048d3152a4a41bb68abb9b663a1dd] kernel: ? dpm_sysfs_remove+0x60/0x60 This was not detected earlier in Intel tests since the tests first remove the parent PCI device and shut down the bus. The sequence above is a corner case which keeps the bus operational but without a driver bound. While trying to solve this kernel oopses, it became clear that the existing SoundWire bus does not deal well with the unbind case. Commit 528be501b7d4a ("soundwire: sdw_slave: add probe_complete structure and new fields") added a 'probed' status variable and a 'probe_complete' struct completion. This status is however not reset on remove and likewise the 'probe complete' is not re-initialized, so the bind/unbind/bind test cases would fail. The timeout used before the 'update_status' callback was also a bad idea in hindsight, there should really be no timing assumption as to if and when a driver is bound to a device. An initial draft was based on device_lock() and device_unlock() was tested. This proved too complicated, with deadlocks created during the suspend-resume sequences, which also use the same device_lock/unlock() as the bind/unbind sequences. On a CometLake device, a bad DSDT/BIOS caused spurious resumes and the use of device_lock() caused hangs during suspend. After multiple weeks or testing and painful reverse-engineering of deadlocks on different devices, we looked for alternatives that did not interfere with the device core. A bus notifier was used successfully to keep track of DRIVER_BOUND and DRIVER_UNBIND events. This solved the bind-unbind-bind case in tests, but it can still be defeated with a theoretical corner case where the memory is freed by a .remove while the callback is in use. The notifier only helps make sure the driver callbacks are valid, but not that the memory allocated in probe remains valid while the callbacks are invoked. This patch suggests the introduction of a new 'sdw_dev_lock' mutex protecting probe/remove and all driver callbacks. Since this mutex is 'local' to SoundWire only, it does not interfere with existing locks and does not create deadlocks. In addition, this patch removes the 'probe_complete' completion, instead we directly invoke the 'update_status' from the probe routine. That removes any sort of timing dependency and a much better support for the device/driver model, the driver could be bound before the bus started, or eons after the bus started and the hardware would be properly initialized in all cases. BugLink: https://github.com/thesofproject/linux/issues/3531 Fixes: 56d4fe31af77 ("soundwire: Add MIPI DisCo property helpers") Fixes: 528be501b7d4a ("soundwire: sdw_slave: add probe_complete structure and new fields") Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> Reviewed-by: Rander Wang <rander.wang@intel.com> Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com> Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com> Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com> Link: https://lore.kernel.org/r/20220621225641.221170-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul <vkoul@kernel.org>
2022-06-21 17:56:38 -05:00
mutex_destroy(&slave->sdw_dev_lock);
kfree(slave);
}
struct device_type sdw_slave_type = {
.name = "sdw_slave",
.release = sdw_slave_release,
.uevent = sdw_slave_uevent,
};
int sdw_slave_add(struct sdw_bus *bus,
struct sdw_slave_id *id, struct fwnode_handle *fwnode)
{
struct sdw_slave *slave;
int ret;
int i;
slave = kzalloc(sizeof(*slave), GFP_KERNEL);
if (!slave)
return -ENOMEM;
/* Initialize data structure */
memcpy(&slave->id, id, sizeof(*id));
slave->dev.parent = bus->dev;
slave->dev.fwnode = fwnode;
if (id->unique_id == SDW_IGNORED_UNIQUE_ID) {
/* name shall be sdw:link:mfg:part:class */
dev_set_name(&slave->dev, "sdw:%01x:%04x:%04x:%02x",
bus->link_id, id->mfg_id, id->part_id,
id->class_id);
} else {
/* name shall be sdw:link:mfg:part:class:unique */
dev_set_name(&slave->dev, "sdw:%01x:%04x:%04x:%02x:%01x",
bus->link_id, id->mfg_id, id->part_id,
id->class_id, id->unique_id);
}
slave->dev.bus = &sdw_bus_type;
slave->dev.of_node = of_node_get(to_of_node(fwnode));
slave->dev.type = &sdw_slave_type;
slave->dev.groups = sdw_slave_status_attr_groups;
slave->bus = bus;
slave->status = SDW_SLAVE_UNATTACHED;
init_completion(&slave->enumeration_complete);
init_completion(&slave->initialization_complete);
slave->dev_num = 0;
slave->probed = false;
slave->first_interrupt_done = false;
soundwire: revisit driver bind/unbind and callbacks In the SoundWire probe, we store a pointer from the driver ops into the 'slave' structure. This can lead to kernel oopses when unbinding codec drivers, e.g. with the following sequence to remove machine driver and codec driver. /sbin/modprobe -r snd_soc_sof_sdw /sbin/modprobe -r snd_soc_rt711 The full details can be found in the BugLink below, for reference the two following examples show different cases of driver ops/callbacks being invoked after the driver .remove(). kernel: BUG: kernel NULL pointer dereference, address: 0000000000000150 kernel: Workqueue: events cdns_update_slave_status_work [soundwire_cadence] kernel: RIP: 0010:mutex_lock+0x19/0x30 kernel: Call Trace: kernel: ? sdw_handle_slave_status+0x426/0xe00 [soundwire_bus 94ff184bf398570c3f8ff7efe9e32529f532e4ae] kernel: ? newidle_balance+0x26a/0x400 kernel: ? cdns_update_slave_status_work+0x1e9/0x200 [soundwire_cadence 1bcf98eebe5ba9833cd433323769ac923c9c6f82] kernel: BUG: unable to handle page fault for address: ffffffffc07654c8 kernel: Workqueue: pm pm_runtime_work kernel: RIP: 0010:sdw_bus_prep_clk_stop+0x6f/0x160 [soundwire_bus] kernel: Call Trace: kernel: <TASK> kernel: sdw_cdns_clock_stop+0xb5/0x1b0 [soundwire_cadence 1bcf98eebe5ba9833cd433323769ac923c9c6f82] kernel: intel_suspend_runtime+0x5f/0x120 [soundwire_intel aca858f7c87048d3152a4a41bb68abb9b663a1dd] kernel: ? dpm_sysfs_remove+0x60/0x60 This was not detected earlier in Intel tests since the tests first remove the parent PCI device and shut down the bus. The sequence above is a corner case which keeps the bus operational but without a driver bound. While trying to solve this kernel oopses, it became clear that the existing SoundWire bus does not deal well with the unbind case. Commit 528be501b7d4a ("soundwire: sdw_slave: add probe_complete structure and new fields") added a 'probed' status variable and a 'probe_complete' struct completion. This status is however not reset on remove and likewise the 'probe complete' is not re-initialized, so the bind/unbind/bind test cases would fail. The timeout used before the 'update_status' callback was also a bad idea in hindsight, there should really be no timing assumption as to if and when a driver is bound to a device. An initial draft was based on device_lock() and device_unlock() was tested. This proved too complicated, with deadlocks created during the suspend-resume sequences, which also use the same device_lock/unlock() as the bind/unbind sequences. On a CometLake device, a bad DSDT/BIOS caused spurious resumes and the use of device_lock() caused hangs during suspend. After multiple weeks or testing and painful reverse-engineering of deadlocks on different devices, we looked for alternatives that did not interfere with the device core. A bus notifier was used successfully to keep track of DRIVER_BOUND and DRIVER_UNBIND events. This solved the bind-unbind-bind case in tests, but it can still be defeated with a theoretical corner case where the memory is freed by a .remove while the callback is in use. The notifier only helps make sure the driver callbacks are valid, but not that the memory allocated in probe remains valid while the callbacks are invoked. This patch suggests the introduction of a new 'sdw_dev_lock' mutex protecting probe/remove and all driver callbacks. Since this mutex is 'local' to SoundWire only, it does not interfere with existing locks and does not create deadlocks. In addition, this patch removes the 'probe_complete' completion, instead we directly invoke the 'update_status' from the probe routine. That removes any sort of timing dependency and a much better support for the device/driver model, the driver could be bound before the bus started, or eons after the bus started and the hardware would be properly initialized in all cases. BugLink: https://github.com/thesofproject/linux/issues/3531 Fixes: 56d4fe31af77 ("soundwire: Add MIPI DisCo property helpers") Fixes: 528be501b7d4a ("soundwire: sdw_slave: add probe_complete structure and new fields") Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> Reviewed-by: Rander Wang <rander.wang@intel.com> Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com> Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com> Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com> Link: https://lore.kernel.org/r/20220621225641.221170-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul <vkoul@kernel.org>
2022-06-21 17:56:38 -05:00
mutex_init(&slave->sdw_dev_lock);
for (i = 0; i < SDW_MAX_PORTS; i++)
init_completion(&slave->port_ready[i]);
mutex_lock(&bus->bus_lock);
list_add_tail(&slave->node, &bus->slaves);
mutex_unlock(&bus->bus_lock);
ret = device_register(&slave->dev);
if (ret) {
dev_err(bus->dev, "Failed to add slave: ret %d\n", ret);
/*
* On err, don't free but drop ref as this will be freed
* when release method is invoked.
*/
mutex_lock(&bus->bus_lock);
list_del(&slave->node);
mutex_unlock(&bus->bus_lock);
put_device(&slave->dev);
return ret;
}
sdw_slave_debugfs_init(slave);
return ret;
}
EXPORT_SYMBOL(sdw_slave_add);
#if IS_ENABLED(CONFIG_ACPI)
static bool find_slave(struct sdw_bus *bus,
struct acpi_device *adev,
struct sdw_slave_id *id)
{
u64 addr;
unsigned int link_id;
acpi_status status;
status = acpi_evaluate_integer(adev->handle,
METHOD_NAME__ADR, NULL, &addr);
if (ACPI_FAILURE(status)) {
dev_err(bus->dev, "_ADR resolution failed: %x\n",
status);
return false;
}
if (bus->ops->override_adr)
addr = bus->ops->override_adr(bus, addr);
if (!addr)
return false;
/* Extract link id from ADR, Bit 51 to 48 (included) */
link_id = SDW_DISCO_LINK_ID(addr);
/* Check for link_id match */
if (link_id != bus->link_id)
return false;
sdw_extract_slave_id(bus, addr, id);
return true;
}
struct sdw_acpi_child_walk_data {
struct sdw_bus *bus;
struct acpi_device *adev;
struct sdw_slave_id id;
bool ignore_unique_id;
};
static int sdw_acpi_check_duplicate(struct acpi_device *adev, void *data)
{
struct sdw_acpi_child_walk_data *cwd = data;
struct sdw_bus *bus = cwd->bus;
struct sdw_slave_id id;
if (adev == cwd->adev)
return 0;
if (!find_slave(bus, adev, &id))
return 0;
if (cwd->id.sdw_version != id.sdw_version || cwd->id.mfg_id != id.mfg_id ||
cwd->id.part_id != id.part_id || cwd->id.class_id != id.class_id)
return 0;
if (cwd->id.unique_id != id.unique_id) {
dev_dbg(bus->dev,
"Valid unique IDs 0x%x 0x%x for Slave mfg_id 0x%04x, part_id 0x%04x\n",
cwd->id.unique_id, id.unique_id, cwd->id.mfg_id,
cwd->id.part_id);
cwd->ignore_unique_id = false;
return 0;
}
dev_err(bus->dev,
"Invalid unique IDs 0x%x 0x%x for Slave mfg_id 0x%04x, part_id 0x%04x\n",
cwd->id.unique_id, id.unique_id, cwd->id.mfg_id, cwd->id.part_id);
return -ENODEV;
}
static int sdw_acpi_find_one(struct acpi_device *adev, void *data)
{
struct sdw_bus *bus = data;
struct sdw_acpi_child_walk_data cwd = {
.bus = bus,
.adev = adev,
.ignore_unique_id = true,
};
int ret;
if (!find_slave(bus, adev, &cwd.id))
return 0;
/* Brute-force O(N^2) search for duplicates. */
ret = acpi_dev_for_each_child(ACPI_COMPANION(bus->dev),
sdw_acpi_check_duplicate, &cwd);
if (ret)
return ret;
if (cwd.ignore_unique_id)
cwd.id.unique_id = SDW_IGNORED_UNIQUE_ID;
/* Ignore errors and continue. */
sdw_slave_add(bus, &cwd.id, acpi_fwnode_handle(adev));
return 0;
}
/*
* sdw_acpi_find_slaves() - Find Slave devices in Master ACPI node
* @bus: SDW bus instance
*
* Scans Master ACPI node for SDW child Slave devices and registers it.
*/
int sdw_acpi_find_slaves(struct sdw_bus *bus)
{
struct acpi_device *parent;
parent = ACPI_COMPANION(bus->dev);
if (!parent) {
dev_err(bus->dev, "Can't find parent for acpi bind\n");
return -ENODEV;
}
return acpi_dev_for_each_child(parent, sdw_acpi_find_one, bus);
}
#endif
/*
* sdw_of_find_slaves() - Find Slave devices in master device tree node
* @bus: SDW bus instance
*
* Scans Master DT node for SDW child Slave devices and registers it.
*/
int sdw_of_find_slaves(struct sdw_bus *bus)
{
struct device *dev = bus->dev;
struct device_node *node;
for_each_child_of_node(bus->dev->of_node, node) {
int link_id, ret, len;
unsigned int sdw_version;
const char *compat = NULL;
struct sdw_slave_id id;
const __be32 *addr;
compat = of_get_property(node, "compatible", NULL);
if (!compat)
continue;
ret = sscanf(compat, "sdw%01x%04hx%04hx%02hhx", &sdw_version,
&id.mfg_id, &id.part_id, &id.class_id);
if (ret != 4) {
dev_err(dev, "Invalid compatible string found %s\n",
compat);
continue;
}
addr = of_get_property(node, "reg", &len);
if (!addr || (len < 2 * sizeof(u32))) {
dev_err(dev, "Invalid Link and Instance ID\n");
continue;
}
link_id = be32_to_cpup(addr++);
id.unique_id = be32_to_cpup(addr);
id.sdw_version = sdw_version;
/* Check for link_id match */
if (link_id != bus->link_id)
continue;
sdw_slave_add(bus, &id, of_fwnode_handle(node));
}
return 0;
}