habanalabs: wait for preboot ready after hard reset
Currently we are not waiting for preboot ready after hard reset. This leads to a race in which COMMs protocol begins but will get no response from the f/w. Signed-off-by: Ohad Sharabi <osharabi@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
a85e389a84
commit
2b9e583d0a
@ -1245,15 +1245,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
|
||||
}
|
||||
}
|
||||
|
||||
static int hl_fw_read_preboot_caps(struct hl_device *hdev,
|
||||
u32 cpu_boot_status_reg,
|
||||
u32 sts_boot_dev_sts0_reg,
|
||||
u32 sts_boot_dev_sts1_reg,
|
||||
u32 boot_err0_reg, u32 boot_err1_reg,
|
||||
u32 timeout)
|
||||
static int hl_fw_wait_preboot_ready(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u32 status, reg_val;
|
||||
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
|
||||
u32 status;
|
||||
int rc;
|
||||
|
||||
/* Need to check two possible scenarios:
|
||||
@ -1266,13 +1261,13 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
|
||||
*/
|
||||
rc = hl_poll_timeout(
|
||||
hdev,
|
||||
cpu_boot_status_reg,
|
||||
pre_fw_load->cpu_boot_status_reg,
|
||||
status,
|
||||
(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
|
||||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
|
||||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
|
||||
hdev->fw_poll_interval_usec,
|
||||
timeout);
|
||||
pre_fw_load->wait_for_preboot_timeout);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "CPU boot ready status timeout\n");
|
||||
@ -1282,12 +1277,32 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
|
||||
* of reading specific errors
|
||||
*/
|
||||
if (status != -1)
|
||||
fw_read_errors(hdev, boot_err0_reg, boot_err1_reg,
|
||||
sts_boot_dev_sts0_reg,
|
||||
sts_boot_dev_sts1_reg);
|
||||
fw_read_errors(hdev, pre_fw_load->boot_err0_reg,
|
||||
pre_fw_load->boot_err1_reg,
|
||||
pre_fw_load->sts_boot_dev_sts0_reg,
|
||||
pre_fw_load->sts_boot_dev_sts1_reg);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
hdev->fw_loader.fw_comp_loaded |= FW_TYPE_PREBOOT_CPU;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hl_fw_read_preboot_caps(struct hl_device *hdev)
|
||||
{
|
||||
struct pre_fw_load_props *pre_fw_load;
|
||||
struct asic_fixed_properties *prop;
|
||||
u32 reg_val;
|
||||
int rc;
|
||||
|
||||
prop = &hdev->asic_prop;
|
||||
pre_fw_load = &hdev->fw_loader.pre_fw_load;
|
||||
|
||||
rc = hl_fw_wait_preboot_ready(hdev);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/*
|
||||
* the registers DEV_STS* contain FW capabilities/features.
|
||||
* We can rely on this registers only if bit CPU_BOOT_DEV_STS*_ENABLED
|
||||
@ -1298,13 +1313,13 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
|
||||
* In case it is not enabled the stored value will be left 0- all
|
||||
* caps/features are off
|
||||
*/
|
||||
reg_val = RREG32(sts_boot_dev_sts0_reg);
|
||||
reg_val = RREG32(pre_fw_load->sts_boot_dev_sts0_reg);
|
||||
if (reg_val & CPU_BOOT_DEV_STS0_ENABLED) {
|
||||
prop->fw_cpu_boot_dev_sts0_valid = true;
|
||||
prop->fw_preboot_cpu_boot_dev_sts0 = reg_val;
|
||||
}
|
||||
|
||||
reg_val = RREG32(sts_boot_dev_sts1_reg);
|
||||
reg_val = RREG32(pre_fw_load->sts_boot_dev_sts1_reg);
|
||||
if (reg_val & CPU_BOOT_DEV_STS1_ENABLED) {
|
||||
prop->fw_cpu_boot_dev_sts1_valid = true;
|
||||
prop->fw_preboot_cpu_boot_dev_sts1 = reg_val;
|
||||
@ -1447,24 +1462,21 @@ static int hl_fw_static_read_preboot_status(struct hl_device *hdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
u32 sts_boot_dev_sts0_reg,
|
||||
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
|
||||
u32 boot_err1_reg, u32 timeout)
|
||||
int hl_fw_read_preboot_status(struct hl_device *hdev)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
|
||||
return 0;
|
||||
|
||||
/* get FW pre-load parameters */
|
||||
hdev->asic_funcs->init_firmware_preload_params(hdev);
|
||||
|
||||
/*
|
||||
* In order to determine boot method (static VS dymanic) we need to
|
||||
* read the boot caps register
|
||||
*/
|
||||
rc = hl_fw_read_preboot_caps(hdev, cpu_boot_status_reg,
|
||||
sts_boot_dev_sts0_reg,
|
||||
sts_boot_dev_sts1_reg, boot_err0_reg,
|
||||
boot_err1_reg, timeout);
|
||||
rc = hl_fw_read_preboot_caps(hdev);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
@ -2454,6 +2466,13 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
|
||||
*/
|
||||
dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
|
||||
|
||||
/* if no preboot loaded indication- wait for preboot */
|
||||
if (!(hdev->fw_loader.fw_comp_loaded & FW_TYPE_PREBOOT_CPU)) {
|
||||
rc = hl_fw_wait_preboot_ready(hdev);
|
||||
if (rc)
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
|
||||
0, true,
|
||||
fw_loader->cpu_timeout);
|
||||
|
@ -1307,6 +1307,24 @@ struct dynamic_fw_load_mgr {
|
||||
bool fw_desc_valid;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct pre_fw_load_props - needed properties for pre-FW load
|
||||
* @cpu_boot_status_reg: cpu_boot_status register address
|
||||
* @sts_boot_dev_sts0_reg: sts_boot_dev_sts0 register address
|
||||
* @sts_boot_dev_sts1_reg: sts_boot_dev_sts1 register address
|
||||
* @boot_err0_reg: boot_err0 register address
|
||||
* @boot_err1_reg: boot_err1 register address
|
||||
* @wait_for_preboot_timeout: timeout to poll for preboot ready
|
||||
*/
|
||||
struct pre_fw_load_props {
|
||||
u32 cpu_boot_status_reg;
|
||||
u32 sts_boot_dev_sts0_reg;
|
||||
u32 sts_boot_dev_sts1_reg;
|
||||
u32 boot_err0_reg;
|
||||
u32 boot_err1_reg;
|
||||
u32 wait_for_preboot_timeout;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct fw_image_props - properties of FW image
|
||||
* @image_name: name of the image
|
||||
@ -1323,6 +1341,7 @@ struct fw_image_props {
|
||||
* struct fw_load_mgr - manager FW loading process
|
||||
* @dynamic_loader: specific structure for dynamic load
|
||||
* @static_loader: specific structure for static load
|
||||
* @pre_fw_load_props: parameter for pre FW load
|
||||
* @boot_fit_img: boot fit image properties
|
||||
* @linux_img: linux image properties
|
||||
* @cpu_timeout: CPU response timeout in usec
|
||||
@ -1338,6 +1357,7 @@ struct fw_load_mgr {
|
||||
struct dynamic_fw_load_mgr dynamic_loader;
|
||||
struct static_fw_load_mgr static_loader;
|
||||
};
|
||||
struct pre_fw_load_props pre_fw_load;
|
||||
struct fw_image_props boot_fit_img;
|
||||
struct fw_image_props linux_img;
|
||||
u32 cpu_timeout;
|
||||
@ -1467,6 +1487,7 @@ struct hl_cs;
|
||||
* @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event
|
||||
* @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to
|
||||
* generic f/w compatible PLL Indexes
|
||||
* @init_firmware_preload_params: initialize pre FW-load parameters.
|
||||
* @init_firmware_loader: initialize data for FW loader.
|
||||
* @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
|
||||
* @state_dump_init: initialize constants required for state dump
|
||||
@ -1599,6 +1620,7 @@ struct hl_asic_funcs {
|
||||
int (*ack_mmu_errors)(struct hl_device *hdev, u64 mmu_cap_mask);
|
||||
void (*get_msi_info)(__le32 *table);
|
||||
int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
|
||||
void (*init_firmware_preload_params)(struct hl_device *hdev);
|
||||
void (*init_firmware_loader)(struct hl_device *hdev);
|
||||
void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
|
||||
void (*state_dump_init)(struct hl_device *hdev);
|
||||
@ -3577,10 +3599,7 @@ int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
|
||||
void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev);
|
||||
void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev);
|
||||
int hl_fw_init_cpu(struct hl_device *hdev);
|
||||
int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
u32 sts_boot_dev_sts0_reg,
|
||||
u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
|
||||
u32 boot_err1_reg, u32 timeout);
|
||||
int hl_fw_read_preboot_status(struct hl_device *hdev);
|
||||
int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
|
||||
struct fw_load_mgr *fw_loader,
|
||||
enum comms_cmd cmd, unsigned int size,
|
||||
|
@ -869,11 +869,7 @@ pci_init:
|
||||
/* Before continuing in the initialization, we need to read the preboot
|
||||
* version to determine whether we run with a security-enabled firmware
|
||||
*/
|
||||
rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
|
||||
mmCPU_BOOT_DEV_STS0,
|
||||
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
|
||||
mmCPU_BOOT_ERR1,
|
||||
GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
|
||||
rc = hl_fw_read_preboot_status(hdev);
|
||||
if (rc) {
|
||||
if (hdev->reset_on_preboot_fail)
|
||||
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||
@ -3840,6 +3836,18 @@ static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
|
||||
GAUDI_CPU_RESET_WAIT_MSEC;
|
||||
}
|
||||
|
||||
static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
|
||||
{
|
||||
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
|
||||
|
||||
pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
|
||||
pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
|
||||
pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
|
||||
pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
|
||||
pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
|
||||
pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
|
||||
}
|
||||
|
||||
static void gaudi_init_firmware_loader(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
@ -9231,6 +9239,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
||||
.enable_events_from_fw = gaudi_enable_events_from_fw,
|
||||
.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
|
||||
.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
|
||||
.init_firmware_preload_params = gaudi_init_firmware_preload_params,
|
||||
.init_firmware_loader = gaudi_init_firmware_loader,
|
||||
.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
|
||||
.state_dump_init = gaudi_state_dump_init,
|
||||
|
@ -2531,11 +2531,7 @@ pci_init:
|
||||
/* Before continuing in the initialization, we need to read the preboot
|
||||
* version to determine whether we run with a security-enabled firmware
|
||||
*/
|
||||
rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
|
||||
mmCPU_BOOT_DEV_STS0,
|
||||
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
|
||||
mmCPU_BOOT_ERR1,
|
||||
GAUDI2_PREBOOT_REQ_TIMEOUT_USEC);
|
||||
rc = hl_fw_read_preboot_status(hdev);
|
||||
if (rc) {
|
||||
if (hdev->reset_on_preboot_fail)
|
||||
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||
@ -3832,6 +3828,18 @@ skip_engines:
|
||||
gaudi2_sync_irqs(hdev);
|
||||
}
|
||||
|
||||
static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
|
||||
{
|
||||
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
|
||||
|
||||
pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
|
||||
pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
|
||||
pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
|
||||
pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
|
||||
pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
|
||||
pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
|
||||
}
|
||||
|
||||
static void gaudi2_init_firmware_loader(struct hl_device *hdev)
|
||||
{
|
||||
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
|
||||
@ -9762,6 +9770,7 @@ static const struct hl_asic_funcs gaudi2_funcs = {
|
||||
.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
|
||||
.get_msi_info = gaudi2_get_msi_info,
|
||||
.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
|
||||
.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
|
||||
.init_firmware_loader = gaudi2_init_firmware_loader,
|
||||
.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
|
||||
.state_dump_init = gaudi2_state_dump_init,
|
||||
|
@ -665,11 +665,7 @@ pci_init:
|
||||
/* Before continuing in the initialization, we need to read the preboot
|
||||
* version to determine whether we run with a security-enabled firmware
|
||||
*/
|
||||
rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
|
||||
mmCPU_BOOT_DEV_STS0,
|
||||
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
|
||||
mmCPU_BOOT_ERR1,
|
||||
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
|
||||
rc = hl_fw_read_preboot_status(hdev);
|
||||
if (rc) {
|
||||
if (hdev->reset_on_preboot_fail)
|
||||
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||
@ -2580,6 +2576,18 @@ static void goya_init_static_firmware_loader(struct hl_device *hdev)
|
||||
static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
|
||||
}
|
||||
|
||||
static void goya_init_firmware_preload_params(struct hl_device *hdev)
|
||||
{
|
||||
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
|
||||
|
||||
pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
|
||||
pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
|
||||
pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
|
||||
pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
|
||||
pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
|
||||
pre_fw_load->wait_for_preboot_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
|
||||
}
|
||||
|
||||
static void goya_init_firmware_loader(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
@ -5510,6 +5518,7 @@ static const struct hl_asic_funcs goya_funcs = {
|
||||
.enable_events_from_fw = goya_enable_events_from_fw,
|
||||
.ack_mmu_errors = goya_ack_mmu_page_fault_or_access_error,
|
||||
.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
|
||||
.init_firmware_preload_params = goya_init_firmware_preload_params,
|
||||
.init_firmware_loader = goya_init_firmware_loader,
|
||||
.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
|
||||
.state_dump_init = goya_state_dump_init,
|
||||
|
Loading…
x
Reference in New Issue
Block a user