mlxsw: core: Extend devlink health reporter with new events and parameters
Extend the devlink health reporter registered by mlxsw to report new health events and their related parameters. These are meant to aid in debugging of hardware / firmware issues. Beside the test event ('MLXSW_REG_MFDE_EVENT_ID_TEST') that is triggered following the devlink health 'test' sub-command, the new events are used to report the triggering of asserts in firmware code ('MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT') and hardware issues ('MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE'). Each event is accompanied with a severity parameter and per-event parameters that are meant to help root cause the detected issue. Signed-off-by: Danielle Ratson <danieller@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
e25c060c5f
commit
239cdd3f4c
@ -1708,12 +1708,93 @@ static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg,
|
||||
static const struct mlxsw_listener mlxsw_core_health_listener =
|
||||
MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE);
|
||||
|
||||
static int
|
||||
mlxsw_core_health_fw_fatal_dump_fatal_cause(const char *mfde_pl,
|
||||
struct devlink_fmsg *fmsg)
|
||||
{
|
||||
u32 val, tile_v;
|
||||
int err;
|
||||
|
||||
val = mlxsw_reg_mfde_fatal_cause_id_get(mfde_pl);
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "cause_id", val);
|
||||
if (err)
|
||||
return err;
|
||||
tile_v = mlxsw_reg_mfde_fatal_cause_tile_v_get(mfde_pl);
|
||||
if (tile_v) {
|
||||
val = mlxsw_reg_mfde_fatal_cause_tile_index_get(mfde_pl);
|
||||
err = devlink_fmsg_u8_pair_put(fmsg, "tile_index", val);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
mlxsw_core_health_fw_fatal_dump_fw_assert(const char *mfde_pl,
|
||||
struct devlink_fmsg *fmsg)
|
||||
{
|
||||
u32 val, tile_v;
|
||||
int err;
|
||||
|
||||
val = mlxsw_reg_mfde_fw_assert_var0_get(mfde_pl);
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "var0", val);
|
||||
if (err)
|
||||
return err;
|
||||
val = mlxsw_reg_mfde_fw_assert_var1_get(mfde_pl);
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "var1", val);
|
||||
if (err)
|
||||
return err;
|
||||
val = mlxsw_reg_mfde_fw_assert_var2_get(mfde_pl);
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "var2", val);
|
||||
if (err)
|
||||
return err;
|
||||
val = mlxsw_reg_mfde_fw_assert_var3_get(mfde_pl);
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "var3", val);
|
||||
if (err)
|
||||
return err;
|
||||
val = mlxsw_reg_mfde_fw_assert_var4_get(mfde_pl);
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "var4", val);
|
||||
if (err)
|
||||
return err;
|
||||
val = mlxsw_reg_mfde_fw_assert_existptr_get(mfde_pl);
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "existptr", val);
|
||||
if (err)
|
||||
return err;
|
||||
val = mlxsw_reg_mfde_fw_assert_callra_get(mfde_pl);
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "callra", val);
|
||||
if (err)
|
||||
return err;
|
||||
val = mlxsw_reg_mfde_fw_assert_oe_get(mfde_pl);
|
||||
err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val);
|
||||
if (err)
|
||||
return err;
|
||||
tile_v = mlxsw_reg_mfde_fw_assert_tile_v_get(mfde_pl);
|
||||
if (tile_v) {
|
||||
val = mlxsw_reg_mfde_fw_assert_tile_index_get(mfde_pl);
|
||||
err = devlink_fmsg_u8_pair_put(fmsg, "tile_index", val);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
val = mlxsw_reg_mfde_fw_assert_ext_synd_get(mfde_pl);
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd", val);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
mlxsw_core_health_fw_fatal_dump_kvd_im_stop(const char *mfde_pl,
|
||||
struct devlink_fmsg *fmsg)
|
||||
{
|
||||
u32 val;
|
||||
int err;
|
||||
|
||||
val = mlxsw_reg_mfde_kvd_im_stop_oe_get(mfde_pl);
|
||||
err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val);
|
||||
if (err)
|
||||
return err;
|
||||
val = mlxsw_reg_mfde_kvd_im_stop_pipes_mask_get(mfde_pl);
|
||||
return devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val);
|
||||
}
|
||||
@ -1727,6 +1808,10 @@ mlxsw_core_health_fw_fatal_dump_crspace_to(const char *mfde_pl,
|
||||
|
||||
val = mlxsw_reg_mfde_crspace_to_log_address_get(mfde_pl);
|
||||
err = devlink_fmsg_u32_pair_put(fmsg, "log_address", val);
|
||||
if (err)
|
||||
return err;
|
||||
val = mlxsw_reg_mfde_crspace_to_oe_get(mfde_pl);
|
||||
err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val);
|
||||
if (err)
|
||||
return err;
|
||||
val = mlxsw_reg_mfde_crspace_to_log_id_get(mfde_pl);
|
||||
@ -1774,6 +1859,15 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor
|
||||
case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP:
|
||||
val_str = "KVD insertion machine stopped";
|
||||
break;
|
||||
case MLXSW_REG_MFDE_EVENT_ID_TEST:
|
||||
val_str = "Test";
|
||||
break;
|
||||
case MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT:
|
||||
val_str = "FW assert";
|
||||
break;
|
||||
case MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE:
|
||||
val_str = "Fatal cause";
|
||||
break;
|
||||
default:
|
||||
val_str = NULL;
|
||||
}
|
||||
@ -1782,6 +1876,38 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
err = devlink_fmsg_arr_pair_nest_end(fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = devlink_fmsg_arr_pair_nest_start(fmsg, "severity");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
val = mlxsw_reg_mfde_severity_get(mfde_pl);
|
||||
err = devlink_fmsg_u8_pair_put(fmsg, "id", val);
|
||||
if (err)
|
||||
return err;
|
||||
switch (val) {
|
||||
case MLXSW_REG_MFDE_SEVERITY_FATL:
|
||||
val_str = "Fatal";
|
||||
break;
|
||||
case MLXSW_REG_MFDE_SEVERITY_NRML:
|
||||
val_str = "Normal";
|
||||
break;
|
||||
case MLXSW_REG_MFDE_SEVERITY_INTR:
|
||||
val_str = "Debug";
|
||||
break;
|
||||
default:
|
||||
val_str = NULL;
|
||||
}
|
||||
if (val_str) {
|
||||
err = devlink_fmsg_string_pair_put(fmsg, "desc", val_str);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
err = devlink_fmsg_arr_pair_nest_end(fmsg);
|
||||
if (err)
|
||||
return err;
|
||||
@ -1840,6 +1966,11 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor
|
||||
case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP:
|
||||
return mlxsw_core_health_fw_fatal_dump_kvd_im_stop(mfde_pl,
|
||||
fmsg);
|
||||
case MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT:
|
||||
return mlxsw_core_health_fw_fatal_dump_fw_assert(mfde_pl, fmsg);
|
||||
case MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE:
|
||||
return mlxsw_core_health_fw_fatal_dump_fatal_cause(mfde_pl,
|
||||
fmsg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user