drm/amd/pm: add new data in metrics table

Export new data in the metrics table for gfx and memory
utilization counter, and each hbm temperature as well.

v2:
change the metrics table version to v1.1

v3:
fix the coding style
v4:
rebase against latest kernel

Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Kevin Wang <kevin1.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Kenneth Feng 2021-03-05 16:41:45 -05:00 committed by Alex Deucher
parent d86fd724e5
commit bea9cd3f8d
4 changed files with 26 additions and 18 deletions

View File

@ -231,6 +231,8 @@ enum pp_df_cstate {
#define XGMI_MODE_PSTATE_D3 0 #define XGMI_MODE_PSTATE_D3 0
#define XGMI_MODE_PSTATE_D0 1 #define XGMI_MODE_PSTATE_D0 1
#define NUM_HBM_INSTANCES 4
struct seq_file; struct seq_file;
enum amd_pp_clock_type; enum amd_pp_clock_type;
struct amd_pp_simple_clock_info; struct amd_pp_simple_clock_info;
@ -449,6 +451,11 @@ struct gpu_metrics_v1_1 {
uint16_t pcie_link_speed; // in 0.1 GT/s uint16_t pcie_link_speed; // in 0.1 GT/s
uint16_t padding; uint16_t padding;
uint32_t gfx_activity_acc;
uint32_t mem_activity_acc;
uint16_t temperature_hbm[NUM_HBM_INSTANCES];
}; };
/* /*

View File

@ -265,8 +265,6 @@ int smu_v13_0_get_current_pcie_link_speed_level(struct smu_context *smu);
int smu_v13_0_get_current_pcie_link_speed(struct smu_context *smu); int smu_v13_0_get_current_pcie_link_speed(struct smu_context *smu);
void smu_v13_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics);
int smu_v13_0_gfx_ulv_control(struct smu_context *smu, int smu_v13_0_gfx_ulv_control(struct smu_context *smu,
bool enablement); bool enablement);

View File

@ -1296,10 +1296,10 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
void **table) void **table)
{ {
struct smu_table_context *smu_table = &smu->smu_table; struct smu_table_context *smu_table = &smu->smu_table;
struct gpu_metrics_v1_0 *gpu_metrics = struct gpu_metrics_v1_1 *gpu_metrics =
(struct gpu_metrics_v1_0 *)smu_table->gpu_metrics_table; (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
SmuMetrics_t metrics; SmuMetrics_t metrics;
int ret = 0; int i, ret = 0;
ret = smu_cmn_get_metrics_table(smu, ret = smu_cmn_get_metrics_table(smu,
&metrics, &metrics,
@ -1307,7 +1307,7 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
if (ret) if (ret)
return ret; return ret;
smu_v13_0_init_gpu_metrics_v1_0(gpu_metrics); smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
gpu_metrics->temperature_edge = metrics.TemperatureEdge; gpu_metrics->temperature_edge = metrics.TemperatureEdge;
gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot; gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@ -1318,12 +1318,16 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity; gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity;
gpu_metrics->average_umc_activity = metrics.AverageUclkActivity; gpu_metrics->average_umc_activity = metrics.AverageUclkActivity;
gpu_metrics->average_mm_activity = 0;
gpu_metrics->average_socket_power = metrics.AverageSocketPower; gpu_metrics->average_socket_power = metrics.AverageSocketPower;
gpu_metrics->energy_accumulator = 0;
gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency; gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency;
gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency; gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency;
gpu_metrics->average_uclk_frequency = metrics.AverageUclkFrequency; gpu_metrics->average_uclk_frequency = metrics.AverageUclkFrequency;
gpu_metrics->average_vclk0_frequency = 0;
gpu_metrics->average_dclk0_frequency = 0;
gpu_metrics->current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK]; gpu_metrics->current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK];
gpu_metrics->current_socclk = metrics.CurrClock[PPCLK_SOCCLK]; gpu_metrics->current_socclk = metrics.CurrClock[PPCLK_SOCCLK];
@ -1333,14 +1337,24 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
gpu_metrics->throttle_status = metrics.ThrottlerStatus; gpu_metrics->throttle_status = metrics.ThrottlerStatus;
gpu_metrics->current_fan_speed = 0;
gpu_metrics->pcie_link_width = gpu_metrics->pcie_link_width =
smu_v13_0_get_current_pcie_link_width(smu); smu_v13_0_get_current_pcie_link_width(smu);
gpu_metrics->pcie_link_speed = gpu_metrics->pcie_link_speed =
aldebaran_get_current_pcie_link_speed(smu); aldebaran_get_current_pcie_link_speed(smu);
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
gpu_metrics->gfx_activity_acc = metrics.GfxBusyAcc;
gpu_metrics->mem_activity_acc = metrics.DramBusyAcc;
for (i = 0; i < NUM_HBM_INSTANCES; i++)
gpu_metrics->temperature_hbm[i] = metrics.TemperatureAllHBM[i];
*table = (void *)gpu_metrics; *table = (void *)gpu_metrics;
return sizeof(struct gpu_metrics_v1_0); return sizeof(struct gpu_metrics_v1_1);
} }
static bool aldebaran_is_mode1_reset_supported(struct smu_context *smu) static bool aldebaran_is_mode1_reset_supported(struct smu_context *smu)

View File

@ -1809,14 +1809,3 @@ int smu_v13_0_get_current_pcie_link_speed(struct smu_context *smu)
return link_speed[speed_level]; return link_speed[speed_level];
} }
void smu_v13_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics)
{
memset(gpu_metrics, 0xFF, sizeof(struct gpu_metrics_v1_0));
gpu_metrics->common_header.structure_size =
sizeof(struct gpu_metrics_v1_0);
gpu_metrics->common_header.format_revision = 1;
gpu_metrics->common_header.content_revision = 0;
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
}