edac: reduce stack pressure by using a pre-allocated buffer
The number of variables at the stack is too big. Reduces the stack usage by using a pre-allocated error buffer. Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
This commit is contained in:
parent
80cc7d87d5
commit
c7ef764554
@ -1065,7 +1065,6 @@ static void edac_ue_error(struct mem_ctl_info *mci,
|
|||||||
edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
|
edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OTHER_LABEL " or "
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* edac_mc_handle_error - reports a memory event to userspace
|
* edac_mc_handle_error - reports a memory event to userspace
|
||||||
@ -1097,19 +1096,28 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
const char *msg,
|
const char *msg,
|
||||||
const char *other_detail)
|
const char *other_detail)
|
||||||
{
|
{
|
||||||
/* FIXME: too much for stack: move it to some pre-alocated area */
|
char detail[80];
|
||||||
char detail[80], location[80];
|
|
||||||
char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
|
|
||||||
char *p;
|
char *p;
|
||||||
int row = -1, chan = -1;
|
int row = -1, chan = -1;
|
||||||
int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
|
int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
|
||||||
int i;
|
int i, n_labels = 0;
|
||||||
long grain;
|
|
||||||
bool enable_per_layer_report = false;
|
|
||||||
u8 grain_bits;
|
u8 grain_bits;
|
||||||
|
struct edac_raw_error_desc *e = &mci->error_desc;
|
||||||
|
|
||||||
edac_dbg(3, "MC%d\n", mci->mc_idx);
|
edac_dbg(3, "MC%d\n", mci->mc_idx);
|
||||||
|
|
||||||
|
/* Fills the error report buffer */
|
||||||
|
memset(e, 0, sizeof (*e));
|
||||||
|
e->error_count = error_count;
|
||||||
|
e->top_layer = top_layer;
|
||||||
|
e->mid_layer = mid_layer;
|
||||||
|
e->low_layer = low_layer;
|
||||||
|
e->page_frame_number = page_frame_number;
|
||||||
|
e->offset_in_page = offset_in_page;
|
||||||
|
e->syndrome = syndrome;
|
||||||
|
e->msg = msg;
|
||||||
|
e->other_detail = other_detail;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if the event report is consistent and if the memory
|
* Check if the event report is consistent and if the memory
|
||||||
* location is known. If it is known, enable_per_layer_report will be
|
* location is known. If it is known, enable_per_layer_report will be
|
||||||
@ -1132,7 +1140,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
pos[i] = -1;
|
pos[i] = -1;
|
||||||
}
|
}
|
||||||
if (pos[i] >= 0)
|
if (pos[i] >= 0)
|
||||||
enable_per_layer_report = true;
|
e->enable_per_layer_report = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1146,8 +1154,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
* where each memory belongs to a separate channel within the same
|
* where each memory belongs to a separate channel within the same
|
||||||
* branch.
|
* branch.
|
||||||
*/
|
*/
|
||||||
grain = 0;
|
p = e->label;
|
||||||
p = label;
|
|
||||||
*p = '\0';
|
*p = '\0';
|
||||||
|
|
||||||
for (i = 0; i < mci->tot_dimms; i++) {
|
for (i = 0; i < mci->tot_dimms; i++) {
|
||||||
@ -1161,8 +1168,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* get the max grain, over the error match range */
|
/* get the max grain, over the error match range */
|
||||||
if (dimm->grain > grain)
|
if (dimm->grain > e->grain)
|
||||||
grain = dimm->grain;
|
e->grain = dimm->grain;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the error is memory-controller wide, there's no need to
|
* If the error is memory-controller wide, there's no need to
|
||||||
@ -1170,8 +1177,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
* channel/memory controller/... may be affected.
|
* channel/memory controller/... may be affected.
|
||||||
* Also, don't show errors for empty DIMM slots.
|
* Also, don't show errors for empty DIMM slots.
|
||||||
*/
|
*/
|
||||||
if (enable_per_layer_report && dimm->nr_pages) {
|
if (e->enable_per_layer_report && dimm->nr_pages) {
|
||||||
if (p != label) {
|
if (n_labels >= EDAC_MAX_LABELS) {
|
||||||
|
e->enable_per_layer_report = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
n_labels++;
|
||||||
|
if (p != e->label) {
|
||||||
strcpy(p, OTHER_LABEL);
|
strcpy(p, OTHER_LABEL);
|
||||||
p += strlen(OTHER_LABEL);
|
p += strlen(OTHER_LABEL);
|
||||||
}
|
}
|
||||||
@ -1198,12 +1210,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!enable_per_layer_report) {
|
if (!e->enable_per_layer_report) {
|
||||||
strcpy(label, "any memory");
|
strcpy(e->label, "any memory");
|
||||||
} else {
|
} else {
|
||||||
edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
|
edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
|
||||||
if (p == label)
|
if (p == e->label)
|
||||||
strcpy(label, "unknown memory");
|
strcpy(e->label, "unknown memory");
|
||||||
if (type == HW_EVENT_ERR_CORRECTED) {
|
if (type == HW_EVENT_ERR_CORRECTED) {
|
||||||
if (row >= 0) {
|
if (row >= 0) {
|
||||||
mci->csrows[row]->ce_count += error_count;
|
mci->csrows[row]->ce_count += error_count;
|
||||||
@ -1216,7 +1228,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Fill the RAM location data */
|
/* Fill the RAM location data */
|
||||||
p = location;
|
p = e->location;
|
||||||
|
|
||||||
for (i = 0; i < mci->n_layers; i++) {
|
for (i = 0; i < mci->n_layers; i++) {
|
||||||
if (pos[i] < 0)
|
if (pos[i] < 0)
|
||||||
@ -1226,32 +1238,35 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
edac_layer_name[mci->layers[i].type],
|
edac_layer_name[mci->layers[i].type],
|
||||||
pos[i]);
|
pos[i]);
|
||||||
}
|
}
|
||||||
if (p > location)
|
if (p > e->location)
|
||||||
*(p - 1) = '\0';
|
*(p - 1) = '\0';
|
||||||
|
|
||||||
/* Report the error via the trace interface */
|
/* Report the error via the trace interface */
|
||||||
grain_bits = fls_long(grain) + 1;
|
grain_bits = fls_long(e->grain) + 1;
|
||||||
trace_mc_event(type, msg, label, error_count,
|
trace_mc_event(type, e->msg, e->label, e->error_count,
|
||||||
mci->mc_idx, top_layer, mid_layer, low_layer,
|
mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
|
||||||
PAGES_TO_MiB(page_frame_number) | offset_in_page,
|
PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
|
||||||
grain_bits, syndrome, other_detail);
|
grain_bits, e->syndrome, other_detail);
|
||||||
|
|
||||||
/* Memory type dependent details about the error */
|
/* Memory type dependent details about the error */
|
||||||
if (type == HW_EVENT_ERR_CORRECTED) {
|
if (type == HW_EVENT_ERR_CORRECTED) {
|
||||||
snprintf(detail, sizeof(detail),
|
snprintf(detail, sizeof(detail),
|
||||||
"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
|
"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
|
||||||
page_frame_number, offset_in_page,
|
e->page_frame_number, e->offset_in_page,
|
||||||
grain, syndrome);
|
e->grain, e->syndrome);
|
||||||
edac_ce_error(mci, error_count, pos, msg, location, label,
|
edac_ce_error(mci, e->error_count, pos, e->msg, e->location,
|
||||||
detail, other_detail, enable_per_layer_report,
|
e->label, detail, other_detail,
|
||||||
page_frame_number, offset_in_page, grain);
|
e->enable_per_layer_report,
|
||||||
|
e->page_frame_number, e->offset_in_page,
|
||||||
|
e->grain);
|
||||||
} else {
|
} else {
|
||||||
snprintf(detail, sizeof(detail),
|
snprintf(detail, sizeof(detail),
|
||||||
"page:0x%lx offset:0x%lx grain:%ld",
|
"page:0x%lx offset:0x%lx grain:%ld",
|
||||||
page_frame_number, offset_in_page, grain);
|
page_frame_number, offset_in_page, e->grain);
|
||||||
|
|
||||||
edac_ue_error(mci, error_count, pos, msg, location, label,
|
edac_ue_error(mci, e->error_count, pos, e->msg, e->location,
|
||||||
detail, other_detail, enable_per_layer_report);
|
e->label, detail, other_detail,
|
||||||
|
e->enable_per_layer_report);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(edac_mc_handle_error);
|
EXPORT_SYMBOL_GPL(edac_mc_handle_error);
|
||||||
|
@ -47,8 +47,18 @@ static inline void opstate_init(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Max length of a DIMM label*/
|
||||||
#define EDAC_MC_LABEL_LEN 31
|
#define EDAC_MC_LABEL_LEN 31
|
||||||
|
|
||||||
|
/* Maximum size of the location string */
|
||||||
|
#define LOCATION_SIZE 80
|
||||||
|
|
||||||
|
/* Defines the maximum number of labels that can be reported */
|
||||||
|
#define EDAC_MAX_LABELS 8
|
||||||
|
|
||||||
|
/* String used to join two or more labels */
|
||||||
|
#define OTHER_LABEL " or "
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* enum dev_type - describe the type of memory DRAM chips used at the stick
|
* enum dev_type - describe the type of memory DRAM chips used at the stick
|
||||||
* @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it
|
* @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it
|
||||||
@ -553,6 +563,46 @@ struct errcount_attribute_data {
|
|||||||
int layer0, layer1, layer2;
|
int layer0, layer1, layer2;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* edac_raw_error_desc - Raw error report structure
|
||||||
|
* @grain: minimum granularity for an error report, in bytes
|
||||||
|
* @error_count: number of errors of the same type
|
||||||
|
* @top_layer: top layer of the error (layer[0])
|
||||||
|
* @mid_layer: middle layer of the error (layer[1])
|
||||||
|
* @low_layer: low layer of the error (layer[2])
|
||||||
|
* @page_frame_number: page where the error happened
|
||||||
|
* @offset_in_page: page offset
|
||||||
|
* @syndrome: syndrome of the error (or 0 if unknown or if
|
||||||
|
* the syndrome is not applicable)
|
||||||
|
* @msg: error message
|
||||||
|
* @location: location of the error
|
||||||
|
* @label: label of the affected DIMM(s)
|
||||||
|
* @other_detail: other driver-specific detail about the error
|
||||||
|
* @enable_per_layer_report: if false, the error affects all layers
|
||||||
|
* (typically, a memory controller error)
|
||||||
|
*/
|
||||||
|
struct edac_raw_error_desc {
|
||||||
|
/*
|
||||||
|
* NOTE: everything before grain won't be cleaned by
|
||||||
|
* edac_raw_error_desc_clean()
|
||||||
|
*/
|
||||||
|
char location[LOCATION_SIZE];
|
||||||
|
char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS];
|
||||||
|
long grain;
|
||||||
|
|
||||||
|
/* the vars below and grain will be cleaned on every new error report */
|
||||||
|
u16 error_count;
|
||||||
|
int top_layer;
|
||||||
|
int mid_layer;
|
||||||
|
int low_layer;
|
||||||
|
unsigned long page_frame_number;
|
||||||
|
unsigned long offset_in_page;
|
||||||
|
unsigned long syndrome;
|
||||||
|
const char *msg;
|
||||||
|
const char *other_detail;
|
||||||
|
bool enable_per_layer_report;
|
||||||
|
};
|
||||||
|
|
||||||
/* MEMORY controller information structure
|
/* MEMORY controller information structure
|
||||||
*/
|
*/
|
||||||
struct mem_ctl_info {
|
struct mem_ctl_info {
|
||||||
@ -660,6 +710,12 @@ struct mem_ctl_info {
|
|||||||
/* work struct for this MC */
|
/* work struct for this MC */
|
||||||
struct delayed_work work;
|
struct delayed_work work;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Used to report an error - by being at the global struct
|
||||||
|
* makes the memory allocated by the EDAC core
|
||||||
|
*/
|
||||||
|
struct edac_raw_error_desc error_desc;
|
||||||
|
|
||||||
/* the internal state of this controller instance */
|
/* the internal state of this controller instance */
|
||||||
int op_state;
|
int op_state;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user