[PATCH] x86-64: Make lockless machine check record passing a bit more robust.
One machine is constantly throwing NMI watchdog timeouts in mce_log This was one attempt to fix it. (AK: this doesn't actually fix the bug I'm seeing unfortunately, probably drop. I don't like it that the reader can spin forever now waiting for a writer) Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
a54e678b8f
commit
673242c10d
@ -56,15 +56,19 @@ void mce_log(struct mce *mce)
|
||||
smp_wmb();
|
||||
for (;;) {
|
||||
entry = rcu_dereference(mcelog.next);
|
||||
/* When the buffer fills up discard new entries. Assume
|
||||
that the earlier errors are the more interesting. */
|
||||
if (entry >= MCE_LOG_LEN) {
|
||||
set_bit(MCE_OVERFLOW, &mcelog.flags);
|
||||
return;
|
||||
for (;;) {
|
||||
/* When the buffer fills up discard new entries. Assume
|
||||
that the earlier errors are the more interesting. */
|
||||
if (entry >= MCE_LOG_LEN) {
|
||||
set_bit(MCE_OVERFLOW, &mcelog.flags);
|
||||
return;
|
||||
}
|
||||
/* Old left over entry. Skip. */
|
||||
if (mcelog.entry[entry].finished) {
|
||||
entry++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* Old left over entry. Skip. */
|
||||
if (mcelog.entry[entry].finished)
|
||||
continue;
|
||||
smp_rmb();
|
||||
next = entry + 1;
|
||||
if (cmpxchg(&mcelog.next, entry, next) == entry)
|
||||
@ -404,9 +408,15 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff
|
||||
}
|
||||
|
||||
err = 0;
|
||||
for (i = 0; i < next; i++) {
|
||||
if (!mcelog.entry[i].finished)
|
||||
continue;
|
||||
for (i = 0; i < next; i++) {
|
||||
unsigned long start = jiffies;
|
||||
while (!mcelog.entry[i].finished) {
|
||||
if (!time_before(jiffies, start + 2)) {
|
||||
memset(mcelog.entry + i,0, sizeof(struct mce));
|
||||
continue;
|
||||
}
|
||||
cpu_relax();
|
||||
}
|
||||
smp_rmb();
|
||||
err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
|
||||
buf += sizeof(struct mce);
|
||||
|
Loading…
Reference in New Issue
Block a user