[PATCH] x86-64: Make lockless machine check record passing a bit more robust.

One machine is constantly throwing NMI watchdog timeouts in mce_log

This was one attempt to fix it.

(AK: this doesn't actually fix the bug I'm seeing unfortunately, probably
drop.  I don't like it that the reader can spin forever now waiting
for a writer)

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Andi Kleen 2005-09-12 18:49:24 +02:00 committed by Linus Torvalds
parent a54e678b8f
commit 673242c10d

View File

@ -56,15 +56,19 @@ void mce_log(struct mce *mce)
smp_wmb(); smp_wmb();
for (;;) { for (;;) {
entry = rcu_dereference(mcelog.next); entry = rcu_dereference(mcelog.next);
/* When the buffer fills up discard new entries. Assume for (;;) {
that the earlier errors are the more interesting. */ /* When the buffer fills up discard new entries. Assume
if (entry >= MCE_LOG_LEN) { that the earlier errors are the more interesting. */
set_bit(MCE_OVERFLOW, &mcelog.flags); if (entry >= MCE_LOG_LEN) {
return; set_bit(MCE_OVERFLOW, &mcelog.flags);
return;
}
/* Old left over entry. Skip. */
if (mcelog.entry[entry].finished) {
entry++;
continue;
}
} }
/* Old left over entry. Skip. */
if (mcelog.entry[entry].finished)
continue;
smp_rmb(); smp_rmb();
next = entry + 1; next = entry + 1;
if (cmpxchg(&mcelog.next, entry, next) == entry) if (cmpxchg(&mcelog.next, entry, next) == entry)
@ -404,9 +408,15 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff
} }
err = 0; err = 0;
for (i = 0; i < next; i++) { for (i = 0; i < next; i++) {
if (!mcelog.entry[i].finished) unsigned long start = jiffies;
continue; while (!mcelog.entry[i].finished) {
if (!time_before(jiffies, start + 2)) {
memset(mcelog.entry + i,0, sizeof(struct mce));
continue;
}
cpu_relax();
}
smp_rmb(); smp_rmb();
err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
buf += sizeof(struct mce); buf += sizeof(struct mce);