aoe: improve handling of misbehaving network paths
An AoE target can have multiple network ports used for AoE, and in the aoe driver, those are tracked by the aoetgt struct. These changes allow the aoe driver to handle network paths, or aoetgts, that are not working well, compared to the others. Paths that do not get responses despite the retransmission of AoE commands are marked as "tainted", and non-tainted paths are preferred. Meanwhile, the aoe driver attempts to "probe" the tainted path in the background by issuing reads of LBA 0 that are padded out to full (possibly jumbo-frame) size. If the probes get responses, then the path is "redeemed", and its taint is removed. This mechanism has been shown to be helpful in transparently handling and recovering from real-world network "brown outs" in ways that the earlier "shoot the help-needing target in the head" mechanism could not. Signed-off-by: Ed Cashin <ecashin@coraid.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
b91316f2b7
commit
bbb44e30d0
@ -91,6 +91,9 @@ enum {
|
|||||||
RTTDSCALE = 3,
|
RTTDSCALE = 3,
|
||||||
RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE,
|
RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE,
|
||||||
RTTDEV_INIT = RTTAVG_INIT / 4,
|
RTTDEV_INIT = RTTAVG_INIT / 4,
|
||||||
|
|
||||||
|
HARD_SCORN_SECS = 10, /* try another remote port after this */
|
||||||
|
MAX_TAINT = 1000, /* cap on aoetgt taint */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct buf {
|
struct buf {
|
||||||
@ -103,6 +106,10 @@ struct buf {
|
|||||||
struct request *rq;
|
struct request *rq;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum frame_flags {
|
||||||
|
FFL_PROBE = 1,
|
||||||
|
};
|
||||||
|
|
||||||
struct frame {
|
struct frame {
|
||||||
struct list_head head;
|
struct list_head head;
|
||||||
u32 tag;
|
u32 tag;
|
||||||
@ -118,6 +125,7 @@ struct frame {
|
|||||||
struct bio_vec *bv;
|
struct bio_vec *bv;
|
||||||
ulong bcnt;
|
ulong bcnt;
|
||||||
ulong bv_off;
|
ulong bv_off;
|
||||||
|
char flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct aoeif {
|
struct aoeif {
|
||||||
@ -138,8 +146,10 @@ struct aoetgt {
|
|||||||
ushort next_cwnd; /* incr maxout after decrementing to zero */
|
ushort next_cwnd; /* incr maxout after decrementing to zero */
|
||||||
ushort ssthresh; /* slow start threshold */
|
ushort ssthresh; /* slow start threshold */
|
||||||
ulong falloc; /* number of allocated frames */
|
ulong falloc; /* number of allocated frames */
|
||||||
|
int taint; /* how much we want to avoid this aoetgt */
|
||||||
int minbcnt;
|
int minbcnt;
|
||||||
int wpkts, rpkts;
|
int wpkts, rpkts;
|
||||||
|
char nout_probes;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct aoedev {
|
struct aoedev {
|
||||||
@ -174,7 +184,6 @@ struct aoedev {
|
|||||||
struct list_head rexmitq; /* deferred retransmissions */
|
struct list_head rexmitq; /* deferred retransmissions */
|
||||||
struct aoetgt *targets[NTARGETS];
|
struct aoetgt *targets[NTARGETS];
|
||||||
struct aoetgt **tgt; /* target in use when working */
|
struct aoetgt **tgt; /* target in use when working */
|
||||||
struct aoetgt *htgt; /* target needing rexmit assistance */
|
|
||||||
ulong ntargets;
|
ulong ntargets;
|
||||||
ulong kicked;
|
ulong kicked;
|
||||||
char ident[512];
|
char ident[512];
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
#define MAXIOC (8192) /* default meant to avoid most soft lockups */
|
#define MAXIOC (8192) /* default meant to avoid most soft lockups */
|
||||||
|
|
||||||
static void ktcomplete(struct frame *, struct sk_buff *);
|
static void ktcomplete(struct frame *, struct sk_buff *);
|
||||||
|
static int count_targets(struct aoedev *d, int *untainted);
|
||||||
|
|
||||||
static struct buf *nextbuf(struct aoedev *);
|
static struct buf *nextbuf(struct aoedev *);
|
||||||
|
|
||||||
@ -43,6 +44,8 @@ static struct {
|
|||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
} iocq;
|
} iocq;
|
||||||
|
|
||||||
|
static struct page *empty_page;
|
||||||
|
|
||||||
static struct sk_buff *
|
static struct sk_buff *
|
||||||
new_skb(ulong len)
|
new_skb(ulong len)
|
||||||
{
|
{
|
||||||
@ -179,8 +182,10 @@ aoe_freetframe(struct frame *f)
|
|||||||
|
|
||||||
t = f->t;
|
t = f->t;
|
||||||
f->buf = NULL;
|
f->buf = NULL;
|
||||||
|
f->lba = 0;
|
||||||
f->bv = NULL;
|
f->bv = NULL;
|
||||||
f->r_skb = NULL;
|
f->r_skb = NULL;
|
||||||
|
f->flags = 0;
|
||||||
list_add(&f->head, &t->ffree);
|
list_add(&f->head, &t->ffree);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -234,20 +239,25 @@ newframe(struct aoedev *d)
|
|||||||
struct frame *f;
|
struct frame *f;
|
||||||
struct aoetgt *t, **tt;
|
struct aoetgt *t, **tt;
|
||||||
int totout = 0;
|
int totout = 0;
|
||||||
|
int use_tainted;
|
||||||
|
int has_untainted;
|
||||||
|
|
||||||
if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */
|
if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */
|
||||||
printk(KERN_ERR "aoe: NULL TARGETS!\n");
|
printk(KERN_ERR "aoe: NULL TARGETS!\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
tt = d->tgt; /* last used target */
|
tt = d->tgt; /* last used target */
|
||||||
for (;;) {
|
for (use_tainted = 0, has_untainted = 0;;) {
|
||||||
tt++;
|
tt++;
|
||||||
if (tt >= &d->targets[NTARGETS] || !*tt)
|
if (tt >= &d->targets[NTARGETS] || !*tt)
|
||||||
tt = d->targets;
|
tt = d->targets;
|
||||||
t = *tt;
|
t = *tt;
|
||||||
totout += t->nout;
|
if (!t->taint) {
|
||||||
|
has_untainted = 1;
|
||||||
|
totout += t->nout;
|
||||||
|
}
|
||||||
if (t->nout < t->maxout
|
if (t->nout < t->maxout
|
||||||
&& t != d->htgt
|
&& (use_tainted || !t->taint)
|
||||||
&& t->ifp->nd) {
|
&& t->ifp->nd) {
|
||||||
f = newtframe(d, t);
|
f = newtframe(d, t);
|
||||||
if (f) {
|
if (f) {
|
||||||
@ -256,8 +266,12 @@ newframe(struct aoedev *d)
|
|||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (tt == d->tgt) /* we've looped and found nada */
|
if (tt == d->tgt) { /* we've looped and found nada */
|
||||||
break;
|
if (!use_tainted && !has_untainted)
|
||||||
|
use_tainted = 1;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (totout == 0) {
|
if (totout == 0) {
|
||||||
d->kicked++;
|
d->kicked++;
|
||||||
@ -294,21 +308,68 @@ fhash(struct frame *f)
|
|||||||
list_add_tail(&f->head, &d->factive[n]);
|
list_add_tail(&f->head, &d->factive[n]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ata_rw_frameinit(struct frame *f)
|
||||||
|
{
|
||||||
|
struct aoetgt *t;
|
||||||
|
struct aoe_hdr *h;
|
||||||
|
struct aoe_atahdr *ah;
|
||||||
|
struct sk_buff *skb;
|
||||||
|
char writebit, extbit;
|
||||||
|
|
||||||
|
skb = f->skb;
|
||||||
|
h = (struct aoe_hdr *) skb_mac_header(skb);
|
||||||
|
ah = (struct aoe_atahdr *) (h + 1);
|
||||||
|
skb_put(skb, sizeof(*h) + sizeof(*ah));
|
||||||
|
memset(h, 0, skb->len);
|
||||||
|
|
||||||
|
writebit = 0x10;
|
||||||
|
extbit = 0x4;
|
||||||
|
|
||||||
|
t = f->t;
|
||||||
|
f->tag = aoehdr_atainit(t->d, t, h);
|
||||||
|
fhash(f);
|
||||||
|
t->nout++;
|
||||||
|
f->waited = 0;
|
||||||
|
f->waited_total = 0;
|
||||||
|
if (f->buf)
|
||||||
|
f->lba = f->buf->sector;
|
||||||
|
|
||||||
|
/* set up ata header */
|
||||||
|
ah->scnt = f->bcnt >> 9;
|
||||||
|
put_lba(ah, f->lba);
|
||||||
|
if (t->d->flags & DEVFL_EXT) {
|
||||||
|
ah->aflags |= AOEAFL_EXT;
|
||||||
|
} else {
|
||||||
|
extbit = 0;
|
||||||
|
ah->lba3 &= 0x0f;
|
||||||
|
ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
|
||||||
|
}
|
||||||
|
if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
|
||||||
|
skb_fillup(skb, f->bv, f->bv_off, f->bcnt);
|
||||||
|
ah->aflags |= AOEAFL_WRITE;
|
||||||
|
skb->len += f->bcnt;
|
||||||
|
skb->data_len = f->bcnt;
|
||||||
|
skb->truesize += f->bcnt;
|
||||||
|
t->wpkts++;
|
||||||
|
} else {
|
||||||
|
t->rpkts++;
|
||||||
|
writebit = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
|
||||||
|
skb->dev = t->ifp->nd;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
aoecmd_ata_rw(struct aoedev *d)
|
aoecmd_ata_rw(struct aoedev *d)
|
||||||
{
|
{
|
||||||
struct frame *f;
|
struct frame *f;
|
||||||
struct aoe_hdr *h;
|
|
||||||
struct aoe_atahdr *ah;
|
|
||||||
struct buf *buf;
|
struct buf *buf;
|
||||||
struct aoetgt *t;
|
struct aoetgt *t;
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
struct sk_buff_head queue;
|
struct sk_buff_head queue;
|
||||||
ulong bcnt, fbcnt;
|
ulong bcnt, fbcnt;
|
||||||
char writebit, extbit;
|
|
||||||
|
|
||||||
writebit = 0x10;
|
|
||||||
extbit = 0x4;
|
|
||||||
|
|
||||||
buf = nextbuf(d);
|
buf = nextbuf(d);
|
||||||
if (buf == NULL)
|
if (buf == NULL)
|
||||||
@ -343,50 +404,15 @@ aoecmd_ata_rw(struct aoedev *d)
|
|||||||
} while (fbcnt);
|
} while (fbcnt);
|
||||||
|
|
||||||
/* initialize the headers & frame */
|
/* initialize the headers & frame */
|
||||||
skb = f->skb;
|
|
||||||
h = (struct aoe_hdr *) skb_mac_header(skb);
|
|
||||||
ah = (struct aoe_atahdr *) (h+1);
|
|
||||||
skb_put(skb, sizeof *h + sizeof *ah);
|
|
||||||
memset(h, 0, skb->len);
|
|
||||||
f->tag = aoehdr_atainit(d, t, h);
|
|
||||||
fhash(f);
|
|
||||||
t->nout++;
|
|
||||||
f->waited = 0;
|
|
||||||
f->waited_total = 0;
|
|
||||||
f->buf = buf;
|
f->buf = buf;
|
||||||
f->bcnt = bcnt;
|
f->bcnt = bcnt;
|
||||||
f->lba = buf->sector;
|
ata_rw_frameinit(f);
|
||||||
|
|
||||||
/* set up ata header */
|
|
||||||
ah->scnt = bcnt >> 9;
|
|
||||||
put_lba(ah, buf->sector);
|
|
||||||
if (d->flags & DEVFL_EXT) {
|
|
||||||
ah->aflags |= AOEAFL_EXT;
|
|
||||||
} else {
|
|
||||||
extbit = 0;
|
|
||||||
ah->lba3 &= 0x0f;
|
|
||||||
ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
|
|
||||||
}
|
|
||||||
if (bio_data_dir(buf->bio) == WRITE) {
|
|
||||||
skb_fillup(skb, f->bv, f->bv_off, bcnt);
|
|
||||||
ah->aflags |= AOEAFL_WRITE;
|
|
||||||
skb->len += bcnt;
|
|
||||||
skb->data_len = bcnt;
|
|
||||||
skb->truesize += bcnt;
|
|
||||||
t->wpkts++;
|
|
||||||
} else {
|
|
||||||
t->rpkts++;
|
|
||||||
writebit = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
|
|
||||||
|
|
||||||
/* mark all tracking fields and load out */
|
/* mark all tracking fields and load out */
|
||||||
buf->nframesout += 1;
|
buf->nframesout += 1;
|
||||||
buf->sector += bcnt >> 9;
|
buf->sector += bcnt >> 9;
|
||||||
|
|
||||||
skb->dev = t->ifp->nd;
|
skb = skb_clone(f->skb, GFP_ATOMIC);
|
||||||
skb = skb_clone(skb, GFP_ATOMIC);
|
|
||||||
if (skb) {
|
if (skb) {
|
||||||
do_gettimeofday(&f->sent);
|
do_gettimeofday(&f->sent);
|
||||||
f->sent_jiffs = (u32) jiffies;
|
f->sent_jiffs = (u32) jiffies;
|
||||||
@ -462,11 +488,14 @@ resend(struct aoedev *d, struct frame *f)
|
|||||||
h = (struct aoe_hdr *) skb_mac_header(skb);
|
h = (struct aoe_hdr *) skb_mac_header(skb);
|
||||||
ah = (struct aoe_atahdr *) (h+1);
|
ah = (struct aoe_atahdr *) (h+1);
|
||||||
|
|
||||||
snprintf(buf, sizeof buf,
|
if (!(f->flags & FFL_PROBE)) {
|
||||||
"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
|
snprintf(buf, sizeof(buf),
|
||||||
"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
|
"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
|
||||||
h->src, h->dst, t->nout);
|
"retransmit", d->aoemajor, d->aoeminor,
|
||||||
aoechr_error(buf);
|
f->tag, jiffies, n,
|
||||||
|
h->src, h->dst, t->nout);
|
||||||
|
aoechr_error(buf);
|
||||||
|
}
|
||||||
|
|
||||||
f->tag = n;
|
f->tag = n;
|
||||||
fhash(f);
|
fhash(f);
|
||||||
@ -558,18 +587,18 @@ ejectif(struct aoetgt *t, struct aoeif *ifp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct frame *
|
static struct frame *
|
||||||
reassign_frame(struct list_head *pos)
|
reassign_frame(struct frame *f)
|
||||||
{
|
{
|
||||||
struct frame *f;
|
|
||||||
struct frame *nf;
|
struct frame *nf;
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
|
|
||||||
f = list_entry(pos, struct frame, head);
|
|
||||||
nf = newframe(f->t->d);
|
nf = newframe(f->t->d);
|
||||||
if (!nf)
|
if (!nf)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
if (nf->t == f->t) {
|
||||||
list_del(pos);
|
aoe_freetframe(nf);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
skb = nf->skb;
|
skb = nf->skb;
|
||||||
nf->skb = f->skb;
|
nf->skb = f->skb;
|
||||||
@ -583,52 +612,67 @@ reassign_frame(struct list_head *pos)
|
|||||||
nf->sent = f->sent;
|
nf->sent = f->sent;
|
||||||
nf->sent_jiffs = f->sent_jiffs;
|
nf->sent_jiffs = f->sent_jiffs;
|
||||||
f->skb = skb;
|
f->skb = skb;
|
||||||
aoe_freetframe(f);
|
|
||||||
f->t->nout--;
|
|
||||||
nf->t->nout++;
|
|
||||||
|
|
||||||
return nf;
|
return nf;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static void
|
||||||
sthtith(struct aoedev *d)
|
probe(struct aoetgt *t)
|
||||||
{
|
{
|
||||||
struct frame *f, *nf;
|
struct aoedev *d;
|
||||||
struct list_head *nx, *pos, *head;
|
struct frame *f;
|
||||||
struct aoetgt *ht = d->htgt;
|
struct sk_buff *skb;
|
||||||
int i;
|
struct sk_buff_head queue;
|
||||||
|
size_t n, m;
|
||||||
|
int frag;
|
||||||
|
|
||||||
/* look through the active and pending retransmit frames */
|
d = t->d;
|
||||||
for (i = 0; i < NFACTIVE; i++) {
|
f = newtframe(d, t);
|
||||||
head = &d->factive[i];
|
if (!f) {
|
||||||
list_for_each_safe(pos, nx, head) {
|
pr_err("%s %pm for e%ld.%d: %s\n",
|
||||||
f = list_entry(pos, struct frame, head);
|
"aoe: cannot probe remote address",
|
||||||
if (f->t != ht)
|
t->addr,
|
||||||
continue;
|
(long) d->aoemajor, d->aoeminor,
|
||||||
nf = reassign_frame(pos);
|
"no frame available");
|
||||||
if (!nf)
|
return;
|
||||||
return 0;
|
|
||||||
resend(d, nf);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
head = &d->rexmitq;
|
f->flags |= FFL_PROBE;
|
||||||
list_for_each_safe(pos, nx, head) {
|
ifrotate(t);
|
||||||
f = list_entry(pos, struct frame, head);
|
f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
|
||||||
if (f->t != ht)
|
ata_rw_frameinit(f);
|
||||||
continue;
|
skb = f->skb;
|
||||||
nf = reassign_frame(pos);
|
for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) {
|
||||||
if (!nf)
|
if (n < PAGE_SIZE)
|
||||||
return 0;
|
m = n;
|
||||||
resend(d, nf);
|
else
|
||||||
|
m = PAGE_SIZE;
|
||||||
|
skb_fill_page_desc(skb, frag, empty_page, 0, m);
|
||||||
}
|
}
|
||||||
/* We've cleaned up the outstanding so take away his
|
skb->len += f->bcnt;
|
||||||
* interfaces so he won't be used. We should remove him from
|
skb->data_len = f->bcnt;
|
||||||
* the target array here, but cleaning up a target is
|
skb->truesize += f->bcnt;
|
||||||
* involved. PUNT!
|
|
||||||
*/
|
skb = skb_clone(f->skb, GFP_ATOMIC);
|
||||||
memset(ht->ifs, 0, sizeof ht->ifs);
|
if (skb) {
|
||||||
d->htgt = NULL;
|
do_gettimeofday(&f->sent);
|
||||||
return 1;
|
f->sent_jiffs = (u32) jiffies;
|
||||||
|
__skb_queue_head_init(&queue);
|
||||||
|
__skb_queue_tail(&queue, skb);
|
||||||
|
aoenet_xmit(&queue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static long
|
||||||
|
rto(struct aoedev *d)
|
||||||
|
{
|
||||||
|
long t;
|
||||||
|
|
||||||
|
t = 2 * d->rttavg >> RTTSCALE;
|
||||||
|
t += 8 * d->rttdev >> RTTDSCALE;
|
||||||
|
if (t == 0)
|
||||||
|
t = 1;
|
||||||
|
|
||||||
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -636,17 +680,53 @@ rexmit_deferred(struct aoedev *d)
|
|||||||
{
|
{
|
||||||
struct aoetgt *t;
|
struct aoetgt *t;
|
||||||
struct frame *f;
|
struct frame *f;
|
||||||
|
struct frame *nf;
|
||||||
struct list_head *pos, *nx, *head;
|
struct list_head *pos, *nx, *head;
|
||||||
int since;
|
int since;
|
||||||
|
int untainted;
|
||||||
|
|
||||||
|
count_targets(d, &untainted);
|
||||||
|
|
||||||
head = &d->rexmitq;
|
head = &d->rexmitq;
|
||||||
list_for_each_safe(pos, nx, head) {
|
list_for_each_safe(pos, nx, head) {
|
||||||
f = list_entry(pos, struct frame, head);
|
f = list_entry(pos, struct frame, head);
|
||||||
t = f->t;
|
t = f->t;
|
||||||
|
if (t->taint) {
|
||||||
|
if (!(f->flags & FFL_PROBE)) {
|
||||||
|
nf = reassign_frame(f);
|
||||||
|
if (nf) {
|
||||||
|
if (t->nout_probes == 0
|
||||||
|
&& untainted > 0) {
|
||||||
|
probe(t);
|
||||||
|
t->nout_probes++;
|
||||||
|
}
|
||||||
|
list_replace(&f->head, &nf->head);
|
||||||
|
pos = &nf->head;
|
||||||
|
aoe_freetframe(f);
|
||||||
|
f = nf;
|
||||||
|
t = f->t;
|
||||||
|
}
|
||||||
|
} else if (untainted < 1) {
|
||||||
|
/* don't probe w/o other untainted aoetgts */
|
||||||
|
goto stop_probe;
|
||||||
|
} else if (tsince_hr(f) < t->taint * rto(d)) {
|
||||||
|
/* reprobe slowly when taint is high */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else if (f->flags & FFL_PROBE) {
|
||||||
|
stop_probe: /* don't probe untainted aoetgts */
|
||||||
|
list_del(pos);
|
||||||
|
aoe_freetframe(f);
|
||||||
|
/* leaving d->kicked, because this is routine */
|
||||||
|
f->t->d->flags |= DEVFL_KICKME;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (t->nout >= t->maxout)
|
if (t->nout >= t->maxout)
|
||||||
continue;
|
continue;
|
||||||
list_del(pos);
|
list_del(pos);
|
||||||
t->nout++;
|
t->nout++;
|
||||||
|
if (f->flags & FFL_PROBE)
|
||||||
|
t->nout_probes++;
|
||||||
since = tsince_hr(f);
|
since = tsince_hr(f);
|
||||||
f->waited += since;
|
f->waited += since;
|
||||||
f->waited_total += since;
|
f->waited_total += since;
|
||||||
@ -654,6 +734,36 @@ rexmit_deferred(struct aoedev *d)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* An aoetgt accumulates demerits quickly, and successful
|
||||||
|
* probing redeems the aoetgt slowly.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
scorn(struct aoetgt *t)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
|
||||||
|
n = t->taint++;
|
||||||
|
t->taint += t->taint * 2;
|
||||||
|
if (n > t->taint)
|
||||||
|
t->taint = n;
|
||||||
|
if (t->taint > MAX_TAINT)
|
||||||
|
t->taint = MAX_TAINT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
count_targets(struct aoedev *d, int *untainted)
|
||||||
|
{
|
||||||
|
int i, good;
|
||||||
|
|
||||||
|
for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
|
||||||
|
if (d->targets[i]->taint == 0)
|
||||||
|
good++;
|
||||||
|
|
||||||
|
if (untainted)
|
||||||
|
*untainted = good;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
rexmit_timer(ulong vp)
|
rexmit_timer(ulong vp)
|
||||||
{
|
{
|
||||||
@ -666,6 +776,7 @@ rexmit_timer(ulong vp)
|
|||||||
register long timeout;
|
register long timeout;
|
||||||
ulong flags, n;
|
ulong flags, n;
|
||||||
int i;
|
int i;
|
||||||
|
int utgts; /* number of aoetgt descriptors (not slots) */
|
||||||
int since;
|
int since;
|
||||||
|
|
||||||
d = (struct aoedev *) vp;
|
d = (struct aoedev *) vp;
|
||||||
@ -673,10 +784,9 @@ rexmit_timer(ulong vp)
|
|||||||
spin_lock_irqsave(&d->lock, flags);
|
spin_lock_irqsave(&d->lock, flags);
|
||||||
|
|
||||||
/* timeout based on observed timings and variations */
|
/* timeout based on observed timings and variations */
|
||||||
timeout = 2 * d->rttavg >> RTTSCALE;
|
timeout = rto(d);
|
||||||
timeout += 8 * d->rttdev >> RTTDSCALE;
|
|
||||||
if (timeout == 0)
|
utgts = count_targets(d, NULL);
|
||||||
timeout = 1;
|
|
||||||
|
|
||||||
if (d->flags & DEVFL_TKILL) {
|
if (d->flags & DEVFL_TKILL) {
|
||||||
spin_unlock_irqrestore(&d->lock, flags);
|
spin_unlock_irqrestore(&d->lock, flags);
|
||||||
@ -702,7 +812,7 @@ rexmit_timer(ulong vp)
|
|||||||
since = tsince_hr(f);
|
since = tsince_hr(f);
|
||||||
n = f->waited_total + since;
|
n = f->waited_total + since;
|
||||||
n /= USEC_PER_SEC;
|
n /= USEC_PER_SEC;
|
||||||
if (n > aoe_deadsecs) {
|
if (n > aoe_deadsecs && !(f->flags & FFL_PROBE)) {
|
||||||
/* Waited too long. Device failure.
|
/* Waited too long. Device failure.
|
||||||
* Hang all frames on first hash bucket for downdev
|
* Hang all frames on first hash bucket for downdev
|
||||||
* to clean up.
|
* to clean up.
|
||||||
@ -713,19 +823,26 @@ rexmit_timer(ulong vp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
t = f->t;
|
t = f->t;
|
||||||
if (n > aoe_deadsecs/2)
|
n = f->waited + since;
|
||||||
d->htgt = t; /* see if another target can help */
|
n /= USEC_PER_SEC;
|
||||||
|
if (aoe_deadsecs && utgts > 0
|
||||||
|
&& (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
|
||||||
|
scorn(t); /* avoid this target */
|
||||||
|
|
||||||
if (t->maxout != 1) {
|
if (t->maxout != 1) {
|
||||||
t->ssthresh = t->maxout / 2;
|
t->ssthresh = t->maxout / 2;
|
||||||
t->maxout = 1;
|
t->maxout = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ifp = getif(t, f->skb->dev);
|
if (f->flags & FFL_PROBE) {
|
||||||
if (ifp && ++ifp->lost > (t->nframes << 1)
|
t->nout_probes--;
|
||||||
&& (ifp != t->ifs || t->ifs[1].nd)) {
|
} else {
|
||||||
ejectif(t, ifp);
|
ifp = getif(t, f->skb->dev);
|
||||||
ifp = NULL;
|
if (ifp && ++ifp->lost > (t->nframes << 1)
|
||||||
|
&& (ifp != t->ifs || t->ifs[1].nd)) {
|
||||||
|
ejectif(t, ifp);
|
||||||
|
ifp = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
list_move_tail(pos, &d->rexmitq);
|
list_move_tail(pos, &d->rexmitq);
|
||||||
t->nout--;
|
t->nout--;
|
||||||
@ -733,7 +850,7 @@ rexmit_timer(ulong vp)
|
|||||||
rexmit_deferred(d);
|
rexmit_deferred(d);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) {
|
if ((d->flags & DEVFL_KICKME) && d->blkq) {
|
||||||
d->flags &= ~DEVFL_KICKME;
|
d->flags &= ~DEVFL_KICKME;
|
||||||
d->blkq->request_fn(d->blkq);
|
d->blkq->request_fn(d->blkq);
|
||||||
}
|
}
|
||||||
@ -854,8 +971,6 @@ nextbuf(struct aoedev *d)
|
|||||||
void
|
void
|
||||||
aoecmd_work(struct aoedev *d)
|
aoecmd_work(struct aoedev *d)
|
||||||
{
|
{
|
||||||
if (d->htgt && !sthtith(d))
|
|
||||||
return;
|
|
||||||
rexmit_deferred(d);
|
rexmit_deferred(d);
|
||||||
while (aoecmd_ata_rw(d))
|
while (aoecmd_ata_rw(d))
|
||||||
;
|
;
|
||||||
@ -1065,19 +1180,22 @@ ktiocomplete(struct frame *f)
|
|||||||
struct aoeif *ifp;
|
struct aoeif *ifp;
|
||||||
struct aoedev *d;
|
struct aoedev *d;
|
||||||
long n;
|
long n;
|
||||||
|
int untainted;
|
||||||
|
|
||||||
if (f == NULL)
|
if (f == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
t = f->t;
|
t = f->t;
|
||||||
d = t->d;
|
d = t->d;
|
||||||
|
skb = f->r_skb;
|
||||||
|
buf = f->buf;
|
||||||
|
if (f->flags & FFL_PROBE)
|
||||||
|
goto out;
|
||||||
|
if (!skb) /* just fail the buf. */
|
||||||
|
goto noskb;
|
||||||
|
|
||||||
hout = (struct aoe_hdr *) skb_mac_header(f->skb);
|
hout = (struct aoe_hdr *) skb_mac_header(f->skb);
|
||||||
ahout = (struct aoe_atahdr *) (hout+1);
|
ahout = (struct aoe_atahdr *) (hout+1);
|
||||||
buf = f->buf;
|
|
||||||
skb = f->r_skb;
|
|
||||||
if (skb == NULL)
|
|
||||||
goto noskb; /* just fail the buf. */
|
|
||||||
|
|
||||||
hin = (struct aoe_hdr *) skb->data;
|
hin = (struct aoe_hdr *) skb->data;
|
||||||
skb_pull(skb, sizeof(*hin));
|
skb_pull(skb, sizeof(*hin));
|
||||||
@ -1089,7 +1207,7 @@ ktiocomplete(struct frame *f)
|
|||||||
d->aoemajor, d->aoeminor);
|
d->aoemajor, d->aoeminor);
|
||||||
noskb: if (buf)
|
noskb: if (buf)
|
||||||
clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
|
clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
|
||||||
goto badrsp;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
n = ahout->scnt << 9;
|
n = ahout->scnt << 9;
|
||||||
@ -1109,8 +1227,6 @@ noskb: if (buf)
|
|||||||
ifp = getif(t, skb->dev);
|
ifp = getif(t, skb->dev);
|
||||||
if (ifp)
|
if (ifp)
|
||||||
ifp->lost = 0;
|
ifp->lost = 0;
|
||||||
if (d->htgt == t) /* I'll help myself, thank you. */
|
|
||||||
d->htgt = NULL;
|
|
||||||
spin_unlock_irq(&d->lock);
|
spin_unlock_irq(&d->lock);
|
||||||
break;
|
break;
|
||||||
case ATA_CMD_ID_ATA:
|
case ATA_CMD_ID_ATA:
|
||||||
@ -1131,8 +1247,17 @@ noskb: if (buf)
|
|||||||
be16_to_cpu(get_unaligned(&hin->major)),
|
be16_to_cpu(get_unaligned(&hin->major)),
|
||||||
hin->minor);
|
hin->minor);
|
||||||
}
|
}
|
||||||
badrsp:
|
out:
|
||||||
spin_lock_irq(&d->lock);
|
spin_lock_irq(&d->lock);
|
||||||
|
if (t->taint > 0
|
||||||
|
&& --t->taint > 0
|
||||||
|
&& t->nout_probes == 0) {
|
||||||
|
count_targets(d, &untainted);
|
||||||
|
if (untainted > 0) {
|
||||||
|
probe(t);
|
||||||
|
t->nout_probes++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
aoe_freetframe(f);
|
aoe_freetframe(f);
|
||||||
|
|
||||||
@ -1261,6 +1386,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
|
|||||||
if (f) {
|
if (f) {
|
||||||
calc_rttavg(d, f->t, tsince_hr(f));
|
calc_rttavg(d, f->t, tsince_hr(f));
|
||||||
f->t->nout--;
|
f->t->nout--;
|
||||||
|
if (f->flags & FFL_PROBE)
|
||||||
|
f->t->nout_probes--;
|
||||||
} else {
|
} else {
|
||||||
f = getframe_deferred(d, n);
|
f = getframe_deferred(d, n);
|
||||||
if (f) {
|
if (f) {
|
||||||
@ -1379,6 +1506,7 @@ addtgt(struct aoedev *d, char *addr, ulong nframes)
|
|||||||
memcpy(t->addr, addr, sizeof t->addr);
|
memcpy(t->addr, addr, sizeof t->addr);
|
||||||
t->ifp = t->ifs;
|
t->ifp = t->ifs;
|
||||||
aoecmd_wreset(t);
|
aoecmd_wreset(t);
|
||||||
|
t->maxout = t->nframes / 2;
|
||||||
INIT_LIST_HEAD(&t->ffree);
|
INIT_LIST_HEAD(&t->ffree);
|
||||||
return *tt = t;
|
return *tt = t;
|
||||||
}
|
}
|
||||||
@ -1584,6 +1712,14 @@ aoe_flush_iocq(void)
|
|||||||
int __init
|
int __init
|
||||||
aoecmd_init(void)
|
aoecmd_init(void)
|
||||||
{
|
{
|
||||||
|
void *p;
|
||||||
|
|
||||||
|
/* get_zeroed_page returns page with ref count 1 */
|
||||||
|
p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
|
||||||
|
if (!p)
|
||||||
|
return -ENOMEM;
|
||||||
|
empty_page = virt_to_page(p);
|
||||||
|
|
||||||
INIT_LIST_HEAD(&iocq.head);
|
INIT_LIST_HEAD(&iocq.head);
|
||||||
spin_lock_init(&iocq.lock);
|
spin_lock_init(&iocq.lock);
|
||||||
init_waitqueue_head(&ktiowq);
|
init_waitqueue_head(&ktiowq);
|
||||||
@ -1599,4 +1735,7 @@ aoecmd_exit(void)
|
|||||||
{
|
{
|
||||||
aoe_ktstop(&kts);
|
aoe_ktstop(&kts);
|
||||||
aoe_flush_iocq();
|
aoe_flush_iocq();
|
||||||
|
|
||||||
|
free_page((unsigned long) page_address(empty_page));
|
||||||
|
empty_page = NULL;
|
||||||
}
|
}
|
||||||
|
@ -223,7 +223,6 @@ aoedev_downdev(struct aoedev *d)
|
|||||||
|
|
||||||
/* clean out the in-process request (if any) */
|
/* clean out the in-process request (if any) */
|
||||||
aoe_failip(d);
|
aoe_failip(d);
|
||||||
d->htgt = NULL;
|
|
||||||
|
|
||||||
/* fast fail all pending I/O */
|
/* fast fail all pending I/O */
|
||||||
if (d->blkq) {
|
if (d->blkq) {
|
||||||
|
Loading…
Reference in New Issue
Block a user