libceph: lower exponential backoff delay
The current setting allows the backoff to climb up to 5 minutes. This is too high -- it becomes hard to tell whether the client is stuck on something or just in backoff. In userspace, ms_max_backoff is defaulted to 15 seconds. Let's do the same. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
@@ -241,8 +241,8 @@ struct ceph_msg {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* ceph connection fault delay defaults, for exponential backoff */
|
/* ceph connection fault delay defaults, for exponential backoff */
|
||||||
#define BASE_DELAY_INTERVAL (HZ/2)
|
#define BASE_DELAY_INTERVAL (HZ / 4)
|
||||||
#define MAX_DELAY_INTERVAL (5 * 60 * HZ)
|
#define MAX_DELAY_INTERVAL (15 * HZ)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A single connection with another host.
|
* A single connection with another host.
|
||||||
|
@@ -2812,6 +2812,9 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay)
|
|||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (delay >= HZ)
|
||||||
|
delay = round_jiffies_relative(delay);
|
||||||
|
|
||||||
dout("%s %p %lu\n", __func__, con, delay);
|
dout("%s %p %lu\n", __func__, con, delay);
|
||||||
if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
|
if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
|
||||||
dout("%s %p - already queued\n", __func__, con);
|
dout("%s %p - already queued\n", __func__, con);
|
||||||
@@ -2871,7 +2874,7 @@ static bool con_backoff(struct ceph_connection *con)
|
|||||||
if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF))
|
if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
ret = queue_con_delay(con, round_jiffies_relative(con->delay));
|
ret = queue_con_delay(con, con->delay);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dout("%s: con %p FAILED to back off %lu\n", __func__,
|
dout("%s: con %p FAILED to back off %lu\n", __func__,
|
||||||
con, con->delay);
|
con, con->delay);
|
||||||
@@ -3018,10 +3021,13 @@ static void con_fault(struct ceph_connection *con)
|
|||||||
} else {
|
} else {
|
||||||
/* retry after a delay. */
|
/* retry after a delay. */
|
||||||
con->state = CON_STATE_PREOPEN;
|
con->state = CON_STATE_PREOPEN;
|
||||||
if (con->delay == 0)
|
if (!con->delay) {
|
||||||
con->delay = BASE_DELAY_INTERVAL;
|
con->delay = BASE_DELAY_INTERVAL;
|
||||||
else if (con->delay < MAX_DELAY_INTERVAL)
|
} else if (con->delay < MAX_DELAY_INTERVAL) {
|
||||||
con->delay *= 2;
|
con->delay *= 2;
|
||||||
|
if (con->delay > MAX_DELAY_INTERVAL)
|
||||||
|
con->delay = MAX_DELAY_INTERVAL;
|
||||||
|
}
|
||||||
con_flag_set(con, CON_FLAG_BACKOFF);
|
con_flag_set(con, CON_FLAG_BACKOFF);
|
||||||
queue_con(con);
|
queue_con(con);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user