libceph: store timeouts in jiffies, verify user input
There are currently three libceph-level timeouts that the user can specify on mount: mount_timeout, osd_idle_ttl and osdkeepalive. All of these are in seconds and no checking is done on user input: negative values are accepted, we multiply them all by HZ which may or may not overflow, arbitrarily large jiffies then get added together, etc. There is also a bug in the way mount_timeout=0 is handled. It's supposed to mean "infinite timeout", but that's not how wait.h APIs treat it and so __ceph_open_session() for example will busy loop without much chance of being interrupted if none of ceph-mons are there. Fix all this by verifying user input, storing timeouts capped by msecs_to_jiffies() in jiffies and using the new ceph_timeout_jiffies() helper for all user-specified waits to handle infinite timeouts correctly. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Alex Elder <elder@linaro.org>
This commit is contained in:
parent
d50c97b566
commit
a319bf56a6
@ -4963,8 +4963,8 @@ out_err:
|
||||
*/
|
||||
static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
|
||||
{
|
||||
struct ceph_options *opts = rbdc->client->options;
|
||||
u64 newest_epoch;
|
||||
unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
|
||||
int tries = 0;
|
||||
int ret;
|
||||
|
||||
@ -4979,7 +4979,8 @@ again:
|
||||
if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
|
||||
ceph_monc_request_next_osdmap(&rbdc->client->monc);
|
||||
(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
|
||||
newest_epoch, timeout);
|
||||
newest_epoch,
|
||||
opts->mount_timeout);
|
||||
goto again;
|
||||
} else {
|
||||
/* the osdmap we have is new enough */
|
||||
|
@ -1259,8 +1259,8 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
|
||||
inode, req->r_tid, last_tid);
|
||||
if (req->r_timeout) {
|
||||
unsigned long time_left = wait_for_completion_timeout(
|
||||
&req->r_safe_completion,
|
||||
req->r_timeout);
|
||||
&req->r_safe_completion,
|
||||
ceph_timeout_jiffies(req->r_timeout));
|
||||
if (time_left > 0)
|
||||
ret = 0;
|
||||
else
|
||||
|
@ -2268,7 +2268,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
|
||||
dout("do_request waiting\n");
|
||||
if (req->r_timeout) {
|
||||
err = (long)wait_for_completion_killable_timeout(
|
||||
&req->r_completion, req->r_timeout);
|
||||
&req->r_completion,
|
||||
ceph_timeout_jiffies(req->r_timeout));
|
||||
if (err == 0)
|
||||
err = -EIO;
|
||||
} else if (req->r_wait_for_completion) {
|
||||
@ -3424,8 +3425,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
|
||||
*/
|
||||
static void wait_requests(struct ceph_mds_client *mdsc)
|
||||
{
|
||||
struct ceph_options *opts = mdsc->fsc->client->options;
|
||||
struct ceph_mds_request *req;
|
||||
struct ceph_fs_client *fsc = mdsc->fsc;
|
||||
|
||||
mutex_lock(&mdsc->mutex);
|
||||
if (__get_oldest_req(mdsc)) {
|
||||
@ -3433,7 +3434,7 @@ static void wait_requests(struct ceph_mds_client *mdsc)
|
||||
|
||||
dout("wait_requests waiting for requests\n");
|
||||
wait_for_completion_timeout(&mdsc->safe_umount_waiters,
|
||||
fsc->client->options->mount_timeout * HZ);
|
||||
ceph_timeout_jiffies(opts->mount_timeout));
|
||||
|
||||
/* tear down remaining requests */
|
||||
mutex_lock(&mdsc->mutex);
|
||||
@ -3556,10 +3557,9 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc)
|
||||
*/
|
||||
void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
|
||||
{
|
||||
struct ceph_options *opts = mdsc->fsc->client->options;
|
||||
struct ceph_mds_session *session;
|
||||
int i;
|
||||
struct ceph_fs_client *fsc = mdsc->fsc;
|
||||
unsigned long timeout = fsc->client->options->mount_timeout * HZ;
|
||||
|
||||
dout("close_sessions\n");
|
||||
|
||||
@ -3580,7 +3580,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
|
||||
|
||||
dout("waiting for sessions to close\n");
|
||||
wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
|
||||
timeout);
|
||||
ceph_timeout_jiffies(opts->mount_timeout));
|
||||
|
||||
/* tear down remaining sessions */
|
||||
mutex_lock(&mdsc->mutex);
|
||||
|
@ -227,7 +227,7 @@ struct ceph_mds_request {
|
||||
int r_err;
|
||||
bool r_aborted;
|
||||
|
||||
unsigned long r_timeout; /* optional. jiffies */
|
||||
unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */
|
||||
unsigned long r_started; /* start time to measure timeout against */
|
||||
unsigned long r_request_started; /* start time for mds request only,
|
||||
used to measure lease durations */
|
||||
|
@ -742,7 +742,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
|
||||
req->r_ino1.ino = CEPH_INO_ROOT;
|
||||
req->r_ino1.snap = CEPH_NOSNAP;
|
||||
req->r_started = started;
|
||||
req->r_timeout = fsc->client->options->mount_timeout * HZ;
|
||||
req->r_timeout = fsc->client->options->mount_timeout;
|
||||
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
|
||||
req->r_num_caps = 2;
|
||||
err = ceph_mdsc_do_request(mdsc, NULL, req);
|
||||
|
@ -43,9 +43,9 @@ struct ceph_options {
|
||||
int flags;
|
||||
struct ceph_fsid fsid;
|
||||
struct ceph_entity_addr my_addr;
|
||||
int mount_timeout;
|
||||
int osd_idle_ttl;
|
||||
int osd_keepalive_timeout;
|
||||
unsigned long mount_timeout; /* jiffies */
|
||||
unsigned long osd_idle_ttl; /* jiffies */
|
||||
unsigned long osd_keepalive_timeout; /* jiffies */
|
||||
|
||||
/*
|
||||
* any type that can't be simply compared or doesn't need need
|
||||
@ -63,9 +63,9 @@ struct ceph_options {
|
||||
/*
|
||||
* defaults
|
||||
*/
|
||||
#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
|
||||
#define CEPH_OSD_KEEPALIVE_DEFAULT 5
|
||||
#define CEPH_OSD_IDLE_TTL_DEFAULT 60
|
||||
#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
|
||||
#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
|
||||
#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
|
||||
|
||||
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
|
||||
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
|
||||
@ -93,6 +93,11 @@ enum {
|
||||
CEPH_MOUNT_SHUTDOWN,
|
||||
};
|
||||
|
||||
static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
|
||||
{
|
||||
return timeout ?: MAX_SCHEDULE_TIMEOUT;
|
||||
}
|
||||
|
||||
struct ceph_mds_client;
|
||||
|
||||
/*
|
||||
|
@ -352,8 +352,8 @@ ceph_parse_options(char *options, const char *dev_name,
|
||||
/* start with defaults */
|
||||
opt->flags = CEPH_OPT_DEFAULT;
|
||||
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
|
||||
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
|
||||
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
|
||||
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
|
||||
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
|
||||
|
||||
/* get mon ip(s) */
|
||||
/* ip1[:port1][,ip2[:port2]...] */
|
||||
@ -439,13 +439,32 @@ ceph_parse_options(char *options, const char *dev_name,
|
||||
pr_warn("ignoring deprecated osdtimeout option\n");
|
||||
break;
|
||||
case Opt_osdkeepalivetimeout:
|
||||
opt->osd_keepalive_timeout = intval;
|
||||
/* 0 isn't well defined right now, reject it */
|
||||
if (intval < 1 || intval > INT_MAX / 1000) {
|
||||
pr_err("osdkeepalive out of range\n");
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
opt->osd_keepalive_timeout =
|
||||
msecs_to_jiffies(intval * 1000);
|
||||
break;
|
||||
case Opt_osd_idle_ttl:
|
||||
opt->osd_idle_ttl = intval;
|
||||
/* 0 isn't well defined right now, reject it */
|
||||
if (intval < 1 || intval > INT_MAX / 1000) {
|
||||
pr_err("osd_idle_ttl out of range\n");
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000);
|
||||
break;
|
||||
case Opt_mount_timeout:
|
||||
opt->mount_timeout = intval;
|
||||
/* 0 is "wait forever" (i.e. infinite timeout) */
|
||||
if (intval < 0 || intval > INT_MAX / 1000) {
|
||||
pr_err("mount_timeout out of range\n");
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
opt->mount_timeout = msecs_to_jiffies(intval * 1000);
|
||||
break;
|
||||
|
||||
case Opt_share:
|
||||
@ -512,12 +531,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
|
||||
seq_puts(m, "notcp_nodelay,");
|
||||
|
||||
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
|
||||
seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
|
||||
seq_printf(m, "mount_timeout=%d,",
|
||||
jiffies_to_msecs(opt->mount_timeout) / 1000);
|
||||
if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
|
||||
seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
|
||||
seq_printf(m, "osd_idle_ttl=%d,",
|
||||
jiffies_to_msecs(opt->osd_idle_ttl) / 1000);
|
||||
if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
|
||||
seq_printf(m, "osdkeepalivetimeout=%d,",
|
||||
opt->osd_keepalive_timeout);
|
||||
jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
|
||||
|
||||
/* drop redundant comma */
|
||||
if (m->count != pos)
|
||||
@ -627,7 +648,7 @@ static int have_mon_and_osd_map(struct ceph_client *client)
|
||||
int __ceph_open_session(struct ceph_client *client, unsigned long started)
|
||||
{
|
||||
int err;
|
||||
unsigned long timeout = client->options->mount_timeout * HZ;
|
||||
unsigned long timeout = client->options->mount_timeout;
|
||||
|
||||
/* open session, and wait for mon and osd maps */
|
||||
err = ceph_monc_open_session(&client->monc);
|
||||
@ -643,7 +664,7 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
|
||||
dout("mount waiting for mon_map\n");
|
||||
err = wait_event_interruptible_timeout(client->auth_wq,
|
||||
have_mon_and_osd_map(client) || (client->auth_err < 0),
|
||||
timeout);
|
||||
ceph_timeout_jiffies(timeout));
|
||||
if (err == -EINTR || err == -ERESTARTSYS)
|
||||
return err;
|
||||
if (client->auth_err < 0)
|
||||
|
@ -298,6 +298,12 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
|
||||
|
||||
/*
|
||||
* Wait for an osdmap with a given epoch.
|
||||
*
|
||||
* @epoch: epoch to wait for
|
||||
* @timeout: in jiffies, 0 means "wait forever"
|
||||
*/
|
||||
int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
|
||||
unsigned long timeout)
|
||||
{
|
||||
@ -308,11 +314,12 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
|
||||
while (monc->have_osdmap < epoch) {
|
||||
mutex_unlock(&monc->mutex);
|
||||
|
||||
if (timeout != 0 && time_after_eq(jiffies, started + timeout))
|
||||
if (timeout && time_after_eq(jiffies, started + timeout))
|
||||
return -ETIMEDOUT;
|
||||
|
||||
ret = wait_event_interruptible_timeout(monc->client->auth_wq,
|
||||
monc->have_osdmap >= epoch, timeout);
|
||||
monc->have_osdmap >= epoch,
|
||||
ceph_timeout_jiffies(timeout));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
|
@ -1097,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
|
||||
BUG_ON(!list_empty(&osd->o_osd_lru));
|
||||
|
||||
list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
|
||||
osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
|
||||
osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl;
|
||||
}
|
||||
|
||||
static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
|
||||
@ -1208,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
|
||||
static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
|
||||
{
|
||||
schedule_delayed_work(&osdc->timeout_work,
|
||||
osdc->client->options->osd_keepalive_timeout * HZ);
|
||||
osdc->client->options->osd_keepalive_timeout);
|
||||
}
|
||||
|
||||
static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
|
||||
@ -1576,10 +1576,9 @@ static void handle_timeout(struct work_struct *work)
|
||||
{
|
||||
struct ceph_osd_client *osdc =
|
||||
container_of(work, struct ceph_osd_client, timeout_work.work);
|
||||
struct ceph_options *opts = osdc->client->options;
|
||||
struct ceph_osd_request *req;
|
||||
struct ceph_osd *osd;
|
||||
unsigned long keepalive =
|
||||
osdc->client->options->osd_keepalive_timeout * HZ;
|
||||
struct list_head slow_osds;
|
||||
dout("timeout\n");
|
||||
down_read(&osdc->map_sem);
|
||||
@ -1595,7 +1594,8 @@ static void handle_timeout(struct work_struct *work)
|
||||
*/
|
||||
INIT_LIST_HEAD(&slow_osds);
|
||||
list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
|
||||
if (time_before(jiffies, req->r_stamp + keepalive))
|
||||
if (time_before(jiffies,
|
||||
req->r_stamp + opts->osd_keepalive_timeout))
|
||||
break;
|
||||
|
||||
osd = req->r_osd;
|
||||
@ -1622,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work)
|
||||
struct ceph_osd_client *osdc =
|
||||
container_of(work, struct ceph_osd_client,
|
||||
osds_timeout_work.work);
|
||||
unsigned long delay =
|
||||
osdc->client->options->osd_idle_ttl * HZ >> 2;
|
||||
unsigned long delay = osdc->client->options->osd_idle_ttl / 4;
|
||||
|
||||
dout("osds timeout\n");
|
||||
down_read(&osdc->map_sem);
|
||||
@ -2628,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
|
||||
osdc->event_count = 0;
|
||||
|
||||
schedule_delayed_work(&osdc->osds_timeout_work,
|
||||
round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
|
||||
round_jiffies_relative(osdc->client->options->osd_idle_ttl));
|
||||
|
||||
err = -ENOMEM;
|
||||
osdc->req_mempool = mempool_create_kmalloc_pool(10,
|
||||
|
Loading…
x
Reference in New Issue
Block a user