A bunch of fixes and follow-ups for -rc1 Luminous patches: issues with
->reencode_message() and last minute RADOS semantic changes in v12.1.2. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQEcBAABCAAGBQJZhI6QAAoJEEp/3jgCEfOLeQYH/0b92RFzmsqwPI+U7iXdD06O r0EXbT5dydMngJkWz/i3jBX8cMBvZyNhBh77VPDYXoFUp8//8uv5w73BkXe8JE08 +gLZU4oP/k7kl/YBYXgCcJYj7eIBFzqNvsWurKKHY/X3xrvEZ0HT+oub92xOUgRM IBnZb1gZ4TJQT1MxqKOwb5aqcxaXlrOGfX7Di0aU3PFQXj5VnBI25NUQF1bgd9+A MbhHpob6cbWZWzVdf0fTl28q9pStq4qggevRSM/5ZH/bETO8C80XYTuaPoLcQ0pY VfpwgWIAPwotw9KU7W+ane13BURw76+pWMHaUZgiJKRyuRMBOT/gaER+AUeuR1o= =XH7K -----END PGP SIGNATURE----- Merge tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client Pull ceph fixes from Ilya Dryomov: "A bunch of fixes and follow-ups for -rc1 Luminous patches: issues with ->reencode_message() and last minute RADOS semantic changes in v12.1.2" * tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client: libceph: make RECOVERY_DELETES feature create a new interval libceph: upmap semantic changes crush: assume weight_set != null imples weight_set_size > 0 libceph: fallback for when there isn't a pool-specific choose_arg libceph: don't call ->reencode_message() more than once per message libceph: make encode_request_*() work with r_mempool requests
This commit is contained in:
commit
c63716ab4d
@ -148,6 +148,7 @@ struct ceph_osd_request_target {
|
||||
int size;
|
||||
int min_size;
|
||||
bool sort_bitwise;
|
||||
bool recovery_deletes;
|
||||
|
||||
unsigned int flags; /* CEPH_OSD_FLAG_* */
|
||||
bool paused;
|
||||
|
@ -272,6 +272,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
|
||||
u32 new_pg_num,
|
||||
bool old_sort_bitwise,
|
||||
bool new_sort_bitwise,
|
||||
bool old_recovery_deletes,
|
||||
bool new_recovery_deletes,
|
||||
const struct ceph_pg *pgid);
|
||||
bool ceph_osds_changed(const struct ceph_osds *old_acting,
|
||||
const struct ceph_osds *new_acting,
|
||||
|
@ -158,6 +158,10 @@ extern const char *ceph_osd_state_name(int s);
|
||||
#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
|
||||
#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
|
||||
#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
|
||||
#define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */
|
||||
#define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */
|
||||
#define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */
|
||||
#define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */
|
||||
|
||||
/*
|
||||
* The error code to return when an OSD can't handle a write
|
||||
|
@ -193,7 +193,7 @@ struct crush_choose_arg {
|
||||
struct crush_choose_arg_map {
|
||||
#ifdef __KERNEL__
|
||||
struct rb_node node;
|
||||
u64 choose_args_index;
|
||||
s64 choose_args_index;
|
||||
#endif
|
||||
struct crush_choose_arg *args; /*!< replacement for each bucket
|
||||
in the crushmap */
|
||||
|
@ -306,7 +306,7 @@ static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket,
|
||||
const struct crush_choose_arg *arg,
|
||||
int position)
|
||||
{
|
||||
if (!arg || !arg->weight_set || arg->weight_set_size == 0)
|
||||
if (!arg || !arg->weight_set)
|
||||
return bucket->item_weights;
|
||||
|
||||
if (position >= arg->weight_set_size)
|
||||
|
@ -1287,10 +1287,10 @@ static void prepare_write_message(struct ceph_connection *con)
|
||||
if (m->needs_out_seq) {
|
||||
m->hdr.seq = cpu_to_le64(++con->out_seq);
|
||||
m->needs_out_seq = false;
|
||||
}
|
||||
|
||||
if (con->ops->reencode_message)
|
||||
con->ops->reencode_message(m);
|
||||
if (con->ops->reencode_message)
|
||||
con->ops->reencode_message(m);
|
||||
}
|
||||
|
||||
dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
|
||||
m, con->out_seq, le16_to_cpu(m->hdr.type),
|
||||
|
@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
|
||||
bool legacy_change;
|
||||
bool split = false;
|
||||
bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
|
||||
bool recovery_deletes = ceph_osdmap_flag(osdc,
|
||||
CEPH_OSDMAP_RECOVERY_DELETES);
|
||||
enum calc_target_result ct_res;
|
||||
int ret;
|
||||
|
||||
@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
|
||||
pi->pg_num,
|
||||
t->sort_bitwise,
|
||||
sort_bitwise,
|
||||
t->recovery_deletes,
|
||||
recovery_deletes,
|
||||
&last_pgid))
|
||||
force_resend = true;
|
||||
|
||||
@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
|
||||
t->pg_num = pi->pg_num;
|
||||
t->pg_num_mask = pi->pg_num_mask;
|
||||
t->sort_bitwise = sort_bitwise;
|
||||
t->recovery_deletes = recovery_deletes;
|
||||
|
||||
t->osd = acting.primary;
|
||||
}
|
||||
@ -1918,10 +1923,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
|
||||
}
|
||||
|
||||
ceph_encode_32(&p, req->r_attempts); /* retry_attempt */
|
||||
BUG_ON(p != end - 8); /* space for features */
|
||||
BUG_ON(p > end - 8); /* space for features */
|
||||
|
||||
msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */
|
||||
/* front_len is finalized in encode_request_finish() */
|
||||
msg->front.iov_len = p - msg->front.iov_base;
|
||||
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
|
||||
msg->hdr.data_len = cpu_to_le32(data_len);
|
||||
/*
|
||||
* The header "data_off" is a hint to the receiver allowing it
|
||||
@ -1937,11 +1944,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
|
||||
static void encode_request_finish(struct ceph_msg *msg)
|
||||
{
|
||||
void *p = msg->front.iov_base;
|
||||
void *const partial_end = p + msg->front.iov_len;
|
||||
void *const end = p + msg->front_alloc_len;
|
||||
|
||||
if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) {
|
||||
/* luminous OSD -- encode features and be done */
|
||||
p = end - 8;
|
||||
p = partial_end;
|
||||
ceph_encode_64(&p, msg->con->peer_features);
|
||||
} else {
|
||||
struct {
|
||||
@ -1984,7 +1992,7 @@ static void encode_request_finish(struct ceph_msg *msg)
|
||||
oid_len = p - oid;
|
||||
|
||||
tail = p;
|
||||
tail_len = (end - p) - 8;
|
||||
tail_len = partial_end - p;
|
||||
|
||||
p = msg->front.iov_base;
|
||||
ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc));
|
||||
|
@ -295,6 +295,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c)
|
||||
ret = decode_choose_arg(p, end, arg);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
if (arg->ids_size &&
|
||||
arg->ids_size != c->buckets[bucket_index]->size)
|
||||
goto e_inval;
|
||||
}
|
||||
|
||||
insert_choose_arg_map(&c->choose_args, arg_map);
|
||||
@ -2078,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
|
||||
u32 new_pg_num,
|
||||
bool old_sort_bitwise,
|
||||
bool new_sort_bitwise,
|
||||
bool old_recovery_deletes,
|
||||
bool new_recovery_deletes,
|
||||
const struct ceph_pg *pgid)
|
||||
{
|
||||
return !osds_equal(old_acting, new_acting) ||
|
||||
@ -2085,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
|
||||
old_size != new_size ||
|
||||
old_min_size != new_min_size ||
|
||||
ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
|
||||
old_sort_bitwise != new_sort_bitwise;
|
||||
old_sort_bitwise != new_sort_bitwise ||
|
||||
old_recovery_deletes != new_recovery_deletes;
|
||||
}
|
||||
|
||||
static int calc_pg_rank(int osd, const struct ceph_osds *acting)
|
||||
@ -2301,10 +2308,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Magic value used for a "default" fallback choose_args, used if the
|
||||
* crush_choose_arg_map passed to do_crush() does not exist. If this
|
||||
* also doesn't exist, fall back to canonical weights.
|
||||
*/
|
||||
#define CEPH_DEFAULT_CHOOSE_ARGS -1
|
||||
|
||||
static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
|
||||
int *result, int result_max,
|
||||
const __u32 *weight, int weight_max,
|
||||
u64 choose_args_index)
|
||||
s64 choose_args_index)
|
||||
{
|
||||
struct crush_choose_arg_map *arg_map;
|
||||
int r;
|
||||
@ -2313,6 +2327,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
|
||||
|
||||
arg_map = lookup_choose_arg_map(&map->crush->choose_args,
|
||||
choose_args_index);
|
||||
if (!arg_map)
|
||||
arg_map = lookup_choose_arg_map(&map->crush->choose_args,
|
||||
CEPH_DEFAULT_CHOOSE_ARGS);
|
||||
|
||||
mutex_lock(&map->crush_workspace_mutex);
|
||||
r = crush_do_rule(map->crush, ruleno, x, result, result_max,
|
||||
@ -2423,40 +2440,23 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
|
||||
for (i = 0; i < pg->pg_upmap.len; i++)
|
||||
raw->osds[i] = pg->pg_upmap.osds[i];
|
||||
raw->size = pg->pg_upmap.len;
|
||||
return;
|
||||
/* check and apply pg_upmap_items, if any */
|
||||
}
|
||||
|
||||
pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
|
||||
if (pg) {
|
||||
/*
|
||||
* Note: this approach does not allow a bidirectional swap,
|
||||
* e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
|
||||
*/
|
||||
for (i = 0; i < pg->pg_upmap_items.len; i++) {
|
||||
int from = pg->pg_upmap_items.from_to[i][0];
|
||||
int to = pg->pg_upmap_items.from_to[i][1];
|
||||
int pos = -1;
|
||||
bool exists = false;
|
||||
for (i = 0; i < raw->size; i++) {
|
||||
for (j = 0; j < pg->pg_upmap_items.len; j++) {
|
||||
int from = pg->pg_upmap_items.from_to[j][0];
|
||||
int to = pg->pg_upmap_items.from_to[j][1];
|
||||
|
||||
/* make sure replacement doesn't already appear */
|
||||
for (j = 0; j < raw->size; j++) {
|
||||
int osd = raw->osds[j];
|
||||
|
||||
if (osd == to) {
|
||||
exists = true;
|
||||
if (from == raw->osds[i]) {
|
||||
if (!(to != CRUSH_ITEM_NONE &&
|
||||
to < osdmap->max_osd &&
|
||||
osdmap->osd_weight[to] == 0))
|
||||
raw->osds[i] = to;
|
||||
break;
|
||||
}
|
||||
/* ignore mapping if target is marked out */
|
||||
if (osd == from && pos < 0 &&
|
||||
!(to != CRUSH_ITEM_NONE &&
|
||||
to < osdmap->max_osd &&
|
||||
osdmap->osd_weight[to] == 0)) {
|
||||
pos = j;
|
||||
}
|
||||
}
|
||||
if (!exists && pos >= 0) {
|
||||
raw->osds[pos] = to;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user