A fairly quiet round: a couple of messenger performance improvements
from myself and a few cap handling fixes from Zheng. -----BEGIN PGP SIGNATURE----- iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAlwuI7ATHGlkcnlvbW92 QGdtYWlsLmNvbQAKCRBKf944AhHzizcvB/9GqpAzR+Yy1iIQGNeijPSeuXsrlcQF WErfaG8tUwZY3vqv3+OSZBwuMgq6wAyCo3wJmh0GCZoy02WLJbPB/G8AiHtoZUAh wAWfL8feZkzx3L7JV0OrPG0GGYkhKu5PebM4rq3cXvlL0OiTKPs8bmbTvh0mSv3z gH1odW0j2mAb1/3tqm9M5+7XhrGSnmSfA028NeKx6I4nE0ONd9BEcHZDoRBBQeNf tgyxH4IJuuQ+x4/FKIn6+hBbMYiVrTBlz4wQHrJvvzDUeCkWu+E8JZ4utxxNdfmS uGsPDRqi4LSMwt1q0HLHhkCP0lg5yf9NByGoy+VH5/gS8ma6be9+IbfX =puaN -----END PGP SIGNATURE----- Merge tag 'ceph-for-4.21-rc1' of git://github.com/ceph/ceph-client Pull ceph updates from Ilya Dryomov: "A fairly quiet round: a couple of messenger performance improvements from myself and a few cap handling fixes from Zheng" * tag 'ceph-for-4.21-rc1' of git://github.com/ceph/ceph-client: ceph: don't encode inode pathes into reconnect message ceph: update wanted caps after resuming stale session ceph: skip updating 'wanted' caps if caps are already issued ceph: don't request excl caps when mount is readonly ceph: don't update importing cap's mseq when handing cap export libceph: switch more to bool in ceph_tcp_sendmsg() libceph: use MSG_SENDPAGE_NOTLAST with ceph_tcp_sendpage() libceph: use sock_no_sendpage() as a fallback in ceph_tcp_sendpage() libceph: drop last_piece logic from write_partial_message_data() ceph: remove redundant assignment ceph: cleanup splice_dentry()
This commit is contained in:
commit
c7eaf342ec
@ -657,6 +657,9 @@ void ceph_add_cap(struct inode *inode,
|
||||
session->s_nr_caps++;
|
||||
spin_unlock(&session->s_cap_lock);
|
||||
} else {
|
||||
if (cap->cap_gen < session->s_cap_gen)
|
||||
cap->issued = cap->implemented = CEPH_CAP_PIN;
|
||||
|
||||
/*
|
||||
* auth mds of the inode changed. we received the cap export
|
||||
* message, but still haven't received the cap import message.
|
||||
@ -1855,14 +1858,17 @@ retry_locked:
|
||||
retain |= CEPH_CAP_ANY; /* be greedy */
|
||||
} else if (S_ISDIR(inode->i_mode) &&
|
||||
(issued & CEPH_CAP_FILE_SHARED) &&
|
||||
__ceph_dir_is_complete(ci)) {
|
||||
__ceph_dir_is_complete(ci)) {
|
||||
/*
|
||||
* If a directory is complete, we want to keep
|
||||
* the exclusive cap. So that MDS does not end up
|
||||
* revoking the shared cap on every create/unlink
|
||||
* operation.
|
||||
*/
|
||||
want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
|
||||
if (IS_RDONLY(inode))
|
||||
want = CEPH_CAP_ANY_SHARED;
|
||||
else
|
||||
want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
|
||||
retain |= want;
|
||||
} else {
|
||||
|
||||
@ -1970,8 +1976,7 @@ retry_locked:
|
||||
goto ack;
|
||||
|
||||
/* things we might delay */
|
||||
if ((cap->issued & ~retain) == 0 &&
|
||||
cap->mds_wanted == want)
|
||||
if ((cap->issued & ~retain) == 0)
|
||||
continue; /* nope, all good */
|
||||
|
||||
if (no_delay)
|
||||
@ -3048,7 +3053,8 @@ static void handle_cap_grant(struct inode *inode,
|
||||
int used, wanted, dirty;
|
||||
u64 size = le64_to_cpu(grant->size);
|
||||
u64 max_size = le64_to_cpu(grant->max_size);
|
||||
int check_caps = 0;
|
||||
unsigned char check_caps = 0;
|
||||
bool was_stale = cap->cap_gen < session->s_cap_gen;
|
||||
bool wake = false;
|
||||
bool writeback = false;
|
||||
bool queue_trunc = false;
|
||||
@ -3062,21 +3068,6 @@ static void handle_cap_grant(struct inode *inode,
|
||||
inode->i_size);
|
||||
|
||||
|
||||
/*
|
||||
* auth mds of the inode changed. we received the cap export message,
|
||||
* but still haven't received the cap import message. handle_cap_export
|
||||
* updated the new auth MDS' cap.
|
||||
*
|
||||
* "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
|
||||
* that was sent before the cap import message. So don't remove caps.
|
||||
*/
|
||||
if (ceph_seq_cmp(seq, cap->seq) <= 0) {
|
||||
WARN_ON(cap != ci->i_auth_cap);
|
||||
WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
|
||||
seq = cap->seq;
|
||||
newcaps |= cap->issued;
|
||||
}
|
||||
|
||||
/*
|
||||
* If CACHE is being revoked, and we have no dirty buffers,
|
||||
* try to invalidate (once). (If there are dirty buffers, we
|
||||
@ -3096,6 +3087,24 @@ static void handle_cap_grant(struct inode *inode,
|
||||
}
|
||||
}
|
||||
|
||||
if (was_stale)
|
||||
cap->issued = cap->implemented = CEPH_CAP_PIN;
|
||||
|
||||
/*
|
||||
* auth mds of the inode changed. we received the cap export message,
|
||||
* but still haven't received the cap import message. handle_cap_export
|
||||
* updated the new auth MDS' cap.
|
||||
*
|
||||
* "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
|
||||
* that was sent before the cap import message. So don't remove caps.
|
||||
*/
|
||||
if (ceph_seq_cmp(seq, cap->seq) <= 0) {
|
||||
WARN_ON(cap != ci->i_auth_cap);
|
||||
WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
|
||||
seq = cap->seq;
|
||||
newcaps |= cap->issued;
|
||||
}
|
||||
|
||||
/* side effects now are allowed */
|
||||
cap->cap_gen = session->s_cap_gen;
|
||||
cap->seq = seq;
|
||||
@ -3200,13 +3209,20 @@ static void handle_cap_grant(struct inode *inode,
|
||||
ceph_cap_string(wanted),
|
||||
ceph_cap_string(used),
|
||||
ceph_cap_string(dirty));
|
||||
if (wanted != le32_to_cpu(grant->wanted)) {
|
||||
dout("mds wanted %s -> %s\n",
|
||||
ceph_cap_string(le32_to_cpu(grant->wanted)),
|
||||
ceph_cap_string(wanted));
|
||||
/* imported cap may not have correct mds_wanted */
|
||||
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
|
||||
check_caps = 1;
|
||||
|
||||
if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) &&
|
||||
(wanted & ~(cap->mds_wanted | newcaps))) {
|
||||
/*
|
||||
* If mds is importing cap, prior cap messages that update
|
||||
* 'wanted' may get dropped by mds (migrate seq mismatch).
|
||||
*
|
||||
* We don't send cap message to update 'wanted' if what we
|
||||
* want are already issued. If mds revokes caps, cap message
|
||||
* that releases caps also tells mds what we want. But if
|
||||
* caps got revoked by mds forcedly (session stale). We may
|
||||
* haven't told mds what we want.
|
||||
*/
|
||||
check_caps = 1;
|
||||
}
|
||||
|
||||
/* revocation, grant, or no-op? */
|
||||
@ -3539,9 +3555,9 @@ retry:
|
||||
goto out_unlock;
|
||||
|
||||
if (target < 0) {
|
||||
__ceph_remove_cap(cap, false);
|
||||
if (!ci->i_auth_cap)
|
||||
if (cap->mds_wanted | cap->issued)
|
||||
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
|
||||
__ceph_remove_cap(cap, false);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
@ -3569,7 +3585,6 @@ retry:
|
||||
tcap->cap_id = t_cap_id;
|
||||
tcap->seq = t_seq - 1;
|
||||
tcap->issue_seq = t_seq - 1;
|
||||
tcap->mseq = t_mseq;
|
||||
tcap->issued |= issued;
|
||||
tcap->implemented |= issued;
|
||||
if (cap == ci->i_auth_cap)
|
||||
|
@ -1098,8 +1098,9 @@ out_unlock:
|
||||
* splice a dentry to an inode.
|
||||
* caller must hold directory i_mutex for this to be safe.
|
||||
*/
|
||||
static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
|
||||
static int splice_dentry(struct dentry **pdn, struct inode *in)
|
||||
{
|
||||
struct dentry *dn = *pdn;
|
||||
struct dentry *realdn;
|
||||
|
||||
BUG_ON(d_inode(dn));
|
||||
@ -1132,28 +1133,23 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
|
||||
if (IS_ERR(realdn)) {
|
||||
pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
|
||||
PTR_ERR(realdn), dn, in, ceph_vinop(in));
|
||||
dn = realdn;
|
||||
/*
|
||||
* Caller should release 'dn' in the case of error.
|
||||
* If 'req->r_dentry' is passed to this function,
|
||||
* caller should leave 'req->r_dentry' untouched.
|
||||
*/
|
||||
goto out;
|
||||
} else if (realdn) {
|
||||
return PTR_ERR(realdn);
|
||||
}
|
||||
|
||||
if (realdn) {
|
||||
dout("dn %p (%d) spliced with %p (%d) "
|
||||
"inode %p ino %llx.%llx\n",
|
||||
dn, d_count(dn),
|
||||
realdn, d_count(realdn),
|
||||
d_inode(realdn), ceph_vinop(d_inode(realdn)));
|
||||
dput(dn);
|
||||
dn = realdn;
|
||||
*pdn = realdn;
|
||||
} else {
|
||||
BUG_ON(!ceph_dentry(dn));
|
||||
dout("dn %p attached to %p ino %llx.%llx\n",
|
||||
dn, d_inode(dn), ceph_vinop(d_inode(dn)));
|
||||
}
|
||||
out:
|
||||
return dn;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1340,7 +1336,12 @@ retry_lookup:
|
||||
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
|
||||
ceph_dentry(req->r_old_dentry)->offset);
|
||||
|
||||
dn = req->r_old_dentry; /* use old_dentry */
|
||||
/* swap r_dentry and r_old_dentry in case that
|
||||
* splice_dentry() gets called later. This is safe
|
||||
* because no other place will use them */
|
||||
req->r_dentry = req->r_old_dentry;
|
||||
req->r_old_dentry = dn;
|
||||
dn = req->r_dentry;
|
||||
}
|
||||
|
||||
/* null dentry? */
|
||||
@ -1365,12 +1366,10 @@ retry_lookup:
|
||||
if (d_really_is_negative(dn)) {
|
||||
ceph_dir_clear_ordered(dir);
|
||||
ihold(in);
|
||||
dn = splice_dentry(dn, in);
|
||||
if (IS_ERR(dn)) {
|
||||
err = PTR_ERR(dn);
|
||||
err = splice_dentry(&req->r_dentry, in);
|
||||
if (err < 0)
|
||||
goto done;
|
||||
}
|
||||
req->r_dentry = dn; /* may have spliced */
|
||||
dn = req->r_dentry; /* may have spliced */
|
||||
} else if (d_really_is_positive(dn) && d_inode(dn) != in) {
|
||||
dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
|
||||
dn, d_inode(dn), ceph_vinop(d_inode(dn)),
|
||||
@ -1390,22 +1389,18 @@ retry_lookup:
|
||||
} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
|
||||
req->r_op == CEPH_MDS_OP_MKSNAP) &&
|
||||
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
|
||||
struct dentry *dn = req->r_dentry;
|
||||
struct inode *dir = req->r_parent;
|
||||
|
||||
/* fill out a snapdir LOOKUPSNAP dentry */
|
||||
BUG_ON(!dn);
|
||||
BUG_ON(!dir);
|
||||
BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
|
||||
dout(" linking snapped dir %p to dn %p\n", in, dn);
|
||||
BUG_ON(!req->r_dentry);
|
||||
dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry);
|
||||
ceph_dir_clear_ordered(dir);
|
||||
ihold(in);
|
||||
dn = splice_dentry(dn, in);
|
||||
if (IS_ERR(dn)) {
|
||||
err = PTR_ERR(dn);
|
||||
err = splice_dentry(&req->r_dentry, in);
|
||||
if (err < 0)
|
||||
goto done;
|
||||
}
|
||||
req->r_dentry = dn; /* may have spliced */
|
||||
} else if (rinfo->head->is_dentry) {
|
||||
struct ceph_vino *ptvino = NULL;
|
||||
|
||||
@ -1669,8 +1664,6 @@ retry_lookup:
|
||||
}
|
||||
|
||||
if (d_really_is_negative(dn)) {
|
||||
struct dentry *realdn;
|
||||
|
||||
if (ceph_security_xattr_deadlock(in)) {
|
||||
dout(" skip splicing dn %p to inode %p"
|
||||
" (security xattr deadlock)\n", dn, in);
|
||||
@ -1679,13 +1672,9 @@ retry_lookup:
|
||||
goto next_item;
|
||||
}
|
||||
|
||||
realdn = splice_dentry(dn, in);
|
||||
if (IS_ERR(realdn)) {
|
||||
err = PTR_ERR(realdn);
|
||||
d_drop(dn);
|
||||
err = splice_dentry(&dn, in);
|
||||
if (err < 0)
|
||||
goto next_item;
|
||||
}
|
||||
dn = realdn;
|
||||
}
|
||||
|
||||
ceph_dentry(dn)->offset = rde->offset;
|
||||
@ -1701,8 +1690,7 @@ retry_lookup:
|
||||
err = ret;
|
||||
}
|
||||
next_item:
|
||||
if (dn)
|
||||
dput(dn);
|
||||
dput(dn);
|
||||
}
|
||||
out:
|
||||
if (err == 0 && skipped == 0) {
|
||||
|
@ -1232,13 +1232,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
dout("removing cap %p, ci is %p, inode is %p\n",
|
||||
cap, ci, &ci->vfs_inode);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
if (cap->mds_wanted | cap->issued)
|
||||
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
|
||||
__ceph_remove_cap(cap, false);
|
||||
if (!ci->i_auth_cap) {
|
||||
struct ceph_cap_flush *cf;
|
||||
struct ceph_mds_client *mdsc = fsc->mdsc;
|
||||
|
||||
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
|
||||
|
||||
if (ci->i_wrbuffer_ref > 0 &&
|
||||
READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
|
||||
invalidate = true;
|
||||
@ -1355,6 +1355,12 @@ static void remove_session_caps(struct ceph_mds_session *session)
|
||||
dispose_cap_releases(session->s_mdsc, &dispose);
|
||||
}
|
||||
|
||||
enum {
|
||||
RECONNECT,
|
||||
RENEWCAPS,
|
||||
FORCE_RO,
|
||||
};
|
||||
|
||||
/*
|
||||
* wake up any threads waiting on this session's caps. if the cap is
|
||||
* old (didn't get renewed on the client reconnect), remove it now.
|
||||
@ -1365,23 +1371,34 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
void *arg)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
unsigned long ev = (unsigned long)arg;
|
||||
|
||||
if (arg) {
|
||||
if (ev == RECONNECT) {
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
ci->i_wanted_max_size = 0;
|
||||
ci->i_requested_max_size = 0;
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
} else if (ev == RENEWCAPS) {
|
||||
if (cap->cap_gen < cap->session->s_cap_gen) {
|
||||
/* mds did not re-issue stale cap */
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
cap->issued = cap->implemented = CEPH_CAP_PIN;
|
||||
/* make sure mds knows what we want */
|
||||
if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted)
|
||||
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
}
|
||||
} else if (ev == FORCE_RO) {
|
||||
}
|
||||
wake_up_all(&ci->i_cap_wq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void wake_up_session_caps(struct ceph_mds_session *session,
|
||||
int reconnect)
|
||||
static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
|
||||
{
|
||||
dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
|
||||
iterate_session_caps(session, wake_up_session_cb,
|
||||
(void *)(unsigned long)reconnect);
|
||||
(void *)(unsigned long)ev);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1466,7 +1483,7 @@ static void renewed_caps(struct ceph_mds_client *mdsc,
|
||||
spin_unlock(&session->s_cap_lock);
|
||||
|
||||
if (wake)
|
||||
wake_up_session_caps(session, 0);
|
||||
wake_up_session_caps(session, RENEWCAPS);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2847,7 +2864,7 @@ static void handle_session(struct ceph_mds_session *session,
|
||||
spin_lock(&session->s_cap_lock);
|
||||
session->s_readonly = true;
|
||||
spin_unlock(&session->s_cap_lock);
|
||||
wake_up_session_caps(session, 0);
|
||||
wake_up_session_caps(session, FORCE_RO);
|
||||
break;
|
||||
|
||||
case CEPH_SESSION_REJECT:
|
||||
@ -2943,11 +2960,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
struct ceph_inode_info *ci = cap->ci;
|
||||
struct ceph_reconnect_state *recon_state = arg;
|
||||
struct ceph_pagelist *pagelist = recon_state->pagelist;
|
||||
char *path;
|
||||
int pathlen, err;
|
||||
u64 pathbase;
|
||||
int err;
|
||||
u64 snap_follows;
|
||||
struct dentry *dentry;
|
||||
|
||||
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
|
||||
inode, ceph_vinop(inode), cap, cap->cap_id,
|
||||
@ -2956,19 +2970,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
dentry = d_find_alias(inode);
|
||||
if (dentry) {
|
||||
path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
|
||||
if (IS_ERR(path)) {
|
||||
err = PTR_ERR(path);
|
||||
goto out_dput;
|
||||
}
|
||||
} else {
|
||||
path = NULL;
|
||||
pathlen = 0;
|
||||
pathbase = 0;
|
||||
}
|
||||
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
cap->seq = 0; /* reset cap seq */
|
||||
cap->issue_seq = 0; /* and issue_seq */
|
||||
@ -2980,7 +2981,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
|
||||
rec.v2.issued = cpu_to_le32(cap->issued);
|
||||
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
|
||||
rec.v2.pathbase = cpu_to_le64(pathbase);
|
||||
rec.v2.pathbase = 0;
|
||||
rec.v2.flock_len = (__force __le32)
|
||||
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
|
||||
} else {
|
||||
@ -2991,7 +2992,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
|
||||
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
|
||||
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
|
||||
rec.v1.pathbase = cpu_to_le64(pathbase);
|
||||
rec.v1.pathbase = 0;
|
||||
}
|
||||
|
||||
if (list_empty(&ci->i_cap_snaps)) {
|
||||
@ -3023,7 +3024,7 @@ encode_again:
|
||||
GFP_NOFS);
|
||||
if (!flocks) {
|
||||
err = -ENOMEM;
|
||||
goto out_free;
|
||||
goto out_err;
|
||||
}
|
||||
err = ceph_encode_locks_to_buffer(inode, flocks,
|
||||
num_fcntl_locks,
|
||||
@ -3033,7 +3034,7 @@ encode_again:
|
||||
flocks = NULL;
|
||||
if (err == -ENOSPC)
|
||||
goto encode_again;
|
||||
goto out_free;
|
||||
goto out_err;
|
||||
}
|
||||
} else {
|
||||
kfree(flocks);
|
||||
@ -3053,44 +3054,64 @@ encode_again:
|
||||
sizeof(struct ceph_filelock);
|
||||
rec.v2.flock_len = cpu_to_le32(struct_len);
|
||||
|
||||
struct_len += sizeof(rec.v2);
|
||||
struct_len += sizeof(u32) + pathlen;
|
||||
struct_len += sizeof(u32) + sizeof(rec.v2);
|
||||
|
||||
if (struct_v >= 2)
|
||||
struct_len += sizeof(u64); /* snap_follows */
|
||||
|
||||
total_len += struct_len;
|
||||
err = ceph_pagelist_reserve(pagelist, total_len);
|
||||
|
||||
if (!err) {
|
||||
if (recon_state->msg_version >= 3) {
|
||||
ceph_pagelist_encode_8(pagelist, struct_v);
|
||||
ceph_pagelist_encode_8(pagelist, 1);
|
||||
ceph_pagelist_encode_32(pagelist, struct_len);
|
||||
}
|
||||
ceph_pagelist_encode_string(pagelist, path, pathlen);
|
||||
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
|
||||
ceph_locks_to_pagelist(flocks, pagelist,
|
||||
num_fcntl_locks,
|
||||
num_flock_locks);
|
||||
if (struct_v >= 2)
|
||||
ceph_pagelist_encode_64(pagelist, snap_follows);
|
||||
if (err) {
|
||||
kfree(flocks);
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
if (recon_state->msg_version >= 3) {
|
||||
ceph_pagelist_encode_8(pagelist, struct_v);
|
||||
ceph_pagelist_encode_8(pagelist, 1);
|
||||
ceph_pagelist_encode_32(pagelist, struct_len);
|
||||
}
|
||||
ceph_pagelist_encode_string(pagelist, NULL, 0);
|
||||
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
|
||||
ceph_locks_to_pagelist(flocks, pagelist,
|
||||
num_fcntl_locks, num_flock_locks);
|
||||
if (struct_v >= 2)
|
||||
ceph_pagelist_encode_64(pagelist, snap_follows);
|
||||
|
||||
kfree(flocks);
|
||||
} else {
|
||||
size_t size = sizeof(u32) + pathlen + sizeof(rec.v1);
|
||||
err = ceph_pagelist_reserve(pagelist, size);
|
||||
if (!err) {
|
||||
ceph_pagelist_encode_string(pagelist, path, pathlen);
|
||||
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
|
||||
u64 pathbase = 0;
|
||||
int pathlen = 0;
|
||||
char *path = NULL;
|
||||
struct dentry *dentry;
|
||||
|
||||
dentry = d_find_alias(inode);
|
||||
if (dentry) {
|
||||
path = ceph_mdsc_build_path(dentry,
|
||||
&pathlen, &pathbase, 0);
|
||||
dput(dentry);
|
||||
if (IS_ERR(path)) {
|
||||
err = PTR_ERR(path);
|
||||
goto out_err;
|
||||
}
|
||||
rec.v1.pathbase = cpu_to_le64(pathbase);
|
||||
}
|
||||
|
||||
err = ceph_pagelist_reserve(pagelist,
|
||||
pathlen + sizeof(u32) + sizeof(rec.v1));
|
||||
if (err) {
|
||||
kfree(path);
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
ceph_pagelist_encode_string(pagelist, path, pathlen);
|
||||
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
|
||||
|
||||
kfree(path);
|
||||
}
|
||||
|
||||
recon_state->nr_caps++;
|
||||
out_free:
|
||||
kfree(path);
|
||||
out_dput:
|
||||
dput(dentry);
|
||||
out_err:
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -3339,7 +3360,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
|
||||
pr_info("mds%d recovery completed\n", s->s_mds);
|
||||
kick_requests(mdsc, i);
|
||||
ceph_kick_flushing_caps(mdsc, s);
|
||||
wake_up_session_caps(s, 1);
|
||||
wake_up_session_caps(s, RECONNECT);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,14 +17,16 @@
|
||||
#include <linux/ceph/auth.h>
|
||||
|
||||
/* The first 8 bits are reserved for old ceph releases */
|
||||
#define CEPHFS_FEATURE_MIMIC 8
|
||||
#define CEPHFS_FEATURE_MIMIC 8
|
||||
#define CEPHFS_FEATURE_REPLY_ENCODING 9
|
||||
#define CEPHFS_FEATURE_RECLAIM_CLIENT 10
|
||||
#define CEPHFS_FEATURE_LAZY_CAP_WANTED 11
|
||||
|
||||
#define CEPHFS_FEATURES_ALL { \
|
||||
0, 1, 2, 3, 4, 5, 6, 7, \
|
||||
CEPHFS_FEATURE_MIMIC, \
|
||||
#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
|
||||
0, 1, 2, 3, 4, 5, 6, 7, \
|
||||
CEPHFS_FEATURE_MIMIC, \
|
||||
CEPHFS_FEATURE_LAZY_CAP_WANTED, \
|
||||
}
|
||||
|
||||
#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL
|
||||
#define CEPHFS_FEATURES_CLIENT_REQUIRED {}
|
||||
|
||||
|
||||
|
@ -35,7 +35,6 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
|
||||
|
||||
/* pick */
|
||||
n = prandom_u32() % n;
|
||||
i = 0;
|
||||
for (i = 0; n > 0; i++, n--)
|
||||
while (m->m_info[i].state <= 0)
|
||||
i++;
|
||||
|
@ -544,7 +544,7 @@ static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
|
||||
* shortly.
|
||||
*/
|
||||
static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
|
||||
size_t kvlen, size_t len, int more)
|
||||
size_t kvlen, size_t len, bool more)
|
||||
{
|
||||
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
|
||||
int r;
|
||||
@ -560,24 +560,15 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
|
||||
return r;
|
||||
}
|
||||
|
||||
static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
|
||||
int offset, size_t size, bool more)
|
||||
{
|
||||
int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
|
||||
int ret;
|
||||
|
||||
ret = kernel_sendpage(sock, page, offset, size, flags);
|
||||
if (ret == -EAGAIN)
|
||||
ret = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST
|
||||
*/
|
||||
static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
|
||||
int offset, size_t size, bool more)
|
||||
int offset, size_t size, int more)
|
||||
{
|
||||
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
|
||||
struct bio_vec bvec;
|
||||
ssize_t (*sendpage)(struct socket *sock, struct page *page,
|
||||
int offset, size_t size, int flags);
|
||||
int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
@ -589,19 +580,11 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
|
||||
* triggers one of hardened usercopy checks.
|
||||
*/
|
||||
if (page_count(page) >= 1 && !PageSlab(page))
|
||||
return __ceph_tcp_sendpage(sock, page, offset, size, more);
|
||||
|
||||
bvec.bv_page = page;
|
||||
bvec.bv_offset = offset;
|
||||
bvec.bv_len = size;
|
||||
|
||||
if (more)
|
||||
msg.msg_flags |= MSG_MORE;
|
||||
sendpage = sock->ops->sendpage;
|
||||
else
|
||||
msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
|
||||
sendpage = sock_no_sendpage;
|
||||
|
||||
iov_iter_bvec(&msg.msg_iter, WRITE, &bvec, 1, size);
|
||||
ret = sock_sendmsg(sock, &msg);
|
||||
ret = sendpage(sock, page, offset, size, flags);
|
||||
if (ret == -EAGAIN)
|
||||
ret = 0;
|
||||
|
||||
@ -1572,6 +1555,7 @@ static int write_partial_message_data(struct ceph_connection *con)
|
||||
struct ceph_msg *msg = con->out_msg;
|
||||
struct ceph_msg_data_cursor *cursor = &msg->cursor;
|
||||
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
|
||||
int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
|
||||
u32 crc;
|
||||
|
||||
dout("%s %p msg %p\n", __func__, con, msg);
|
||||
@ -1592,7 +1576,6 @@ static int write_partial_message_data(struct ceph_connection *con)
|
||||
struct page *page;
|
||||
size_t page_offset;
|
||||
size_t length;
|
||||
bool last_piece;
|
||||
int ret;
|
||||
|
||||
if (!cursor->resid) {
|
||||
@ -1600,10 +1583,11 @@ static int write_partial_message_data(struct ceph_connection *con)
|
||||
continue;
|
||||
}
|
||||
|
||||
page = ceph_msg_data_next(cursor, &page_offset, &length,
|
||||
&last_piece);
|
||||
ret = ceph_tcp_sendpage(con->sock, page, page_offset,
|
||||
length, !last_piece);
|
||||
page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
|
||||
if (length == cursor->total_resid)
|
||||
more = MSG_MORE;
|
||||
ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
|
||||
more);
|
||||
if (ret <= 0) {
|
||||
if (do_datacrc)
|
||||
msg->footer.data_crc = cpu_to_le32(crc);
|
||||
@ -1633,13 +1617,16 @@ static int write_partial_message_data(struct ceph_connection *con)
|
||||
*/
|
||||
static int write_partial_skip(struct ceph_connection *con)
|
||||
{
|
||||
int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
|
||||
int ret;
|
||||
|
||||
dout("%s %p %d left\n", __func__, con, con->out_skip);
|
||||
while (con->out_skip > 0) {
|
||||
size_t size = min(con->out_skip, (int) PAGE_SIZE);
|
||||
|
||||
ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
|
||||
if (size == con->out_skip)
|
||||
more = MSG_MORE;
|
||||
ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
con->out_skip -= ret;
|
||||
|
Loading…
Reference in New Issue
Block a user