From 1d17de9534cb5a4c4c380d174cc2f9281b34f89e Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 25 Sep 2023 16:25:20 +0800 Subject: [PATCH 1/7] ceph: save cap_auths in MDS client when session is opened Save the cap_auths, which have been parsed by the MDS, in the opened session. [ idryomov: use s64 and u32 instead of int64_t and uint32_t, switch to bool for root_squash, readable and writeable ] Link: https://tracker.ceph.com/issues/61333 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 106 ++++++++++++++++++++++++++++++++++++++++++- fs/ceph/mds_client.h | 21 +++++++++ 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 360b686c3c67..6a1e68549475 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4112,10 +4112,13 @@ static void handle_session(struct ceph_mds_session *session, void *p = msg->front.iov_base; void *end = p + msg->front.iov_len; struct ceph_mds_session_head *h; - u32 op; + struct ceph_mds_cap_auth *cap_auths = NULL; + u32 op, cap_auths_num = 0; u64 seq, features = 0; int wake = 0; bool blocklisted = false; + u32 i; + /* decode */ ceph_decode_need(&p, end, sizeof(*h), bad); @@ -4160,7 +4163,101 @@ static void handle_session(struct ceph_mds_session *session, } } + if (msg_version >= 6) { + ceph_decode_32_safe(&p, end, cap_auths_num, bad); + doutc(cl, "cap_auths_num %d\n", cap_auths_num); + + if (cap_auths_num && op != CEPH_SESSION_OPEN) { + WARN_ON_ONCE(op != CEPH_SESSION_OPEN); + goto skip_cap_auths; + } + + cap_auths = kcalloc(cap_auths_num, + sizeof(struct ceph_mds_cap_auth), + GFP_KERNEL); + if (!cap_auths) { + pr_err_client(cl, "No memory for cap_auths\n"); + return; + } + + for (i = 0; i < cap_auths_num; i++) { + u32 _len, j; + + /* struct_v, struct_compat, and struct_len in MDSCapAuth */ + ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad); + + /* struct_v, struct_compat, and struct_len in MDSCapMatch */ + ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad); + ceph_decode_64_safe(&p, end, cap_auths[i].match.uid, bad); + ceph_decode_32_safe(&p, end, _len, bad); + if (_len) { + cap_auths[i].match.gids = kcalloc(_len, sizeof(u32), + GFP_KERNEL); + if (!cap_auths[i].match.gids) { + pr_err_client(cl, "No memory for gids\n"); + goto fail; + } + + cap_auths[i].match.num_gids = _len; + for (j = 0; j < _len; j++) + ceph_decode_32_safe(&p, end, + cap_auths[i].match.gids[j], + bad); + } + + ceph_decode_32_safe(&p, end, _len, bad); + if (_len) { + cap_auths[i].match.path = kcalloc(_len + 1, sizeof(char), + GFP_KERNEL); + if (!cap_auths[i].match.path) { + pr_err_client(cl, "No memory for path\n"); + goto fail; + } + ceph_decode_copy(&p, cap_auths[i].match.path, _len); + + /* Remove the tailing '/' */ + while (_len && cap_auths[i].match.path[_len - 1] == '/') { + cap_auths[i].match.path[_len - 1] = '\0'; + _len -= 1; + } + } + + ceph_decode_32_safe(&p, end, _len, bad); + if (_len) { + cap_auths[i].match.fs_name = kcalloc(_len + 1, sizeof(char), + GFP_KERNEL); + if (!cap_auths[i].match.fs_name) { + pr_err_client(cl, "No memory for fs_name\n"); + goto fail; + } + ceph_decode_copy(&p, cap_auths[i].match.fs_name, _len); + } + + ceph_decode_8_safe(&p, end, cap_auths[i].match.root_squash, bad); + ceph_decode_8_safe(&p, end, cap_auths[i].readable, bad); + ceph_decode_8_safe(&p, end, cap_auths[i].writeable, bad); + doutc(cl, "uid %lld, num_gids %u, path %s, fs_name %s, root_squash %d, readable %d, writeable %d\n", + cap_auths[i].match.uid, cap_auths[i].match.num_gids, + cap_auths[i].match.path, cap_auths[i].match.fs_name, + cap_auths[i].match.root_squash, + cap_auths[i].readable, cap_auths[i].writeable); + } + } + +skip_cap_auths: mutex_lock(&mdsc->mutex); + if (op == CEPH_SESSION_OPEN) { + if (mdsc->s_cap_auths) { + for (i = 0; i < mdsc->s_cap_auths_num; i++) { + kfree(mdsc->s_cap_auths[i].match.gids); + kfree(mdsc->s_cap_auths[i].match.path); + kfree(mdsc->s_cap_auths[i].match.fs_name); + } + kfree(mdsc->s_cap_auths); + } + mdsc->s_cap_auths_num = cap_auths_num; + mdsc->s_cap_auths = cap_auths; + } if (op == CEPH_SESSION_CLOSE) { ceph_get_mds_session(session); __unregister_session(mdsc, session); @@ -4290,6 +4387,13 @@ bad: pr_err_client(cl, "corrupt message mds%d len %d\n", mds, (int)msg->front.iov_len); ceph_msg_dump(msg); +fail: + for (i = 0; i < cap_auths_num; i++) { + kfree(cap_auths[i].match.gids); + kfree(cap_auths[i].match.path); + kfree(cap_auths[i].match.fs_name); + } + kfree(cap_auths); return; } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index b88e80415224..cc106f1aa7d2 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -71,6 +71,24 @@ enum ceph_feature_type { struct ceph_fs_client; struct ceph_cap; +#define MDS_AUTH_UID_ANY -1 + +struct ceph_mds_cap_match { + s64 uid; /* default to MDS_AUTH_UID_ANY */ + u32 num_gids; + u32 *gids; /* use these GIDs */ + char *path; /* require path to be child of this + (may be "" or "/" for any) */ + char *fs_name; + bool root_squash; /* default to false */ +}; + +struct ceph_mds_cap_auth { + struct ceph_mds_cap_match match; + bool readable; + bool writeable; +}; + /* * parsed info about a single inode. pointers are into the encoded * on-wire structures within the mds reply message payload. @@ -513,6 +531,9 @@ struct ceph_mds_client { struct rw_semaphore pool_perm_rwsem; struct rb_root pool_perm_tree; + u32 s_cap_auths_num; + struct ceph_mds_cap_auth *s_cap_auths; + char nodename[__NEW_UTS_LEN + 1]; }; From 596afb0b8933ba6ed7227adcc538db26feb25c74 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 7 Nov 2023 14:55:46 +0800 Subject: [PATCH 2/7] ceph: add ceph_mds_check_access() helper This will help check the mds auth access in client side. Always insert the server path in front of the target path when matching the paths. [ idryomov: use u32 instead of uint32_t ] Link: https://tracker.ceph.com/issues/61333 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 164 +++++++++++++++++++++++++++++++++++++++++++ fs/ceph/mds_client.h | 3 + 2 files changed, 167 insertions(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6a1e68549475..c2157f6e0c69 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -5603,6 +5603,170 @@ void send_flush_mdlog(struct ceph_mds_session *s) mutex_unlock(&s->s_mutex); } +static int ceph_mds_auth_match(struct ceph_mds_client *mdsc, + struct ceph_mds_cap_auth *auth, + char *tpath) +{ + const struct cred *cred = get_current_cred(); + u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); + u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); + struct ceph_client *cl = mdsc->fsc->client; + const char *spath = mdsc->fsc->mount_options->server_path; + bool gid_matched = false; + u32 gid, tlen, len; + int i, j; + + doutc(cl, "match.uid %lld\n", auth->match.uid); + if (auth->match.uid != MDS_AUTH_UID_ANY) { + if (auth->match.uid != caller_uid) + return 0; + if (auth->match.num_gids) { + for (i = 0; i < auth->match.num_gids; i++) { + if (caller_gid == auth->match.gids[i]) + gid_matched = true; + } + if (!gid_matched && cred->group_info->ngroups) { + for (i = 0; i < cred->group_info->ngroups; i++) { + gid = from_kgid(&init_user_ns, + cred->group_info->gid[i]); + for (j = 0; j < auth->match.num_gids; j++) { + if (gid == auth->match.gids[j]) { + gid_matched = true; + break; + } + } + if (gid_matched) + break; + } + } + if (!gid_matched) + return 0; + } + } + + /* path match */ + if (auth->match.path) { + if (!tpath) + return 0; + + tlen = strlen(tpath); + len = strlen(auth->match.path); + if (len) { + char *_tpath = tpath; + bool free_tpath = false; + int m, n; + + doutc(cl, "server path %s, tpath %s, match.path %s\n", + spath, tpath, auth->match.path); + if (spath && (m = strlen(spath)) != 1) { + /* mount path + '/' + tpath + an extra space */ + n = m + 1 + tlen + 1; + _tpath = kmalloc(n, GFP_NOFS); + if (!_tpath) + return -ENOMEM; + /* remove the leading '/' */ + snprintf(_tpath, n, "%s/%s", spath + 1, tpath); + free_tpath = true; + tlen = strlen(_tpath); + } + + /* + * Please note the tailing '/' for match.path has already + * been removed when parsing. + * + * Remove the tailing '/' for the target path. + */ + while (tlen && _tpath[tlen - 1] == '/') { + _tpath[tlen - 1] = '\0'; + tlen -= 1; + } + doutc(cl, "_tpath %s\n", _tpath); + + /* + * In case first == _tpath && tlen == len: + * match.path=/foo --> /foo _path=/foo --> match + * match.path=/foo/ --> /foo _path=/foo --> match + * + * In case first == _tmatch.path && tlen > len: + * match.path=/foo/ --> /foo _path=/foo/ --> match + * match.path=/foo --> /foo _path=/foo/ --> match + * match.path=/foo/ --> /foo _path=/foo/d --> match + * match.path=/foo --> /foo _path=/food --> mismatch + * + * All the other cases --> mismatch + */ + char *first = strstr(_tpath, auth->match.path); + if (first != _tpath) { + if (free_tpath) + kfree(_tpath); + return 0; + } + + if (tlen > len && _tpath[len] != '/') { + if (free_tpath) + kfree(_tpath); + return 0; + } + } + } + + doutc(cl, "matched\n"); + return 1; +} + +int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, int mask) +{ + const struct cred *cred = get_current_cred(); + u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); + u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); + struct ceph_mds_cap_auth *rw_perms_s = NULL; + struct ceph_client *cl = mdsc->fsc->client; + bool root_squash_perms = true; + int i, err; + + doutc(cl, "tpath '%s', mask %d, caller_uid %d, caller_gid %d\n", + tpath, mask, caller_uid, caller_gid); + + for (i = 0; i < mdsc->s_cap_auths_num; i++) { + struct ceph_mds_cap_auth *s = &mdsc->s_cap_auths[i]; + + err = ceph_mds_auth_match(mdsc, s, tpath); + if (err < 0) { + return err; + } else if (err > 0) { + /* always follow the last auth caps' permision */ + root_squash_perms = true; + rw_perms_s = NULL; + if ((mask & MAY_WRITE) && s->writeable && + s->match.root_squash && (!caller_uid || !caller_gid)) + root_squash_perms = false; + + if (((mask & MAY_WRITE) && !s->writeable) || + ((mask & MAY_READ) && !s->readable)) + rw_perms_s = s; + } + } + + doutc(cl, "root_squash_perms %d, rw_perms_s %p\n", root_squash_perms, + rw_perms_s); + if (root_squash_perms && rw_perms_s == NULL) { + doutc(cl, "access allowed\n"); + return 0; + } + + if (!root_squash_perms) { + doutc(cl, "root_squash is enabled and user(%d %d) isn't allowed to write", + caller_uid, caller_gid); + } + if (rw_perms_s) { + doutc(cl, "mds auth caps readable/writeable %d/%d while request r/w %d/%d", + rw_perms_s->readable, rw_perms_s->writeable, + !!(mask & MAY_READ), !!(mask & MAY_WRITE)); + } + doutc(cl, "access denied\n"); + return -EACCES; +} + /* * called before mount is ro, and before dentries are torn down. * (hmm, does this still race with new lookups?) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index cc106f1aa7d2..dba514fb7fab 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -602,6 +602,9 @@ extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc); extern int ceph_iterate_session_caps(struct ceph_mds_session *session, int (*cb)(struct inode *, int mds, void *), void *arg); +extern int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, + int mask); + extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); static inline void ceph_mdsc_free_path(char *path, int len) From ded67830403710a60a4bb00136c0d21ca0cd7dc5 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 8 Nov 2023 10:54:17 +0800 Subject: [PATCH 3/7] ceph: check the cephx mds auth access for setattr If we hit any failre just try to force it to do the sync setattr. Link: https://tracker.ceph.com/issues/61333 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 46 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7b2e77517f23..3f1f048d9206 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2480,6 +2480,34 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, bool lock_snap_rwsem = false; bool fill_fscrypt; int truncate_retry = 20; /* The RMW will take around 50ms */ + struct dentry *dentry; + char *path; + int pathlen; + u64 pathbase; + bool do_sync = false; + + dentry = d_find_alias(inode); + if (!dentry) { + do_sync = true; + } else { + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); + if (IS_ERR(path)) { + do_sync = true; + err = 0; + } else { + err = ceph_mds_check_access(mdsc, path, MAY_WRITE); + } + ceph_mdsc_free_path(path, pathlen); + dput(dentry); + + /* For none EACCES cases will let the MDS do the mds auth check */ + if (err == -EACCES) { + return err; + } else if (err < 0) { + do_sync = true; + err = 0; + } + } retry: prealloc_cf = ceph_alloc_cap_flush(); @@ -2526,7 +2554,7 @@ retry: /* It should never be re-set once set */ WARN_ON_ONCE(ci->fscrypt_auth); - if (issued & CEPH_CAP_AUTH_EXCL) { + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { dirtied |= CEPH_CAP_AUTH_EXCL; kfree(ci->fscrypt_auth); ci->fscrypt_auth = (u8 *)cia->fscrypt_auth; @@ -2555,7 +2583,7 @@ retry: ceph_vinop(inode), from_kuid(&init_user_ns, inode->i_uid), from_kuid(&init_user_ns, attr->ia_uid)); - if (issued & CEPH_CAP_AUTH_EXCL) { + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { inode->i_uid = fsuid; dirtied |= CEPH_CAP_AUTH_EXCL; } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || @@ -2573,7 +2601,7 @@ retry: ceph_vinop(inode), from_kgid(&init_user_ns, inode->i_gid), from_kgid(&init_user_ns, attr->ia_gid)); - if (issued & CEPH_CAP_AUTH_EXCL) { + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { inode->i_gid = fsgid; dirtied |= CEPH_CAP_AUTH_EXCL; } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || @@ -2587,7 +2615,7 @@ retry: if (ia_valid & ATTR_MODE) { doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode, ceph_vinop(inode), inode->i_mode, attr->ia_mode); - if (issued & CEPH_CAP_AUTH_EXCL) { + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { inode->i_mode = attr->ia_mode; dirtied |= CEPH_CAP_AUTH_EXCL; } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || @@ -2606,11 +2634,11 @@ retry: inode, ceph_vinop(inode), atime.tv_sec, atime.tv_nsec, attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); - if (issued & CEPH_CAP_FILE_EXCL) { + if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) { ci->i_time_warp_seq++; inode_set_atime_to_ts(inode, attr->ia_atime); dirtied |= CEPH_CAP_FILE_EXCL; - } else if ((issued & CEPH_CAP_FILE_WR) && + } else if (!do_sync && (issued & CEPH_CAP_FILE_WR) && timespec64_compare(&atime, &attr->ia_atime) < 0) { inode_set_atime_to_ts(inode, attr->ia_atime); @@ -2646,7 +2674,7 @@ retry: CEPH_FSCRYPT_BLOCK_SIZE)); req->r_fscrypt_file = attr->ia_size; fill_fscrypt = true; - } else if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) { + } else if (!do_sync && (issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) { if (attr->ia_size > isize) { i_size_write(inode, attr->ia_size); inode->i_blocks = calc_inode_blocks(attr->ia_size); @@ -2683,11 +2711,11 @@ retry: inode, ceph_vinop(inode), mtime.tv_sec, mtime.tv_nsec, attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); - if (issued & CEPH_CAP_FILE_EXCL) { + if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) { ci->i_time_warp_seq++; inode_set_mtime_to_ts(inode, attr->ia_mtime); dirtied |= CEPH_CAP_FILE_EXCL; - } else if ((issued & CEPH_CAP_FILE_WR) && + } else if (!do_sync && (issued & CEPH_CAP_FILE_WR) && timespec64_compare(&mtime, &attr->ia_mtime) < 0) { inode_set_mtime_to_ts(inode, attr->ia_mtime); dirtied |= CEPH_CAP_FILE_WR; From 845ae9d4926fa69d27e0912e4404d848d19c79a0 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 8 Nov 2023 10:54:35 +0800 Subject: [PATCH 4/7] ceph: check the cephx mds auth access for open Before opening the file locally we need to check the cephx access. Link: https://tracker.ceph.com/issues/61333 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 16873d07692f..4de4bdd7949e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -366,6 +366,12 @@ int ceph_open(struct inode *inode, struct file *file) struct ceph_file_info *fi = file->private_data; int err; int flags, fmode, wanted; + struct dentry *dentry; + char *path; + int pathlen; + u64 pathbase; + bool do_sync = false; + int mask = MAY_READ; if (fi) { doutc(cl, "file %p is already opened\n", file); @@ -387,6 +393,31 @@ int ceph_open(struct inode *inode, struct file *file) fmode = ceph_flags_to_mode(flags); wanted = ceph_caps_for_mode(fmode); + if (fmode & CEPH_FILE_MODE_WR) + mask |= MAY_WRITE; + dentry = d_find_alias(inode); + if (!dentry) { + do_sync = true; + } else { + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); + if (IS_ERR(path)) { + do_sync = true; + err = 0; + } else { + err = ceph_mds_check_access(mdsc, path, mask); + } + ceph_mdsc_free_path(path, pathlen); + dput(dentry); + + /* For none EACCES cases will let the MDS do the mds auth check */ + if (err == -EACCES) { + return err; + } else if (err < 0) { + do_sync = true; + err = 0; + } + } + /* snapped files are read-only */ if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE)) return -EROFS; @@ -402,7 +433,7 @@ int ceph_open(struct inode *inode, struct file *file) * asynchronously. */ spin_lock(&ci->i_ceph_lock); - if (__ceph_is_any_real_caps(ci) && + if (!do_sync && __ceph_is_any_real_caps(ci) && (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { int mds_wanted = __ceph_caps_mds_wanted(ci, true); int issued = __ceph_caps_issued(ci, NULL); @@ -420,7 +451,7 @@ int ceph_open(struct inode *inode, struct file *file) ceph_check_caps(ci, 0); return ceph_init_file(inode, file, fmode); - } else if (ceph_snap(inode) != CEPH_NOSNAP && + } else if (!do_sync && ceph_snap(inode) != CEPH_NOSNAP && (ci->i_snap_caps & wanted) == wanted) { __ceph_touch_fmode(ci, mdsc, fmode); spin_unlock(&ci->i_ceph_lock); From 2827badaf8162157271027ea6cc13056890f3e93 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 8 Nov 2023 11:06:05 +0800 Subject: [PATCH 5/7] ceph: check the cephx mds auth access for async dirop Before doing the op locally we need to check the cephx access. Link: https://tracker.ceph.com/issues/61333 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 28 ++++++++++++++++++++++++++++ fs/ceph/file.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 0e9f56eaba1e..82a2e2a06a65 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1336,8 +1336,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode = d_inode(dentry); struct ceph_mds_request *req; bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); + struct dentry *dn; int err = -EROFS; int op; + char *path; + int pathlen; + u64 pathbase; if (ceph_snap(dir) == CEPH_SNAPDIR) { /* rmdir .snap/foo is RMSNAP */ @@ -1351,6 +1355,30 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; } else goto out; + + dn = d_find_alias(dir); + if (!dn) { + try_async = false; + } else { + path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); + if (IS_ERR(path)) { + try_async = false; + err = 0; + } else { + err = ceph_mds_check_access(mdsc, path, MAY_WRITE); + } + ceph_mdsc_free_path(path, pathlen); + dput(dn); + + /* For none EACCES cases will let the MDS do the mds auth check */ + if (err == -EACCES) { + return err; + } else if (err < 0) { + try_async = false; + err = 0; + } + } + retry: req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4de4bdd7949e..4b8d59ebda00 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -790,6 +790,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); int mask; int err; + char *path; + int pathlen; + u64 pathbase; doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", dir, ceph_vinop(dir), dentry, dentry, @@ -807,6 +810,34 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, */ flags &= ~O_TRUNC; + dn = d_find_alias(dir); + if (!dn) { + try_async = false; + } else { + path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); + if (IS_ERR(path)) { + try_async = false; + err = 0; + } else { + int fmode = ceph_flags_to_mode(flags); + + mask = MAY_READ; + if (fmode & CEPH_FILE_MODE_WR) + mask |= MAY_WRITE; + err = ceph_mds_check_access(mdsc, path, mask); + } + ceph_mdsc_free_path(path, pathlen); + dput(dn); + + /* For none EACCES cases will let the MDS do the mds auth check */ + if (err == -EACCES) { + return err; + } else if (err < 0) { + try_async = false; + err = 0; + } + } + retry: if (flags & O_CREAT) { if (ceph_quota_is_max_files_exceeded(dir)) From d8fc89815f67db960d1684e1c89c36292a981250 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 4 Dec 2023 13:01:13 +0800 Subject: [PATCH 6/7] ceph: add CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK feature bit Since we have support checking the mds auth cap in kclient, just set the feature bit. Link: https://tracker.ceph.com/issues/61333 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index dba514fb7fab..cfa18cf915a0 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -35,8 +35,9 @@ enum ceph_feature_type { CEPHFS_FEATURE_32BITS_RETRY_FWD, CEPHFS_FEATURE_NEW_SNAPREALM_INFO, CEPHFS_FEATURE_HAS_OWNER_UIDGID, + CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_HAS_OWNER_UIDGID, + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, }; #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ @@ -52,6 +53,7 @@ enum ceph_feature_type { CEPHFS_FEATURE_OP_GETVXATTR, \ CEPHFS_FEATURE_32BITS_RETRY_FWD, \ CEPHFS_FEATURE_HAS_OWNER_UIDGID, \ + CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, \ } /* From 93a2221c9c1ae32643df67c482dc4c4c591b7514 Mon Sep 17 00:00:00 2001 From: Artem Ikonnikov Date: Sun, 19 May 2024 00:40:54 +0300 Subject: [PATCH 7/7] doc: ceph: update userspace command to get CephFS metadata According to ceph documentation [1], "getfattr -d /some/dir" no longer displays the list of all extended attributes. Both CephFS kernel and FUSE clients hide this information. To retrieve the information you have to specify the particular attribute name e.g. "getfattr -n ceph.dir.rbytes /some/dir". [1] https://docs.ceph.com/en/latest/cephfs/quota/ Signed-off-by: Artem Ikonnikov Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- Documentation/filesystems/ceph.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/ceph.rst b/Documentation/filesystems/ceph.rst index 085f309ece60..6d2276a87a5a 100644 --- a/Documentation/filesystems/ceph.rst +++ b/Documentation/filesystems/ceph.rst @@ -67,12 +67,15 @@ Snapshot names have two limitations: more than 255 characters, and `` takes 13 characters, the long snapshot names can take as much as 255 - 1 - 1 - 13 = 240. -Ceph also provides some recursive accounting on directories for nested -files and bytes. That is, a 'getfattr -d foo' on any directory in the -system will reveal the total number of nested regular files and -subdirectories, and a summation of all nested file sizes. This makes -the identification of large disk space consumers relatively quick, as -no 'du' or similar recursive scan of the file system is required. +Ceph also provides some recursive accounting on directories for nested files +and bytes. You can run the commands:: + + getfattr -n ceph.dir.rfiles /some/dir + getfattr -n ceph.dir.rbytes /some/dir + +to get the total number of nested files and their combined size, respectively. +This makes the identification of large disk space consumers relatively quick, +as no 'du' or similar recursive scan of the file system is required. Finally, Ceph also allows quotas to be set on any directory in the system. The quota can restrict the number of bytes or the number of files stored