cluster/dht: refactor dht lookup functions

Part 1:  refactor the dht_lookup_dir_cbk
and dht_selfheal_directory functions.
Added a simple dht selfheal directory test

Change-Id: I1410c26359e3c14b396adbe751937a52bd2fcff9
updates: bz#1707393
Signed-off-by: N Balachandran <nbalacha@redhat.com>
This commit is contained in:
N Balachandran 2019-03-25 15:56:56 +05:30 committed by Shyamsundar Ranganathan
parent 3fc7d08c1d
commit 7f780f30e5
3 changed files with 267 additions and 127 deletions

View File

@ -0,0 +1,145 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
. $(dirname $0)/../../common-utils.rc
# Test 1 overview:
# ----------------
#
# 1. Kill one brick of the volume.
# 2. Create directories and change directory properties.
# 3. Bring up the brick and access the directory
# 4. Check the permissions and xattrs on the backend
cleanup
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
TEST $CLI volume start $V0
# We want the lookup to reach DHT
TEST $CLI volume set $V0 performance.stat-prefetch off
# Mount using FUSE , kill a brick and create directories
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
ls $M0/
cd $M0
TEST kill_brick $V0 $H0 $B0/$V0-1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1
TEST mkdir dir{1..4}
# No change for dir1
# Change permissions for dir2
# Set xattr on dir3
# Change permissions and set xattr on dir4
TEST chmod 777 $M0/dir2
TEST setfattr -n "user.test" -v "test" $M0/dir3
TEST chmod 777 $M0/dir4
TEST setfattr -n "user.test" -v "test" $M0/dir4
# Start all bricks
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1
#$CLI volume status
# It takes a while for the client to reconnect to the brick
sleep 5
stat $M0/dir* > /dev/null
# Check that directories have been created on the brick that was killed
TEST ls $B0/$V0-1/dir1
TEST ls $B0/$V0-1/dir2
EXPECT "777" stat -c "%a" $B0/$V0-1/dir2
TEST ls $B0/$V0-1/dir3
EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3
TEST ls $B0/$V0-1/dir4
EXPECT "777" stat -c "%a" $B0/$V0-1/dir4
EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4
TEST rm -rf $M0/*
cd
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
# Test 2 overview:
# ----------------
# 1. Create directories with all bricks up.
# 2. Kill a brick and change directory properties and set user xattr.
# 2. Bring up the brick and access the directory
# 3. Check the permissions and xattrs on the backend
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
ls $M0/
cd $M0
TEST mkdir dir{1..4}
TEST kill_brick $V0 $H0 $B0/$V0-1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1
# No change for dir1
# Change permissions for dir2
# Set xattr on dir3
# Change permissions and set xattr on dir4
TEST chmod 777 $M0/dir2
TEST setfattr -n "user.test" -v "test" $M0/dir3
TEST chmod 777 $M0/dir4
TEST setfattr -n "user.test" -v "test" $M0/dir4
# Start all bricks
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1
#$CLI volume status
# It takes a while for the client to reconnect to the brick
sleep 5
stat $M0/dir* > /dev/null
# Check directories on the brick that was killed
TEST ls $B0/$V0-1/dir2
EXPECT "777" stat -c "%a" $B0/$V0-1/dir2
TEST ls $B0/$V0-1/dir3
EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3
TEST ls $B0/$V0-1/dir4
EXPECT "777" stat -c "%a" $B0/$V0-1/dir4
EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4
cd
# Cleanup
cleanup

View File

@ -801,9 +801,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
call_frame_t *xattr_frame = NULL;
gf_boolean_t vol_down = _gf_false;
this = frame->this;
GF_VALIDATE_OR_GOTO("dht", frame, out);
this = frame->this;
GF_VALIDATE_OR_GOTO("dht", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
@ -812,6 +811,7 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
conf = this->private;
layout = local->selfheal.layout;
local->mds_heal_fresh_lookup = mark_during_fresh_lookup;
gf_uuid_unparse(local->gfid, gfid_local);
/* Code to update hashed subvol consider as a mds subvol
@ -1238,6 +1238,31 @@ out:
return ret;
}
int
dht_needs_selfheal(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
int needs_selfheal = 0;
int ret = 0;
local = frame->local;
layout = local->layout;
if (local->need_attrheal || local->need_xattr_heal ||
local->need_selfheal) {
needs_selfheal = 1;
}
ret = dht_layout_normalize(this, &local->loc, layout);
if (ret != 0) {
gf_msg_debug(this->name, 0, "fixing assignment on %s", local->loc.path);
needs_selfheal = 1;
}
return needs_selfheal;
}
int
dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
@ -1255,8 +1280,6 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
char gfid_local[GF_UUID_BUF_SIZE] = {0};
char gfid_node[GF_UUID_BUF_SIZE] = {0};
int32_t mds_xattr_val[1] = {0};
call_frame_t *copy = NULL;
dht_local_t *copy_local = NULL;
GF_VALIDATE_OR_GOTO("dht", frame, out);
GF_VALIDATE_OR_GOTO("dht", this, out);
@ -1269,7 +1292,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
conf = this->private;
layout = local->layout;
gf_msg_debug(this->name, op_errno,
"%s: lookup on %s returned with op_ret = %d, op_errno = %d",
local->loc.path, prev->name, op_ret, op_errno);
/* The first successful lookup*/
if (!op_ret && gf_uuid_is_null(local->gfid)) {
memcpy(local->gfid, stbuf->ia_gfid, 16);
}
@ -1297,13 +1324,10 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
local->op_errno = op_errno;
gf_msg_debug(this->name, op_errno,
"%s: lookup on %s returned error", local->loc.path,
prev->name);
/* The GFID is missing on this subvol. Force a heal. */
if (op_errno == ENODATA) {
local->need_selfheal = 1;
local->need_lookup_everywhere = 1;
}
goto unlock;
}
@ -1311,12 +1335,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
is_dir = check_is_dir(inode, stbuf, xattr);
if (!is_dir) {
gf_msg_debug(this->name, 0,
"lookup of %s on %s returned non"
"dir 0%o"
"%s: lookup on %s returned non dir 0%o"
"calling lookup_everywhere",
local->loc.path, prev->name, stbuf->ia_type);
local->need_selfheal = 1;
local->need_lookup_everywhere = 1;
goto unlock;
}
@ -1327,14 +1350,8 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
dht_aggregate_xattr(local->xattr, xattr);
}
if (dict_get(xattr, conf->mds_xattr_key)) {
local->mds_subvol = prev;
local->mds_stbuf.ia_gid = stbuf->ia_gid;
local->mds_stbuf.ia_uid = stbuf->ia_uid;
local->mds_stbuf.ia_prot = stbuf->ia_prot;
}
if (local->stbuf.ia_type != IA_INVAL) {
/* This is not the first subvol to respond */
if (!__is_root_gfid(stbuf->ia_gfid) &&
((local->stbuf.ia_gid != stbuf->ia_gid) ||
(local->stbuf.ia_uid != stbuf->ia_uid) ||
@ -1347,65 +1364,64 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->inode == NULL)
local->inode = inode_ref(inode);
/* This could be a problem */
dht_iatt_merge(this, &local->stbuf, stbuf);
dht_iatt_merge(this, &local->postparent, postparent);
if (!dict_get(xattr, conf->mds_xattr_key)) {
gf_msg_debug(this->name, 0,
"Internal xattr %s is not present "
" on path %s gfid is %s ",
conf->mds_xattr_key, local->loc.path, gfid_local);
"%s: mds xattr %s is not present "
"on %s(gfid = %s)",
local->loc.path, conf->mds_xattr_key, prev->name,
gfid_local);
goto unlock;
} else {
/* Save mds subvol on inode ctx */
ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0,
DHT_MSG_SET_INODE_CTX_FAILED,
"Failed to set hashed subvol for %s vol is %s",
local->loc.path, prev->name);
}
}
local->mds_subvol = prev;
local->mds_stbuf = *stbuf;
/* Save mds subvol on inode ctx */
ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
"%s: Failed to set mds (%s)", local->loc.path, prev->name);
}
check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
mds_xattr_val, 1, &errst);
if ((check_mds < 0) && !errst) {
local->mds_xattr = dict_ref(xattr);
gf_msg_debug(this->name, 0,
"Value of %s is not zero on hashed subvol "
"so xattr needs to be heal on non hashed"
" path is %s and vol name is %s "
" gfid is %s",
conf->mds_xattr_key, local->loc.path, prev->name,
"%s: %s is not zero on %s. Xattrs need to be healed."
"(gfid = %s)",
local->loc.path, conf->mds_xattr_key, prev->name,
gfid_local);
local->need_xattr_heal = 1;
local->mds_subvol = prev;
}
}
unlock:
UNLOCK(&frame->lock);
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
/* No need to call xattr heal code if volume count is 1
*/
if (conf->subvolume_cnt == 1)
local->need_xattr_heal = 0;
/* Code to update all extended attributed from hashed subvol
to local->xattr
*/
if (local->need_xattr_heal && (local->mds_xattr)) {
dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
NULL, NULL);
dict_unref(local->mds_xattr);
local->mds_xattr = NULL;
/* If the mds subvol is not set correctly*/
if (!__is_root_gfid(local->gfid) &&
(!dict_get(local->xattr, conf->mds_xattr_key))) {
local->need_selfheal = 1;
}
if (local->need_selfheal) {
local->need_selfheal = 0;
/* No need to call xattr heal code if volume count is 1
*/
if (conf->subvolume_cnt == 1) {
local->need_xattr_heal = 0;
}
if (local->need_selfheal || local->need_lookup_everywhere) {
/* Set the gfid-req so posix will set the GFID*/
if (!gf_uuid_is_null(local->gfid)) {
/* Ok, this should _never_ happen */
ret = dict_set_static_bin(local->xattr_req, "gfid-req",
local->gfid, 16);
} else {
@ -1413,73 +1429,36 @@ unlock:
ret = dict_set_static_bin(local->xattr_req, "gfid-req",
local->gfid_req, 16);
}
}
if (local->need_lookup_everywhere) {
local->need_lookup_everywhere = 0;
dht_lookup_everywhere(frame, this, &local->loc);
return 0;
}
if (local->op_ret == 0) {
ret = dht_layout_normalize(this, &local->loc, layout);
if (ret != 0) {
gf_msg_debug(this->name, 0, "fixing assignment on %s",
local->loc.path);
if (dht_needs_selfheal(frame, this)) {
goto selfheal;
}
dht_layout_set(this, local->inode, layout);
if (!dict_get(local->xattr, conf->mds_xattr_key) ||
local->need_xattr_heal)
goto selfheal;
}
if (local->inode) {
dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
}
if (local->loc.parent) {
dht_inode_ctx_time_update(local->loc.parent, this,
&local->postparent, 1);
}
if (local->need_attrheal) {
local->need_attrheal = 0;
if (!__is_root_gfid(inode->gfid)) {
local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
if (local->inode) {
dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
}
copy = create_frame(this, this->ctx->pool);
if (copy) {
copy_local = dht_local_init(copy, &local->loc, NULL, 0);
if (!copy_local) {
DHT_STACK_DESTROY(copy);
goto skip_attr_heal;
}
copy_local->stbuf = local->stbuf;
gf_uuid_copy(copy_local->loc.gfid, local->stbuf.ia_gfid);
copy_local->mds_stbuf = local->mds_stbuf;
copy_local->mds_subvol = local->mds_subvol;
copy->local = copy_local;
FRAME_SU_DO(copy, dht_local_t);
ret = synctask_new(this->ctx->env, dht_dir_attr_heal,
dht_dir_attr_heal_done, copy, copy);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
DHT_MSG_DIR_ATTR_HEAL_FAILED,
"Synctask creation failed to heal attr "
"for path %s gfid %s ",
local->loc.path, local->gfid);
DHT_STACK_DESTROY(copy);
}
if (local->loc.parent) {
dht_inode_ctx_time_update(local->loc.parent, this,
&local->postparent, 1);
}
}
skip_attr_heal:
DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
dht_set_fixed_dir_stat(&local->postparent);
/* Delete mds xattr at the time of STACK UNWIND */
if (local->xattr)
GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
&local->postparent);
@ -5443,9 +5422,8 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
} else {
gf_msg(this->name, GF_LOG_ERROR, 0,
DHT_MSG_HASHED_SUBVOL_GET_FAILED,
"Failed to get mds subvol for path %s"
"gfid is %s ",
loc->path, gfid_local);
"%s: Failed to get mds subvol. (gfid is %s)", loc->path,
gfid_local);
}
(*op_errno) = ENOENT;
goto err;

View File

@ -1033,18 +1033,27 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
int missing_attr = 0;
int i = 0, ret = -1;
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
xlator_t *this = NULL;
int cnt = 0;
local = frame->local;
this = frame->this;
conf = this->private;
/* We need to heal the attrs if:
* 1. Any directories were missing - the newly created dirs will need
* to have the correct attrs set
* 2. An existing dir does not have the correct permissions -they may
* have been changed when a brick was down.
*/
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err == -1)
missing_attr++;
}
if (missing_attr == 0) {
if ((missing_attr == 0) && (local->need_attrheal == 0)) {
if (!local->heal_layout) {
gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ",
loc->path, uuid_utoa(loc->gfid));
@ -1062,19 +1071,12 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
return 0;
}
local->call_cnt = missing_attr;
cnt = layout->cnt;
cnt = local->call_cnt = conf->subvolume_cnt;
for (i = 0; i < cnt; i++) {
if (layout->list[i].err == -1) {
gf_msg_trace(this->name, 0, "%s: setattr on subvol %s, gfid = %s",
loc->path, layout->list[i].xlator->name,
uuid_utoa(loc->gfid));
STACK_WIND(
frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
layout->list[i].xlator->fops->setattr, loc, stbuf, valid, NULL);
}
STACK_WIND(frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
layout->list[i].xlator->fops->setattr, loc, stbuf, valid,
NULL);
}
return 0;
@ -1492,6 +1494,9 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
}
if (missing_dirs == 0) {
/* We don't need to create any directories. Proceed to heal the
* attrs and xattrs
*/
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
@ -1499,8 +1504,8 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
if (ret)
gf_msg(this->name, GF_LOG_ERROR, ret,
DHT_MSG_DIR_XATTR_HEAL_FAILED,
"xattr heal failed for "
"directory %s gfid %s ",
"%s:xattr heal failed for "
"directory (gfid = %s)",
local->loc.path, local->gfid);
} else {
if (!gf_uuid_is_null(local->gfid))
@ -1512,8 +1517,8 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
gf_msg(this->name, GF_LOG_INFO, 0,
DHT_MSG_DIR_XATTR_HEAL_FAILED,
"Failed to set mds xattr "
"for directory %s gfid %s ",
"%s: Failed to set mds xattr "
"for directory (gfid = %s)",
local->loc.path, local->gfid);
}
}
@ -2085,10 +2090,10 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
xlator_t *this = NULL;
uint32_t down = 0;
uint32_t misc = 0;
int ret = 0;
xlator_t *this = NULL;
char pgfid[GF_UUID_BUF_SIZE] = {0};
char gfid[GF_UUID_BUF_SIZE] = {0};
inode_t *linked_inode = NULL, *inode = NULL;
@ -2099,6 +2104,11 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
local->selfheal.dir_cbk = dir_cbk;
local->selfheal.layout = dht_layout_ref(this, layout);
if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) {
/*Use the one in the mds_stbuf*/
local->stbuf = local->mds_stbuf;
}
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
gf_uuid_unparse(loc->parent->gfid, pgfid);
@ -2118,6 +2128,13 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
inode_unref(inode);
}
if (local->need_xattr_heal && (local->mds_xattr)) {
dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
NULL, NULL);
dict_unref(local->mds_xattr);
local->mds_xattr = NULL;
}
dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt,
&local->selfheal.overlaps_cnt,
&local->selfheal.missing_cnt, &local->selfheal.down,
@ -2128,18 +2145,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
if (down) {
gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
"Directory selfheal failed: %d subvolumes down."
"Not fixing. path = %s, gfid = %s",
down, loc->path, gfid);
"%s: Directory selfheal failed: %d subvolumes down."
"Not fixing. gfid = %s",
loc->path, down, gfid);
ret = 0;
goto sorry_no_fix;
}
if (misc) {
gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
"Directory selfheal failed : %d subvolumes "
"have unrecoverable errors. path = %s, gfid = %s",
misc, loc->path, gfid);
"%s: Directory selfheal failed : %d subvolumes "
"have unrecoverable errors. gfid = %s",
loc->path, misc, gfid);
ret = 0;
goto sorry_no_fix;
@ -2369,13 +2386,13 @@ dht_dir_attr_heal(void *data)
frame = data;
local = frame->local;
mds_subvol = local->mds_subvol;
this = frame->this;
GF_VALIDATE_OR_GOTO("dht", this, out);
GF_VALIDATE_OR_GOTO("dht", local, out);
conf = this->private;
GF_VALIDATE_OR_GOTO("dht", conf, out);
mds_subvol = local->mds_subvol;
call_cnt = conf->subvolume_cnt;
if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) {