glusterfs/tests/volume.rc

477 lines
12 KiB
Plaintext
Raw Normal View History

function volinfo_field()
{
local vol=$1;
local field=$2;
$CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
}
function brick_count()
{
local vol=$1;
$CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
}
function online_brick_count ()
{
pgrep glusterfsd | wc -l
}
function volume_option()
{
local vol=$1
local key=$2
$CLI volume info $vol | egrep "^$key: " | cut -f2 -d' ';
}
dht: better layout-optimization algorithm This method deals with the case where swapping might gain a bigger overlap for the xlator currently under consideration, but sacrifices even more from the xlator we're swapping with. For example: A = 0x00000000 - 0x44444443 (new 0x00000000 - 0x55555554) B = 0x44444444 - 0x77777776 (new 0x55555555 - 0xaaaaaaa9) C = 0x77777777 - 0xffffffff (new 0xaaaaaaaa - 0xffffffff) Here, the new range for B has a bigger overlap with the old C than with the old B (0x33333333 vs. 0x22222222 to be precise) so looking only at that might lead us to swap. However, such a swap turns the new C's overlap from 0x55555556 (vs. old C) to *zero* (vs. old B). In other words, we've gained 0x11111111 for B but lost 0x55555556 for C, so it's a bad idea. The new algorithm accounts for all effects of the swap, so it not only avoids bad swaps but can make some good ones that would have been missed previously. For example, if swapping a range X with a later range Y would not increase the overlap for X we would previously have skipped it even if the swap would increase Y's overlap without affecting X's. This is the normal case when we're adding a new brick (which initially has zero overlap with any old range) so finding more good swaps is probably even more important than avoiding bad ones. Also, the logic in dht_overlap_calc was completely broken before, causing integer overflows instead of providing correct values, so no matter what higher-level algorithm was in place the GIGO effect would have resulted in bad decisions. Change-Id: If61ed513cfcb931916c6b51da293e3efbaaf385f BUG: 853258 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/3908 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
2013-02-05 19:19:06 -05:00
function rebalance_status_field {
#The rebalance status can be upto 3 words, (ex:'fix-layout in progress'), hence the awk-print $7 thru $9.
#But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00').
#So we trim the numbers out with `tr`. Finally remove the trailing white spaces with sed. What we get is one of the
#strings in the 'cli_vol_task_status_str' char array of cli-rpc-ops.c
$CLI volume rebalance $1 status | awk '{print $7,$8,$9}' |sed -n 3p |tr -d '[^0-9+\.]'|sed 's/ *$//g'
}
function remove_brick_status_completed_field {
local vol=$1
local brick_list=$2
$CLI volume remove-brick $vol $brick_list status | awk '{print $7}' | sed -n 3p
}
function get_mount_process_pid {
local vol=$1
ps auxww | grep glusterfs | grep -E "volfile-id[ =]/?$vol " | awk '{print $2}' | head -1
}
function get_nfs_pid ()
{
ps auxww | grep "volfile-id\ gluster\/nfs" | awk '{print $2}' | head -1
}
function read_nfs_pidfile ()
{
echo `cat $GLUSTERD_WORKDIR/nfs/run/nfs.pid`
}
function cleanup_statedump {
pid=$1
rm -f $statedumpdir/*$pid.dump.*
#.vimrc friendly comment */
}
function generate_statedump {
local fpath=""
pid=$1
#remove old stale statedumps
cleanup_statedump $pid
kill -USR1 $pid
#Wait till the statedump is generated
sleep 1
fname=$(ls $statedumpdir | grep -E "\.$pid\.dump\.")
echo $statedumpdir/$fname
}
function generate_mount_statedump {
local vol=$1
generate_statedump $(get_mount_process_pid $vol)
}
function cleanup_mount_statedump {
local vol=$1
cleanup_statedump $(get_mount_process_pid $vol)
}
function snap_client_connected_status {
local vol=$1
local fpath=$(generate_mount_statedump $vol)
up=$(grep -A2 xlator.protocol.client.$vol-snapd-client.priv $fpath | tail -1 | cut -f 2 -d'=')
rm -f $fpath
echo "$up"
}
function _afr_child_up_status {
local vol=$1
#brick_id is (brick-num in volume info - 1)
local brick_id=$2
local gen_state_dump=$3
local fpath=$($gen_state_dump $vol)
up=$(grep -B1 trusted.afr.$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
rm -f $fpath
echo "$up"
}
function afr_child_up_status_meta {
local mnt=$1
local repl=$2
local child=$3
grep "child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}'
}
function afr_child_up_status {
local vol=$1
#brick_id is (brick-num in volume info - 1)
local brick_id=$2
_afr_child_up_status $vol $brick_id generate_mount_statedump
}
function ec_get_info {
local vol=$1
local dist_id=$2
local key=$3
local fpath=$(generate_mount_statedump $vol)
local value=$(sed -n "/^\[cluster\/disperse\.$vol-disperse-$dist_id\]/,/^\[/{s/^$key=\(.*\)/\1/p;}" $fpath | head -1)
rm -f $fpath
echo "$value"
}
function ec_child_up_status {
local vol=$1
local dist_id=$2
local brick_id=$(($3 + 1))
local mask=$(ec_get_info $vol $dist_id "childs_up_mask")
echo "${mask: -$brick_id:1}"
}
function ec_child_up_count {
local vol=$1
local dist_id=$2
ec_get_info $vol $dist_id "childs_up"
}
function get_shd_process_pid {
ps auxww | grep glusterfs | grep -E "glustershd/run/glustershd.pid" | awk '{print $2}' | head -1
}
function generate_shd_statedump {
generate_statedump $(get_shd_process_pid)
}
function generate_nfs_statedump {
generate_statedump $(get_nfs_pid)
}
function generate_brick_statedump {
local vol=$1
local host=$2
local brick=$3
generate_statedump $(get_brick_pid $vol $host $brick)
}
function afr_child_up_status_in_shd {
local vol=$1
#brick_id is (brick-num in volume info - 1)
local brick_id=$2
_afr_child_up_status $vol $brick_id generate_shd_statedump
}
function afr_child_up_status_in_nfs {
local vol=$1
#brick_id is (brick-num in volume info - 1)
local brick_id=$2
_afr_child_up_status $vol $brick_id generate_nfs_statedump
}
function nfs_up_status {
cli: volume status for tcp,rdma type volume display only tcp port For tcp,rdma type voumes, there will be two ports, one for tcp and one for rdma. But volume status command only display tcp port. By this change, adding an extra column for rdma port and changing the port to tcp port. Eg: >gluster volume status pathy >For tcp,rdma type volume Status of volume: patchy Gluster process TCP Port RDMA Port Online Pid ------------------------------------------------------------------------------ Brick brickname 49152 49153 Y 14158 >For rdma type volume Status of volume: patchy Gluster process TCP Port RDMA Port Online Pid ------------------------------------------------------------------------------ Brick brickname 0 49153 Y 14158 For tcp type volume Status of volume: patchy Gluster process TCP Port RDMA Port Online Pid ------------------------------------------------------------------------------ Brick brickname 49152 0 Y 14158 >gluster volume status patchy detail Status of volume: xcube2 ------------------------------------------------------------------------------ Brick : Brick brickname TCP Port : 49152 RDMA Port : 49153 Online : Y Pid : 14158 File System : ext4 Device : /dev/mapper/luks-2099dd4a-0050-4cae-ad7b-c6a0498c4e88 Mount Options : rw,seclabel,relatime,data=ordered Inode Size : 256 Disk Space Free : 31.1GB Total Disk Space : 47.9GB Inode Count : 3203072 Free Inodes : 2926789 >gluster volume status xcube --xml <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <cliOutput> <opRet>0</opRet> <opErrno>0</opErrno> <opErrstr>(null)</opErrstr> <volStatus> <volumes> <volume> <volName>xcube</volName> <nodeCount>2</nodeCount> <node> <hostname>hostname</hostname> <path>/home/brick1</path> <peerid>2d7bcb95-3d26-4d4f-b3c6-e2ee01b71662</peerid> <status>1</status> <port>49152</port> <ports> <tcp>49152</tcp> <rdma>N/A</rdma> </ports> <pid>5657</pid> </node> <node> <hostname>NFS Server</hostname> <path>localhost</path> <peerid>2d7bcb95-3d26-4d4f-b3c6-e2ee01b71662</peerid> <status>1</status> <port>2049</port> <ports> <tcp>2049</tcp> <rdma>N/A</rdma> </ports> <pid>5665</pid> </node> <tasks/> </volume> </volumes> </volStatus> </cliOutput> Change-Id: I81aab226edbd400d29cd3f510af4f344dd99ba51 BUG: 1164079 Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> Reviewed-on: http://review.gluster.org/9191 Reviewed-by: Atin Mukherjee <amukherj@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Kaushal M <kaushal@redhat.com>
2014-11-24 17:07:02 +05:30
gluster volume status | grep "NFS Server" | awk '{print $7}'
}
function glustershd_up_status {
cli: volume status for tcp,rdma type volume display only tcp port For tcp,rdma type voumes, there will be two ports, one for tcp and one for rdma. But volume status command only display tcp port. By this change, adding an extra column for rdma port and changing the port to tcp port. Eg: >gluster volume status pathy >For tcp,rdma type volume Status of volume: patchy Gluster process TCP Port RDMA Port Online Pid ------------------------------------------------------------------------------ Brick brickname 49152 49153 Y 14158 >For rdma type volume Status of volume: patchy Gluster process TCP Port RDMA Port Online Pid ------------------------------------------------------------------------------ Brick brickname 0 49153 Y 14158 For tcp type volume Status of volume: patchy Gluster process TCP Port RDMA Port Online Pid ------------------------------------------------------------------------------ Brick brickname 49152 0 Y 14158 >gluster volume status patchy detail Status of volume: xcube2 ------------------------------------------------------------------------------ Brick : Brick brickname TCP Port : 49152 RDMA Port : 49153 Online : Y Pid : 14158 File System : ext4 Device : /dev/mapper/luks-2099dd4a-0050-4cae-ad7b-c6a0498c4e88 Mount Options : rw,seclabel,relatime,data=ordered Inode Size : 256 Disk Space Free : 31.1GB Total Disk Space : 47.9GB Inode Count : 3203072 Free Inodes : 2926789 >gluster volume status xcube --xml <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <cliOutput> <opRet>0</opRet> <opErrno>0</opErrno> <opErrstr>(null)</opErrstr> <volStatus> <volumes> <volume> <volName>xcube</volName> <nodeCount>2</nodeCount> <node> <hostname>hostname</hostname> <path>/home/brick1</path> <peerid>2d7bcb95-3d26-4d4f-b3c6-e2ee01b71662</peerid> <status>1</status> <port>49152</port> <ports> <tcp>49152</tcp> <rdma>N/A</rdma> </ports> <pid>5657</pid> </node> <node> <hostname>NFS Server</hostname> <path>localhost</path> <peerid>2d7bcb95-3d26-4d4f-b3c6-e2ee01b71662</peerid> <status>1</status> <port>2049</port> <ports> <tcp>2049</tcp> <rdma>N/A</rdma> </ports> <pid>5665</pid> </node> <tasks/> </volume> </volumes> </volStatus> </cliOutput> Change-Id: I81aab226edbd400d29cd3f510af4f344dd99ba51 BUG: 1164079 Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> Reviewed-on: http://review.gluster.org/9191 Reviewed-by: Atin Mukherjee <amukherj@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Kaushal M <kaushal@redhat.com>
2014-11-24 17:07:02 +05:30
gluster volume status | grep "Self-heal Daemon" | awk '{print $7}'
}
function quotad_up_status {
cli: volume status for tcp,rdma type volume display only tcp port For tcp,rdma type voumes, there will be two ports, one for tcp and one for rdma. But volume status command only display tcp port. By this change, adding an extra column for rdma port and changing the port to tcp port. Eg: >gluster volume status pathy >For tcp,rdma type volume Status of volume: patchy Gluster process TCP Port RDMA Port Online Pid ------------------------------------------------------------------------------ Brick brickname 49152 49153 Y 14158 >For rdma type volume Status of volume: patchy Gluster process TCP Port RDMA Port Online Pid ------------------------------------------------------------------------------ Brick brickname 0 49153 Y 14158 For tcp type volume Status of volume: patchy Gluster process TCP Port RDMA Port Online Pid ------------------------------------------------------------------------------ Brick brickname 49152 0 Y 14158 >gluster volume status patchy detail Status of volume: xcube2 ------------------------------------------------------------------------------ Brick : Brick brickname TCP Port : 49152 RDMA Port : 49153 Online : Y Pid : 14158 File System : ext4 Device : /dev/mapper/luks-2099dd4a-0050-4cae-ad7b-c6a0498c4e88 Mount Options : rw,seclabel,relatime,data=ordered Inode Size : 256 Disk Space Free : 31.1GB Total Disk Space : 47.9GB Inode Count : 3203072 Free Inodes : 2926789 >gluster volume status xcube --xml <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <cliOutput> <opRet>0</opRet> <opErrno>0</opErrno> <opErrstr>(null)</opErrstr> <volStatus> <volumes> <volume> <volName>xcube</volName> <nodeCount>2</nodeCount> <node> <hostname>hostname</hostname> <path>/home/brick1</path> <peerid>2d7bcb95-3d26-4d4f-b3c6-e2ee01b71662</peerid> <status>1</status> <port>49152</port> <ports> <tcp>49152</tcp> <rdma>N/A</rdma> </ports> <pid>5657</pid> </node> <node> <hostname>NFS Server</hostname> <path>localhost</path> <peerid>2d7bcb95-3d26-4d4f-b3c6-e2ee01b71662</peerid> <status>1</status> <port>2049</port> <ports> <tcp>2049</tcp> <rdma>N/A</rdma> </ports> <pid>5665</pid> </node> <tasks/> </volume> </volumes> </volStatus> </cliOutput> Change-Id: I81aab226edbd400d29cd3f510af4f344dd99ba51 BUG: 1164079 Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> Reviewed-on: http://review.gluster.org/9191 Reviewed-by: Atin Mukherjee <amukherj@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Kaushal M <kaushal@redhat.com>
2014-11-24 17:07:02 +05:30
gluster volume status | grep "Quota Daemon" | awk '{print $7}'
}
function get_brick_pid {
local vol=$1
local host=$2
local brick=$3
local brick_hiphenated=$(echo $brick | tr '/' '-')
echo `cat $GLUSTERD_WORKDIR/vols/$vol/run/${host}${brick_hiphenated}.pid`
}
function kill_brick {
local vol=$1
local host=$2
local brick=$3
kill -9 $(get_brick_pid $vol $host $brick)
}
function check_option_help_presence {
local option=$1
$CLI volume set help | grep "^Option:" | grep -w $option
}
function afr_get_changelog_xattr {
local file=$1
local xkey=$2
getfattr -n $xkey -e hex $file 2>/dev/null | grep "$xkey" | cut -f2 -d'='
}
function afr_get_pending_heal_count {
local vol=$1
gluster volume heal $vol info | grep "Number of entries" | awk '{ sum+=$4} END {print sum}'
}
function afr_get_split_brain_count {
local vol=$1
gluster volume heal $vol info split-brain | grep "Number of entries in split-brain" | awk '{ sum+=$6} END {print sum}'
}
function afr_get_index_path {
local brick_path=$1
echo "$brick_path/.glusterfs/indices/xattrop"
}
function afr_get_num_indices_in_brick {
local brick_path=$1
echo $(ls $(afr_get_index_path $brick_path) | grep -v xattrop | wc -l)
}
function gf_get_gfid_xattr {
file=$1
getfattr -n trusted.gfid -e hex $file 2>/dev/null | grep "trusted.gfid" | cut -f2 -d'='
}
function gf_gfid_xattr_to_str {
xval=$1
echo "${xval:2:8}-${xval:10:4}-${xval:14:4}-${xval:18:4}-${xval:22:12}"
}
function get_text_xattr {
local key=$1
local path=$2
getfattr -d -m. -e text $path 2>/dev/null | grep $key | cut -f2 -d'='
}
function gf_check_file_opened_in_brick {
vol=$1
host=$2
brick=$3
realpath=$4
ls -l /proc/$(get_brick_pid $vol $host $brick)/fd | grep "${realpath}$" 2>&1 > /dev/null
if [ $? -eq 0 ]; then
echo "Y"
else
echo "N"
fi
}
function gf_get_gfid_backend_file_path {
brickpath=$1
filepath_in_brick=$2
gfid=$(gf_get_gfid_xattr "$brickpath/$filepath_in_brick")
gfidstr=$(gf_gfid_xattr_to_str $gfid)
echo "$brickpath/.glusterfs/${gfidstr:0:2}/${gfidstr:2:2}/$gfidstr"
}
function gf_rm_file_and_gfid_link {
brickpath=$1
filepath_in_brick=$2
rm -f $(gf_get_gfid_backend_file_path $brickpath $filepath_in_brick)
rm -f "$brickpath/$filepath_in_brick"
}
function gd_is_replace_brick_completed {
local host=$1
local vol=$2
local src_brick=$3
local dst_brick=$4
$CLI volume replace-brick $vol $src_brick $dst_brick status | grep -i "Migration complete"
if [ $? -eq 0 ]; then
echo "Y"
else
echo "N"
fi
}
dht: better layout-optimization algorithm This method deals with the case where swapping might gain a bigger overlap for the xlator currently under consideration, but sacrifices even more from the xlator we're swapping with. For example: A = 0x00000000 - 0x44444443 (new 0x00000000 - 0x55555554) B = 0x44444444 - 0x77777776 (new 0x55555555 - 0xaaaaaaa9) C = 0x77777777 - 0xffffffff (new 0xaaaaaaaa - 0xffffffff) Here, the new range for B has a bigger overlap with the old C than with the old B (0x33333333 vs. 0x22222222 to be precise) so looking only at that might lead us to swap. However, such a swap turns the new C's overlap from 0x55555556 (vs. old C) to *zero* (vs. old B). In other words, we've gained 0x11111111 for B but lost 0x55555556 for C, so it's a bad idea. The new algorithm accounts for all effects of the swap, so it not only avoids bad swaps but can make some good ones that would have been missed previously. For example, if swapping a range X with a later range Y would not increase the overlap for X we would previously have skipped it even if the swap would increase Y's overlap without affecting X's. This is the normal case when we're adding a new brick (which initially has zero overlap with any old range) so finding more good swaps is probably even more important than avoiding bad ones. Also, the logic in dht_overlap_calc was completely broken before, causing integer overflows instead of providing correct values, so no matter what higher-level algorithm was in place the GIGO effect would have resulted in bad decisions. Change-Id: If61ed513cfcb931916c6b51da293e3efbaaf385f BUG: 853258 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/3908 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
2013-02-05 19:19:06 -05:00
function dht_get_layout {
local my_xa=trusted.glusterfs.dht
getfattr -d -e hex -n $my_xa $1 2> /dev/null | grep "$my_xa=" | cut -d= -f2
dht: better layout-optimization algorithm This method deals with the case where swapping might gain a bigger overlap for the xlator currently under consideration, but sacrifices even more from the xlator we're swapping with. For example: A = 0x00000000 - 0x44444443 (new 0x00000000 - 0x55555554) B = 0x44444444 - 0x77777776 (new 0x55555555 - 0xaaaaaaa9) C = 0x77777777 - 0xffffffff (new 0xaaaaaaaa - 0xffffffff) Here, the new range for B has a bigger overlap with the old C than with the old B (0x33333333 vs. 0x22222222 to be precise) so looking only at that might lead us to swap. However, such a swap turns the new C's overlap from 0x55555556 (vs. old C) to *zero* (vs. old B). In other words, we've gained 0x11111111 for B but lost 0x55555556 for C, so it's a bad idea. The new algorithm accounts for all effects of the swap, so it not only avoids bad swaps but can make some good ones that would have been missed previously. For example, if swapping a range X with a later range Y would not increase the overlap for X we would previously have skipped it even if the swap would increase Y's overlap without affecting X's. This is the normal case when we're adding a new brick (which initially has zero overlap with any old range) so finding more good swaps is probably even more important than avoiding bad ones. Also, the logic in dht_overlap_calc was completely broken before, causing integer overflows instead of providing correct values, so no matter what higher-level algorithm was in place the GIGO effect would have resulted in bad decisions. Change-Id: If61ed513cfcb931916c6b51da293e3efbaaf385f BUG: 853258 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/3908 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
2013-02-05 19:19:06 -05:00
}
2013-06-28 19:11:47 +05:30
function afr_get_specific_changelog_xattr ()
{
local path=$1
local key=$2
local type=$3
local specific_changelog=""
changelog_xattr=$(afr_get_changelog_xattr "$path" "$key")
if [ "$type" == "data" ]; then
specific_changelog=${changelog_xattr:2:8}
elif [ "$type" == "metadata" ]; then
specific_changelog=${changelog_xattr:10:8}
elif [ "$type" == "entry" ]; then
specific_changelog=${changelog_xattr:18:8}
else
specific_changlog="error"
fi
echo $specific_changelog
}
##
# query pathinfo xattr and extract POSIX pathname(s)
##
function get_backend_paths {
local path=$1
getfattr -m . -n trusted.glusterfs.pathinfo $path | tr ' ' '\n' | sed -n 's/<POSIX.*:.*:\(.*\)>.*/\1/p'
}
#Gets the xattr value in hex, also removed 0x in front of the value
function get_hex_xattr {
local key=$1
local path=$2
getfattr -d -m. -e hex $2 2>/dev/null | grep $1 | cut -f2 -d'=' | cut -f2 -d'x'
}
function cumulative_stat_count {
echo "$1" | grep "Cumulative Stats:" | wc -l
}
function incremental_stat_count {
echo "$1" | grep "Interval$2Stats:" | wc -l
}
function cleared_stat_count {
echo "$1" | grep "Cleared stats." | wc -l
}
function data_read_count {
echo "$1" | grep "Data Read:$2bytes" | wc -l
}
function data_written_count {
echo "$1" | grep "Data Written:$2bytes" | wc -l
}
function has_holes {
if [ $((`stat -c '%b*%B-%s' $1`)) -lt 0 ];
then
echo "1"
else
echo "0"
fi
}
function do_volume_operations() {
local operation=$1
local count=$2
local force=$3
local pids=()
local cli
local v
for i in `seq 1 $count`; do
cli="CLI_$i"
v="V`expr $i - 1`"
${!cli} volume $operation ${!v} $force &
pids[$i]=$!
done
for i in `seq 1 $count`; do
wait ${pids[$i]}
done
}
function start_volumes() {
do_volume_operations start $1
}
function stop_volumes() {
do_volume_operations stop $1
}
function start_force_volumes() {
do_volume_operations start $1 force
}
function stop_force_volumes() {
do_volume_operations stop $1 force
}
function delete_volumes() {
do_volume_operations delete $1
}
function volume_exists() {
$CLI volume info $1 > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "Y"
else
echo "N"
fi
}
function afr_get_index_count {
local brick=$1
ls $1/.glusterfs/indices/xattrop | grep -v xattrop | wc -l
}
storage/posix: Janitor should guard against dir renames. Problem: Directory rename while a brick is down can cause gfid handle of that directory to be deleted until next lookup happens on that directory. *) Self-heal does not have intelligence to detect renames at the moment. So it has to delete the directory 'd' using special flags, because it has to perform 'rm -rf' of that directory as it is not empty. Posix xlator implements this by renaming the directory deleted to 'landfill' directory in '.glusterfs' where janitor thread will perform actual rm -rf by traversing the directory. Janitor thread wakes up every 10 minutes to check if there are any directories to be deleted and deletes them. As part of deleting it also deletes the gfid-handles. Steps to hit the problem: 1) On a replicate volume create a directory 'd', file in 'd' called 'f' so the directory 'd' is not empty. 2) bring one of the bricks down (lets call it brick-a, the other one is brick-b 3) Rename d to d1 4) When brick-a comes online again, self-heal deletes directory 'd' and creates directory 'd1' on brick-a for performing self-heal. So on brick-a, gfid-handle of 'd' pointing to 'da is deleted and recreated to point to 'd1'. 5) This directory 'b' with all its directory hierarchy (for now just the file 'f') will be under 'landfill' directory. 6) When janitor thread wakes up and deletes directory 'd' and gfid-handle of 'd' without realizing that it is now pointing to 'd1'. Thus 'd1' loses its gfid-handle Fix: Delete gfid-handle for a directory only when the gfid-handle is stale. Change-Id: I21265b3bd3852f0967d916aaa21108ae5c9e7373 BUG: 1101143 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/7879 Reviewed-by: Niels de Vos <ndevos@redhat.com> Reviewed-by: Xavier Hernandez <xhernandez@datalab.es> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
2014-05-23 12:51:28 +05:30
function landfill_entry_count {
local brick=$1
ls $brick/.glusterfs/landfill | wc -l
}
function path_exists {
stat $1
if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
}
function force_umount {
umount -f $*
if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
}
function assign_gfid {
local gfid=$1
local file=$2
setfattr -n trusted.gfid -v $1 $2
}
function get_random_gfid {
echo "0x"$(uuidgen | awk -F '-' 'BEGIN {OFS=""} {print $1,$2,$3,$4,$5}')
}
function volgen_volume_exists {
local volfile="$1"
local xl_vol="$2"
local xl_type="$3"
local xl_feature="$4"
xl=$(sed -e "/./{H;\$!d;}" -e "x;/volume $xl_vol/!d;/type $xl_type\/$xl_feature/!d" $volfile)
if [ -z "$xl" ];
then
echo "N"
else
echo "Y"
fi
}
function volgen_volume_option {
local volfile="$1"
local xl_vol="$2"
local xl_type="$3"
local xl_feature="$4"
local xl_option="$5"
sed -e "/./{H;\$!d;}" -e "x;/volume $xl_vol/!d;/type $xl_type\/$xl_feature/!d;/option $xl_option/!d" $volfile | grep " $xl_option " | awk '{print $3}'
}
function mount_get_option_value {
local m=$1
local subvol=$2
local key=$3
grep -w "$3" $m/.meta/graphs/active/$subvol/private | awk '{print $3}'
}
function get_volume_mark {
getfattr -n trusted.glusterfs.volume-mark -ehex $1 | sed -n 's/^trusted.glusterfs.volume-mark=0x//p' | cut -b5-36 | sed 's/\([a-f0-9]\{8\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)/\1-\2-\3-\4-/'
}