features/shard: Introducing ".shard/.remove_me" for atomic shard deletion (part 1)

PROBLEM:
Shards are deleted synchronously when a sharded file is unlinked or
when a sharded file participating as the dst in a rename() is going to
be replaced. The problem with this approach is it makes the operation
really slow, sometimes causing the application to time out, especially
with large files.

SOLUTION:
To make this operation atomic, we introduce a ".remove_me" directory.
Now renames and unlinks will simply involve two steps:
1. creating an empty file under .remove_me named after the gfid of the file
participating in unlink/rename
2. carrying out the actual rename/unlink
A synctask is created (more on that in part 2) to scan this directory
after every unlink/rename operation (or upon a volume mount) and clean
up all shards associated with it. All of this happens in the background.
The task takes care to delete the shards associated with the gfid in
.remove_me only if this gfid doesn't exist in backend, ensuring that the
file was successfully renamed/unlinked and its shards can be discarded now
safely.

Change-Id: Ia1d238b721a3e99f951a73abbe199e4245f51a3a
updates: bz#1568521
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
This commit is contained in:
Krutika Dhananjay 2018-03-29 17:21:32 +05:30 committed by Pranith Kumar Karampuri
parent 5702ff3012
commit c30aca6a5b
10 changed files with 1230 additions and 453 deletions

View File

@ -123,6 +123,7 @@ void trap (void);
/* Shard */
#define GF_XATTR_SHARD_FILE_SIZE "trusted.glusterfs.shard.file-size"
#define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806"
#define DOT_SHARD_REMOVE_ME_GFID "77dd5a45-dbf5-4592-b31b-b440382302e9"
/* Lease: buffer length for stringified lease id
* Format: 4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum

View File

@ -25,11 +25,11 @@ TEST touch $M0/bar
TEST truncate -s 10G $M0/bar
#Unlink on such a file should succeed.
TEST unlink $M0/bar
#
#Create a file 'baz' with holes.
TEST touch $M0/baz
TEST truncate -s 10G $M0/baz
#Rename with a sharded existing dest that has holes must succeed.
TEST mv -f $M0/foo $M0/baz
cleanup;
cleanup

View File

@ -0,0 +1,79 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
cleanup
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
TEST $CLI volume set $V0 features.shard on
TEST $CLI volume set $V0 features.shard-block-size 4MB
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
TEST mkdir $M0/dir
# Unlink a temporary file to trigger creation of .remove_me
TEST touch $M0/tmp
TEST unlink $M0/tmp
TEST stat $B0/${V0}0/.shard/.remove_me
TEST stat $B0/${V0}1/.shard/.remove_me
TEST dd if=/dev/zero of=$M0/dir/file bs=1024 count=9216
gfid_file=$(get_gfid_string $M0/dir/file)
# Create marker file from the backend to simulate ENODATA.
touch $B0/${V0}0/.shard/.remove_me/$gfid_file
touch $B0/${V0}1/.shard/.remove_me/$gfid_file
# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case
# and confirm that the correct values are set when the actual unlink takes place
TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_file
TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_file
TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_file
TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_file
# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT
sleep 2
TEST unlink $M0/dir/file
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_file
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_file
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_file
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_file
##############################
### Repeat test for rename ###
##############################
TEST touch $M0/src
TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=9216
gfid_dst=$(get_gfid_string $M0/dir/dst)
# Create marker file from the backend to simulate ENODATA.
touch $B0/${V0}0/.shard/.remove_me/$gfid_dst
touch $B0/${V0}1/.shard/.remove_me/$gfid_dst
# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case
# and confirm that the correct values are set when the actual unlink takes place
TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst
TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst
TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst
TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst
# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT
sleep 2
TEST mv -f $M0/src $M0/dir/dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
cleanup

View File

@ -42,14 +42,14 @@ EXPECT_NOT "1" file_all_zeroes `find $B0 -name $gfid_foo.1`
# Now unlink the file. And ensure that all shards associated with the file are cleaned up
TEST unlink $M0/foo
TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1
TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1
TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1
TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1
TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2
TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2
TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2
TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2
#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1
#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1
#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1
#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1
#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2
#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2
#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2
#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2
TEST ! stat $M0/foo
#clean up everything

View File

@ -18,7 +18,7 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23
ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0)
TEST rm -f $M0/one-plus-five-shards
EXPECT `expr $ACTIVE_INODES_BEFORE - 5` get_mount_active_size_value $V0
#EXPECT `expr $ACTIVE_INODES_BEFORE - 4` get_mount_active_size_value $V0
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST $CLI volume stop $V0

View File

@ -32,7 +32,17 @@ TEST truncate -s 5M $M0/dir/foo
TEST ! stat $B0/${V0}0/.shard
TEST ! stat $B0/${V0}1/.shard
# Test to ensure that unlink doesn't fail due to absence of /.shard
gfid_foo=$(get_gfid_string $M0/dir/foo)
TEST unlink $M0/dir/foo
TEST stat $B0/${V0}0/.shard/.remove_me
TEST stat $B0/${V0}1/.shard/.remove_me
TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
##################################################
##### Unlink of a sharded file without holes #####
@ -46,14 +56,20 @@ TEST stat $B0/${V0}1/.shard/$gfid_new.1
TEST stat $B0/${V0}0/.shard/$gfid_new.2
TEST stat $B0/${V0}1/.shard/$gfid_new.2
TEST unlink $M0/dir/new
TEST ! stat $B0/${V0}0/.shard/$gfid_new.1
TEST ! stat $B0/${V0}1/.shard/$gfid_new.1
TEST ! stat $B0/${V0}0/.shard/$gfid_new.2
TEST ! stat $B0/${V0}1/.shard/$gfid_new.2
#TEST ! stat $B0/${V0}0/.shard/$gfid_new.1
#TEST ! stat $B0/${V0}1/.shard/$gfid_new.1
#TEST ! stat $B0/${V0}0/.shard/$gfid_new.2
#TEST ! stat $B0/${V0}1/.shard/$gfid_new.2
TEST ! stat $M0/dir/new
TEST ! stat $B0/${V0}0/dir/new
TEST ! stat $B0/${V0}1/dir/new
TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_new
TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_new
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_new
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_new
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_new
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_new
#######################################
##### Unlink with /.shard present #####
#######################################
@ -67,18 +83,32 @@ TEST unlink $M0/dir/foo
TEST ! stat $B0/${V0}0/dir/foo
TEST ! stat $B0/${V0}1/dir/foo
TEST ! stat $M0/dir/foo
TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
#############################################################
##### Unlink of a file with only one block (the zeroth) #####
#############################################################
TEST touch $M0/dir/foo
gfid_foo=$(get_gfid_string $M0/dir/foo)
TEST dd if=/dev/zero of=$M0/dir/foo bs=1024 count=1024
# Test to ensure that unlink of a sparse file works fine.
# Test to ensure that unlink of a file with only base shard works fine.
TEST unlink $M0/dir/foo
TEST ! stat $B0/${V0}0/dir/foo
TEST ! stat $B0/${V0}1/dir/foo
TEST ! stat $M0/dir/foo
TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
####################################################
##### Unlink of a sharded file with hard-links #####
####################################################
@ -94,6 +124,8 @@ TEST stat $B0/${V0}1/.shard/$gfid_original.2
TEST ln $M0/dir/original $M0/link
# Now delete the original file.
TEST unlink $M0/dir/original
TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_original
TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_original
# Ensure the shards are still intact.
TEST stat $B0/${V0}0/.shard/$gfid_original.1
TEST stat $B0/${V0}1/.shard/$gfid_original.1
@ -105,15 +137,22 @@ TEST stat $B0/${V0}0/link
TEST stat $B0/${V0}1/link
# Now delete the last link.
TEST unlink $M0/link
TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_original
TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_original
# Ensure that the shards are all cleaned up.
TEST ! stat $B0/${V0}0/.shard/$gfid_original.1
TEST ! stat $B0/${V0}1/.shard/$gfid_original.1
TEST ! stat $B0/${V0}0/.shard/$gfid_original.2
TEST ! stat $B0/${V0}1/.shard/$gfid_original.2
TEST ! stat $M0/link
#TEST ! stat $B0/${V0}0/.shard/$gfid_original.1
#TEST ! stat $B0/${V0}1/.shard/$gfid_original.1
#TEST ! stat $B0/${V0}0/.shard/$gfid_original.2
#TEST ! stat $B0/${V0}1/.shard/$gfid_original.2
#TEST ! stat $M0/link
TEST ! stat $B0/${V0}0/link
TEST ! stat $B0/${V0}1/link
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_original
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_original
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_original
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_original
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST $CLI volume stop $V0
TEST $CLI volume delete $V0
@ -140,6 +179,7 @@ TEST touch $M0/dir/dst
##### Rename with /.shard absent #####
######################################
TEST truncate -s 5M $M0/dir/dst
gfid_dst=$(get_gfid_string $M0/dir/dst)
TEST ! stat $B0/${V0}0/.shard
TEST ! stat $B0/${V0}1/.shard
# Test to ensure that rename doesn't fail due to absence of /.shard
@ -150,6 +190,13 @@ TEST ! stat $B0/${V0}0/dir/src
TEST ! stat $B0/${V0}1/dir/src
TEST stat $B0/${V0}0/dir/dst
TEST stat $B0/${V0}1/dir/dst
TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
##################################################
##### Rename to a sharded file without holes #####
@ -165,16 +212,23 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.1
TEST stat $B0/${V0}0/.shard/$gfid_dst.2
TEST stat $B0/${V0}1/.shard/$gfid_dst.2
TEST mv -f $M0/dir/src $M0/dir/dst
TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
TEST ! stat $M0/dir/src
TEST stat $M0/dir/dst
TEST ! stat $B0/${V0}0/dir/src
TEST ! stat $B0/${V0}1/dir/src
TEST stat $B0/${V0}0/dir/dst
TEST stat $B0/${V0}1/dir/dst
TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
###################################################
##### Rename of dst file with /.shard present #####
@ -182,7 +236,8 @@ TEST stat $B0/${V0}1/dir/dst
TEST unlink $M0/dir/dst
TEST touch $M0/dir/src
TEST truncate -s 5M $M0/dir/dst
# Test to ensure that unlink of a sparse file works fine.
gfid_dst=$(get_gfid_string $M0/dir/dst)
# Test to ensure that rename into a sparse file works fine.
TEST mv -f $M0/dir/src $M0/dir/dst
TEST ! stat $M0/dir/src
TEST stat $M0/dir/dst
@ -190,6 +245,13 @@ TEST ! stat $B0/${V0}0/dir/src
TEST ! stat $B0/${V0}1/dir/src
TEST stat $B0/${V0}0/dir/dst
TEST stat $B0/${V0}1/dir/dst
TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
###############################################################
##### Rename of dst file with only one block (the zeroth) #####
@ -197,7 +259,8 @@ TEST stat $B0/${V0}1/dir/dst
TEST unlink $M0/dir/dst
TEST touch $M0/dir/src
TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=1024
# Test to ensure that unlink of a sparse file works fine.
gfid_dst=$(get_gfid_string $M0/dir/dst)
# Test to ensure that rename into a file with only base shard works fine.
TEST mv -f $M0/dir/src $M0/dir/dst
TEST ! stat $M0/dir/src
TEST stat $M0/dir/dst
@ -205,6 +268,13 @@ TEST ! stat $B0/${V0}0/dir/src
TEST ! stat $B0/${V0}1/dir/src
TEST stat $B0/${V0}0/dir/dst
TEST stat $B0/${V0}1/dir/dst
TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
########################################################
##### Rename to a dst sharded file with hard-links #####
@ -231,18 +301,26 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.2
TEST ! stat $M0/dir/src
TEST ! stat $B0/${V0}0/dir/src
TEST ! stat $B0/${V0}1/dir/src
TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
# Now rename another file to the last link.
TEST touch $M0/dir/src2
TEST mv -f $M0/dir/src2 $M0/link
# Ensure that the shards are all cleaned up.
TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
TEST ! stat $M0/dir/src2
TEST ! stat $B0/${V0}0/dir/src2
TEST ! stat $B0/${V0}1/dir/src2
TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
# Rename with non-existent dst and a sharded src
TEST touch $M0/dir/src
TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=9216

View File

@ -18,6 +18,7 @@ enum gf_shard_mem_types_ {
gf_shard_mt_inode_ctx_t,
gf_shard_mt_iovec,
gf_shard_mt_int64_t,
gf_shard_mt_uint64_t,
gf_shard_mt_end
};
#endif

View File

@ -42,7 +42,8 @@ GLFS_MSGID(SHARD,
SHARD_MSG_UPDATE_FILE_SIZE_FAILED,
SHARD_MSG_FOP_NOT_SUPPORTED,
SHARD_MSG_INVALID_FOP,
SHARD_MSG_MEMALLOC_FAILED
SHARD_MSG_MEMALLOC_FAILED,
SHARD_MSG_FOP_FAILED
);
#endif /* !_SHARD_MESSAGES_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,7 @@
#include "syncop.h"
#define GF_SHARD_DIR ".shard"
#define GF_SHARD_REMOVE_ME_DIR ".remove_me"
#define SHARD_MIN_BLOCK_SIZE (4 * GF_UNIT_MB)
#define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB)
#define SHARD_XATTR_PREFIX "trusted.glusterfs.shard."
@ -55,6 +56,12 @@
#define get_highest_block(off, len, shard_size) \
(((((off)+(len)) == 0)?0:((off)+(len)-1)) / (shard_size))
int
shard_unlock_inodelk (call_frame_t *frame, xlator_t *this);
int
shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
#define SHARD_ENTRY_FOP_CHECK(loc, op_errno, label) do { \
if ((loc->name && !strcmp (GF_SHARD_DIR, loc->name)) && \
(((loc->parent) && \
@ -79,39 +86,57 @@
} \
} while (0)
#define SHARD_STACK_UNWIND(fop, frame, params ...) do { \
shard_local_t *__local = NULL; \
if (frame) { \
__local = frame->local; \
frame->local = NULL; \
} \
STACK_UNWIND_STRICT (fop, frame, params); \
if (__local) { \
shard_local_wipe (__local); \
mem_put (__local); \
} \
#define SHARD_STACK_UNWIND(fop, frame, params ...) do { \
shard_local_t *__local = NULL; \
if (frame) { \
__local = frame->local; \
if (__local && __local->int_inodelk.acquired_lock) \
shard_unlock_inodelk (frame, frame->this); \
if (__local && __local->int_entrylk.acquired_lock) \
shard_unlock_entrylk (frame, frame->this); \
frame->local = NULL; \
} \
STACK_UNWIND_STRICT (fop, frame, params); \
if (__local) { \
shard_local_wipe (__local); \
mem_put (__local); \
} \
} while (0)
#define SHARD_STACK_DESTROY(frame) \
do { \
shard_local_t *__local = NULL; \
__local = frame->local; \
frame->local = NULL; \
STACK_DESTROY (frame->root); \
if (__local) { \
shard_local_wipe (__local); \
mem_put (__local); \
} \
} while (0);
#define SHARD_INODE_CREATE_INIT(this, local, xattr_req, loc, label) do { \
#define SHARD_INODE_CREATE_INIT(this, block_size, xattr_req, loc, size, \
block_count, label) do { \
int __ret = -1; \
int64_t *__size_attr = NULL; \
shard_priv_t *__priv = NULL; \
uint64_t *__bs = 0; \
\
__priv = this->private; \
\
local->block_size = hton64 (__priv->block_size); \
__ret = dict_set_static_bin (xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, \
&local->block_size, \
sizeof (local->block_size)); \
__bs = GF_CALLOC (1, sizeof (uint64_t), gf_shard_mt_uint64_t); \
if (!__bs) \
goto label; \
*__bs = hton64 (block_size); \
__ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, __bs, \
sizeof (*__bs)); \
if (__ret) { \
gf_msg (this->name, GF_LOG_WARNING, 0, \
SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \
"on path %s", GF_XATTR_SHARD_BLOCK_SIZE, loc->path); \
"on path %s", GF_XATTR_SHARD_BLOCK_SIZE, (loc)->path);\
GF_FREE (__bs); \
goto label; \
} \
\
__ret = shard_set_size_attrs (0, 0, &__size_attr); \
__ret = shard_set_size_attrs (size, block_count, &__size_attr); \
if (__ret) \
goto label; \
\
@ -120,7 +145,7 @@
if (__ret) { \
gf_msg (this->name, GF_LOG_WARNING, 0, \
SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \
"on path %s", GF_XATTR_SHARD_FILE_SIZE, loc->path); \
"on path %s", GF_XATTR_SHARD_FILE_SIZE, (loc)->path); \
GF_FREE (__size_attr); \
goto label; \
} \
@ -172,21 +197,34 @@
} \
} while (0)
/* rm = "remove me" */
typedef struct shard_priv {
uint64_t block_size;
uuid_t dot_shard_gfid;
uuid_t dot_shard_rm_gfid;
inode_t *dot_shard_inode;
inode_t *dot_shard_rm_inode;
gf_lock_t lock;
int inode_count;
struct list_head ilist_head;
} shard_priv_t;
typedef struct {
loc_t *loc;
short type;
loc_t loc;
char *domain;
} shard_lock_t;
struct gf_flock flock;
gf_boolean_t acquired_lock;
} shard_inodelk_t;
typedef struct {
loc_t loc;
char *domain;
char *basename;
entrylk_cmd cmd;
entrylk_type type;
gf_boolean_t acquired_lock;
} shard_entrylk_t;
typedef int32_t (*shard_post_fop_handler_t) (call_frame_t *frame,
xlator_t *this);
@ -200,6 +238,7 @@ typedef int32_t (*shard_post_mknod_fop_handler_t) (call_frame_t *frame,
typedef int32_t (*shard_post_update_size_fop_handler_t) (call_frame_t *frame,
xlator_t *this);
typedef struct shard_local {
int op_ret;
int op_errno;
@ -227,6 +266,7 @@ typedef struct shard_local {
int delta_blocks;
loc_t loc;
loc_t dot_shard_loc;
loc_t dot_shard_rm_loc;
loc_t loc2;
loc_t tmp_loc;
fd_t *fd;
@ -251,16 +291,18 @@ typedef struct shard_local {
shard_post_resolve_fop_handler_t post_res_handler;
shard_post_mknod_fop_handler_t post_mknod_handler;
shard_post_update_size_fop_handler_t post_update_size_handler;
struct {
int lock_count;
fop_inodelk_cbk_t inodelk_cbk;
shard_lock_t *shard_lock;
} lock;
shard_inodelk_t int_inodelk;
shard_entrylk_t int_entrylk;
inode_t *resolver_base_inode;
gf_boolean_t first_lookup_done;
syncbarrier_t barrier;
gf_boolean_t lookup_shards_barriered;
gf_boolean_t unlink_shards_barriered;
gf_boolean_t resolve_not;
loc_t newloc;
call_frame_t *main_frame;
call_frame_t *inodelk_frame;
call_frame_t *entrylk_frame;
} shard_local_t;
typedef struct shard_inode_ctx {
@ -284,6 +326,7 @@ typedef struct shard_inode_ctx {
typedef enum {
SHARD_INTERNAL_DIR_DOT_SHARD = 1,
SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME,
} shard_internal_dir_type_t;
#endif /* __SHARD_H__ */