cluster/ec: Change [f]getxattr to parallel-dispatch-one
At the moment in EC, [f]getxattr operations wait to acquire a lock while other operations are in progress even when it is in the same mount with a lock on the file/directory. This happens because [f]getxattr operations follow the model where the operation is wound on 'k' of the bricks and are matched to make sure the data returned is same on all of them. This consistency check requires that no other operations are on-going while [f]getxattr operations are wound to the bricks. We can perform [f]getxattr in another way as well, where we find the good_mask from the lock that is already granted and wind the operation on any one of the good bricks and unwind the answer after adjusting size/blocks to the parent xlator. Since we are taking into account good_mask, the reply we get will either be before or after a possible on-going operation. Using this method, the operation doesn't need to depend on completion of on-going operations which could be taking long time (In case of some slow disks and writes are in progress etc). Thus we reduce the time to serve [f]getxattr requests. I changed [f]getxattr to dispatch-one and added extra logic in ec_link_has_lock_conflict() to not have any conflicts for fops with EC_MINIMUM_ONE as fop->minimum to achieve the effect described above. Modified scripts to make sure READ fop is received in EC to trigger heals. Updates gluster/glusterfs#368 Change-Id: I3b4ebf89181c336b7b8d5471b0454f016cdaf296 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
This commit is contained in:
parent
85d321b21c
commit
c96a1338fe
@ -72,6 +72,9 @@
|
||||
#define FNM_EXTMATCH 0
|
||||
#endif
|
||||
|
||||
/*gets max-offset on all architectures correctly*/
|
||||
#define GF_OFF_MAX ((1ULL << (sizeof(off_t) * 8 - 1)) - 1ULL)
|
||||
|
||||
#define GLUSTERD_MAX_SNAP_NAME 255
|
||||
#define GLUSTERFS_SOCKET_LISTEN_BACKLOG 10
|
||||
#define ZR_MOUNTPOINT_OPT "mountpoint"
|
||||
|
133
tests/basic/ec/ec-fast-fgetxattr.c
Normal file
133
tests/basic/ec/ec-fast-fgetxattr.c
Normal file
@ -0,0 +1,133 @@
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <glusterfs/api/glfs.h>
|
||||
#include <glusterfs/api/glfs-handles.h>
|
||||
|
||||
int cbk_complete = 0;
|
||||
ssize_t cbk_ret_val = 0;
|
||||
int
|
||||
fill_iov (struct iovec *iov, char fillchar, int count)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
iov->iov_base = calloc (count + 1, sizeof(fillchar));
|
||||
if (iov->iov_base == NULL) {
|
||||
return ret;
|
||||
} else {
|
||||
iov->iov_len = count;
|
||||
ret = 0;
|
||||
}
|
||||
memset (iov->iov_base, fillchar, count);
|
||||
memset (iov->iov_base + count, '\0', 1);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
write_async_cbk (glfs_fd_t *fd, ssize_t ret, void *cookie)
|
||||
{
|
||||
|
||||
if (ret < 0) {
|
||||
fprintf (stderr, "glfs_write failed");
|
||||
}
|
||||
cbk_ret_val = ret;
|
||||
cbk_complete = 1;
|
||||
}
|
||||
|
||||
int
|
||||
write_async (glfs_t *fs, glfs_fd_t *glfd, int char_count)
|
||||
{
|
||||
ssize_t ret = -1;
|
||||
int flags = O_RDWR;
|
||||
struct iovec iov = {0};
|
||||
|
||||
|
||||
|
||||
ret = fill_iov (&iov, 'a', char_count);
|
||||
if (ret) {
|
||||
fprintf (stderr, "failed to create iov");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = glfs_pwritev_async (glfd, &iov, 1, 0, flags, write_async_cbk,
|
||||
NULL);
|
||||
out:
|
||||
if (ret < 0) {
|
||||
fprintf (stderr, "glfs_pwritev async failed");
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
glfs_t *fs = NULL;
|
||||
glfs_fd_t *fd = NULL;
|
||||
int ret = 1;
|
||||
char buf[1024] = {0};
|
||||
|
||||
if (argc != 4) {
|
||||
fprintf (stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
fs = glfs_new (argv[2]);
|
||||
if (!fs) {
|
||||
fprintf (stderr, "glfs_new: returned NULL\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
ret = glfs_set_volfile_server (fs, "tcp", argv[1], 24007);
|
||||
if (ret != 0) {
|
||||
fprintf (stderr, "glfs_set_volfile_server: retuned %d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
ret = glfs_set_logging (fs, "/tmp/ec-fgetxattr.log", 7);
|
||||
if (ret != 0) {
|
||||
fprintf (stderr, "glfs_set_logging: returned %d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
ret = glfs_init (fs);
|
||||
if (ret != 0) {
|
||||
fprintf (stderr, "glfs_init: returned %d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
fd = glfs_open (fs, argv[3], O_RDWR | O_TRUNC);
|
||||
if (fd == NULL) {
|
||||
fprintf (stderr, "glfs_open: returned NULL\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = write_async (fs, fd, 16);
|
||||
if (ret) {
|
||||
fprintf (stderr, "write_async failed\n");
|
||||
}
|
||||
|
||||
sleep (1);
|
||||
ret = glfs_fgetxattr (fd, "trusted.glusterfs.abc", buf, sizeof buf);
|
||||
while (cbk_complete != 1) {
|
||||
/* ret will be -ve as xattr doesn't exist, and fgetxattr should
|
||||
* return waaaayyy before writev */
|
||||
ret = 0;
|
||||
sleep (1);
|
||||
}
|
||||
if (cbk_ret_val < 0) {
|
||||
fprintf (stderr, "cbk_ret_val is -ve\n");
|
||||
ret = -1;
|
||||
}
|
||||
glfs_close(fd);
|
||||
|
||||
out:
|
||||
unlink ("/tmp/ec-fgetxattr.log");
|
||||
glfs_fini (fs);
|
||||
|
||||
return ret;
|
||||
}
|
40
tests/basic/ec/ec-fast-fgetxattr.t
Executable file
40
tests/basic/ec/ec-fast-fgetxattr.t
Executable file
@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
|
||||
. $(dirname $0)/../../include.rc
|
||||
. $(dirname $0)/../../volume.rc
|
||||
|
||||
cleanup;
|
||||
|
||||
TEST glusterd
|
||||
TEST pidof glusterd
|
||||
|
||||
TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6}
|
||||
TEST $CLI volume set $V0 performance.quick-read off
|
||||
TEST $CLI volume set $V0 performance.write-behind off
|
||||
TEST $CLI volume set $V0 performance.io-cache off
|
||||
TEST $CLI volume set $V0 performance.stat-prefetch off
|
||||
TEST $CLI volume set $V0 performance.client-io-threads off
|
||||
TEST $CLI volume set $V0 brick-log-level DEBUG
|
||||
TEST $CLI volume set $V0 delay-gen posix
|
||||
TEST $CLI volume set $V0 delay-gen.delay-duration 10000000
|
||||
TEST $CLI volume set $V0 delay-gen.delay-percentage 100
|
||||
TEST $CLI volume set $V0 delay-gen.enable read,write
|
||||
|
||||
TEST $CLI volume start $V0
|
||||
EXPECT 'Started' volinfo_field $V0 'Status'
|
||||
|
||||
TEST $GFS -s $H0 --volfile-id $V0 $M0
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
|
||||
TEST touch $M0/file
|
||||
|
||||
# Perform two writes to make sure io-threads have enough threads to perform
|
||||
# things in parallel when the test execution happens.
|
||||
echo abc > $M0/file1 &
|
||||
echo abc > $M0/file2 &
|
||||
wait
|
||||
|
||||
TEST build_tester $(dirname $0)/ec-fast-fgetxattr.c -lgfapi -Wall -O2
|
||||
TEST $(dirname $0)/ec-fast-fgetxattr $H0 $V0 /file
|
||||
cleanup_tester $(dirname ${0})/ec-fast-fgetxattr
|
||||
|
||||
cleanup;
|
@ -53,6 +53,13 @@ ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
|
||||
static gf_boolean_t
|
||||
ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
|
||||
{
|
||||
/* Fops like access/stat won't have to worry what the other fops are
|
||||
* modifying as the fop is wound only to one brick. So it can be
|
||||
* executed in parallel*/
|
||||
if (l1->fop->minimum == EC_MINIMUM_ONE ||
|
||||
l2->fop->minimum == EC_MINIMUM_ONE)
|
||||
return _gf_false;
|
||||
|
||||
if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
|
||||
(l2->fop->flags & EC_FLAG_LOCK_SHARED))
|
||||
return _gf_false;
|
||||
|
@ -152,7 +152,7 @@ ec_adjust_offset_up(ec_t *ec, off_t *value, gf_boolean_t scale)
|
||||
} else {
|
||||
/* Check if there has been an overflow. */
|
||||
if ((off_t)tmp < 0) {
|
||||
tmp = (1ULL << (sizeof(off_t) * 8 - 1)) - 1ULL;
|
||||
tmp = GF_OFF_MAX;
|
||||
tail = -tail;
|
||||
}
|
||||
}
|
||||
|
@ -324,13 +324,23 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
|
||||
return EC_STATE_DISPATCH;
|
||||
|
||||
case EC_STATE_DISPATCH:
|
||||
ec_dispatch_all(fop);
|
||||
if (fop->minimum == EC_MINIMUM_ALL) {
|
||||
ec_dispatch_all(fop);
|
||||
} else {
|
||||
ec_dispatch_one(fop);
|
||||
}
|
||||
|
||||
return EC_STATE_PREPARE_ANSWER;
|
||||
|
||||
case EC_STATE_PREPARE_ANSWER:
|
||||
ec_handle_special_xattrs (fop);
|
||||
cbk = ec_fop_prepare_answer(fop, _gf_true);
|
||||
if (fop->minimum == EC_MINIMUM_ALL) {
|
||||
cbk = ec_fop_prepare_answer(fop, _gf_true);
|
||||
} else {
|
||||
if (ec_dispatch_one_retry (fop, &cbk)) {
|
||||
return EC_STATE_DISPATCH;
|
||||
}
|
||||
}
|
||||
if (cbk != NULL) {
|
||||
int32_t err;
|
||||
|
||||
@ -1809,6 +1819,7 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
|
||||
|
||||
case EC_STATE_PREPARE_ANSWER:
|
||||
cbk = ec_fop_prepare_answer(fop, _gf_true);
|
||||
|
||||
if (cbk != NULL) {
|
||||
if (cbk->iatt[0].ia_type == IA_IFREG) {
|
||||
ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1,
|
||||
|
@ -864,7 +864,7 @@ ec_gf_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
|
||||
{
|
||||
int error = 0;
|
||||
ec_t *ec = this->private;
|
||||
int32_t minimum = EC_MINIMUM_MIN;
|
||||
int32_t minimum = EC_MINIMUM_ONE;
|
||||
|
||||
if (name && strcmp (name, EC_XATTR_HEAL) != 0) {
|
||||
EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
|
||||
@ -901,7 +901,7 @@ ec_gf_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
||||
|
||||
EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
|
||||
|
||||
ec_fgetxattr (frame, this, -1, EC_MINIMUM_MIN, default_fgetxattr_cbk,
|
||||
ec_fgetxattr (frame, this, -1, EC_MINIMUM_ONE, default_fgetxattr_cbk,
|
||||
NULL, fd, name, xdata);
|
||||
return 0;
|
||||
out:
|
||||
|
Loading…
x
Reference in New Issue
Block a user