5784a00f99
With this change when SHD starts the index crawl it requests all the clients to release the AFR_TA_DOM_NOTIFY lock so that clients will know the in memory state is no more valid and any new operations needs to query the thin-arbiter if required. When SHD completes healing all the files without any failure, it will again take the AFR_TA_DOM_NOTIFY lock and gets the xattrs on TA to see whether there are any new failures happened by that time. If there are new failures marked on TA, SHD will start the crawl immediately to heal those failures as well. If there are no new failures, then SHD will take the AFR_TA_DOM_MODIFY lock and unsets the xattrs on TA, so that both the data bricks will be considered as good there after. Change-Id: I037b89a0823648f314580ba0716d877bd5ddb1f1 fixes: bz#1579788 Signed-off-by: karthik-us <ksubrahm@redhat.com>
615 lines
17 KiB
Plaintext
615 lines
17 KiB
Plaintext
declare -A PORTMAP
|
|
PORTCURR=49152
|
|
function ta_create_ta_and_volfile()
|
|
{
|
|
local b=$B0/$1
|
|
mkdir -p $b/.glusterfs/indices
|
|
cat > $B0/ta.vol <<EOF
|
|
volume ta-posix
|
|
type storage/posix
|
|
option directory $b
|
|
end-volume
|
|
|
|
volume ta-thin-arbiter
|
|
type features/thin-arbiter
|
|
subvolumes ta-posix
|
|
end-volume
|
|
|
|
volume ta-locks
|
|
type features/locks
|
|
option notify-contention yes
|
|
subvolumes ta-thin-arbiter
|
|
end-volume
|
|
|
|
volume ta-upcall
|
|
type features/upcall
|
|
option cache-invalidation off
|
|
subvolumes ta-locks
|
|
end-volume
|
|
|
|
volume ta-io-threads
|
|
type performance/io-threads
|
|
subvolumes ta-upcall
|
|
end-volume
|
|
|
|
volume ta-index
|
|
type features/index
|
|
option xattrop-pending-watchlist trusted.afr.ta-
|
|
option xattrop-dirty-watchlist trusted.afr.dirty
|
|
option index-base $b/.glusterfs/indices
|
|
subvolumes ta-io-threads
|
|
end-volume
|
|
|
|
volume ta-io-stats
|
|
type debug/io-stats
|
|
option count-fop-hits off
|
|
option latency-measurement off
|
|
option log-level WARNING
|
|
option unique-id $b
|
|
subvolumes ta-index
|
|
end-volume
|
|
|
|
volume ta-server
|
|
type protocol/server
|
|
option transport.listen-backlog 10
|
|
option transport.socket.keepalive-count 9
|
|
option transport.socket.keepalive-interval 2
|
|
option transport.socket.keepalive-time 20
|
|
option transport.tcp-user-timeout 0
|
|
option transport.socket.keepalive 1
|
|
option auth.addr.$b.allow *
|
|
option auth-path $b
|
|
option transport.address-family inet
|
|
option transport-type tcp
|
|
subvolumes ta-io-stats
|
|
end-volume
|
|
EOF
|
|
}
|
|
|
|
function ta_create_brick_and_volfile()
|
|
{
|
|
local b=$B0/$1
|
|
mkdir -p $b/.glusterfs/indices
|
|
cat > $B0/${1}.vol <<EOF
|
|
volume ${V0}-posix
|
|
type storage/posix
|
|
option directory $b
|
|
end-volume
|
|
|
|
volume ${V0}-locks
|
|
type features/locks
|
|
subvolumes ${V0}-posix
|
|
end-volume
|
|
|
|
volume ${V0}-leases
|
|
type features/leases
|
|
option leases off
|
|
subvolumes ${V0}-locks
|
|
end-volume
|
|
|
|
volume ${V0}-upcall
|
|
type features/upcall
|
|
option cache-invalidation off
|
|
subvolumes ${V0}-leases
|
|
end-volume
|
|
|
|
volume ${V0}-io-threads
|
|
type performance/io-threads
|
|
subvolumes ${V0}-upcall
|
|
end-volume
|
|
|
|
volume ${V0}-index
|
|
type features/index
|
|
option xattrop-pending-watchlist trusted.afr.${V0}-
|
|
option xattrop-dirty-watchlist trusted.afr.dirty
|
|
option index-base $b/.glusterfs/indices
|
|
subvolumes ${V0}-io-threads
|
|
end-volume
|
|
|
|
volume ${V0}-io-stats
|
|
type debug/io-stats
|
|
option count-fop-hits off
|
|
option latency-measurement off
|
|
option log-level INFO
|
|
option unique-id $b
|
|
subvolumes ${V0}-index
|
|
end-volume
|
|
|
|
volume $b
|
|
type performance/decompounder
|
|
subvolumes ${V0}-io-stats
|
|
end-volume
|
|
|
|
volume ${V0}-server
|
|
type protocol/server
|
|
option transport.listen-backlog 1024
|
|
option transport.socket.keepalive-count 9
|
|
option transport.socket.keepalive-interval 2
|
|
option transport.socket.keepalive-time 20
|
|
option transport.tcp-user-timeout 0
|
|
option transport.socket.keepalive 1
|
|
option auth.addr.$b.allow *
|
|
option auth-path $b
|
|
option auth.login.459d48e8-2a92-4f11-89f2-077b29f6f86d.password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
|
|
option auth.login.$b.allow 459d48e8-2a92-4f11-89f2-077b29f6f86d
|
|
option transport.address-family inet
|
|
option transport-type tcp
|
|
subvolumes $b
|
|
end-volume
|
|
EOF
|
|
}
|
|
|
|
function ta_set_port_by_name()
|
|
{
|
|
if [ -z ${PORTMAP[$1]} ]
|
|
then
|
|
PORTMAP[$1]=$PORTCURR
|
|
PORTCURR=$((PORTCURR+1))
|
|
fi
|
|
}
|
|
|
|
function ta_start_brick_process()
|
|
{
|
|
ta_set_port_by_name $1
|
|
local p=${PORTMAP[$1]}
|
|
if glusterfs -p $B0/${1}.pid --volfile=$B0/${1}.vol -l $(gluster --print-logdir)/${1}.log --xlator-option ${V0}-server.listen-port=$p
|
|
then
|
|
cat $B0/${1}.pid
|
|
else
|
|
echo ""
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
function ta_start_ta_process()
|
|
{
|
|
ta_set_port_by_name $1
|
|
local p=${PORTMAP[$1]}
|
|
if glusterfs -p $B0/${1}.pid --volfile=$B0/${1}.vol -l $(gluster --print-logdir)/${1}.log --xlator-option ta-server.listen-port=$p
|
|
then
|
|
cat $B0/${1}.pid
|
|
else
|
|
echo ""
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
function ta_start_mount_process()
|
|
{
|
|
mkdir -p $1
|
|
identifier=$(echo $1 | tr / .)
|
|
if glusterfs -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1
|
|
then
|
|
cat $B0/$identifier.pid
|
|
else
|
|
echo ""
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
function ta_create_mount_volfile()
|
|
{
|
|
local b0=$B0/$1
|
|
local b1=$B0/$2
|
|
local ta=$B0/$3
|
|
local b0_port=${PORTMAP[$1]}
|
|
local b1_port=${PORTMAP[$2]}
|
|
local ta_port=${PORTMAP[$3]}
|
|
cat > $B0/mount.vol <<EOF
|
|
volume ${V0}-client-0
|
|
type protocol/client
|
|
option remote-host $H0
|
|
option client-bind-insecure off
|
|
option transport.socket.keepalive-interval 2
|
|
option transport.socket.keepalive-time 20
|
|
option transport.socket.ssl-enabled off
|
|
option remote-subvolume $b0
|
|
option transport.tcp-user-timeout 0
|
|
option transport.socket.keepalive-count 9
|
|
option transport-type tcp
|
|
option ping-timeout 42
|
|
option send-gids on
|
|
option remote-port $b0_port
|
|
option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
|
|
option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
|
|
end-volume
|
|
|
|
volume ${V0}-client-1
|
|
type protocol/client
|
|
option remote-host $H0
|
|
option client-bind-insecure off
|
|
option transport.socket.keepalive-interval 2
|
|
option transport.socket.keepalive-time 20
|
|
option transport.socket.ssl-enabled off
|
|
option remote-subvolume $b1
|
|
option transport.tcp-user-timeout 0
|
|
option transport.socket.keepalive-count 9
|
|
option transport-type tcp
|
|
option ping-timeout 42
|
|
option send-gids on
|
|
option remote-port $b1_port
|
|
option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
|
|
option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
|
|
end-volume
|
|
|
|
volume ${V0}-thin-arbiter-client
|
|
type protocol/client
|
|
option client-bind-insecure off
|
|
option transport.socket.ssl-enabled off
|
|
option remote-subvolume $ta
|
|
option ping-timeout 42
|
|
option remote-host $H0
|
|
option send-gids on
|
|
option transport.socket.keepalive-interval 2
|
|
option remote-port $ta_port
|
|
option transport-type tcp
|
|
option transport.tcp-user-timeout 0
|
|
option transport.socket.keepalive-time 20
|
|
option transport.socket.keepalive-count 9
|
|
end-volume
|
|
|
|
volume ${V0}-replicate-0
|
|
type cluster/replicate
|
|
option afr-dirty-xattr trusted.afr.dirty
|
|
option iam-self-heal-daemon off
|
|
option afr-pending-xattr ${V0}-client-0,${V0}-client-1,${V0}-ta-2
|
|
option thin-arbiter $H0:$ta
|
|
subvolumes ${V0}-client-0 ${V0}-client-1 ${V0}-thin-arbiter-client
|
|
end-volume
|
|
|
|
volume ${V0}-distribute
|
|
type cluster/distribute
|
|
option tier-hot-compact-frequency 604800
|
|
option rebal-throttle normal
|
|
option force-migration off
|
|
option lookup-optimize on
|
|
option weighted-rebalance on
|
|
option write-freq-threshold 0
|
|
option assert-no-child-down off
|
|
option tier-pause off
|
|
option watermark-low 75
|
|
option tier-compact off
|
|
option lock-migration off
|
|
option lookup-unhashed on
|
|
option tier-demote-frequency 3600
|
|
option watermark-hi 90
|
|
option tier-cold-compact-frequency 604800
|
|
option randomize-hash-range-by-gfid off
|
|
option unhashed-sticky-bit off
|
|
option use-readdirp on
|
|
option readdir-optimize off
|
|
option xattr-name trusted.glusterfs.dht
|
|
option tier-max-mb 4000
|
|
option tier-max-files 10000
|
|
option tier-query-limit 100
|
|
option read-freq-threshold 0
|
|
option tier-mode test
|
|
option tier-max-promote-file-size 0
|
|
option tier-promote-frequency 120
|
|
option min-free-disk 10%
|
|
option min-free-inodes 5%
|
|
option rebalance-stats off
|
|
subvolumes ${V0}-replicate-0
|
|
end-volume
|
|
|
|
volume ${V0}-write-behind
|
|
type performance/write-behind
|
|
option strict-O_DIRECT off
|
|
option strict-write-ordering off
|
|
option resync-failed-syncs-after-fsync off
|
|
option aggregate-size 128KB
|
|
option flush-behind on
|
|
option cache-size 1MB
|
|
option trickling-writes on
|
|
subvolumes ${V0}-distribute
|
|
end-volume
|
|
|
|
volume ${V0}-read-ahead
|
|
type performance/read-ahead
|
|
option force-atime-update false
|
|
option page-count 4
|
|
option page-size 131072
|
|
option pass-through false
|
|
subvolumes ${V0}-write-behind
|
|
end-volume
|
|
|
|
volume ${V0}-readdir-ahead
|
|
type performance/readdir-ahead
|
|
option rda-low-wmark 4096
|
|
option rda-high-wmark 128KB
|
|
option rda-cache-limit 10MB
|
|
option parallel-readdir off
|
|
option pass-through false
|
|
option rda-request-size 131072
|
|
subvolumes ${V0}-read-ahead
|
|
end-volume
|
|
|
|
volume ${V0}-io-cache
|
|
type performance/io-cache
|
|
option cache-timeout 1
|
|
option cache-size 32MB
|
|
option min-file-size 0
|
|
option max-file-size 0
|
|
option pass-through false
|
|
subvolumes ${V0}-readdir-ahead
|
|
end-volume
|
|
|
|
volume ${V0}-quick-read
|
|
type performance/quick-read
|
|
option cache-invalidation false
|
|
option ctime-invalidation false
|
|
option cache-size 128MB
|
|
option cache-timeout 1
|
|
option max-file-size 64KB
|
|
subvolumes ${V0}-io-cache
|
|
end-volume
|
|
|
|
volume ${V0}-open-behind
|
|
type performance/open-behind
|
|
option use-anonymous-fd yes
|
|
option lazy-open yes
|
|
option read-after-open no
|
|
option pass-through false
|
|
subvolumes ${V0}-quick-read
|
|
end-volume
|
|
|
|
volume ${V0}-md-cache
|
|
type performance/md-cache
|
|
option pass-through false
|
|
option cache-capability-xattrs true
|
|
option cache-posix-acl false
|
|
option cache-swift-metadata true
|
|
option cache-samba-metadata false
|
|
option md-cache-timeout 1
|
|
option force-readdirp true
|
|
option cache-invalidation false
|
|
option md-cache-statfs off
|
|
option cache-selinux false
|
|
option cache-ima-xattrs true
|
|
subvolumes ${V0}-open-behind
|
|
end-volume
|
|
|
|
volume ${V0}-io-threads
|
|
type performance/io-threads
|
|
option normal-prio-threads 16
|
|
option enable-least-priority on
|
|
option idle-time 120
|
|
option cleanup-disconnected-reqs off
|
|
option pass-through false
|
|
option thread-count 16
|
|
option high-prio-threads 16
|
|
option low-prio-threads 16
|
|
option least-prio-threads 1
|
|
subvolumes ${V0}-md-cache
|
|
end-volume
|
|
|
|
volume ${V0}
|
|
type debug/io-stats
|
|
option client-logger gluster-log
|
|
option client-log-buf-size 5
|
|
option latency-measurement off
|
|
option client-log-level INFO
|
|
option brick-log-level INFO
|
|
option count-fop-hits off
|
|
option sys-log-level CRITICAL
|
|
option brick-log-format with-msg-id
|
|
option brick-log-buf-size 5
|
|
option dump-fd-stats off
|
|
option ios-dump-interval 0
|
|
option ios-dump-format json
|
|
option client-log-format with-msg-id
|
|
option log-buf-size 5
|
|
option log-flush-timeout 120
|
|
option client-log-flush-timeout 120
|
|
option ios-sample-interval 0
|
|
option ios-sample-buf-size 65535
|
|
option brick-logger gluster-log
|
|
option ios-dnscache-ttl-sec 86400
|
|
option brick-log-flush-timeout 120
|
|
option unique-id /no/such/path
|
|
subvolumes ${V0}-io-threads
|
|
end-volume
|
|
EOF
|
|
}
|
|
|
|
function ta_kill_brick()
|
|
{
|
|
local p=$(cat $B0/${1}.pid)
|
|
echo > $B0/${1}.pid
|
|
kill -9 $p
|
|
}
|
|
|
|
function ta_get_pid_by_brick_name()
|
|
{
|
|
cat $B0/${1}.pid
|
|
}
|
|
|
|
function ta_up_status()
|
|
{
|
|
local v=$1
|
|
local m=$2
|
|
local replica_id=$3
|
|
grep -E "^up = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'='
|
|
}
|
|
|
|
function ta_create_shd_volfile()
|
|
{
|
|
local b0=$B0/$1
|
|
local b1=$B0/$2
|
|
local ta=$B0/$3
|
|
local b0_port=${PORTMAP[$1]}
|
|
local b1_port=${PORTMAP[$2]}
|
|
local ta_port=${PORTMAP[$3]}
|
|
cat > $B0/glustershd.vol <<EOF
|
|
volume ${V0}-replicate-0-client-0
|
|
type protocol/client
|
|
option send-gids on
|
|
option transport.socket.lowlat off
|
|
option transport.socket.keepalive-interval 2
|
|
option remote-host $H0
|
|
option remote-subvolume $b0
|
|
option ping-timeout 42
|
|
option client-bind-insecure off
|
|
option transport.socket.own-thread off
|
|
option frame-timeout 1800
|
|
option non-blocking-io off
|
|
option transport.socket.keepalive 1
|
|
option transport.socket.keepalive-count 9
|
|
option transport.tcp-user-timeout 0
|
|
option transport.socket.nodelay 1
|
|
option transport.socket.keepalive-time 20
|
|
option transport.socket.read-fail-log off
|
|
option transport-type tcp
|
|
option filter-O_DIRECT disable
|
|
option event-threads 2
|
|
option transport.listen-backlog 1024
|
|
option transport.socket.ssl-enabled off
|
|
option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
|
|
option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
|
|
option remote-port $b0_port
|
|
end-volume
|
|
|
|
volume ${V0}-replicate-0-client-1
|
|
type protocol/client
|
|
option remote-host $H0
|
|
option transport.socket.keepalive-time 20
|
|
option transport.socket.keepalive-count 9
|
|
option transport.socket.own-thread off
|
|
option transport.socket.ssl-enabled off
|
|
option transport-type tcp
|
|
option remote-subvolume $b1
|
|
option event-threads 2
|
|
option transport.tcp-user-timeout 0
|
|
option transport.socket.keepalive 1
|
|
option transport.socket.nodelay 1
|
|
option transport.socket.read-fail-log off
|
|
option frame-timeout 1800
|
|
option ping-timeout 42
|
|
option client-bind-insecure off
|
|
option filter-O_DIRECT disable
|
|
option send-gids on
|
|
option non-blocking-io off
|
|
option transport.listen-backlog 1024
|
|
option transport.socket.lowlat off
|
|
option transport.socket.keepalive-interval 2
|
|
option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
|
|
option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
|
|
option remote-port $b1_port
|
|
end-volume
|
|
|
|
volume ${V0}-replicate-0-thin-arbiter-client
|
|
type protocol/client
|
|
option frame-timeout 1800
|
|
option event-threads 2
|
|
option transport.listen-backlog 1024
|
|
option transport.socket.nodelay 1
|
|
option transport.socket.keepalive-count 9
|
|
option transport.socket.ssl-enabled off
|
|
option transport-type tcp
|
|
option remote-subvolume $ta
|
|
option filter-O_DIRECT disable
|
|
option non-blocking-io off
|
|
option transport.socket.lowlat off
|
|
option transport.socket.keepalive-interval 2
|
|
option transport.socket.read-fail-log off
|
|
option remote-host $H0
|
|
option send-gids on
|
|
option transport.tcp-user-timeout 0
|
|
option transport.socket.keepalive-time 20
|
|
option ping-timeout 42
|
|
option client-bind-insecure off
|
|
option transport.socket.keepalive 1
|
|
option transport.socket.own-thread off
|
|
option remote-port $ta_port
|
|
end-volume
|
|
|
|
volume ${V0}-replicate-0
|
|
type cluster/replicate
|
|
option background-self-heal-count 8
|
|
option metadata-self-heal on
|
|
option data-change-log on
|
|
option entrylk-trace off
|
|
option iam-self-heal-daemon yes
|
|
option afr-dirty-xattr trusted.afr.dirty
|
|
option heal-timeout 10
|
|
option read-hash-mode 1
|
|
option metadata-splitbrain-forced-heal off
|
|
option thin-arbiter $H0:$ta
|
|
option shd-max-threads 1
|
|
option afr-pending-xattr ${V0}-client-0,${V0}-client-1,${V0}-ta-2
|
|
option halo-max-latency 5
|
|
option halo-max-replicas 99999
|
|
option entry-change-log on
|
|
option halo-nfsd-max-latency 5
|
|
option inodelk-trace off
|
|
option pre-op-compat on
|
|
option eager-lock on
|
|
option self-heal-readdir-size 1KB
|
|
option ensure-durability on
|
|
option locking-scheme full
|
|
option halo-enabled False
|
|
option heal-wait-queue-length 128
|
|
option entry-self-heal on
|
|
option self-heal-daemon on
|
|
option quorum-reads no
|
|
option shd-wait-qlength 1024
|
|
option choose-local true
|
|
option halo-min-replicas 2
|
|
option data-self-heal on
|
|
option metadata-change-log on
|
|
option consistent-metadata no
|
|
option full-lock yes
|
|
option use-compound-fops no
|
|
option halo-shd-max-latency 99999
|
|
option quorum-type none
|
|
option favorite-child-policy none
|
|
option read-subvolume-index -1
|
|
option optimistic-change-log on
|
|
option iam-nfs-daemon off
|
|
option post-op-delay-secs 1
|
|
option granular-entry-heal no
|
|
option consistent-io no
|
|
option data-self-heal-window-size 1
|
|
subvolumes ${V0}-replicate-0-client-0 ${V0}-replicate-0-client-1 ${V0}-replicate-0-thin-arbiter-client
|
|
end-volume
|
|
|
|
volume glustershd
|
|
type debug/io-stats
|
|
option log-buf-size 5
|
|
option ios-dump-format json
|
|
option latency-measurement off
|
|
option sys-log-level CRITICAL
|
|
option brick-log-level INFO
|
|
option client-logger gluster-log
|
|
option client-log-format with-msg-id
|
|
option brick-log-format with-msg-id
|
|
option client-log-buf-size 5
|
|
option log-flush-timeout 120
|
|
option ios-dump-interval 0
|
|
option ios-sample-interval 0
|
|
option ios-dnscache-ttl-sec 86400
|
|
option count-fop-hits off
|
|
option client-log-level INFO
|
|
option brick-logger gluster-log
|
|
option brick-log-buf-size 5
|
|
option ios-sample-buf-size 65535
|
|
option client-log-flush-timeout 120
|
|
option brick-log-flush-timeout 120
|
|
option unique-id /no/such/path
|
|
option dump-fd-stats off
|
|
subvolumes ${V0}-replicate-0
|
|
end-volume
|
|
EOF
|
|
}
|
|
|
|
function ta_start_shd_process()
|
|
{
|
|
if glusterfs -p $B0/${1}.pid --volfile=$B0/${1}.vol -l $(gluster --print-logdir)/${1}.log --process-name=glustershd
|
|
then
|
|
cat $B0/${1}.pid
|
|
else
|
|
echo ""
|
|
return 1
|
|
fi
|
|
}
|