GFS2: dlm based recovery coordination
This new method of managing recovery is an alternative to the previous approach of using the userland gfs_controld. - use dlm slot numbers to assign journal id's - use dlm recovery callbacks to initiate journal recovery - use a dlm lock to determine the first node to mount fs - use a dlm lock to track journals that need recovery Signed-off-by: David Teigland <teigland@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
This commit is contained in:
parent
e343a895a9
commit
e0c2a9aa1e
@ -1353,7 +1353,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
|
|||||||
spin_lock(&gl->gl_spin);
|
spin_lock(&gl->gl_spin);
|
||||||
gl->gl_reply = ret;
|
gl->gl_reply = ret;
|
||||||
|
|
||||||
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
|
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
|
||||||
if (gfs2_should_freeze(gl)) {
|
if (gfs2_should_freeze(gl)) {
|
||||||
set_bit(GLF_FROZEN, &gl->gl_flags);
|
set_bit(GLF_FROZEN, &gl->gl_flags);
|
||||||
spin_unlock(&gl->gl_spin);
|
spin_unlock(&gl->gl_spin);
|
||||||
|
@ -121,7 +121,10 @@ enum {
|
|||||||
|
|
||||||
struct lm_lockops {
|
struct lm_lockops {
|
||||||
const char *lm_proto_name;
|
const char *lm_proto_name;
|
||||||
int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
|
int (*lm_mount) (struct gfs2_sbd *sdp, const char *table);
|
||||||
|
void (*lm_first_done) (struct gfs2_sbd *sdp);
|
||||||
|
void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
|
||||||
|
unsigned int result);
|
||||||
void (*lm_unmount) (struct gfs2_sbd *sdp);
|
void (*lm_unmount) (struct gfs2_sbd *sdp);
|
||||||
void (*lm_withdraw) (struct gfs2_sbd *sdp);
|
void (*lm_withdraw) (struct gfs2_sbd *sdp);
|
||||||
void (*lm_put_lock) (struct gfs2_glock *gl);
|
void (*lm_put_lock) (struct gfs2_glock *gl);
|
||||||
|
@ -139,8 +139,45 @@ struct gfs2_bufdata {
|
|||||||
#define GDLM_STRNAME_BYTES 25
|
#define GDLM_STRNAME_BYTES 25
|
||||||
#define GDLM_LVB_SIZE 32
|
#define GDLM_LVB_SIZE 32
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ls_recover_flags:
|
||||||
|
*
|
||||||
|
* DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been
|
||||||
|
* held by failed nodes whose journals need recovery. Those locks should
|
||||||
|
* only be used for journal recovery until the journal recovery is done.
|
||||||
|
* This is set by the dlm recover_prep callback and cleared by the
|
||||||
|
* gfs2_control thread when journal recovery is complete. To avoid
|
||||||
|
* races between recover_prep setting and gfs2_control clearing, recover_spin
|
||||||
|
* is held while changing this bit and reading/writing recover_block
|
||||||
|
* and recover_start.
|
||||||
|
*
|
||||||
|
* DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used.
|
||||||
|
*
|
||||||
|
* DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing
|
||||||
|
* recovery of all journals before allowing other nodes to mount the fs.
|
||||||
|
* This is cleared when FIRST_MOUNT_DONE is set.
|
||||||
|
*
|
||||||
|
* DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished
|
||||||
|
* recovery of all journals, and now allows other nodes to mount the fs.
|
||||||
|
*
|
||||||
|
* DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared
|
||||||
|
* BLOCK_LOCKS for the first time. The gfs2_control thread should now
|
||||||
|
* control clearing BLOCK_LOCKS for further recoveries.
|
||||||
|
*
|
||||||
|
* DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq.
|
||||||
|
*
|
||||||
|
* DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep()
|
||||||
|
* and recover_done(), i.e. set while recover_block == recover_start.
|
||||||
|
*/
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DFL_BLOCK_LOCKS = 0,
|
DFL_BLOCK_LOCKS = 0,
|
||||||
|
DFL_NO_DLM_OPS = 1,
|
||||||
|
DFL_FIRST_MOUNT = 2,
|
||||||
|
DFL_FIRST_MOUNT_DONE = 3,
|
||||||
|
DFL_MOUNT_DONE = 4,
|
||||||
|
DFL_UNMOUNT = 5,
|
||||||
|
DFL_DLM_RECOVERY = 6,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct lm_lockname {
|
struct lm_lockname {
|
||||||
@ -499,14 +536,26 @@ struct gfs2_sb_host {
|
|||||||
struct lm_lockstruct {
|
struct lm_lockstruct {
|
||||||
int ls_jid;
|
int ls_jid;
|
||||||
unsigned int ls_first;
|
unsigned int ls_first;
|
||||||
unsigned int ls_first_done;
|
|
||||||
unsigned int ls_nodir;
|
unsigned int ls_nodir;
|
||||||
const struct lm_lockops *ls_ops;
|
const struct lm_lockops *ls_ops;
|
||||||
unsigned long ls_flags;
|
|
||||||
dlm_lockspace_t *ls_dlm;
|
dlm_lockspace_t *ls_dlm;
|
||||||
|
|
||||||
int ls_recover_jid_done;
|
int ls_recover_jid_done; /* These two are deprecated, */
|
||||||
int ls_recover_jid_status;
|
int ls_recover_jid_status; /* used previously by gfs_controld */
|
||||||
|
|
||||||
|
struct dlm_lksb ls_mounted_lksb; /* mounted_lock */
|
||||||
|
struct dlm_lksb ls_control_lksb; /* control_lock */
|
||||||
|
char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
|
||||||
|
struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
|
||||||
|
|
||||||
|
spinlock_t ls_recover_spin; /* protects following fields */
|
||||||
|
unsigned long ls_recover_flags; /* DFL_ */
|
||||||
|
uint32_t ls_recover_mount; /* gen in first recover_done cb */
|
||||||
|
uint32_t ls_recover_start; /* gen in last recover_done cb */
|
||||||
|
uint32_t ls_recover_block; /* copy recover_start in last recover_prep */
|
||||||
|
uint32_t ls_recover_size; /* size of recover_submit, recover_result */
|
||||||
|
uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */
|
||||||
|
uint32_t *ls_recover_result; /* result of last jid recovery */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gfs2_sbd {
|
struct gfs2_sbd {
|
||||||
@ -544,6 +593,7 @@ struct gfs2_sbd {
|
|||||||
wait_queue_head_t sd_glock_wait;
|
wait_queue_head_t sd_glock_wait;
|
||||||
atomic_t sd_glock_disposal;
|
atomic_t sd_glock_disposal;
|
||||||
struct completion sd_locking_init;
|
struct completion sd_locking_init;
|
||||||
|
struct delayed_work sd_control_work;
|
||||||
|
|
||||||
/* Inode Stuff */
|
/* Inode Stuff */
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -28,6 +28,8 @@
|
|||||||
#include "recovery.h"
|
#include "recovery.h"
|
||||||
#include "dir.h"
|
#include "dir.h"
|
||||||
|
|
||||||
|
struct workqueue_struct *gfs2_control_wq;
|
||||||
|
|
||||||
static struct shrinker qd_shrinker = {
|
static struct shrinker qd_shrinker = {
|
||||||
.shrink = gfs2_shrink_qd_memory,
|
.shrink = gfs2_shrink_qd_memory,
|
||||||
.seeks = DEFAULT_SEEKS,
|
.seeks = DEFAULT_SEEKS,
|
||||||
@ -146,12 +148,19 @@ static int __init init_gfs2_fs(void)
|
|||||||
if (!gfs_recovery_wq)
|
if (!gfs_recovery_wq)
|
||||||
goto fail_wq;
|
goto fail_wq;
|
||||||
|
|
||||||
|
gfs2_control_wq = alloc_workqueue("gfs2_control",
|
||||||
|
WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
|
||||||
|
if (!gfs2_control_wq)
|
||||||
|
goto fail_control;
|
||||||
|
|
||||||
gfs2_register_debugfs();
|
gfs2_register_debugfs();
|
||||||
|
|
||||||
printk("GFS2 installed\n");
|
printk("GFS2 installed\n");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
fail_control:
|
||||||
|
destroy_workqueue(gfs_recovery_wq);
|
||||||
fail_wq:
|
fail_wq:
|
||||||
unregister_filesystem(&gfs2meta_fs_type);
|
unregister_filesystem(&gfs2meta_fs_type);
|
||||||
fail_unregister:
|
fail_unregister:
|
||||||
@ -195,6 +204,7 @@ static void __exit exit_gfs2_fs(void)
|
|||||||
unregister_filesystem(&gfs2_fs_type);
|
unregister_filesystem(&gfs2_fs_type);
|
||||||
unregister_filesystem(&gfs2meta_fs_type);
|
unregister_filesystem(&gfs2meta_fs_type);
|
||||||
destroy_workqueue(gfs_recovery_wq);
|
destroy_workqueue(gfs_recovery_wq);
|
||||||
|
destroy_workqueue(gfs2_control_wq);
|
||||||
|
|
||||||
rcu_barrier();
|
rcu_barrier();
|
||||||
|
|
||||||
|
@ -562,8 +562,12 @@ static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
|
|||||||
{
|
{
|
||||||
char *message = "FIRSTMOUNT=Done";
|
char *message = "FIRSTMOUNT=Done";
|
||||||
char *envp[] = { message, NULL };
|
char *envp[] = { message, NULL };
|
||||||
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
|
|
||||||
ls->ls_first_done = 1;
|
fs_info(sdp, "first mount done, others may mount\n");
|
||||||
|
|
||||||
|
if (sdp->sd_lockstruct.ls_ops->lm_first_done)
|
||||||
|
sdp->sd_lockstruct.ls_ops->lm_first_done(sdp);
|
||||||
|
|
||||||
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
|
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -944,7 +948,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
|
|||||||
struct gfs2_args *args = &sdp->sd_args;
|
struct gfs2_args *args = &sdp->sd_args;
|
||||||
const char *proto = sdp->sd_proto_name;
|
const char *proto = sdp->sd_proto_name;
|
||||||
const char *table = sdp->sd_table_name;
|
const char *table = sdp->sd_table_name;
|
||||||
const char *fsname;
|
|
||||||
char *o, *options;
|
char *o, *options;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
@ -1004,21 +1007,12 @@ hostdata_error:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sdp->sd_args.ar_spectator)
|
|
||||||
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
|
|
||||||
else
|
|
||||||
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
|
|
||||||
sdp->sd_lockstruct.ls_jid);
|
|
||||||
|
|
||||||
fsname = strchr(table, ':');
|
|
||||||
if (fsname)
|
|
||||||
fsname++;
|
|
||||||
if (lm->lm_mount == NULL) {
|
if (lm->lm_mount == NULL) {
|
||||||
fs_info(sdp, "Now mounting FS...\n");
|
fs_info(sdp, "Now mounting FS...\n");
|
||||||
complete_all(&sdp->sd_locking_init);
|
complete_all(&sdp->sd_locking_init);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
ret = lm->lm_mount(sdp, fsname);
|
ret = lm->lm_mount(sdp, table);
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
fs_info(sdp, "Joined cluster. Now mounting FS...\n");
|
fs_info(sdp, "Joined cluster. Now mounting FS...\n");
|
||||||
complete_all(&sdp->sd_locking_init);
|
complete_all(&sdp->sd_locking_init);
|
||||||
@ -1124,6 +1118,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
|
|||||||
if (error)
|
if (error)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
|
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
|
||||||
|
|
||||||
gfs2_create_debugfs_file(sdp);
|
gfs2_create_debugfs_file(sdp);
|
||||||
|
|
||||||
error = gfs2_sys_fs_add(sdp);
|
error = gfs2_sys_fs_add(sdp);
|
||||||
@ -1160,6 +1156,13 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
|
|||||||
goto fail_sb;
|
goto fail_sb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sdp->sd_args.ar_spectator)
|
||||||
|
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s",
|
||||||
|
sdp->sd_table_name);
|
||||||
|
else
|
||||||
|
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u",
|
||||||
|
sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
|
||||||
|
|
||||||
error = init_inodes(sdp, DO);
|
error = init_inodes(sdp, DO);
|
||||||
if (error)
|
if (error)
|
||||||
goto fail_sb;
|
goto fail_sb;
|
||||||
|
@ -436,12 +436,16 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
|
|||||||
char env_status[20];
|
char env_status[20];
|
||||||
char *envp[] = { env_jid, env_status, NULL };
|
char *envp[] = { env_jid, env_status, NULL };
|
||||||
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
|
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
|
||||||
|
|
||||||
ls->ls_recover_jid_done = jid;
|
ls->ls_recover_jid_done = jid;
|
||||||
ls->ls_recover_jid_status = message;
|
ls->ls_recover_jid_status = message;
|
||||||
sprintf(env_jid, "JID=%d", jid);
|
sprintf(env_jid, "JID=%d", jid);
|
||||||
sprintf(env_status, "RECOVERY=%s",
|
sprintf(env_status, "RECOVERY=%s",
|
||||||
message == LM_RD_SUCCESS ? "Done" : "Failed");
|
message == LM_RD_SUCCESS ? "Done" : "Failed");
|
||||||
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
|
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
|
||||||
|
|
||||||
|
if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
|
||||||
|
sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gfs2_recover_func(struct work_struct *work)
|
void gfs2_recover_func(struct work_struct *work)
|
||||||
|
@ -298,7 +298,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
|
|||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
int val = 0;
|
int val = 0;
|
||||||
|
|
||||||
if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))
|
if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))
|
||||||
val = 1;
|
val = 1;
|
||||||
ret = sprintf(buf, "%d\n", val);
|
ret = sprintf(buf, "%d\n", val);
|
||||||
return ret;
|
return ret;
|
||||||
@ -313,9 +313,9 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
|
|||||||
val = simple_strtol(buf, NULL, 0);
|
val = simple_strtol(buf, NULL, 0);
|
||||||
|
|
||||||
if (val == 1)
|
if (val == 1)
|
||||||
set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
|
set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
|
||||||
else if (val == 0) {
|
else if (val == 0) {
|
||||||
clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
|
clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
|
||||||
smp_mb__after_clear_bit();
|
smp_mb__after_clear_bit();
|
||||||
gfs2_glock_thaw(sdp);
|
gfs2_glock_thaw(sdp);
|
||||||
} else {
|
} else {
|
||||||
@ -360,19 +360,14 @@ out:
|
|||||||
static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
|
static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
|
||||||
{
|
{
|
||||||
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
|
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
|
||||||
return sprintf(buf, "%d\n", ls->ls_first_done);
|
return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
|
int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
|
||||||
{
|
{
|
||||||
unsigned jid;
|
|
||||||
struct gfs2_jdesc *jd;
|
struct gfs2_jdesc *jd;
|
||||||
int rv;
|
int rv;
|
||||||
|
|
||||||
rv = sscanf(buf, "%u", &jid);
|
|
||||||
if (rv != 1)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
rv = -ESHUTDOWN;
|
rv = -ESHUTDOWN;
|
||||||
spin_lock(&sdp->sd_jindex_spin);
|
spin_lock(&sdp->sd_jindex_spin);
|
||||||
if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
|
if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
|
||||||
@ -389,6 +384,20 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
|
|||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
spin_unlock(&sdp->sd_jindex_spin);
|
spin_unlock(&sdp->sd_jindex_spin);
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
unsigned jid;
|
||||||
|
int rv;
|
||||||
|
|
||||||
|
rv = sscanf(buf, "%u", &jid);
|
||||||
|
if (rv != 1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
rv = gfs2_recover_set(sdp, jid);
|
||||||
|
|
||||||
return rv ? rv : len;
|
return rv ? rv : len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,5 +19,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
|
|||||||
int gfs2_sys_init(void);
|
int gfs2_sys_init(void);
|
||||||
void gfs2_sys_uninit(void);
|
void gfs2_sys_uninit(void);
|
||||||
|
|
||||||
|
int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid);
|
||||||
|
|
||||||
#endif /* __SYS_DOT_H__ */
|
#endif /* __SYS_DOT_H__ */
|
||||||
|
|
||||||
|
@ -22,6 +22,8 @@
|
|||||||
#define GFS2_LIVE_LOCK 1
|
#define GFS2_LIVE_LOCK 1
|
||||||
#define GFS2_TRANS_LOCK 2
|
#define GFS2_TRANS_LOCK 2
|
||||||
#define GFS2_RENAME_LOCK 3
|
#define GFS2_RENAME_LOCK 3
|
||||||
|
#define GFS2_CONTROL_LOCK 4
|
||||||
|
#define GFS2_MOUNTED_LOCK 5
|
||||||
|
|
||||||
/* Format numbers for various metadata types */
|
/* Format numbers for various metadata types */
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user