Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw

* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw:
  GFS2: Fix nlink setting on inode creation
  GFS2: fail mount if journal recovery fails
  GFS2: let spectator mount do read only recovery
  GFS2: Fix a use-after-free that coverity spotted
  GFS2: dlm based recovery coordination
This commit is contained in:
Linus Torvalds 2012-01-13 10:33:39 -08:00
commit 57e6a7dde8
12 changed files with 1108 additions and 49 deletions

View File

@ -1353,7 +1353,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
spin_lock(&gl->gl_spin);
gl->gl_reply = ret;
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
if (gfs2_should_freeze(gl)) {
set_bit(GLF_FROZEN, &gl->gl_flags);
spin_unlock(&gl->gl_spin);

View File

@ -121,8 +121,11 @@ enum {
struct lm_lockops {
const char *lm_proto_name;
int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
void (*lm_unmount) (struct gfs2_sbd *sdp);
int (*lm_mount) (struct gfs2_sbd *sdp, const char *table);
void (*lm_first_done) (struct gfs2_sbd *sdp);
void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
unsigned int result);
void (*lm_unmount) (struct gfs2_sbd *sdp);
void (*lm_withdraw) (struct gfs2_sbd *sdp);
void (*lm_put_lock) (struct gfs2_glock *gl);
int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,

View File

@ -139,8 +139,45 @@ struct gfs2_bufdata {
#define GDLM_STRNAME_BYTES 25
#define GDLM_LVB_SIZE 32
/*
* ls_recover_flags:
*
* DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been
* held by failed nodes whose journals need recovery. Those locks should
* only be used for journal recovery until the journal recovery is done.
* This is set by the dlm recover_prep callback and cleared by the
* gfs2_control thread when journal recovery is complete. To avoid
* races between recover_prep setting and gfs2_control clearing, recover_spin
* is held while changing this bit and reading/writing recover_block
* and recover_start.
*
* DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used.
*
* DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing
* recovery of all journals before allowing other nodes to mount the fs.
* This is cleared when FIRST_MOUNT_DONE is set.
*
* DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished
* recovery of all journals, and now allows other nodes to mount the fs.
*
* DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared
* BLOCK_LOCKS for the first time. The gfs2_control thread should now
* control clearing BLOCK_LOCKS for further recoveries.
*
* DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq.
*
* DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep()
* and recover_done(), i.e. set while recover_block == recover_start.
*/
enum {
DFL_BLOCK_LOCKS = 0,
DFL_NO_DLM_OPS = 1,
DFL_FIRST_MOUNT = 2,
DFL_FIRST_MOUNT_DONE = 3,
DFL_MOUNT_DONE = 4,
DFL_UNMOUNT = 5,
DFL_DLM_RECOVERY = 6,
};
struct lm_lockname {
@ -392,6 +429,7 @@ struct gfs2_jdesc {
#define JDF_RECOVERY 1
unsigned int jd_jid;
unsigned int jd_blocks;
int jd_recover_error;
};
struct gfs2_statfs_change_host {
@ -461,6 +499,7 @@ enum {
SDF_NORECOVERY = 4,
SDF_DEMOTE = 5,
SDF_NOJOURNALID = 6,
SDF_RORECOVERY = 7, /* read only recovery */
};
#define GFS2_FSNAME_LEN 256
@ -499,14 +538,26 @@ struct gfs2_sb_host {
struct lm_lockstruct {
int ls_jid;
unsigned int ls_first;
unsigned int ls_first_done;
unsigned int ls_nodir;
const struct lm_lockops *ls_ops;
unsigned long ls_flags;
dlm_lockspace_t *ls_dlm;
int ls_recover_jid_done;
int ls_recover_jid_status;
int ls_recover_jid_done; /* These two are deprecated, */
int ls_recover_jid_status; /* used previously by gfs_controld */
struct dlm_lksb ls_mounted_lksb; /* mounted_lock */
struct dlm_lksb ls_control_lksb; /* control_lock */
char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
spinlock_t ls_recover_spin; /* protects following fields */
unsigned long ls_recover_flags; /* DFL_ */
uint32_t ls_recover_mount; /* gen in first recover_done cb */
uint32_t ls_recover_start; /* gen in last recover_done cb */
uint32_t ls_recover_block; /* copy recover_start in last recover_prep */
uint32_t ls_recover_size; /* size of recover_submit, recover_result */
uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */
uint32_t *ls_recover_result; /* result of last jid recovery */
};
struct gfs2_sbd {
@ -544,6 +595,7 @@ struct gfs2_sbd {
wait_queue_head_t sd_glock_wait;
atomic_t sd_glock_disposal;
struct completion sd_locking_init;
struct delayed_work sd_control_work;
/* Inode Stuff */

View File

@ -599,9 +599,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto fail_end_trans;
inc_nlink(&ip->i_inode);
if (S_ISDIR(ip->i_inode.i_mode))
inc_nlink(&ip->i_inode);
set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,8 @@
#include "recovery.h"
#include "dir.h"
struct workqueue_struct *gfs2_control_wq;
static struct shrinker qd_shrinker = {
.shrink = gfs2_shrink_qd_memory,
.seeks = DEFAULT_SEEKS,
@ -146,12 +148,19 @@ static int __init init_gfs2_fs(void)
if (!gfs_recovery_wq)
goto fail_wq;
gfs2_control_wq = alloc_workqueue("gfs2_control",
WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
if (!gfs2_control_wq)
goto fail_control;
gfs2_register_debugfs();
printk("GFS2 installed\n");
return 0;
fail_control:
destroy_workqueue(gfs_recovery_wq);
fail_wq:
unregister_filesystem(&gfs2meta_fs_type);
fail_unregister:
@ -195,6 +204,7 @@ static void __exit exit_gfs2_fs(void)
unregister_filesystem(&gfs2_fs_type);
unregister_filesystem(&gfs2meta_fs_type);
destroy_workqueue(gfs_recovery_wq);
destroy_workqueue(gfs2_control_wq);
rcu_barrier();

View File

@ -562,8 +562,12 @@ static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
{
char *message = "FIRSTMOUNT=Done";
char *envp[] = { message, NULL };
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
ls->ls_first_done = 1;
fs_info(sdp, "first mount done, others may mount\n");
if (sdp->sd_lockstruct.ls_ops->lm_first_done)
sdp->sd_lockstruct.ls_ops->lm_first_done(sdp);
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
}
@ -944,7 +948,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
struct gfs2_args *args = &sdp->sd_args;
const char *proto = sdp->sd_proto_name;
const char *table = sdp->sd_table_name;
const char *fsname;
char *o, *options;
int ret;
@ -1004,21 +1007,12 @@ hostdata_error:
}
}
if (sdp->sd_args.ar_spectator)
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
else
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
sdp->sd_lockstruct.ls_jid);
fsname = strchr(table, ':');
if (fsname)
fsname++;
if (lm->lm_mount == NULL) {
fs_info(sdp, "Now mounting FS...\n");
complete_all(&sdp->sd_locking_init);
return 0;
}
ret = lm->lm_mount(sdp, fsname);
ret = lm->lm_mount(sdp, table);
if (ret == 0)
fs_info(sdp, "Joined cluster. Now mounting FS...\n");
complete_all(&sdp->sd_locking_init);
@ -1084,7 +1078,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
if (sdp->sd_args.ar_spectator) {
sb->s_flags |= MS_RDONLY;
set_bit(SDF_NORECOVERY, &sdp->sd_flags);
set_bit(SDF_RORECOVERY, &sdp->sd_flags);
}
if (sdp->sd_args.ar_posix_acl)
sb->s_flags |= MS_POSIXACL;
@ -1124,6 +1118,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
if (error)
goto fail;
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
gfs2_create_debugfs_file(sdp);
error = gfs2_sys_fs_add(sdp);
@ -1160,6 +1156,13 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
goto fail_sb;
}
if (sdp->sd_args.ar_spectator)
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s",
sdp->sd_table_name);
else
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u",
sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
error = init_inodes(sdp, DO);
if (error)
goto fail_sb;

View File

@ -436,12 +436,16 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
char env_status[20];
char *envp[] = { env_jid, env_status, NULL };
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
ls->ls_recover_jid_done = jid;
ls->ls_recover_jid_status = message;
sprintf(env_jid, "JID=%d", jid);
sprintf(env_status, "RECOVERY=%s",
message == LM_RD_SUCCESS ? "Done" : "Failed");
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
}
void gfs2_recover_func(struct work_struct *work)
@ -512,7 +516,9 @@ void gfs2_recover_func(struct work_struct *work)
if (error)
goto fail_gunlock_ji;
if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
ro = 1;
} else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
ro = 1;
} else {
@ -577,6 +583,7 @@ fail_gunlock_j:
fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
fail:
jd->jd_recover_error = error;
gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
done:
clear_bit(JDF_RECOVERY, &jd->jd_flags);
@ -605,6 +612,6 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait,
TASK_UNINTERRUPTIBLE);
return 0;
return wait ? jd->jd_recover_error : 0;
}

View File

@ -1108,9 +1108,9 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
{
struct gfs2_blkreserv *rs = ip->i_res;
gfs2_blkrsv_put(ip);
if (rs->rs_rgd_gh.gh_gl)
gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
gfs2_blkrsv_put(ip);
}
/**

View File

@ -298,7 +298,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
ssize_t ret;
int val = 0;
if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))
if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))
val = 1;
ret = sprintf(buf, "%d\n", val);
return ret;
@ -313,9 +313,9 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
val = simple_strtol(buf, NULL, 0);
if (val == 1)
set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
else if (val == 0) {
clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
smp_mb__after_clear_bit();
gfs2_glock_thaw(sdp);
} else {
@ -350,8 +350,8 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
goto out;
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
goto out;
sdp->sd_lockstruct.ls_first = first;
rv = 0;
sdp->sd_lockstruct.ls_first = first;
rv = 0;
out:
spin_unlock(&sdp->sd_jindex_spin);
return rv ? rv : len;
@ -360,19 +360,14 @@ out:
static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
return sprintf(buf, "%d\n", ls->ls_first_done);
return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
}
static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
{
unsigned jid;
struct gfs2_jdesc *jd;
int rv;
rv = sscanf(buf, "%u", &jid);
if (rv != 1)
return -EINVAL;
rv = -ESHUTDOWN;
spin_lock(&sdp->sd_jindex_spin);
if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
@ -389,6 +384,20 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
}
out:
spin_unlock(&sdp->sd_jindex_spin);
return rv;
}
static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
unsigned jid;
int rv;
rv = sscanf(buf, "%u", &jid);
if (rv != 1)
return -EINVAL;
rv = gfs2_recover_set(sdp, jid);
return rv ? rv : len;
}

View File

@ -19,5 +19,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
int gfs2_sys_init(void);
void gfs2_sys_uninit(void);
int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid);
#endif /* __SYS_DOT_H__ */

View File

@ -22,6 +22,8 @@
#define GFS2_LIVE_LOCK 1
#define GFS2_TRANS_LOCK 2
#define GFS2_RENAME_LOCK 3
#define GFS2_CONTROL_LOCK 4
#define GFS2_MOUNTED_LOCK 5
/* Format numbers for various metadata types */