Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (64 commits) ocfs2/net: Add debug interface to o2net ocfs2: Only build ocfs2/dlm with the o2cb stack module ocfs2/cluster: Get rid of arguments to the timeout routines ocfs2: Put tree in MAINTAINERS ocfs2: Use BUG_ON ocfs2: Convert ocfs2 over to unlocked_ioctl ocfs2: Improve rename locking fs/ocfs2/aops.c: test for IS_ERR rather than 0 ocfs2: Add inode stealing for ocfs2_reserve_new_inode ocfs2: Add ac_alloc_slot in ocfs2_alloc_context ocfs2: Add a new parameter for ocfs2_reserve_suballoc_bits ocfs2: Enable cross extent block merge. ocfs2: Add support for cross extent block ocfs2: Move /sys/o2cb to /sys/fs/o2cb sysfs: Allow removal of symlinks in the sysfs root ocfs2: Reconnect after idle time out. ocfs2/dlm: Cleanup lockres print ocfs2/dlm: Fix lockname in lockres print function ocfs2/dlm: Move dlm_print_one_mle() from dlmmaster.c to dlmdebug.c ocfs2/dlm: Dumps the purgelist into a debugfs file ...
This commit is contained in:
commit
e675349e2b
11
Documentation/ABI/obsolete/o2cb
Normal file
11
Documentation/ABI/obsolete/o2cb
Normal file
@ -0,0 +1,11 @@
|
||||
What: /sys/o2cb symlink
|
||||
Date: Dec 2005
|
||||
KernelVersion: 2.6.16
|
||||
Contact: ocfs2-devel@oss.oracle.com
|
||||
Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will
|
||||
be removed when new versions of ocfs2-tools which know to look
|
||||
in /sys/fs/o2cb are sufficiently prevalent. Don't code new
|
||||
software to look here, it should try /sys/fs/o2cb instead.
|
||||
See Documentation/ABI/stable/o2cb for more information on usage.
|
||||
Users: ocfs2-tools. It's sufficient to mail proposed changes to
|
||||
ocfs2-devel@oss.oracle.com.
|
10
Documentation/ABI/stable/o2cb
Normal file
10
Documentation/ABI/stable/o2cb
Normal file
@ -0,0 +1,10 @@
|
||||
What: /sys/fs/o2cb/ (was /sys/o2cb)
|
||||
Date: Dec 2005
|
||||
KernelVersion: 2.6.16
|
||||
Contact: ocfs2-devel@oss.oracle.com
|
||||
Description: Ocfs2-tools looks at 'interface-revision' for versioning
|
||||
information. Each logmask/ file controls a set of debug prints
|
||||
and can be written into with the strings "allow", "deny", or
|
||||
"off". Reading the file returns the current state.
|
||||
Users: ocfs2-tools. It's sufficient to mail proposed changes to
|
||||
ocfs2-devel@oss.oracle.com.
|
89
Documentation/ABI/testing/sysfs-ocfs2
Normal file
89
Documentation/ABI/testing/sysfs-ocfs2
Normal file
@ -0,0 +1,89 @@
|
||||
What: /sys/fs/ocfs2/
|
||||
Date: April 2008
|
||||
Contact: ocfs2-devel@oss.oracle.com
|
||||
Description:
|
||||
The /sys/fs/ocfs2 directory contains knobs used by the
|
||||
ocfs2-tools to interact with the filesystem.
|
||||
|
||||
What: /sys/fs/ocfs2/max_locking_protocol
|
||||
Date: April 2008
|
||||
Contact: ocfs2-devel@oss.oracle.com
|
||||
Description:
|
||||
The /sys/fs/ocfs2/max_locking_protocol file displays version
|
||||
of ocfs2 locking supported by the filesystem. This version
|
||||
covers how ocfs2 uses distributed locking between cluster
|
||||
nodes.
|
||||
|
||||
The protocol version has a major and minor number. Two
|
||||
cluster nodes can interoperate if they have an identical
|
||||
major number and an overlapping minor number - thus,
|
||||
a node with version 1.10 can interoperate with a node
|
||||
sporting version 1.8, as long as both use the 1.8 protocol.
|
||||
|
||||
Reading from this file returns a single line, the major
|
||||
number and minor number joined by a period, eg "1.10".
|
||||
|
||||
This file is read-only. The value is compiled into the
|
||||
driver.
|
||||
|
||||
What: /sys/fs/ocfs2/loaded_cluster_plugins
|
||||
Date: April 2008
|
||||
Contact: ocfs2-devel@oss.oracle.com
|
||||
Description:
|
||||
The /sys/fs/ocfs2/loaded_cluster_plugins file describes
|
||||
the available plugins to support ocfs2 cluster operation.
|
||||
A cluster plugin is required to use ocfs2 in a cluster.
|
||||
There are currently two available plugins:
|
||||
|
||||
* 'o2cb' - The classic o2cb cluster stack that ocfs2 has
|
||||
used since its inception.
|
||||
* 'user' - A plugin supporting userspace cluster software
|
||||
in conjunction with fs/dlm.
|
||||
|
||||
Reading from this file returns the names of all loaded
|
||||
plugins, one per line.
|
||||
|
||||
This file is read-only. Its contents may change as
|
||||
plugins are loaded or removed.
|
||||
|
||||
What: /sys/fs/ocfs2/active_cluster_plugin
|
||||
Date: April 2008
|
||||
Contact: ocfs2-devel@oss.oracle.com
|
||||
Description:
|
||||
The /sys/fs/ocfs2/active_cluster_plugin displays which
|
||||
cluster plugin is currently in use by the filesystem.
|
||||
The active plugin will appear in the loaded_cluster_plugins
|
||||
file as well. Only one plugin can be used at a time.
|
||||
|
||||
Reading from this file returns the name of the active plugin
|
||||
on a single line.
|
||||
|
||||
This file is read-only. Which plugin is active depends on
|
||||
the cluster stack in use. The contents may change
|
||||
when all filesystems are unmounted and the cluster stack
|
||||
is changed.
|
||||
|
||||
What: /sys/fs/ocfs2/cluster_stack
|
||||
Date: April 2008
|
||||
Contact: ocfs2-devel@oss.oracle.com
|
||||
Description:
|
||||
The /sys/fs/ocfs2/cluster_stack file contains the name
|
||||
of current ocfs2 cluster stack. This value is set by
|
||||
userspace tools when bringing the cluster stack online.
|
||||
|
||||
Cluster stack names are 4 characters in length.
|
||||
|
||||
When the 'o2cb' cluster stack is used, the 'o2cb' cluster
|
||||
plugin is active. All other cluster stacks use the 'user'
|
||||
cluster plugin.
|
||||
|
||||
Reading from this file returns the name of the current
|
||||
cluster stack on a single line.
|
||||
|
||||
Writing a new stack name to this file changes the current
|
||||
cluster stack unless there are mounted ocfs2 filesystems.
|
||||
If there are mounted filesystems, attempts to change the
|
||||
stack return an error.
|
||||
|
||||
Users:
|
||||
ocfs2-tools <ocfs2-tools-devel@oss.oracle.com>
|
@ -318,3 +318,13 @@ Why: Not used in-tree. The current out-of-tree users used it to
|
||||
code / infrastructure should be in the kernel and not in some
|
||||
out-of-tree driver.
|
||||
Who: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: /sys/o2cb symlink
|
||||
When: January 2010
|
||||
Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb
|
||||
exists as a symlink for backwards compatibility for old versions of
|
||||
ocfs2-tools. 2 years should be sufficient time to phase in new versions
|
||||
which know to look in /sys/fs/o2cb.
|
||||
Who: ocfs2-devel@oss.oracle.com
|
||||
|
@ -2952,6 +2952,7 @@ P: Joel Becker
|
||||
M: joel.becker@oracle.com
|
||||
L: ocfs2-devel@oss.oracle.com
|
||||
W: http://oss.oracle.com/projects/ocfs2/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git
|
||||
S: Supported
|
||||
|
||||
OMNIKEY CARDMAN 4000 DRIVER
|
||||
|
26
fs/Kconfig
26
fs/Kconfig
@ -444,6 +444,32 @@ config OCFS2_FS
|
||||
For more information on OCFS2, see the file
|
||||
<file:Documentation/filesystems/ocfs2.txt>.
|
||||
|
||||
config OCFS2_FS_O2CB
|
||||
tristate "O2CB Kernelspace Clustering"
|
||||
depends on OCFS2_FS
|
||||
default y
|
||||
help
|
||||
OCFS2 includes a simple kernelspace clustering package, the OCFS2
|
||||
Cluster Base. It only requires a very small userspace component
|
||||
to configure it. This comes with the standard ocfs2-tools package.
|
||||
O2CB is limited to maintaining a cluster for OCFS2 file systems.
|
||||
It cannot manage any other cluster applications.
|
||||
|
||||
It is always safe to say Y here, as the clustering method is
|
||||
run-time selectable.
|
||||
|
||||
config OCFS2_FS_USERSPACE_CLUSTER
|
||||
tristate "OCFS2 Userspace Clustering"
|
||||
depends on OCFS2_FS && DLM
|
||||
default y
|
||||
help
|
||||
This option will allow OCFS2 to use userspace clustering services
|
||||
in conjunction with the DLM in fs/dlm. If you are using a
|
||||
userspace cluster manager, say Y here.
|
||||
|
||||
It is safe to say Y, as the clustering method is run-time
|
||||
selectable.
|
||||
|
||||
config OCFS2_DEBUG_MASKLOG
|
||||
bool "OCFS2 logging support"
|
||||
depends on OCFS2_FS
|
||||
|
@ -2,7 +2,12 @@ EXTRA_CFLAGS += -Ifs/ocfs2
|
||||
|
||||
EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES
|
||||
|
||||
obj-$(CONFIG_OCFS2_FS) += ocfs2.o
|
||||
obj-$(CONFIG_OCFS2_FS) += \
|
||||
ocfs2.o \
|
||||
ocfs2_stackglue.o
|
||||
|
||||
obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_stack_o2cb.o
|
||||
obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o
|
||||
|
||||
ocfs2-objs := \
|
||||
alloc.o \
|
||||
@ -31,5 +36,10 @@ ocfs2-objs := \
|
||||
uptodate.o \
|
||||
ver.o
|
||||
|
||||
ocfs2_stackglue-objs := stackglue.o
|
||||
ocfs2_stack_o2cb-objs := stack_o2cb.o
|
||||
ocfs2_stack_user-objs := stack_user.o
|
||||
|
||||
# cluster/ is always needed when OCFS2_FS for masklog support
|
||||
obj-$(CONFIG_OCFS2_FS) += cluster/
|
||||
obj-$(CONFIG_OCFS2_FS) += dlm/
|
||||
obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/
|
||||
|
469
fs/ocfs2/alloc.c
469
fs/ocfs2/alloc.c
@ -1029,8 +1029,7 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
|
||||
BUG_ON(!next_free);
|
||||
|
||||
/* The tree code before us didn't allow enough room in the leaf. */
|
||||
if (el->l_next_free_rec == el->l_count && !has_empty)
|
||||
BUG();
|
||||
BUG_ON(el->l_next_free_rec == el->l_count && !has_empty);
|
||||
|
||||
/*
|
||||
* The easiest way to approach this is to just remove the
|
||||
@ -1450,6 +1449,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
|
||||
* - When our insert into the right path leaf is at the leftmost edge
|
||||
* and requires an update of the path immediately to it's left. This
|
||||
* can occur at the end of some types of rotation and appending inserts.
|
||||
* - When we've adjusted the last extent record in the left path leaf and the
|
||||
* 1st extent record in the right path leaf during cross extent block merge.
|
||||
*/
|
||||
static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
|
||||
struct ocfs2_path *left_path,
|
||||
@ -2712,24 +2713,147 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove split_rec clusters from the record at index and merge them
|
||||
* onto the beginning of the record at index + 1.
|
||||
*/
|
||||
static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
|
||||
handle_t *handle,
|
||||
struct ocfs2_extent_rec *split_rec,
|
||||
struct ocfs2_extent_list *el, int index)
|
||||
static int ocfs2_get_right_path(struct inode *inode,
|
||||
struct ocfs2_path *left_path,
|
||||
struct ocfs2_path **ret_right_path)
|
||||
{
|
||||
int ret;
|
||||
u32 right_cpos;
|
||||
struct ocfs2_path *right_path = NULL;
|
||||
struct ocfs2_extent_list *left_el;
|
||||
|
||||
*ret_right_path = NULL;
|
||||
|
||||
/* This function shouldn't be called for non-trees. */
|
||||
BUG_ON(left_path->p_tree_depth == 0);
|
||||
|
||||
left_el = path_leaf_el(left_path);
|
||||
BUG_ON(left_el->l_next_free_rec != left_el->l_count);
|
||||
|
||||
ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
|
||||
&right_cpos);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* This function shouldn't be called for the rightmost leaf. */
|
||||
BUG_ON(right_cpos == 0);
|
||||
|
||||
right_path = ocfs2_new_path(path_root_bh(left_path),
|
||||
path_root_el(left_path));
|
||||
if (!right_path) {
|
||||
ret = -ENOMEM;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_find_path(inode, right_path, right_cpos);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
*ret_right_path = right_path;
|
||||
out:
|
||||
if (ret)
|
||||
ocfs2_free_path(right_path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove split_rec clusters from the record at index and merge them
|
||||
* onto the beginning of the record "next" to it.
|
||||
* For index < l_count - 1, the next means the extent rec at index + 1.
|
||||
* For index == l_count - 1, the "next" means the 1st extent rec of the
|
||||
* next extent block.
|
||||
*/
|
||||
static int ocfs2_merge_rec_right(struct inode *inode,
|
||||
struct ocfs2_path *left_path,
|
||||
handle_t *handle,
|
||||
struct ocfs2_extent_rec *split_rec,
|
||||
int index)
|
||||
{
|
||||
int ret, next_free, i;
|
||||
unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
|
||||
struct ocfs2_extent_rec *left_rec;
|
||||
struct ocfs2_extent_rec *right_rec;
|
||||
struct ocfs2_extent_list *right_el;
|
||||
struct ocfs2_path *right_path = NULL;
|
||||
int subtree_index = 0;
|
||||
struct ocfs2_extent_list *el = path_leaf_el(left_path);
|
||||
struct buffer_head *bh = path_leaf_bh(left_path);
|
||||
struct buffer_head *root_bh = NULL;
|
||||
|
||||
BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
|
||||
|
||||
left_rec = &el->l_recs[index];
|
||||
right_rec = &el->l_recs[index + 1];
|
||||
|
||||
if (index == le16_to_cpu(el->l_next_free_rec - 1) &&
|
||||
le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
|
||||
/* we meet with a cross extent block merge. */
|
||||
ret = ocfs2_get_right_path(inode, left_path, &right_path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
right_el = path_leaf_el(right_path);
|
||||
next_free = le16_to_cpu(right_el->l_next_free_rec);
|
||||
BUG_ON(next_free <= 0);
|
||||
right_rec = &right_el->l_recs[0];
|
||||
if (ocfs2_is_empty_extent(right_rec)) {
|
||||
BUG_ON(le16_to_cpu(next_free) <= 1);
|
||||
right_rec = &right_el->l_recs[1];
|
||||
}
|
||||
|
||||
BUG_ON(le32_to_cpu(left_rec->e_cpos) +
|
||||
le16_to_cpu(left_rec->e_leaf_clusters) !=
|
||||
le32_to_cpu(right_rec->e_cpos));
|
||||
|
||||
subtree_index = ocfs2_find_subtree_root(inode,
|
||||
left_path, right_path);
|
||||
|
||||
ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
|
||||
handle->h_buffer_credits,
|
||||
right_path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
root_bh = left_path->p_node[subtree_index].bh;
|
||||
BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
|
||||
|
||||
ret = ocfs2_journal_access(handle, inode, root_bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = subtree_index + 1;
|
||||
i < path_num_items(right_path); i++) {
|
||||
ret = ocfs2_journal_access(handle, inode,
|
||||
right_path->p_node[i].bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_journal_access(handle, inode,
|
||||
left_path->p_node[i].bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1);
|
||||
right_rec = &el->l_recs[index + 1];
|
||||
}
|
||||
|
||||
ret = ocfs2_journal_access(handle, inode, bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
@ -2751,30 +2875,156 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
if (right_path) {
|
||||
ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
ocfs2_complete_edge_insert(inode, handle, left_path,
|
||||
right_path, subtree_index);
|
||||
}
|
||||
out:
|
||||
if (right_path)
|
||||
ocfs2_free_path(right_path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_get_left_path(struct inode *inode,
|
||||
struct ocfs2_path *right_path,
|
||||
struct ocfs2_path **ret_left_path)
|
||||
{
|
||||
int ret;
|
||||
u32 left_cpos;
|
||||
struct ocfs2_path *left_path = NULL;
|
||||
|
||||
*ret_left_path = NULL;
|
||||
|
||||
/* This function shouldn't be called for non-trees. */
|
||||
BUG_ON(right_path->p_tree_depth == 0);
|
||||
|
||||
ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
|
||||
right_path, &left_cpos);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* This function shouldn't be called for the leftmost leaf. */
|
||||
BUG_ON(left_cpos == 0);
|
||||
|
||||
left_path = ocfs2_new_path(path_root_bh(right_path),
|
||||
path_root_el(right_path));
|
||||
if (!left_path) {
|
||||
ret = -ENOMEM;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_find_path(inode, left_path, left_cpos);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
*ret_left_path = left_path;
|
||||
out:
|
||||
if (ret)
|
||||
ocfs2_free_path(left_path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove split_rec clusters from the record at index and merge them
|
||||
* onto the tail of the record at index - 1.
|
||||
* onto the tail of the record "before" it.
|
||||
* For index > 0, the "before" means the extent rec at index - 1.
|
||||
*
|
||||
* For index == 0, the "before" means the last record of the previous
|
||||
* extent block. And there is also a situation that we may need to
|
||||
* remove the rightmost leaf extent block in the right_path and change
|
||||
* the right path to indicate the new rightmost path.
|
||||
*/
|
||||
static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
|
||||
static int ocfs2_merge_rec_left(struct inode *inode,
|
||||
struct ocfs2_path *right_path,
|
||||
handle_t *handle,
|
||||
struct ocfs2_extent_rec *split_rec,
|
||||
struct ocfs2_extent_list *el, int index)
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc,
|
||||
int index)
|
||||
{
|
||||
int ret, has_empty_extent = 0;
|
||||
int ret, i, subtree_index = 0, has_empty_extent = 0;
|
||||
unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
|
||||
struct ocfs2_extent_rec *left_rec;
|
||||
struct ocfs2_extent_rec *right_rec;
|
||||
struct ocfs2_extent_list *el = path_leaf_el(right_path);
|
||||
struct buffer_head *bh = path_leaf_bh(right_path);
|
||||
struct buffer_head *root_bh = NULL;
|
||||
struct ocfs2_path *left_path = NULL;
|
||||
struct ocfs2_extent_list *left_el;
|
||||
|
||||
BUG_ON(index <= 0);
|
||||
BUG_ON(index < 0);
|
||||
|
||||
left_rec = &el->l_recs[index - 1];
|
||||
right_rec = &el->l_recs[index];
|
||||
if (ocfs2_is_empty_extent(&el->l_recs[0]))
|
||||
has_empty_extent = 1;
|
||||
if (index == 0) {
|
||||
/* we meet with a cross extent block merge. */
|
||||
ret = ocfs2_get_left_path(inode, right_path, &left_path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
left_el = path_leaf_el(left_path);
|
||||
BUG_ON(le16_to_cpu(left_el->l_next_free_rec) !=
|
||||
le16_to_cpu(left_el->l_count));
|
||||
|
||||
left_rec = &left_el->l_recs[
|
||||
le16_to_cpu(left_el->l_next_free_rec) - 1];
|
||||
BUG_ON(le32_to_cpu(left_rec->e_cpos) +
|
||||
le16_to_cpu(left_rec->e_leaf_clusters) !=
|
||||
le32_to_cpu(split_rec->e_cpos));
|
||||
|
||||
subtree_index = ocfs2_find_subtree_root(inode,
|
||||
left_path, right_path);
|
||||
|
||||
ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
|
||||
handle->h_buffer_credits,
|
||||
left_path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
root_bh = left_path->p_node[subtree_index].bh;
|
||||
BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
|
||||
|
||||
ret = ocfs2_journal_access(handle, inode, root_bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = subtree_index + 1;
|
||||
i < path_num_items(right_path); i++) {
|
||||
ret = ocfs2_journal_access(handle, inode,
|
||||
right_path->p_node[i].bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_journal_access(handle, inode,
|
||||
left_path->p_node[i].bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
left_rec = &el->l_recs[index - 1];
|
||||
if (ocfs2_is_empty_extent(&el->l_recs[0]))
|
||||
has_empty_extent = 1;
|
||||
}
|
||||
|
||||
ret = ocfs2_journal_access(handle, inode, bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
@ -2790,9 +3040,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
|
||||
*left_rec = *split_rec;
|
||||
|
||||
has_empty_extent = 0;
|
||||
} else {
|
||||
} else
|
||||
le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters);
|
||||
}
|
||||
|
||||
le32_add_cpu(&right_rec->e_cpos, split_clusters);
|
||||
le64_add_cpu(&right_rec->e_blkno,
|
||||
@ -2805,13 +3054,44 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
if (left_path) {
|
||||
ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
/*
|
||||
* In the situation that the right_rec is empty and the extent
|
||||
* block is empty also, ocfs2_complete_edge_insert can't handle
|
||||
* it and we need to delete the right extent block.
|
||||
*/
|
||||
if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
|
||||
le16_to_cpu(el->l_next_free_rec) == 1) {
|
||||
|
||||
ret = ocfs2_remove_rightmost_path(inode, handle,
|
||||
right_path, dealloc);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Now the rightmost extent block has been deleted.
|
||||
* So we use the new rightmost path.
|
||||
*/
|
||||
ocfs2_mv_path(right_path, left_path);
|
||||
left_path = NULL;
|
||||
} else
|
||||
ocfs2_complete_edge_insert(inode, handle, left_path,
|
||||
right_path, subtree_index);
|
||||
}
|
||||
out:
|
||||
if (left_path)
|
||||
ocfs2_free_path(left_path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_try_to_merge_extent(struct inode *inode,
|
||||
handle_t *handle,
|
||||
struct ocfs2_path *left_path,
|
||||
struct ocfs2_path *path,
|
||||
int split_index,
|
||||
struct ocfs2_extent_rec *split_rec,
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc,
|
||||
@ -2819,7 +3099,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
|
||||
|
||||
{
|
||||
int ret = 0;
|
||||
struct ocfs2_extent_list *el = path_leaf_el(left_path);
|
||||
struct ocfs2_extent_list *el = path_leaf_el(path);
|
||||
struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
|
||||
|
||||
BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
|
||||
@ -2832,7 +3112,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
|
||||
* extents - having more than one in a leaf is
|
||||
* illegal.
|
||||
*/
|
||||
ret = ocfs2_rotate_tree_left(inode, handle, left_path,
|
||||
ret = ocfs2_rotate_tree_left(inode, handle, path,
|
||||
dealloc);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -2847,7 +3127,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
|
||||
* Left-right contig implies this.
|
||||
*/
|
||||
BUG_ON(!ctxt->c_split_covers_rec);
|
||||
BUG_ON(split_index == 0);
|
||||
|
||||
/*
|
||||
* Since the leftright insert always covers the entire
|
||||
@ -2858,9 +3137,14 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
|
||||
* Since the adding of an empty extent shifts
|
||||
* everything back to the right, there's no need to
|
||||
* update split_index here.
|
||||
*
|
||||
* When the split_index is zero, we need to merge it to the
|
||||
* prevoius extent block. It is more efficient and easier
|
||||
* if we do merge_right first and merge_left later.
|
||||
*/
|
||||
ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path),
|
||||
handle, split_rec, el, split_index);
|
||||
ret = ocfs2_merge_rec_right(inode, path,
|
||||
handle, split_rec,
|
||||
split_index);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
@ -2871,32 +3155,30 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
|
||||
*/
|
||||
BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
|
||||
|
||||
/*
|
||||
* The left merge left us with an empty extent, remove
|
||||
* it.
|
||||
*/
|
||||
ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc);
|
||||
/* The merge left us with an empty extent, remove it. */
|
||||
ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
split_index--;
|
||||
|
||||
rec = &el->l_recs[split_index];
|
||||
|
||||
/*
|
||||
* Note that we don't pass split_rec here on purpose -
|
||||
* we've merged it into the left side.
|
||||
* we've merged it into the rec already.
|
||||
*/
|
||||
ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path),
|
||||
handle, rec, el, split_index);
|
||||
ret = ocfs2_merge_rec_left(inode, path,
|
||||
handle, rec,
|
||||
dealloc,
|
||||
split_index);
|
||||
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
|
||||
|
||||
ret = ocfs2_rotate_tree_left(inode, handle, left_path,
|
||||
ret = ocfs2_rotate_tree_left(inode, handle, path,
|
||||
dealloc);
|
||||
/*
|
||||
* Error from this last rotate is not critical, so
|
||||
@ -2915,8 +3197,9 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
|
||||
*/
|
||||
if (ctxt->c_contig_type == CONTIG_RIGHT) {
|
||||
ret = ocfs2_merge_rec_left(inode,
|
||||
path_leaf_bh(left_path),
|
||||
handle, split_rec, el,
|
||||
path,
|
||||
handle, split_rec,
|
||||
dealloc,
|
||||
split_index);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -2924,8 +3207,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
|
||||
}
|
||||
} else {
|
||||
ret = ocfs2_merge_rec_right(inode,
|
||||
path_leaf_bh(left_path),
|
||||
handle, split_rec, el,
|
||||
path,
|
||||
handle, split_rec,
|
||||
split_index);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -2938,7 +3221,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
|
||||
* The merge may have left an empty extent in
|
||||
* our leaf. Try to rotate it away.
|
||||
*/
|
||||
ret = ocfs2_rotate_tree_left(inode, handle, left_path,
|
||||
ret = ocfs2_rotate_tree_left(inode, handle, path,
|
||||
dealloc);
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
@ -3498,20 +3781,57 @@ out:
|
||||
}
|
||||
|
||||
static enum ocfs2_contig_type
|
||||
ocfs2_figure_merge_contig_type(struct inode *inode,
|
||||
ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
|
||||
struct ocfs2_extent_list *el, int index,
|
||||
struct ocfs2_extent_rec *split_rec)
|
||||
{
|
||||
struct ocfs2_extent_rec *rec;
|
||||
int status;
|
||||
enum ocfs2_contig_type ret = CONTIG_NONE;
|
||||
u32 left_cpos, right_cpos;
|
||||
struct ocfs2_extent_rec *rec = NULL;
|
||||
struct ocfs2_extent_list *new_el;
|
||||
struct ocfs2_path *left_path = NULL, *right_path = NULL;
|
||||
struct buffer_head *bh;
|
||||
struct ocfs2_extent_block *eb;
|
||||
|
||||
if (index > 0) {
|
||||
rec = &el->l_recs[index - 1];
|
||||
} else if (path->p_tree_depth > 0) {
|
||||
status = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
|
||||
path, &left_cpos);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
if (left_cpos != 0) {
|
||||
left_path = ocfs2_new_path(path_root_bh(path),
|
||||
path_root_el(path));
|
||||
if (!left_path)
|
||||
goto out;
|
||||
|
||||
status = ocfs2_find_path(inode, left_path, left_cpos);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
new_el = path_leaf_el(left_path);
|
||||
|
||||
if (le16_to_cpu(new_el->l_next_free_rec) !=
|
||||
le16_to_cpu(new_el->l_count)) {
|
||||
bh = path_leaf_bh(left_path);
|
||||
eb = (struct ocfs2_extent_block *)bh->b_data;
|
||||
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
|
||||
eb);
|
||||
goto out;
|
||||
}
|
||||
rec = &new_el->l_recs[
|
||||
le16_to_cpu(new_el->l_next_free_rec) - 1];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We're careful to check for an empty extent record here -
|
||||
* the merge code will know what to do if it sees one.
|
||||
*/
|
||||
|
||||
if (index > 0) {
|
||||
rec = &el->l_recs[index - 1];
|
||||
if (rec) {
|
||||
if (index == 1 && ocfs2_is_empty_extent(rec)) {
|
||||
if (split_rec->e_cpos == el->l_recs[index].e_cpos)
|
||||
ret = CONTIG_RIGHT;
|
||||
@ -3520,10 +3840,45 @@ ocfs2_figure_merge_contig_type(struct inode *inode,
|
||||
}
|
||||
}
|
||||
|
||||
if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) {
|
||||
rec = NULL;
|
||||
if (index < (le16_to_cpu(el->l_next_free_rec) - 1))
|
||||
rec = &el->l_recs[index + 1];
|
||||
else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
|
||||
path->p_tree_depth > 0) {
|
||||
status = ocfs2_find_cpos_for_right_leaf(inode->i_sb,
|
||||
path, &right_cpos);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
if (right_cpos == 0)
|
||||
goto out;
|
||||
|
||||
right_path = ocfs2_new_path(path_root_bh(path),
|
||||
path_root_el(path));
|
||||
if (!right_path)
|
||||
goto out;
|
||||
|
||||
status = ocfs2_find_path(inode, right_path, right_cpos);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
new_el = path_leaf_el(right_path);
|
||||
rec = &new_el->l_recs[0];
|
||||
if (ocfs2_is_empty_extent(rec)) {
|
||||
if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
|
||||
bh = path_leaf_bh(right_path);
|
||||
eb = (struct ocfs2_extent_block *)bh->b_data;
|
||||
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
|
||||
eb);
|
||||
goto out;
|
||||
}
|
||||
rec = &new_el->l_recs[1];
|
||||
}
|
||||
}
|
||||
|
||||
if (rec) {
|
||||
enum ocfs2_contig_type contig_type;
|
||||
|
||||
rec = &el->l_recs[index + 1];
|
||||
contig_type = ocfs2_extent_contig(inode, rec, split_rec);
|
||||
|
||||
if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
|
||||
@ -3532,6 +3887,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode,
|
||||
ret = contig_type;
|
||||
}
|
||||
|
||||
out:
|
||||
if (left_path)
|
||||
ocfs2_free_path(left_path);
|
||||
if (right_path)
|
||||
ocfs2_free_path(right_path);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3994,7 +4355,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el,
|
||||
ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el,
|
||||
split_index,
|
||||
split_rec);
|
||||
|
||||
@ -4788,6 +5149,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work)
|
||||
status = ocfs2_flush_truncate_log(osb);
|
||||
if (status < 0)
|
||||
mlog_errno(status);
|
||||
else
|
||||
ocfs2_init_inode_steal_slot(osb);
|
||||
|
||||
mlog_exit(status);
|
||||
}
|
||||
|
@ -467,11 +467,11 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
|
||||
unsigned to)
|
||||
{
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
handle_t *handle = NULL;
|
||||
handle_t *handle;
|
||||
int ret = 0;
|
||||
|
||||
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
|
||||
if (!handle) {
|
||||
if (IS_ERR(handle)) {
|
||||
ret = -ENOMEM;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
@ -487,7 +487,7 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
|
||||
}
|
||||
out:
|
||||
if (ret) {
|
||||
if (handle)
|
||||
if (!IS_ERR(handle))
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
handle = ERR_PTR(ret);
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o
|
||||
|
||||
ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \
|
||||
quorum.o tcp.o ver.o
|
||||
quorum.o tcp.o netdebug.o ver.o
|
||||
|
441
fs/ocfs2/cluster/netdebug.c
Normal file
441
fs/ocfs2/cluster/netdebug.c
Normal file
@ -0,0 +1,441 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* netdebug.c
|
||||
*
|
||||
* debug functionality for o2net
|
||||
*
|
||||
* Copyright (C) 2005, 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/kref.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include "tcp.h"
|
||||
#include "nodemanager.h"
|
||||
#define MLOG_MASK_PREFIX ML_TCP
|
||||
#include "masklog.h"
|
||||
|
||||
#include "tcp_internal.h"
|
||||
|
||||
#define O2NET_DEBUG_DIR "o2net"
|
||||
#define SC_DEBUG_NAME "sock_containers"
|
||||
#define NST_DEBUG_NAME "send_tracking"
|
||||
|
||||
static struct dentry *o2net_dentry;
|
||||
static struct dentry *sc_dentry;
|
||||
static struct dentry *nst_dentry;
|
||||
|
||||
static DEFINE_SPINLOCK(o2net_debug_lock);
|
||||
|
||||
static LIST_HEAD(sock_containers);
|
||||
static LIST_HEAD(send_tracking);
|
||||
|
||||
void o2net_debug_add_nst(struct o2net_send_tracking *nst)
|
||||
{
|
||||
spin_lock(&o2net_debug_lock);
|
||||
list_add(&nst->st_net_debug_item, &send_tracking);
|
||||
spin_unlock(&o2net_debug_lock);
|
||||
}
|
||||
|
||||
void o2net_debug_del_nst(struct o2net_send_tracking *nst)
|
||||
{
|
||||
spin_lock(&o2net_debug_lock);
|
||||
if (!list_empty(&nst->st_net_debug_item))
|
||||
list_del_init(&nst->st_net_debug_item);
|
||||
spin_unlock(&o2net_debug_lock);
|
||||
}
|
||||
|
||||
static struct o2net_send_tracking
|
||||
*next_nst(struct o2net_send_tracking *nst_start)
|
||||
{
|
||||
struct o2net_send_tracking *nst, *ret = NULL;
|
||||
|
||||
assert_spin_locked(&o2net_debug_lock);
|
||||
|
||||
list_for_each_entry(nst, &nst_start->st_net_debug_item,
|
||||
st_net_debug_item) {
|
||||
/* discover the head of the list */
|
||||
if (&nst->st_net_debug_item == &send_tracking)
|
||||
break;
|
||||
|
||||
/* use st_task to detect real nsts in the list */
|
||||
if (nst->st_task != NULL) {
|
||||
ret = nst;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *nst_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
struct o2net_send_tracking *nst, *dummy_nst = seq->private;
|
||||
|
||||
spin_lock(&o2net_debug_lock);
|
||||
nst = next_nst(dummy_nst);
|
||||
spin_unlock(&o2net_debug_lock);
|
||||
|
||||
return nst;
|
||||
}
|
||||
|
||||
static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct o2net_send_tracking *nst, *dummy_nst = seq->private;
|
||||
|
||||
spin_lock(&o2net_debug_lock);
|
||||
nst = next_nst(dummy_nst);
|
||||
list_del_init(&dummy_nst->st_net_debug_item);
|
||||
if (nst)
|
||||
list_add(&dummy_nst->st_net_debug_item,
|
||||
&nst->st_net_debug_item);
|
||||
spin_unlock(&o2net_debug_lock);
|
||||
|
||||
return nst; /* unused, just needs to be null when done */
|
||||
}
|
||||
|
||||
static int nst_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct o2net_send_tracking *nst, *dummy_nst = seq->private;
|
||||
|
||||
spin_lock(&o2net_debug_lock);
|
||||
nst = next_nst(dummy_nst);
|
||||
|
||||
if (nst != NULL) {
|
||||
/* get_task_comm isn't exported. oh well. */
|
||||
seq_printf(seq, "%p:\n"
|
||||
" pid: %lu\n"
|
||||
" tgid: %lu\n"
|
||||
" process name: %s\n"
|
||||
" node: %u\n"
|
||||
" sc: %p\n"
|
||||
" message id: %d\n"
|
||||
" message type: %u\n"
|
||||
" message key: 0x%08x\n"
|
||||
" sock acquiry: %lu.%lu\n"
|
||||
" send start: %lu.%lu\n"
|
||||
" wait start: %lu.%lu\n",
|
||||
nst, (unsigned long)nst->st_task->pid,
|
||||
(unsigned long)nst->st_task->tgid,
|
||||
nst->st_task->comm, nst->st_node,
|
||||
nst->st_sc, nst->st_id, nst->st_msg_type,
|
||||
nst->st_msg_key,
|
||||
nst->st_sock_time.tv_sec, nst->st_sock_time.tv_usec,
|
||||
nst->st_send_time.tv_sec, nst->st_send_time.tv_usec,
|
||||
nst->st_status_time.tv_sec,
|
||||
nst->st_status_time.tv_usec);
|
||||
}
|
||||
|
||||
spin_unlock(&o2net_debug_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nst_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
}
|
||||
|
||||
static struct seq_operations nst_seq_ops = {
|
||||
.start = nst_seq_start,
|
||||
.next = nst_seq_next,
|
||||
.stop = nst_seq_stop,
|
||||
.show = nst_seq_show,
|
||||
};
|
||||
|
||||
static int nst_fop_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct o2net_send_tracking *dummy_nst;
|
||||
struct seq_file *seq;
|
||||
int ret;
|
||||
|
||||
dummy_nst = kmalloc(sizeof(struct o2net_send_tracking), GFP_KERNEL);
|
||||
if (dummy_nst == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
dummy_nst->st_task = NULL;
|
||||
|
||||
ret = seq_open(file, &nst_seq_ops);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
seq = file->private_data;
|
||||
seq->private = dummy_nst;
|
||||
o2net_debug_add_nst(dummy_nst);
|
||||
|
||||
dummy_nst = NULL;
|
||||
|
||||
out:
|
||||
kfree(dummy_nst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nst_fop_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *seq = file->private_data;
|
||||
struct o2net_send_tracking *dummy_nst = seq->private;
|
||||
|
||||
o2net_debug_del_nst(dummy_nst);
|
||||
return seq_release_private(inode, file);
|
||||
}
|
||||
|
||||
static struct file_operations nst_seq_fops = {
|
||||
.open = nst_fop_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = nst_fop_release,
|
||||
};
|
||||
|
||||
void o2net_debug_add_sc(struct o2net_sock_container *sc)
|
||||
{
|
||||
spin_lock(&o2net_debug_lock);
|
||||
list_add(&sc->sc_net_debug_item, &sock_containers);
|
||||
spin_unlock(&o2net_debug_lock);
|
||||
}
|
||||
|
||||
void o2net_debug_del_sc(struct o2net_sock_container *sc)
|
||||
{
|
||||
spin_lock(&o2net_debug_lock);
|
||||
list_del_init(&sc->sc_net_debug_item);
|
||||
spin_unlock(&o2net_debug_lock);
|
||||
}
|
||||
|
||||
static struct o2net_sock_container
|
||||
*next_sc(struct o2net_sock_container *sc_start)
|
||||
{
|
||||
struct o2net_sock_container *sc, *ret = NULL;
|
||||
|
||||
assert_spin_locked(&o2net_debug_lock);
|
||||
|
||||
list_for_each_entry(sc, &sc_start->sc_net_debug_item,
|
||||
sc_net_debug_item) {
|
||||
/* discover the head of the list miscast as a sc */
|
||||
if (&sc->sc_net_debug_item == &sock_containers)
|
||||
break;
|
||||
|
||||
/* use sc_page to detect real scs in the list */
|
||||
if (sc->sc_page != NULL) {
|
||||
ret = sc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
struct o2net_sock_container *sc, *dummy_sc = seq->private;
|
||||
|
||||
spin_lock(&o2net_debug_lock);
|
||||
sc = next_sc(dummy_sc);
|
||||
spin_unlock(&o2net_debug_lock);
|
||||
|
||||
return sc;
|
||||
}
|
||||
|
||||
static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct o2net_sock_container *sc, *dummy_sc = seq->private;
|
||||
|
||||
spin_lock(&o2net_debug_lock);
|
||||
sc = next_sc(dummy_sc);
|
||||
list_del_init(&dummy_sc->sc_net_debug_item);
|
||||
if (sc)
|
||||
list_add(&dummy_sc->sc_net_debug_item, &sc->sc_net_debug_item);
|
||||
spin_unlock(&o2net_debug_lock);
|
||||
|
||||
return sc; /* unused, just needs to be null when done */
|
||||
}
|
||||
|
||||
#define TV_SEC_USEC(TV) TV.tv_sec, TV.tv_usec
|
||||
|
||||
static int sc_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct o2net_sock_container *sc, *dummy_sc = seq->private;
|
||||
|
||||
spin_lock(&o2net_debug_lock);
|
||||
sc = next_sc(dummy_sc);
|
||||
|
||||
if (sc != NULL) {
|
||||
struct inet_sock *inet = NULL;
|
||||
|
||||
__be32 saddr = 0, daddr = 0;
|
||||
__be16 sport = 0, dport = 0;
|
||||
|
||||
if (sc->sc_sock) {
|
||||
inet = inet_sk(sc->sc_sock->sk);
|
||||
/* the stack's structs aren't sparse endian clean */
|
||||
saddr = (__force __be32)inet->saddr;
|
||||
daddr = (__force __be32)inet->daddr;
|
||||
sport = (__force __be16)inet->sport;
|
||||
dport = (__force __be16)inet->dport;
|
||||
}
|
||||
|
||||
/* XXX sigh, inet-> doesn't have sparse annotation so any
|
||||
* use of it here generates a warning with -Wbitwise */
|
||||
seq_printf(seq, "%p:\n"
|
||||
" krefs: %d\n"
|
||||
" sock: %u.%u.%u.%u:%u -> "
|
||||
"%u.%u.%u.%u:%u\n"
|
||||
" remote node: %s\n"
|
||||
" page off: %zu\n"
|
||||
" handshake ok: %u\n"
|
||||
" timer: %lu.%lu\n"
|
||||
" data ready: %lu.%lu\n"
|
||||
" advance start: %lu.%lu\n"
|
||||
" advance stop: %lu.%lu\n"
|
||||
" func start: %lu.%lu\n"
|
||||
" func stop: %lu.%lu\n"
|
||||
" func key: %u\n"
|
||||
" func type: %u\n",
|
||||
sc,
|
||||
atomic_read(&sc->sc_kref.refcount),
|
||||
NIPQUAD(saddr), inet ? ntohs(sport) : 0,
|
||||
NIPQUAD(daddr), inet ? ntohs(dport) : 0,
|
||||
sc->sc_node->nd_name,
|
||||
sc->sc_page_off,
|
||||
sc->sc_handshake_ok,
|
||||
TV_SEC_USEC(sc->sc_tv_timer),
|
||||
TV_SEC_USEC(sc->sc_tv_data_ready),
|
||||
TV_SEC_USEC(sc->sc_tv_advance_start),
|
||||
TV_SEC_USEC(sc->sc_tv_advance_stop),
|
||||
TV_SEC_USEC(sc->sc_tv_func_start),
|
||||
TV_SEC_USEC(sc->sc_tv_func_stop),
|
||||
sc->sc_msg_key,
|
||||
sc->sc_msg_type);
|
||||
}
|
||||
|
||||
|
||||
spin_unlock(&o2net_debug_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sc_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
}
|
||||
|
||||
static struct seq_operations sc_seq_ops = {
|
||||
.start = sc_seq_start,
|
||||
.next = sc_seq_next,
|
||||
.stop = sc_seq_stop,
|
||||
.show = sc_seq_show,
|
||||
};
|
||||
|
||||
static int sc_fop_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct o2net_sock_container *dummy_sc;
|
||||
struct seq_file *seq;
|
||||
int ret;
|
||||
|
||||
dummy_sc = kmalloc(sizeof(struct o2net_sock_container), GFP_KERNEL);
|
||||
if (dummy_sc == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
dummy_sc->sc_page = NULL;
|
||||
|
||||
ret = seq_open(file, &sc_seq_ops);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
seq = file->private_data;
|
||||
seq->private = dummy_sc;
|
||||
o2net_debug_add_sc(dummy_sc);
|
||||
|
||||
dummy_sc = NULL;
|
||||
|
||||
out:
|
||||
kfree(dummy_sc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int sc_fop_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *seq = file->private_data;
|
||||
struct o2net_sock_container *dummy_sc = seq->private;
|
||||
|
||||
o2net_debug_del_sc(dummy_sc);
|
||||
return seq_release_private(inode, file);
|
||||
}
|
||||
|
||||
static struct file_operations sc_seq_fops = {
|
||||
.open = sc_fop_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = sc_fop_release,
|
||||
};
|
||||
|
||||
int o2net_debugfs_init(void)
|
||||
{
|
||||
o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
|
||||
if (!o2net_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
nst_dentry = debugfs_create_file(NST_DEBUG_NAME, S_IFREG|S_IRUSR,
|
||||
o2net_dentry, NULL,
|
||||
&nst_seq_fops);
|
||||
if (!nst_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
sc_dentry = debugfs_create_file(SC_DEBUG_NAME, S_IFREG|S_IRUSR,
|
||||
o2net_dentry, NULL,
|
||||
&sc_seq_fops);
|
||||
if (!sc_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
if (sc_dentry)
|
||||
debugfs_remove(sc_dentry);
|
||||
if (nst_dentry)
|
||||
debugfs_remove(nst_dentry);
|
||||
if (o2net_dentry)
|
||||
debugfs_remove(o2net_dentry);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void o2net_debugfs_exit(void)
|
||||
{
|
||||
if (sc_dentry)
|
||||
debugfs_remove(sc_dentry);
|
||||
if (nst_dentry)
|
||||
debugfs_remove(nst_dentry);
|
||||
if (o2net_dentry)
|
||||
debugfs_remove(o2net_dentry);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_DEBUG_FS */
|
@ -959,7 +959,10 @@ static int __init init_o2nm(void)
|
||||
cluster_print_version();
|
||||
|
||||
o2hb_init();
|
||||
o2net_init();
|
||||
|
||||
ret = o2net_init();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
|
||||
if (!ocfs2_table_header) {
|
||||
|
@ -57,6 +57,7 @@ static struct kset *o2cb_kset;
|
||||
void o2cb_sys_shutdown(void)
|
||||
{
|
||||
mlog_sys_shutdown();
|
||||
sysfs_remove_link(NULL, "o2cb");
|
||||
kset_unregister(o2cb_kset);
|
||||
}
|
||||
|
||||
@ -68,6 +69,14 @@ int o2cb_sys_init(void)
|
||||
if (!o2cb_kset)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Create this symlink for backwards compatibility with old
|
||||
* versions of ocfs2-tools which look for things in /sys/o2cb.
|
||||
*/
|
||||
ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb");
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
@ -142,23 +142,65 @@ static void o2net_idle_timer(unsigned long data);
|
||||
static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
|
||||
static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
|
||||
|
||||
/*
|
||||
* FIXME: These should use to_o2nm_cluster_from_node(), but we end up
|
||||
* losing our parent link to the cluster during shutdown. This can be
|
||||
* solved by adding a pre-removal callback to configfs, or passing
|
||||
* around the cluster with the node. -jeffm
|
||||
*/
|
||||
static inline int o2net_reconnect_delay(struct o2nm_node *node)
|
||||
static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
|
||||
u32 msgkey, struct task_struct *task, u8 node)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
INIT_LIST_HEAD(&nst->st_net_debug_item);
|
||||
nst->st_task = task;
|
||||
nst->st_msg_type = msgtype;
|
||||
nst->st_msg_key = msgkey;
|
||||
nst->st_node = node;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
do_gettimeofday(&nst->st_sock_time);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
do_gettimeofday(&nst->st_send_time);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
do_gettimeofday(&nst->st_status_time);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
|
||||
struct o2net_sock_container *sc)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
nst->st_sc = sc;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
nst->st_id = msg_id;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int o2net_reconnect_delay(void)
|
||||
{
|
||||
return o2nm_single_cluster->cl_reconnect_delay_ms;
|
||||
}
|
||||
|
||||
static inline int o2net_keepalive_delay(struct o2nm_node *node)
|
||||
static inline int o2net_keepalive_delay(void)
|
||||
{
|
||||
return o2nm_single_cluster->cl_keepalive_delay_ms;
|
||||
}
|
||||
|
||||
static inline int o2net_idle_timeout(struct o2nm_node *node)
|
||||
static inline int o2net_idle_timeout(void)
|
||||
{
|
||||
return o2nm_single_cluster->cl_idle_timeout_ms;
|
||||
}
|
||||
@ -296,6 +338,7 @@ static void sc_kref_release(struct kref *kref)
|
||||
o2nm_node_put(sc->sc_node);
|
||||
sc->sc_node = NULL;
|
||||
|
||||
o2net_debug_del_sc(sc);
|
||||
kfree(sc);
|
||||
}
|
||||
|
||||
@ -336,6 +379,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
|
||||
|
||||
ret = sc;
|
||||
sc->sc_page = page;
|
||||
o2net_debug_add_sc(sc);
|
||||
sc = NULL;
|
||||
page = NULL;
|
||||
|
||||
@ -399,8 +443,6 @@ static void o2net_set_nn_state(struct o2net_node *nn,
|
||||
mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid);
|
||||
mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc);
|
||||
|
||||
/* we won't reconnect after our valid conn goes away for
|
||||
* this hb iteration.. here so it shows up in the logs */
|
||||
if (was_valid && !valid && err == 0)
|
||||
err = -ENOTCONN;
|
||||
|
||||
@ -430,11 +472,6 @@ static void o2net_set_nn_state(struct o2net_node *nn,
|
||||
|
||||
if (!was_valid && valid) {
|
||||
o2quo_conn_up(o2net_num_from_nn(nn));
|
||||
/* this is a bit of a hack. we only try reconnecting
|
||||
* when heartbeating starts until we get a connection.
|
||||
* if that connection then dies we don't try reconnecting.
|
||||
* the only way to start connecting again is to down
|
||||
* heartbeat and bring it back up. */
|
||||
cancel_delayed_work(&nn->nn_connect_expired);
|
||||
printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n",
|
||||
o2nm_this_node() > sc->sc_node->nd_num ?
|
||||
@ -451,12 +488,24 @@ static void o2net_set_nn_state(struct o2net_node *nn,
|
||||
/* delay if we're withing a RECONNECT_DELAY of the
|
||||
* last attempt */
|
||||
delay = (nn->nn_last_connect_attempt +
|
||||
msecs_to_jiffies(o2net_reconnect_delay(NULL)))
|
||||
msecs_to_jiffies(o2net_reconnect_delay()))
|
||||
- jiffies;
|
||||
if (delay > msecs_to_jiffies(o2net_reconnect_delay(NULL)))
|
||||
if (delay > msecs_to_jiffies(o2net_reconnect_delay()))
|
||||
delay = 0;
|
||||
mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
|
||||
queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
|
||||
|
||||
/*
|
||||
* Delay the expired work after idle timeout.
|
||||
*
|
||||
* We might have lots of failed connection attempts that run
|
||||
* through here but we only cancel the connect_expired work when
|
||||
* a connection attempt succeeds. So only the first enqueue of
|
||||
* the connect_expired work will do anything. The rest will see
|
||||
* that it's already queued and do nothing.
|
||||
*/
|
||||
delay += msecs_to_jiffies(o2net_idle_timeout());
|
||||
queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay);
|
||||
}
|
||||
|
||||
/* keep track of the nn's sc ref for the caller */
|
||||
@ -914,6 +963,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
|
||||
struct o2net_status_wait nsw = {
|
||||
.ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item),
|
||||
};
|
||||
struct o2net_send_tracking nst;
|
||||
|
||||
o2net_init_nst(&nst, msg_type, key, current, target_node);
|
||||
|
||||
if (o2net_wq == NULL) {
|
||||
mlog(0, "attempt to tx without o2netd running\n");
|
||||
@ -939,6 +991,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
|
||||
goto out;
|
||||
}
|
||||
|
||||
o2net_debug_add_nst(&nst);
|
||||
|
||||
o2net_set_nst_sock_time(&nst);
|
||||
|
||||
ret = wait_event_interruptible(nn->nn_sc_wq,
|
||||
o2net_tx_can_proceed(nn, &sc, &error));
|
||||
if (!ret && error)
|
||||
@ -946,6 +1002,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
o2net_set_nst_sock_container(&nst, sc);
|
||||
|
||||
veclen = caller_veclen + 1;
|
||||
vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC);
|
||||
if (vec == NULL) {
|
||||
@ -972,6 +1030,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
|
||||
goto out;
|
||||
|
||||
msg->msg_num = cpu_to_be32(nsw.ns_id);
|
||||
o2net_set_nst_msg_id(&nst, nsw.ns_id);
|
||||
|
||||
o2net_set_nst_send_time(&nst);
|
||||
|
||||
/* finally, convert the message header to network byte-order
|
||||
* and send */
|
||||
@ -986,6 +1047,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
|
||||
}
|
||||
|
||||
/* wait on other node's handler */
|
||||
o2net_set_nst_status_time(&nst);
|
||||
wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw));
|
||||
|
||||
/* Note that we avoid overwriting the callers status return
|
||||
@ -998,6 +1060,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
|
||||
mlog(0, "woken, returning system status %d, user status %d\n",
|
||||
ret, nsw.ns_status);
|
||||
out:
|
||||
o2net_debug_del_nst(&nst); /* must be before dropping sc and node */
|
||||
if (sc)
|
||||
sc_put(sc);
|
||||
if (vec)
|
||||
@ -1154,23 +1217,23 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
|
||||
* but isn't. This can ultimately cause corruption.
|
||||
*/
|
||||
if (be32_to_cpu(hand->o2net_idle_timeout_ms) !=
|
||||
o2net_idle_timeout(sc->sc_node)) {
|
||||
o2net_idle_timeout()) {
|
||||
mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of "
|
||||
"%u ms, but we use %u ms locally. disconnecting\n",
|
||||
SC_NODEF_ARGS(sc),
|
||||
be32_to_cpu(hand->o2net_idle_timeout_ms),
|
||||
o2net_idle_timeout(sc->sc_node));
|
||||
o2net_idle_timeout());
|
||||
o2net_ensure_shutdown(nn, sc, -ENOTCONN);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (be32_to_cpu(hand->o2net_keepalive_delay_ms) !=
|
||||
o2net_keepalive_delay(sc->sc_node)) {
|
||||
o2net_keepalive_delay()) {
|
||||
mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of "
|
||||
"%u ms, but we use %u ms locally. disconnecting\n",
|
||||
SC_NODEF_ARGS(sc),
|
||||
be32_to_cpu(hand->o2net_keepalive_delay_ms),
|
||||
o2net_keepalive_delay(sc->sc_node));
|
||||
o2net_keepalive_delay());
|
||||
o2net_ensure_shutdown(nn, sc, -ENOTCONN);
|
||||
return -1;
|
||||
}
|
||||
@ -1193,6 +1256,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
|
||||
* shut down already */
|
||||
if (nn->nn_sc == sc) {
|
||||
o2net_sc_reset_idle_timer(sc);
|
||||
atomic_set(&nn->nn_timeout, 0);
|
||||
o2net_set_nn_state(nn, sc, 1, 0);
|
||||
}
|
||||
spin_unlock(&nn->nn_lock);
|
||||
@ -1347,12 +1411,11 @@ static void o2net_initialize_handshake(void)
|
||||
{
|
||||
o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
|
||||
O2HB_MAX_WRITE_TIMEOUT_MS);
|
||||
o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(
|
||||
o2net_idle_timeout(NULL));
|
||||
o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(o2net_idle_timeout());
|
||||
o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
|
||||
o2net_keepalive_delay(NULL));
|
||||
o2net_keepalive_delay());
|
||||
o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
|
||||
o2net_reconnect_delay(NULL));
|
||||
o2net_reconnect_delay());
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
@ -1391,14 +1454,15 @@ static void o2net_sc_send_keep_req(struct work_struct *work)
|
||||
static void o2net_idle_timer(unsigned long data)
|
||||
{
|
||||
struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
|
||||
struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
|
||||
struct timeval now;
|
||||
|
||||
do_gettimeofday(&now);
|
||||
|
||||
printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
|
||||
"seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
|
||||
o2net_idle_timeout(sc->sc_node) / 1000,
|
||||
o2net_idle_timeout(sc->sc_node) % 1000);
|
||||
o2net_idle_timeout() / 1000,
|
||||
o2net_idle_timeout() % 1000);
|
||||
mlog(ML_NOTICE, "here are some times that might help debug the "
|
||||
"situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
|
||||
"%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
|
||||
@ -1413,6 +1477,12 @@ static void o2net_idle_timer(unsigned long data)
|
||||
sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec,
|
||||
sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec);
|
||||
|
||||
/*
|
||||
* Initialize the nn_timeout so that the next connection attempt
|
||||
* will continue in o2net_start_connect.
|
||||
*/
|
||||
atomic_set(&nn->nn_timeout, 1);
|
||||
|
||||
o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
|
||||
}
|
||||
|
||||
@ -1420,10 +1490,10 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
|
||||
{
|
||||
o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
|
||||
o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
|
||||
msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node)));
|
||||
msecs_to_jiffies(o2net_keepalive_delay()));
|
||||
do_gettimeofday(&sc->sc_tv_timer);
|
||||
mod_timer(&sc->sc_idle_timeout,
|
||||
jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node)));
|
||||
jiffies + msecs_to_jiffies(o2net_idle_timeout()));
|
||||
}
|
||||
|
||||
static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
|
||||
@ -1447,6 +1517,7 @@ static void o2net_start_connect(struct work_struct *work)
|
||||
struct socket *sock = NULL;
|
||||
struct sockaddr_in myaddr = {0, }, remoteaddr = {0, };
|
||||
int ret = 0, stop;
|
||||
unsigned int timeout;
|
||||
|
||||
/* if we're greater we initiate tx, otherwise we accept */
|
||||
if (o2nm_this_node() <= o2net_num_from_nn(nn))
|
||||
@ -1466,8 +1537,17 @@ static void o2net_start_connect(struct work_struct *work)
|
||||
}
|
||||
|
||||
spin_lock(&nn->nn_lock);
|
||||
/* see if we already have one pending or have given up */
|
||||
stop = (nn->nn_sc || nn->nn_persistent_error);
|
||||
/*
|
||||
* see if we already have one pending or have given up.
|
||||
* For nn_timeout, it is set when we close the connection
|
||||
* because of the idle time out. So it means that we have
|
||||
* at least connected to that node successfully once,
|
||||
* now try to connect to it again.
|
||||
*/
|
||||
timeout = atomic_read(&nn->nn_timeout);
|
||||
stop = (nn->nn_sc ||
|
||||
(nn->nn_persistent_error &&
|
||||
(nn->nn_persistent_error != -ENOTCONN || timeout == 0)));
|
||||
spin_unlock(&nn->nn_lock);
|
||||
if (stop)
|
||||
goto out;
|
||||
@ -1555,8 +1635,8 @@ static void o2net_connect_expired(struct work_struct *work)
|
||||
mlog(ML_ERROR, "no connection established with node %u after "
|
||||
"%u.%u seconds, giving up and returning errors.\n",
|
||||
o2net_num_from_nn(nn),
|
||||
o2net_idle_timeout(NULL) / 1000,
|
||||
o2net_idle_timeout(NULL) % 1000);
|
||||
o2net_idle_timeout() / 1000,
|
||||
o2net_idle_timeout() % 1000);
|
||||
|
||||
o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
|
||||
}
|
||||
@ -1579,6 +1659,7 @@ void o2net_disconnect_node(struct o2nm_node *node)
|
||||
|
||||
/* don't reconnect until it's heartbeating again */
|
||||
spin_lock(&nn->nn_lock);
|
||||
atomic_set(&nn->nn_timeout, 0);
|
||||
o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
|
||||
spin_unlock(&nn->nn_lock);
|
||||
|
||||
@ -1610,20 +1691,15 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
|
||||
|
||||
/* ensure an immediate connect attempt */
|
||||
nn->nn_last_connect_attempt = jiffies -
|
||||
(msecs_to_jiffies(o2net_reconnect_delay(node)) + 1);
|
||||
(msecs_to_jiffies(o2net_reconnect_delay()) + 1);
|
||||
|
||||
if (node_num != o2nm_this_node()) {
|
||||
/* heartbeat doesn't work unless a local node number is
|
||||
* configured and doing so brings up the o2net_wq, so we can
|
||||
* use it.. */
|
||||
queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
|
||||
msecs_to_jiffies(o2net_idle_timeout(node)));
|
||||
|
||||
/* believe it or not, accept and node hearbeating testing
|
||||
* can succeed for this node before we got here.. so
|
||||
* only use set_nn_state to clear the persistent error
|
||||
* if that hasn't already happened */
|
||||
spin_lock(&nn->nn_lock);
|
||||
atomic_set(&nn->nn_timeout, 0);
|
||||
if (nn->nn_persistent_error)
|
||||
o2net_set_nn_state(nn, NULL, 0, 0);
|
||||
spin_unlock(&nn->nn_lock);
|
||||
@ -1747,6 +1823,7 @@ static int o2net_accept_one(struct socket *sock)
|
||||
new_sock = NULL;
|
||||
|
||||
spin_lock(&nn->nn_lock);
|
||||
atomic_set(&nn->nn_timeout, 0);
|
||||
o2net_set_nn_state(nn, sc, 0, 0);
|
||||
spin_unlock(&nn->nn_lock);
|
||||
|
||||
@ -1922,6 +1999,9 @@ int o2net_init(void)
|
||||
|
||||
o2quo_init();
|
||||
|
||||
if (o2net_debugfs_init())
|
||||
return -ENOMEM;
|
||||
|
||||
o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL);
|
||||
o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
|
||||
o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
|
||||
@ -1941,6 +2021,7 @@ int o2net_init(void)
|
||||
for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) {
|
||||
struct o2net_node *nn = o2net_nn_from_num(i);
|
||||
|
||||
atomic_set(&nn->nn_timeout, 0);
|
||||
spin_lock_init(&nn->nn_lock);
|
||||
INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect);
|
||||
INIT_DELAYED_WORK(&nn->nn_connect_expired,
|
||||
@ -1962,4 +2043,5 @@ void o2net_exit(void)
|
||||
kfree(o2net_hand);
|
||||
kfree(o2net_keep_req);
|
||||
kfree(o2net_keep_resp);
|
||||
o2net_debugfs_exit();
|
||||
}
|
||||
|
@ -117,4 +117,36 @@ int o2net_num_connected_peers(void);
|
||||
int o2net_init(void);
|
||||
void o2net_exit(void);
|
||||
|
||||
struct o2net_send_tracking;
|
||||
struct o2net_sock_container;
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
int o2net_debugfs_init(void);
|
||||
void o2net_debugfs_exit(void);
|
||||
void o2net_debug_add_nst(struct o2net_send_tracking *nst);
|
||||
void o2net_debug_del_nst(struct o2net_send_tracking *nst);
|
||||
void o2net_debug_add_sc(struct o2net_sock_container *sc);
|
||||
void o2net_debug_del_sc(struct o2net_sock_container *sc);
|
||||
#else
|
||||
static int o2net_debugfs_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static void o2net_debugfs_exit(void)
|
||||
{
|
||||
}
|
||||
static void o2net_debug_add_nst(struct o2net_send_tracking *nst)
|
||||
{
|
||||
}
|
||||
static void o2net_debug_del_nst(struct o2net_send_tracking *nst)
|
||||
{
|
||||
}
|
||||
static void o2net_debug_add_sc(struct o2net_sock_container *sc)
|
||||
{
|
||||
}
|
||||
static void o2net_debug_del_sc(struct o2net_sock_container *sc)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
#endif /* O2CLUSTER_TCP_H */
|
||||
|
@ -95,6 +95,8 @@ struct o2net_node {
|
||||
unsigned nn_sc_valid:1;
|
||||
/* if this is set tx just returns it */
|
||||
int nn_persistent_error;
|
||||
/* It is only set to 1 after the idle time out. */
|
||||
atomic_t nn_timeout;
|
||||
|
||||
/* threads waiting for an sc to arrive wait on the wq for generation
|
||||
* to increase. it is increased when a connecting socket succeeds
|
||||
@ -164,7 +166,9 @@ struct o2net_sock_container {
|
||||
/* original handlers for the sockets */
|
||||
void (*sc_state_change)(struct sock *sk);
|
||||
void (*sc_data_ready)(struct sock *sk, int bytes);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct list_head sc_net_debug_item;
|
||||
#endif
|
||||
struct timeval sc_tv_timer;
|
||||
struct timeval sc_tv_data_ready;
|
||||
struct timeval sc_tv_advance_start;
|
||||
@ -206,4 +210,24 @@ struct o2net_status_wait {
|
||||
struct list_head ns_node_item;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
/* just for state dumps */
|
||||
struct o2net_send_tracking {
|
||||
struct list_head st_net_debug_item;
|
||||
struct task_struct *st_task;
|
||||
struct o2net_sock_container *st_sc;
|
||||
u32 st_id;
|
||||
u32 st_msg_type;
|
||||
u32 st_msg_key;
|
||||
u8 st_node;
|
||||
struct timeval st_sock_time;
|
||||
struct timeval st_send_time;
|
||||
struct timeval st_status_time;
|
||||
};
|
||||
#else
|
||||
struct o2net_send_tracking {
|
||||
u32 dummy;
|
||||
};
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
#endif /* O2CLUSTER_TCP_INTERNAL_H */
|
||||
|
@ -1,6 +1,6 @@
|
||||
EXTRA_CFLAGS += -Ifs/ocfs2
|
||||
|
||||
obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o ocfs2_dlmfs.o
|
||||
obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlmfs.o
|
||||
|
||||
ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \
|
||||
dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o
|
||||
|
@ -49,6 +49,41 @@
|
||||
/* Intended to make it easier for us to switch out hash functions */
|
||||
#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
|
||||
|
||||
enum dlm_mle_type {
|
||||
DLM_MLE_BLOCK,
|
||||
DLM_MLE_MASTER,
|
||||
DLM_MLE_MIGRATION
|
||||
};
|
||||
|
||||
struct dlm_lock_name {
|
||||
u8 len;
|
||||
u8 name[DLM_LOCKID_NAME_MAX];
|
||||
};
|
||||
|
||||
struct dlm_master_list_entry {
|
||||
struct list_head list;
|
||||
struct list_head hb_events;
|
||||
struct dlm_ctxt *dlm;
|
||||
spinlock_t spinlock;
|
||||
wait_queue_head_t wq;
|
||||
atomic_t woken;
|
||||
struct kref mle_refs;
|
||||
int inuse;
|
||||
unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
u8 master;
|
||||
u8 new_master;
|
||||
enum dlm_mle_type type;
|
||||
struct o2hb_callback_func mle_hb_up;
|
||||
struct o2hb_callback_func mle_hb_down;
|
||||
union {
|
||||
struct dlm_lock_resource *res;
|
||||
struct dlm_lock_name name;
|
||||
} u;
|
||||
};
|
||||
|
||||
enum dlm_ast_type {
|
||||
DLM_AST = 0,
|
||||
DLM_BAST,
|
||||
@ -101,6 +136,7 @@ struct dlm_ctxt
|
||||
struct list_head purge_list;
|
||||
struct list_head pending_asts;
|
||||
struct list_head pending_basts;
|
||||
struct list_head tracking_list;
|
||||
unsigned int purge_count;
|
||||
spinlock_t spinlock;
|
||||
spinlock_t ast_lock;
|
||||
@ -122,6 +158,9 @@ struct dlm_ctxt
|
||||
atomic_t remote_resources;
|
||||
atomic_t unknown_resources;
|
||||
|
||||
struct dlm_debug_ctxt *dlm_debug_ctxt;
|
||||
struct dentry *dlm_debugfs_subroot;
|
||||
|
||||
/* NOTE: Next three are protected by dlm_domain_lock */
|
||||
struct kref dlm_refs;
|
||||
enum dlm_ctxt_state dlm_state;
|
||||
@ -270,6 +309,9 @@ struct dlm_lock_resource
|
||||
struct list_head dirty;
|
||||
struct list_head recovering; // dlm_recovery_ctxt.resources list
|
||||
|
||||
/* Added during init and removed during release */
|
||||
struct list_head tracking; /* dlm->tracking_list */
|
||||
|
||||
/* unused lock resources have their last_used stamped and are
|
||||
* put on a list for the dlm thread to run. */
|
||||
unsigned long last_used;
|
||||
@ -963,9 +1005,16 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res)
|
||||
DLM_LOCK_RES_MIGRATING));
|
||||
}
|
||||
|
||||
/* create/destroy slab caches */
|
||||
int dlm_init_master_caches(void);
|
||||
void dlm_destroy_master_caches(void);
|
||||
|
||||
int dlm_init_lock_cache(void);
|
||||
void dlm_destroy_lock_cache(void);
|
||||
|
||||
int dlm_init_mle_cache(void);
|
||||
void dlm_destroy_mle_cache(void);
|
||||
|
||||
void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up);
|
||||
int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res);
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* debug functionality for the dlm
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
@ -30,6 +30,7 @@
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include "cluster/heartbeat.h"
|
||||
#include "cluster/nodemanager.h"
|
||||
@ -37,17 +38,16 @@
|
||||
|
||||
#include "dlmapi.h"
|
||||
#include "dlmcommon.h"
|
||||
|
||||
#include "dlmdomain.h"
|
||||
#include "dlmdebug.h"
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_DLM
|
||||
#include "cluster/masklog.h"
|
||||
|
||||
int stringify_lockname(const char *lockname, int locklen, char *buf, int len);
|
||||
|
||||
void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
|
||||
{
|
||||
mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n",
|
||||
res->lockname.len, res->lockname.name,
|
||||
res->owner, res->state);
|
||||
spin_lock(&res->spinlock);
|
||||
__dlm_print_one_lock_resource(res);
|
||||
spin_unlock(&res->spinlock);
|
||||
@ -58,7 +58,7 @@ static void dlm_print_lockres_refmap(struct dlm_lock_resource *res)
|
||||
int bit;
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
mlog(ML_NOTICE, " refmap nodes: [ ");
|
||||
printk(" refmap nodes: [ ");
|
||||
bit = 0;
|
||||
while (1) {
|
||||
bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit);
|
||||
@ -70,63 +70,66 @@ static void dlm_print_lockres_refmap(struct dlm_lock_resource *res)
|
||||
printk("], inflight=%u\n", res->inflight_locks);
|
||||
}
|
||||
|
||||
static void __dlm_print_lock(struct dlm_lock *lock)
|
||||
{
|
||||
spin_lock(&lock->spinlock);
|
||||
|
||||
printk(" type=%d, conv=%d, node=%u, cookie=%u:%llu, "
|
||||
"ref=%u, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c), "
|
||||
"pending=(conv=%c,lock=%c,cancel=%c,unlock=%c)\n",
|
||||
lock->ml.type, lock->ml.convert_type, lock->ml.node,
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
|
||||
atomic_read(&lock->lock_refs.refcount),
|
||||
(list_empty(&lock->ast_list) ? 'y' : 'n'),
|
||||
(lock->ast_pending ? 'y' : 'n'),
|
||||
(list_empty(&lock->bast_list) ? 'y' : 'n'),
|
||||
(lock->bast_pending ? 'y' : 'n'),
|
||||
(lock->convert_pending ? 'y' : 'n'),
|
||||
(lock->lock_pending ? 'y' : 'n'),
|
||||
(lock->cancel_pending ? 'y' : 'n'),
|
||||
(lock->unlock_pending ? 'y' : 'n'));
|
||||
|
||||
spin_unlock(&lock->spinlock);
|
||||
}
|
||||
|
||||
void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
|
||||
{
|
||||
struct list_head *iter2;
|
||||
struct dlm_lock *lock;
|
||||
char buf[DLM_LOCKID_NAME_MAX];
|
||||
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n",
|
||||
res->lockname.len, res->lockname.name,
|
||||
res->owner, res->state);
|
||||
mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n",
|
||||
res->last_used, list_empty(&res->purge) ? "no" : "yes");
|
||||
stringify_lockname(res->lockname.name, res->lockname.len,
|
||||
buf, sizeof(buf) - 1);
|
||||
printk("lockres: %s, owner=%u, state=%u\n",
|
||||
buf, res->owner, res->state);
|
||||
printk(" last used: %lu, refcnt: %u, on purge list: %s\n",
|
||||
res->last_used, atomic_read(&res->refs.refcount),
|
||||
list_empty(&res->purge) ? "no" : "yes");
|
||||
printk(" on dirty list: %s, on reco list: %s, "
|
||||
"migrating pending: %s\n",
|
||||
list_empty(&res->dirty) ? "no" : "yes",
|
||||
list_empty(&res->recovering) ? "no" : "yes",
|
||||
res->migration_pending ? "yes" : "no");
|
||||
printk(" inflight locks: %d, asts reserved: %d\n",
|
||||
res->inflight_locks, atomic_read(&res->asts_reserved));
|
||||
dlm_print_lockres_refmap(res);
|
||||
mlog(ML_NOTICE, " granted queue: \n");
|
||||
printk(" granted queue:\n");
|
||||
list_for_each(iter2, &res->granted) {
|
||||
lock = list_entry(iter2, struct dlm_lock, list);
|
||||
spin_lock(&lock->spinlock);
|
||||
mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
|
||||
"cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
|
||||
lock->ml.type, lock->ml.convert_type, lock->ml.node,
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
|
||||
list_empty(&lock->ast_list) ? 'y' : 'n',
|
||||
lock->ast_pending ? 'y' : 'n',
|
||||
list_empty(&lock->bast_list) ? 'y' : 'n',
|
||||
lock->bast_pending ? 'y' : 'n');
|
||||
spin_unlock(&lock->spinlock);
|
||||
__dlm_print_lock(lock);
|
||||
}
|
||||
mlog(ML_NOTICE, " converting queue: \n");
|
||||
printk(" converting queue:\n");
|
||||
list_for_each(iter2, &res->converting) {
|
||||
lock = list_entry(iter2, struct dlm_lock, list);
|
||||
spin_lock(&lock->spinlock);
|
||||
mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
|
||||
"cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
|
||||
lock->ml.type, lock->ml.convert_type, lock->ml.node,
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
|
||||
list_empty(&lock->ast_list) ? 'y' : 'n',
|
||||
lock->ast_pending ? 'y' : 'n',
|
||||
list_empty(&lock->bast_list) ? 'y' : 'n',
|
||||
lock->bast_pending ? 'y' : 'n');
|
||||
spin_unlock(&lock->spinlock);
|
||||
__dlm_print_lock(lock);
|
||||
}
|
||||
mlog(ML_NOTICE, " blocked queue: \n");
|
||||
printk(" blocked queue:\n");
|
||||
list_for_each(iter2, &res->blocked) {
|
||||
lock = list_entry(iter2, struct dlm_lock, list);
|
||||
spin_lock(&lock->spinlock);
|
||||
mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
|
||||
"cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
|
||||
lock->ml.type, lock->ml.convert_type, lock->ml.node,
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
|
||||
list_empty(&lock->ast_list) ? 'y' : 'n',
|
||||
lock->ast_pending ? 'y' : 'n',
|
||||
list_empty(&lock->bast_list) ? 'y' : 'n',
|
||||
lock->bast_pending ? 'y' : 'n');
|
||||
spin_unlock(&lock->spinlock);
|
||||
__dlm_print_lock(lock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -136,31 +139,6 @@ void dlm_print_one_lock(struct dlm_lock *lockid)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dlm_print_one_lock);
|
||||
|
||||
#if 0
|
||||
void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
|
||||
{
|
||||
struct dlm_lock_resource *res;
|
||||
struct hlist_node *iter;
|
||||
struct hlist_head *bucket;
|
||||
int i;
|
||||
|
||||
mlog(ML_NOTICE, "struct dlm_ctxt: %s, node=%u, key=%u\n",
|
||||
dlm->name, dlm->node_num, dlm->key);
|
||||
if (!dlm || !dlm->name) {
|
||||
mlog(ML_ERROR, "dlm=%p\n", dlm);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
for (i=0; i<DLM_HASH_BUCKETS; i++) {
|
||||
bucket = dlm_lockres_hash(dlm, i);
|
||||
hlist_for_each_entry(res, iter, bucket, hash_node)
|
||||
dlm_print_one_lock_resource(res);
|
||||
}
|
||||
spin_unlock(&dlm->spinlock);
|
||||
}
|
||||
#endif /* 0 */
|
||||
|
||||
static const char *dlm_errnames[] = {
|
||||
[DLM_NORMAL] = "DLM_NORMAL",
|
||||
[DLM_GRANTED] = "DLM_GRANTED",
|
||||
@ -266,3 +244,792 @@ const char *dlm_errname(enum dlm_status err)
|
||||
return dlm_errnames[err];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dlm_errname);
|
||||
|
||||
/* NOTE: This function converts a lockname into a string. It uses knowledge
|
||||
* of the format of the lockname that should be outside the purview of the dlm.
|
||||
* We are adding only to make dlm debugging slightly easier.
|
||||
*
|
||||
* For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h.
|
||||
*/
|
||||
int stringify_lockname(const char *lockname, int locklen, char *buf, int len)
|
||||
{
|
||||
int out = 0;
|
||||
__be64 inode_blkno_be;
|
||||
|
||||
#define OCFS2_DENTRY_LOCK_INO_START 18
|
||||
if (*lockname == 'N') {
|
||||
memcpy((__be64 *)&inode_blkno_be,
|
||||
(char *)&lockname[OCFS2_DENTRY_LOCK_INO_START],
|
||||
sizeof(__be64));
|
||||
out += snprintf(buf + out, len - out, "%.*s%08x",
|
||||
OCFS2_DENTRY_LOCK_INO_START - 1, lockname,
|
||||
(unsigned int)be64_to_cpu(inode_blkno_be));
|
||||
} else
|
||||
out += snprintf(buf + out, len - out, "%.*s",
|
||||
locklen, lockname);
|
||||
return out;
|
||||
}
|
||||
|
||||
static int stringify_nodemap(unsigned long *nodemap, int maxnodes,
|
||||
char *buf, int len)
|
||||
{
|
||||
int out = 0;
|
||||
int i = -1;
|
||||
|
||||
while ((i = find_next_bit(nodemap, maxnodes, i + 1)) < maxnodes)
|
||||
out += snprintf(buf + out, len - out, "%d ", i);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
|
||||
{
|
||||
int out = 0;
|
||||
unsigned int namelen;
|
||||
const char *name;
|
||||
char *mle_type;
|
||||
|
||||
if (mle->type != DLM_MLE_MASTER) {
|
||||
namelen = mle->u.name.len;
|
||||
name = mle->u.name.name;
|
||||
} else {
|
||||
namelen = mle->u.res->lockname.len;
|
||||
name = mle->u.res->lockname.name;
|
||||
}
|
||||
|
||||
if (mle->type == DLM_MLE_BLOCK)
|
||||
mle_type = "BLK";
|
||||
else if (mle->type == DLM_MLE_MASTER)
|
||||
mle_type = "MAS";
|
||||
else
|
||||
mle_type = "MIG";
|
||||
|
||||
out += stringify_lockname(name, namelen, buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out,
|
||||
"\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n",
|
||||
mle_type, mle->master, mle->new_master,
|
||||
!list_empty(&mle->hb_events),
|
||||
!!mle->inuse,
|
||||
atomic_read(&mle->mle_refs.refcount));
|
||||
|
||||
out += snprintf(buf + out, len - out, "Maybe=");
|
||||
out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
out += snprintf(buf + out, len - out, "Vote=");
|
||||
out += stringify_nodemap(mle->vote_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
out += snprintf(buf + out, len - out, "Response=");
|
||||
out += stringify_nodemap(mle->response_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
out += snprintf(buf + out, len - out, "Node=");
|
||||
out += stringify_nodemap(mle->node_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void dlm_print_one_mle(struct dlm_master_list_entry *mle)
|
||||
{
|
||||
char *buf;
|
||||
|
||||
buf = (char *) get_zeroed_page(GFP_NOFS);
|
||||
if (buf) {
|
||||
dump_mle(mle, buf, PAGE_SIZE - 1);
|
||||
free_page((unsigned long)buf);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
static struct dentry *dlm_debugfs_root = NULL;
|
||||
|
||||
#define DLM_DEBUGFS_DIR "o2dlm"
|
||||
#define DLM_DEBUGFS_DLM_STATE "dlm_state"
|
||||
#define DLM_DEBUGFS_LOCKING_STATE "locking_state"
|
||||
#define DLM_DEBUGFS_MLE_STATE "mle_state"
|
||||
#define DLM_DEBUGFS_PURGE_LIST "purge_list"
|
||||
|
||||
/* begin - utils funcs */
|
||||
static void dlm_debug_free(struct kref *kref)
|
||||
{
|
||||
struct dlm_debug_ctxt *dc;
|
||||
|
||||
dc = container_of(kref, struct dlm_debug_ctxt, debug_refcnt);
|
||||
|
||||
kfree(dc);
|
||||
}
|
||||
|
||||
void dlm_debug_put(struct dlm_debug_ctxt *dc)
|
||||
{
|
||||
if (dc)
|
||||
kref_put(&dc->debug_refcnt, dlm_debug_free);
|
||||
}
|
||||
|
||||
static void dlm_debug_get(struct dlm_debug_ctxt *dc)
|
||||
{
|
||||
kref_get(&dc->debug_refcnt);
|
||||
}
|
||||
|
||||
static struct debug_buffer *debug_buffer_allocate(void)
|
||||
{
|
||||
struct debug_buffer *db = NULL;
|
||||
|
||||
db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL);
|
||||
if (!db)
|
||||
goto bail;
|
||||
|
||||
db->len = PAGE_SIZE;
|
||||
db->buf = kmalloc(db->len, GFP_KERNEL);
|
||||
if (!db->buf)
|
||||
goto bail;
|
||||
|
||||
return db;
|
||||
bail:
|
||||
kfree(db);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static ssize_t debug_buffer_read(struct file *file, char __user *buf,
|
||||
size_t nbytes, loff_t *ppos)
|
||||
{
|
||||
struct debug_buffer *db = file->private_data;
|
||||
|
||||
return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len);
|
||||
}
|
||||
|
||||
static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence)
|
||||
{
|
||||
struct debug_buffer *db = file->private_data;
|
||||
loff_t new = -1;
|
||||
|
||||
switch (whence) {
|
||||
case 0:
|
||||
new = off;
|
||||
break;
|
||||
case 1:
|
||||
new = file->f_pos + off;
|
||||
break;
|
||||
}
|
||||
|
||||
if (new < 0 || new > db->len)
|
||||
return -EINVAL;
|
||||
|
||||
return (file->f_pos = new);
|
||||
}
|
||||
|
||||
static int debug_buffer_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct debug_buffer *db = (struct debug_buffer *)file->private_data;
|
||||
|
||||
if (db)
|
||||
kfree(db->buf);
|
||||
kfree(db);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/* end - util funcs */
|
||||
|
||||
/* begin - purge list funcs */
|
||||
static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
|
||||
{
|
||||
struct dlm_lock_resource *res;
|
||||
int out = 0;
|
||||
unsigned long total = 0;
|
||||
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Dumping Purgelist for Domain: %s\n", dlm->name);
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
list_for_each_entry(res, &dlm->purge_list, purge) {
|
||||
++total;
|
||||
if (db->len - out < 100)
|
||||
continue;
|
||||
spin_lock(&res->spinlock);
|
||||
out += stringify_lockname(res->lockname.name,
|
||||
res->lockname.len,
|
||||
db->buf + out, db->len - out);
|
||||
out += snprintf(db->buf + out, db->len - out, "\t%ld\n",
|
||||
(jiffies - res->last_used)/HZ);
|
||||
spin_unlock(&res->spinlock);
|
||||
}
|
||||
spin_unlock(&dlm->spinlock);
|
||||
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Total on list: %ld\n", total);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static int debug_purgelist_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct dlm_ctxt *dlm = inode->i_private;
|
||||
struct debug_buffer *db;
|
||||
|
||||
db = debug_buffer_allocate();
|
||||
if (!db)
|
||||
goto bail;
|
||||
|
||||
db->len = debug_purgelist_print(dlm, db);
|
||||
|
||||
file->private_data = db;
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static struct file_operations debug_purgelist_fops = {
|
||||
.open = debug_purgelist_open,
|
||||
.release = debug_buffer_release,
|
||||
.read = debug_buffer_read,
|
||||
.llseek = debug_buffer_llseek,
|
||||
};
|
||||
/* end - purge list funcs */
|
||||
|
||||
/* begin - debug mle funcs */
|
||||
static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
|
||||
{
|
||||
struct dlm_master_list_entry *mle;
|
||||
int out = 0;
|
||||
unsigned long total = 0;
|
||||
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Dumping MLEs for Domain: %s\n", dlm->name);
|
||||
|
||||
spin_lock(&dlm->master_lock);
|
||||
list_for_each_entry(mle, &dlm->master_list, list) {
|
||||
++total;
|
||||
if (db->len - out < 200)
|
||||
continue;
|
||||
out += dump_mle(mle, db->buf + out, db->len - out);
|
||||
}
|
||||
spin_unlock(&dlm->master_lock);
|
||||
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Total on list: %ld\n", total);
|
||||
return out;
|
||||
}
|
||||
|
||||
static int debug_mle_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct dlm_ctxt *dlm = inode->i_private;
|
||||
struct debug_buffer *db;
|
||||
|
||||
db = debug_buffer_allocate();
|
||||
if (!db)
|
||||
goto bail;
|
||||
|
||||
db->len = debug_mle_print(dlm, db);
|
||||
|
||||
file->private_data = db;
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static struct file_operations debug_mle_fops = {
|
||||
.open = debug_mle_open,
|
||||
.release = debug_buffer_release,
|
||||
.read = debug_buffer_read,
|
||||
.llseek = debug_buffer_llseek,
|
||||
};
|
||||
|
||||
/* end - debug mle funcs */
|
||||
|
||||
/* begin - debug lockres funcs */
|
||||
static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len)
|
||||
{
|
||||
int out;
|
||||
|
||||
#define DEBUG_LOCK_VERSION 1
|
||||
spin_lock(&lock->spinlock);
|
||||
out = snprintf(buf, len, "LOCK:%d,%d,%d,%d,%d,%d:%lld,%d,%d,%d,%d,%d,"
|
||||
"%d,%d,%d,%d\n",
|
||||
DEBUG_LOCK_VERSION,
|
||||
list_type, lock->ml.type, lock->ml.convert_type,
|
||||
lock->ml.node,
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
|
||||
!list_empty(&lock->ast_list),
|
||||
!list_empty(&lock->bast_list),
|
||||
lock->ast_pending, lock->bast_pending,
|
||||
lock->convert_pending, lock->lock_pending,
|
||||
lock->cancel_pending, lock->unlock_pending,
|
||||
atomic_read(&lock->lock_refs.refcount));
|
||||
spin_unlock(&lock->spinlock);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len)
|
||||
{
|
||||
struct dlm_lock *lock;
|
||||
int i;
|
||||
int out = 0;
|
||||
|
||||
out += snprintf(buf + out, len - out, "NAME:");
|
||||
out += stringify_lockname(res->lockname.name, res->lockname.len,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
#define DEBUG_LRES_VERSION 1
|
||||
out += snprintf(buf + out, len - out,
|
||||
"LRES:%d,%d,%d,%ld,%d,%d,%d,%d,%d,%d,%d\n",
|
||||
DEBUG_LRES_VERSION,
|
||||
res->owner, res->state, res->last_used,
|
||||
!list_empty(&res->purge),
|
||||
!list_empty(&res->dirty),
|
||||
!list_empty(&res->recovering),
|
||||
res->inflight_locks, res->migration_pending,
|
||||
atomic_read(&res->asts_reserved),
|
||||
atomic_read(&res->refs.refcount));
|
||||
|
||||
/* refmap */
|
||||
out += snprintf(buf + out, len - out, "RMAP:");
|
||||
out += stringify_nodemap(res->refmap, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
/* lvb */
|
||||
out += snprintf(buf + out, len - out, "LVBX:");
|
||||
for (i = 0; i < DLM_LVB_LEN; i++)
|
||||
out += snprintf(buf + out, len - out,
|
||||
"%02x", (unsigned char)res->lvb[i]);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
/* granted */
|
||||
list_for_each_entry(lock, &res->granted, list)
|
||||
out += dump_lock(lock, 0, buf + out, len - out);
|
||||
|
||||
/* converting */
|
||||
list_for_each_entry(lock, &res->converting, list)
|
||||
out += dump_lock(lock, 1, buf + out, len - out);
|
||||
|
||||
/* blocked */
|
||||
list_for_each_entry(lock, &res->blocked, list)
|
||||
out += dump_lock(lock, 2, buf + out, len - out);
|
||||
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
struct debug_lockres *dl = m->private;
|
||||
struct dlm_ctxt *dlm = dl->dl_ctxt;
|
||||
struct dlm_lock_resource *res = NULL;
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
|
||||
if (dl->dl_res) {
|
||||
list_for_each_entry(res, &dl->dl_res->tracking, tracking) {
|
||||
if (dl->dl_res) {
|
||||
dlm_lockres_put(dl->dl_res);
|
||||
dl->dl_res = NULL;
|
||||
}
|
||||
if (&res->tracking == &dlm->tracking_list) {
|
||||
mlog(0, "End of list found, %p\n", res);
|
||||
dl = NULL;
|
||||
break;
|
||||
}
|
||||
dlm_lockres_get(res);
|
||||
dl->dl_res = res;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!list_empty(&dlm->tracking_list)) {
|
||||
list_for_each_entry(res, &dlm->tracking_list, tracking)
|
||||
break;
|
||||
dlm_lockres_get(res);
|
||||
dl->dl_res = res;
|
||||
} else
|
||||
dl = NULL;
|
||||
}
|
||||
|
||||
if (dl) {
|
||||
spin_lock(&dl->dl_res->spinlock);
|
||||
dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1);
|
||||
spin_unlock(&dl->dl_res->spinlock);
|
||||
}
|
||||
|
||||
spin_unlock(&dlm->spinlock);
|
||||
|
||||
return dl;
|
||||
}
|
||||
|
||||
static void lockres_seq_stop(struct seq_file *m, void *v)
|
||||
{
|
||||
}
|
||||
|
||||
static void *lockres_seq_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int lockres_seq_show(struct seq_file *s, void *v)
|
||||
{
|
||||
struct debug_lockres *dl = (struct debug_lockres *)v;
|
||||
|
||||
seq_printf(s, "%s", dl->dl_buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct seq_operations debug_lockres_ops = {
|
||||
.start = lockres_seq_start,
|
||||
.stop = lockres_seq_stop,
|
||||
.next = lockres_seq_next,
|
||||
.show = lockres_seq_show,
|
||||
};
|
||||
|
||||
static int debug_lockres_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct dlm_ctxt *dlm = inode->i_private;
|
||||
int ret = -ENOMEM;
|
||||
struct seq_file *seq;
|
||||
struct debug_lockres *dl = NULL;
|
||||
|
||||
dl = kzalloc(sizeof(struct debug_lockres), GFP_KERNEL);
|
||||
if (!dl) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
dl->dl_len = PAGE_SIZE;
|
||||
dl->dl_buf = kmalloc(dl->dl_len, GFP_KERNEL);
|
||||
if (!dl->dl_buf) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
ret = seq_open(file, &debug_lockres_ops);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
seq = (struct seq_file *) file->private_data;
|
||||
seq->private = dl;
|
||||
|
||||
dlm_grab(dlm);
|
||||
dl->dl_ctxt = dlm;
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
if (dl)
|
||||
kfree(dl->dl_buf);
|
||||
kfree(dl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int debug_lockres_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *seq = (struct seq_file *)file->private_data;
|
||||
struct debug_lockres *dl = (struct debug_lockres *)seq->private;
|
||||
|
||||
if (dl->dl_res)
|
||||
dlm_lockres_put(dl->dl_res);
|
||||
dlm_put(dl->dl_ctxt);
|
||||
kfree(dl->dl_buf);
|
||||
return seq_release_private(inode, file);
|
||||
}
|
||||
|
||||
static struct file_operations debug_lockres_fops = {
|
||||
.open = debug_lockres_open,
|
||||
.release = debug_lockres_release,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
};
|
||||
/* end - debug lockres funcs */
|
||||
|
||||
/* begin - debug state funcs */
|
||||
static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
|
||||
{
|
||||
int out = 0;
|
||||
struct dlm_reco_node_data *node;
|
||||
char *state;
|
||||
int lres, rres, ures, tres;
|
||||
|
||||
lres = atomic_read(&dlm->local_resources);
|
||||
rres = atomic_read(&dlm->remote_resources);
|
||||
ures = atomic_read(&dlm->unknown_resources);
|
||||
tres = lres + rres + ures;
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
|
||||
switch (dlm->dlm_state) {
|
||||
case DLM_CTXT_NEW:
|
||||
state = "NEW"; break;
|
||||
case DLM_CTXT_JOINED:
|
||||
state = "JOINED"; break;
|
||||
case DLM_CTXT_IN_SHUTDOWN:
|
||||
state = "SHUTDOWN"; break;
|
||||
case DLM_CTXT_LEAVING:
|
||||
state = "LEAVING"; break;
|
||||
default:
|
||||
state = "UNKNOWN"; break;
|
||||
}
|
||||
|
||||
/* Domain: xxxxxxxxxx Key: 0xdfbac769 */
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Domain: %s Key: 0x%08x\n", dlm->name, dlm->key);
|
||||
|
||||
/* Thread Pid: xxx Node: xxx State: xxxxx */
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Thread Pid: %d Node: %d State: %s\n",
|
||||
dlm->dlm_thread_task->pid, dlm->node_num, state);
|
||||
|
||||
/* Number of Joins: xxx Joining Node: xxx */
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Number of Joins: %d Joining Node: %d\n",
|
||||
dlm->num_joins, dlm->joining_node);
|
||||
|
||||
/* Domain Map: xx xx xx */
|
||||
out += snprintf(db->buf + out, db->len - out, "Domain Map: ");
|
||||
out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES,
|
||||
db->buf + out, db->len - out);
|
||||
out += snprintf(db->buf + out, db->len - out, "\n");
|
||||
|
||||
/* Live Map: xx xx xx */
|
||||
out += snprintf(db->buf + out, db->len - out, "Live Map: ");
|
||||
out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
|
||||
db->buf + out, db->len - out);
|
||||
out += snprintf(db->buf + out, db->len - out, "\n");
|
||||
|
||||
/* Mastered Resources Total: xxx Locally: xxx Remotely: ... */
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Mastered Resources Total: %d Locally: %d "
|
||||
"Remotely: %d Unknown: %d\n",
|
||||
tres, lres, rres, ures);
|
||||
|
||||
/* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Lists: Dirty=%s Purge=%s PendingASTs=%s "
|
||||
"PendingBASTs=%s Master=%s\n",
|
||||
(list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
|
||||
(list_empty(&dlm->purge_list) ? "Empty" : "InUse"),
|
||||
(list_empty(&dlm->pending_asts) ? "Empty" : "InUse"),
|
||||
(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"),
|
||||
(list_empty(&dlm->master_list) ? "Empty" : "InUse"));
|
||||
|
||||
/* Purge Count: xxx Refs: xxx */
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Purge Count: %d Refs: %d\n", dlm->purge_count,
|
||||
atomic_read(&dlm->dlm_refs.refcount));
|
||||
|
||||
/* Dead Node: xxx */
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Dead Node: %d\n", dlm->reco.dead_node);
|
||||
|
||||
/* What about DLM_RECO_STATE_FINALIZE? */
|
||||
if (dlm->reco.state == DLM_RECO_STATE_ACTIVE)
|
||||
state = "ACTIVE";
|
||||
else
|
||||
state = "INACTIVE";
|
||||
|
||||
/* Recovery Pid: xxxx Master: xxx State: xxxx */
|
||||
out += snprintf(db->buf + out, db->len - out,
|
||||
"Recovery Pid: %d Master: %d State: %s\n",
|
||||
dlm->dlm_reco_thread_task->pid,
|
||||
dlm->reco.new_master, state);
|
||||
|
||||
/* Recovery Map: xx xx */
|
||||
out += snprintf(db->buf + out, db->len - out, "Recovery Map: ");
|
||||
out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES,
|
||||
db->buf + out, db->len - out);
|
||||
out += snprintf(db->buf + out, db->len - out, "\n");
|
||||
|
||||
/* Recovery Node State: */
|
||||
out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n");
|
||||
list_for_each_entry(node, &dlm->reco.node_data, list) {
|
||||
switch (node->state) {
|
||||
case DLM_RECO_NODE_DATA_INIT:
|
||||
state = "INIT";
|
||||
break;
|
||||
case DLM_RECO_NODE_DATA_REQUESTING:
|
||||
state = "REQUESTING";
|
||||
break;
|
||||
case DLM_RECO_NODE_DATA_DEAD:
|
||||
state = "DEAD";
|
||||
break;
|
||||
case DLM_RECO_NODE_DATA_RECEIVING:
|
||||
state = "RECEIVING";
|
||||
break;
|
||||
case DLM_RECO_NODE_DATA_REQUESTED:
|
||||
state = "REQUESTED";
|
||||
break;
|
||||
case DLM_RECO_NODE_DATA_DONE:
|
||||
state = "DONE";
|
||||
break;
|
||||
case DLM_RECO_NODE_DATA_FINALIZE_SENT:
|
||||
state = "FINALIZE-SENT";
|
||||
break;
|
||||
default:
|
||||
state = "BAD";
|
||||
break;
|
||||
}
|
||||
out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n",
|
||||
node->node_num, state);
|
||||
}
|
||||
|
||||
spin_unlock(&dlm->spinlock);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static int debug_state_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct dlm_ctxt *dlm = inode->i_private;
|
||||
struct debug_buffer *db = NULL;
|
||||
|
||||
db = debug_buffer_allocate();
|
||||
if (!db)
|
||||
goto bail;
|
||||
|
||||
db->len = debug_state_print(dlm, db);
|
||||
|
||||
file->private_data = db;
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static struct file_operations debug_state_fops = {
|
||||
.open = debug_state_open,
|
||||
.release = debug_buffer_release,
|
||||
.read = debug_buffer_read,
|
||||
.llseek = debug_buffer_llseek,
|
||||
};
|
||||
/* end - debug state funcs */
|
||||
|
||||
/* files in subroot */
|
||||
int dlm_debug_init(struct dlm_ctxt *dlm)
|
||||
{
|
||||
struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
|
||||
|
||||
/* for dumping dlm_ctxt */
|
||||
dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE,
|
||||
S_IFREG|S_IRUSR,
|
||||
dlm->dlm_debugfs_subroot,
|
||||
dlm, &debug_state_fops);
|
||||
if (!dc->debug_state_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* for dumping lockres */
|
||||
dc->debug_lockres_dentry =
|
||||
debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE,
|
||||
S_IFREG|S_IRUSR,
|
||||
dlm->dlm_debugfs_subroot,
|
||||
dlm, &debug_lockres_fops);
|
||||
if (!dc->debug_lockres_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* for dumping mles */
|
||||
dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE,
|
||||
S_IFREG|S_IRUSR,
|
||||
dlm->dlm_debugfs_subroot,
|
||||
dlm, &debug_mle_fops);
|
||||
if (!dc->debug_mle_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* for dumping lockres on the purge list */
|
||||
dc->debug_purgelist_dentry =
|
||||
debugfs_create_file(DLM_DEBUGFS_PURGE_LIST,
|
||||
S_IFREG|S_IRUSR,
|
||||
dlm->dlm_debugfs_subroot,
|
||||
dlm, &debug_purgelist_fops);
|
||||
if (!dc->debug_purgelist_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
dlm_debug_get(dc);
|
||||
return 0;
|
||||
|
||||
bail:
|
||||
dlm_debug_shutdown(dlm);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void dlm_debug_shutdown(struct dlm_ctxt *dlm)
|
||||
{
|
||||
struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
|
||||
|
||||
if (dc) {
|
||||
if (dc->debug_purgelist_dentry)
|
||||
debugfs_remove(dc->debug_purgelist_dentry);
|
||||
if (dc->debug_mle_dentry)
|
||||
debugfs_remove(dc->debug_mle_dentry);
|
||||
if (dc->debug_lockres_dentry)
|
||||
debugfs_remove(dc->debug_lockres_dentry);
|
||||
if (dc->debug_state_dentry)
|
||||
debugfs_remove(dc->debug_state_dentry);
|
||||
dlm_debug_put(dc);
|
||||
}
|
||||
}
|
||||
|
||||
/* subroot - domain dir */
|
||||
int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
|
||||
{
|
||||
dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
|
||||
dlm_debugfs_root);
|
||||
if (!dlm->dlm_debugfs_subroot) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt),
|
||||
GFP_KERNEL);
|
||||
if (!dlm->dlm_debug_ctxt) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
kref_init(&dlm->dlm_debug_ctxt->debug_refcnt);
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
dlm_destroy_debugfs_subroot(dlm);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
|
||||
{
|
||||
if (dlm->dlm_debugfs_subroot)
|
||||
debugfs_remove(dlm->dlm_debugfs_subroot);
|
||||
}
|
||||
|
||||
/* debugfs root */
|
||||
int dlm_create_debugfs_root(void)
|
||||
{
|
||||
dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL);
|
||||
if (!dlm_debugfs_root) {
|
||||
mlog_errno(-ENOMEM);
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dlm_destroy_debugfs_root(void)
|
||||
{
|
||||
if (dlm_debugfs_root)
|
||||
debugfs_remove(dlm_debugfs_root);
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
86
fs/ocfs2/dlm/dlmdebug.h
Normal file
86
fs/ocfs2/dlm/dlmdebug.h
Normal file
@ -0,0 +1,86 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmdebug.h
|
||||
*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef DLMDEBUG_H
|
||||
#define DLMDEBUG_H
|
||||
|
||||
void dlm_print_one_mle(struct dlm_master_list_entry *mle);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
struct dlm_debug_ctxt {
|
||||
struct kref debug_refcnt;
|
||||
struct dentry *debug_state_dentry;
|
||||
struct dentry *debug_lockres_dentry;
|
||||
struct dentry *debug_mle_dentry;
|
||||
struct dentry *debug_purgelist_dentry;
|
||||
};
|
||||
|
||||
struct debug_buffer {
|
||||
int len;
|
||||
char *buf;
|
||||
};
|
||||
|
||||
struct debug_lockres {
|
||||
int dl_len;
|
||||
char *dl_buf;
|
||||
struct dlm_ctxt *dl_ctxt;
|
||||
struct dlm_lock_resource *dl_res;
|
||||
};
|
||||
|
||||
int dlm_debug_init(struct dlm_ctxt *dlm);
|
||||
void dlm_debug_shutdown(struct dlm_ctxt *dlm);
|
||||
|
||||
int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
|
||||
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm);
|
||||
|
||||
int dlm_create_debugfs_root(void);
|
||||
void dlm_destroy_debugfs_root(void);
|
||||
|
||||
#else
|
||||
|
||||
static int dlm_debug_init(struct dlm_ctxt *dlm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static void dlm_debug_shutdown(struct dlm_ctxt *dlm)
|
||||
{
|
||||
}
|
||||
static int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
|
||||
{
|
||||
}
|
||||
static int dlm_create_debugfs_root(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static void dlm_destroy_debugfs_root(void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
#endif /* DLMDEBUG_H */
|
@ -33,6 +33,7 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include "cluster/heartbeat.h"
|
||||
#include "cluster/nodemanager.h"
|
||||
@ -40,8 +41,8 @@
|
||||
|
||||
#include "dlmapi.h"
|
||||
#include "dlmcommon.h"
|
||||
|
||||
#include "dlmdomain.h"
|
||||
#include "dlmdebug.h"
|
||||
|
||||
#include "dlmver.h"
|
||||
|
||||
@ -298,6 +299,8 @@ static int dlm_wait_on_domain_helper(const char *domain)
|
||||
|
||||
static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
|
||||
{
|
||||
dlm_destroy_debugfs_subroot(dlm);
|
||||
|
||||
if (dlm->lockres_hash)
|
||||
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
|
||||
|
||||
@ -395,6 +398,7 @@ static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
|
||||
static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
|
||||
{
|
||||
dlm_unregister_domain_handlers(dlm);
|
||||
dlm_debug_shutdown(dlm);
|
||||
dlm_complete_thread(dlm);
|
||||
dlm_complete_recovery_thread(dlm);
|
||||
dlm_destroy_dlm_worker(dlm);
|
||||
@ -644,6 +648,7 @@ int dlm_shutting_down(struct dlm_ctxt *dlm)
|
||||
void dlm_unregister_domain(struct dlm_ctxt *dlm)
|
||||
{
|
||||
int leave = 0;
|
||||
struct dlm_lock_resource *res;
|
||||
|
||||
spin_lock(&dlm_domain_lock);
|
||||
BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED);
|
||||
@ -673,6 +678,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
|
||||
msleep(500);
|
||||
mlog(0, "%s: more migration to do\n", dlm->name);
|
||||
}
|
||||
|
||||
/* This list should be empty. If not, print remaining lockres */
|
||||
if (!list_empty(&dlm->tracking_list)) {
|
||||
mlog(ML_ERROR, "Following lockres' are still on the "
|
||||
"tracking list:\n");
|
||||
list_for_each_entry(res, &dlm->tracking_list, tracking)
|
||||
dlm_print_one_lock_resource(res);
|
||||
}
|
||||
|
||||
dlm_mark_domain_leaving(dlm);
|
||||
dlm_leave_domain(dlm);
|
||||
dlm_complete_dlm_shutdown(dlm);
|
||||
@ -1405,6 +1419,12 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
|
||||
goto bail;
|
||||
}
|
||||
|
||||
status = dlm_debug_init(dlm);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
status = dlm_launch_thread(dlm);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
@ -1472,6 +1492,7 @@ bail:
|
||||
|
||||
if (status) {
|
||||
dlm_unregister_domain_handlers(dlm);
|
||||
dlm_debug_shutdown(dlm);
|
||||
dlm_complete_thread(dlm);
|
||||
dlm_complete_recovery_thread(dlm);
|
||||
dlm_destroy_dlm_worker(dlm);
|
||||
@ -1484,6 +1505,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
|
||||
u32 key)
|
||||
{
|
||||
int i;
|
||||
int ret;
|
||||
struct dlm_ctxt *dlm = NULL;
|
||||
|
||||
dlm = kzalloc(sizeof(*dlm), GFP_KERNEL);
|
||||
@ -1516,6 +1538,15 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
|
||||
dlm->key = key;
|
||||
dlm->node_num = o2nm_this_node();
|
||||
|
||||
ret = dlm_create_debugfs_subroot(dlm);
|
||||
if (ret < 0) {
|
||||
dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
|
||||
kfree(dlm->name);
|
||||
kfree(dlm);
|
||||
dlm = NULL;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
spin_lock_init(&dlm->spinlock);
|
||||
spin_lock_init(&dlm->master_lock);
|
||||
spin_lock_init(&dlm->ast_lock);
|
||||
@ -1526,6 +1557,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
|
||||
INIT_LIST_HEAD(&dlm->reco.node_data);
|
||||
INIT_LIST_HEAD(&dlm->purge_list);
|
||||
INIT_LIST_HEAD(&dlm->dlm_domain_handlers);
|
||||
INIT_LIST_HEAD(&dlm->tracking_list);
|
||||
dlm->reco.state = 0;
|
||||
|
||||
INIT_LIST_HEAD(&dlm->pending_asts);
|
||||
@ -1816,21 +1848,49 @@ static int __init dlm_init(void)
|
||||
dlm_print_version();
|
||||
|
||||
status = dlm_init_mle_cache();
|
||||
if (status)
|
||||
return -1;
|
||||
if (status) {
|
||||
mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = dlm_init_master_caches();
|
||||
if (status) {
|
||||
mlog(ML_ERROR, "Could not create o2dlm_lockres and "
|
||||
"o2dlm_lockname slabcaches\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = dlm_init_lock_cache();
|
||||
if (status) {
|
||||
mlog(ML_ERROR, "Count not create o2dlm_lock slabcache\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = dlm_register_net_handlers();
|
||||
if (status) {
|
||||
dlm_destroy_mle_cache();
|
||||
return -1;
|
||||
mlog(ML_ERROR, "Unable to register network handlers\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = dlm_create_debugfs_root();
|
||||
if (status)
|
||||
goto error;
|
||||
|
||||
return 0;
|
||||
error:
|
||||
dlm_unregister_net_handlers();
|
||||
dlm_destroy_lock_cache();
|
||||
dlm_destroy_master_caches();
|
||||
dlm_destroy_mle_cache();
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void __exit dlm_exit (void)
|
||||
{
|
||||
dlm_destroy_debugfs_root();
|
||||
dlm_unregister_net_handlers();
|
||||
dlm_destroy_lock_cache();
|
||||
dlm_destroy_master_caches();
|
||||
dlm_destroy_mle_cache();
|
||||
}
|
||||
|
||||
|
@ -53,6 +53,8 @@
|
||||
#define MLOG_MASK_PREFIX ML_DLM
|
||||
#include "cluster/masklog.h"
|
||||
|
||||
static struct kmem_cache *dlm_lock_cache = NULL;
|
||||
|
||||
static DEFINE_SPINLOCK(dlm_cookie_lock);
|
||||
static u64 dlm_next_cookie = 1;
|
||||
|
||||
@ -64,6 +66,22 @@ static void dlm_init_lock(struct dlm_lock *newlock, int type,
|
||||
static void dlm_lock_release(struct kref *kref);
|
||||
static void dlm_lock_detach_lockres(struct dlm_lock *lock);
|
||||
|
||||
int dlm_init_lock_cache(void)
|
||||
{
|
||||
dlm_lock_cache = kmem_cache_create("o2dlm_lock",
|
||||
sizeof(struct dlm_lock),
|
||||
0, SLAB_HWCACHE_ALIGN, NULL);
|
||||
if (dlm_lock_cache == NULL)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dlm_destroy_lock_cache(void)
|
||||
{
|
||||
if (dlm_lock_cache)
|
||||
kmem_cache_destroy(dlm_lock_cache);
|
||||
}
|
||||
|
||||
/* Tell us whether we can grant a new lock request.
|
||||
* locking:
|
||||
* caller needs: res->spinlock
|
||||
@ -353,7 +371,7 @@ static void dlm_lock_release(struct kref *kref)
|
||||
mlog(0, "freeing kernel-allocated lksb\n");
|
||||
kfree(lock->lksb);
|
||||
}
|
||||
kfree(lock);
|
||||
kmem_cache_free(dlm_lock_cache, lock);
|
||||
}
|
||||
|
||||
/* associate a lock with it's lockres, getting a ref on the lockres */
|
||||
@ -412,7 +430,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
|
||||
struct dlm_lock *lock;
|
||||
int kernel_allocated = 0;
|
||||
|
||||
lock = kzalloc(sizeof(*lock), GFP_NOFS);
|
||||
lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS);
|
||||
if (!lock)
|
||||
return NULL;
|
||||
|
||||
|
@ -48,47 +48,11 @@
|
||||
#include "dlmapi.h"
|
||||
#include "dlmcommon.h"
|
||||
#include "dlmdomain.h"
|
||||
#include "dlmdebug.h"
|
||||
|
||||
#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER)
|
||||
#include "cluster/masklog.h"
|
||||
|
||||
enum dlm_mle_type {
|
||||
DLM_MLE_BLOCK,
|
||||
DLM_MLE_MASTER,
|
||||
DLM_MLE_MIGRATION
|
||||
};
|
||||
|
||||
struct dlm_lock_name
|
||||
{
|
||||
u8 len;
|
||||
u8 name[DLM_LOCKID_NAME_MAX];
|
||||
};
|
||||
|
||||
struct dlm_master_list_entry
|
||||
{
|
||||
struct list_head list;
|
||||
struct list_head hb_events;
|
||||
struct dlm_ctxt *dlm;
|
||||
spinlock_t spinlock;
|
||||
wait_queue_head_t wq;
|
||||
atomic_t woken;
|
||||
struct kref mle_refs;
|
||||
int inuse;
|
||||
unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
u8 master;
|
||||
u8 new_master;
|
||||
enum dlm_mle_type type;
|
||||
struct o2hb_callback_func mle_hb_up;
|
||||
struct o2hb_callback_func mle_hb_down;
|
||||
union {
|
||||
struct dlm_lock_resource *res;
|
||||
struct dlm_lock_name name;
|
||||
} u;
|
||||
};
|
||||
|
||||
static void dlm_mle_node_down(struct dlm_ctxt *dlm,
|
||||
struct dlm_master_list_entry *mle,
|
||||
struct o2nm_node *node,
|
||||
@ -128,98 +92,10 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define dlm_print_nodemap(m) _dlm_print_nodemap(m,#m)
|
||||
static void _dlm_print_nodemap(unsigned long *map, const char *mapname)
|
||||
{
|
||||
int i;
|
||||
printk("%s=[ ", mapname);
|
||||
for (i=0; i<O2NM_MAX_NODES; i++)
|
||||
if (test_bit(i, map))
|
||||
printk("%d ", i);
|
||||
printk("]");
|
||||
}
|
||||
|
||||
static void dlm_print_one_mle(struct dlm_master_list_entry *mle)
|
||||
{
|
||||
int refs;
|
||||
char *type;
|
||||
char attached;
|
||||
u8 master;
|
||||
unsigned int namelen;
|
||||
const char *name;
|
||||
struct kref *k;
|
||||
unsigned long *maybe = mle->maybe_map,
|
||||
*vote = mle->vote_map,
|
||||
*resp = mle->response_map,
|
||||
*node = mle->node_map;
|
||||
|
||||
k = &mle->mle_refs;
|
||||
if (mle->type == DLM_MLE_BLOCK)
|
||||
type = "BLK";
|
||||
else if (mle->type == DLM_MLE_MASTER)
|
||||
type = "MAS";
|
||||
else
|
||||
type = "MIG";
|
||||
refs = atomic_read(&k->refcount);
|
||||
master = mle->master;
|
||||
attached = (list_empty(&mle->hb_events) ? 'N' : 'Y');
|
||||
|
||||
if (mle->type != DLM_MLE_MASTER) {
|
||||
namelen = mle->u.name.len;
|
||||
name = mle->u.name.name;
|
||||
} else {
|
||||
namelen = mle->u.res->lockname.len;
|
||||
name = mle->u.res->lockname.name;
|
||||
}
|
||||
|
||||
mlog(ML_NOTICE, "%.*s: %3s refs=%3d mas=%3u new=%3u evt=%c inuse=%d ",
|
||||
namelen, name, type, refs, master, mle->new_master, attached,
|
||||
mle->inuse);
|
||||
dlm_print_nodemap(maybe);
|
||||
printk(", ");
|
||||
dlm_print_nodemap(vote);
|
||||
printk(", ");
|
||||
dlm_print_nodemap(resp);
|
||||
printk(", ");
|
||||
dlm_print_nodemap(node);
|
||||
printk(", ");
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Code here is included but defined out as it aids debugging */
|
||||
|
||||
static void dlm_dump_mles(struct dlm_ctxt *dlm)
|
||||
{
|
||||
struct dlm_master_list_entry *mle;
|
||||
|
||||
mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
|
||||
spin_lock(&dlm->master_lock);
|
||||
list_for_each_entry(mle, &dlm->master_list, list)
|
||||
dlm_print_one_mle(mle);
|
||||
spin_unlock(&dlm->master_lock);
|
||||
}
|
||||
|
||||
int dlm_dump_all_mles(const char __user *data, unsigned int len)
|
||||
{
|
||||
struct dlm_ctxt *dlm;
|
||||
|
||||
spin_lock(&dlm_domain_lock);
|
||||
list_for_each_entry(dlm, &dlm_domains, list) {
|
||||
mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name);
|
||||
dlm_dump_mles(dlm);
|
||||
}
|
||||
spin_unlock(&dlm_domain_lock);
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dlm_dump_all_mles);
|
||||
|
||||
#endif /* 0 */
|
||||
|
||||
|
||||
static struct kmem_cache *dlm_lockres_cache = NULL;
|
||||
static struct kmem_cache *dlm_lockname_cache = NULL;
|
||||
static struct kmem_cache *dlm_mle_cache = NULL;
|
||||
|
||||
|
||||
static void dlm_mle_release(struct kref *kref);
|
||||
static void dlm_init_mle(struct dlm_master_list_entry *mle,
|
||||
enum dlm_mle_type type,
|
||||
@ -507,7 +383,7 @@ static void dlm_mle_node_up(struct dlm_ctxt *dlm,
|
||||
|
||||
int dlm_init_mle_cache(void)
|
||||
{
|
||||
dlm_mle_cache = kmem_cache_create("dlm_mle_cache",
|
||||
dlm_mle_cache = kmem_cache_create("o2dlm_mle",
|
||||
sizeof(struct dlm_master_list_entry),
|
||||
0, SLAB_HWCACHE_ALIGN,
|
||||
NULL);
|
||||
@ -560,6 +436,35 @@ static void dlm_mle_release(struct kref *kref)
|
||||
* LOCK RESOURCE FUNCTIONS
|
||||
*/
|
||||
|
||||
int dlm_init_master_caches(void)
|
||||
{
|
||||
dlm_lockres_cache = kmem_cache_create("o2dlm_lockres",
|
||||
sizeof(struct dlm_lock_resource),
|
||||
0, SLAB_HWCACHE_ALIGN, NULL);
|
||||
if (!dlm_lockres_cache)
|
||||
goto bail;
|
||||
|
||||
dlm_lockname_cache = kmem_cache_create("o2dlm_lockname",
|
||||
DLM_LOCKID_NAME_MAX, 0,
|
||||
SLAB_HWCACHE_ALIGN, NULL);
|
||||
if (!dlm_lockname_cache)
|
||||
goto bail;
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
dlm_destroy_master_caches();
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void dlm_destroy_master_caches(void)
|
||||
{
|
||||
if (dlm_lockname_cache)
|
||||
kmem_cache_destroy(dlm_lockname_cache);
|
||||
|
||||
if (dlm_lockres_cache)
|
||||
kmem_cache_destroy(dlm_lockres_cache);
|
||||
}
|
||||
|
||||
static void dlm_set_lockres_owner(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
u8 owner)
|
||||
@ -610,6 +515,14 @@ static void dlm_lockres_release(struct kref *kref)
|
||||
mlog(0, "destroying lockres %.*s\n", res->lockname.len,
|
||||
res->lockname.name);
|
||||
|
||||
if (!list_empty(&res->tracking))
|
||||
list_del_init(&res->tracking);
|
||||
else {
|
||||
mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n",
|
||||
res->lockname.len, res->lockname.name);
|
||||
dlm_print_one_lock_resource(res);
|
||||
}
|
||||
|
||||
if (!hlist_unhashed(&res->hash_node) ||
|
||||
!list_empty(&res->granted) ||
|
||||
!list_empty(&res->converting) ||
|
||||
@ -642,9 +555,9 @@ static void dlm_lockres_release(struct kref *kref)
|
||||
BUG_ON(!list_empty(&res->recovering));
|
||||
BUG_ON(!list_empty(&res->purge));
|
||||
|
||||
kfree(res->lockname.name);
|
||||
kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name);
|
||||
|
||||
kfree(res);
|
||||
kmem_cache_free(dlm_lockres_cache, res);
|
||||
}
|
||||
|
||||
void dlm_lockres_put(struct dlm_lock_resource *res)
|
||||
@ -677,6 +590,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
|
||||
INIT_LIST_HEAD(&res->dirty);
|
||||
INIT_LIST_HEAD(&res->recovering);
|
||||
INIT_LIST_HEAD(&res->purge);
|
||||
INIT_LIST_HEAD(&res->tracking);
|
||||
atomic_set(&res->asts_reserved, 0);
|
||||
res->migration_pending = 0;
|
||||
res->inflight_locks = 0;
|
||||
@ -692,6 +606,8 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
|
||||
|
||||
res->last_used = 0;
|
||||
|
||||
list_add_tail(&res->tracking, &dlm->tracking_list);
|
||||
|
||||
memset(res->lvb, 0, DLM_LVB_LEN);
|
||||
memset(res->refmap, 0, sizeof(res->refmap));
|
||||
}
|
||||
@ -700,20 +616,28 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
|
||||
const char *name,
|
||||
unsigned int namelen)
|
||||
{
|
||||
struct dlm_lock_resource *res;
|
||||
struct dlm_lock_resource *res = NULL;
|
||||
|
||||
res = kmalloc(sizeof(struct dlm_lock_resource), GFP_NOFS);
|
||||
res = (struct dlm_lock_resource *)
|
||||
kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
|
||||
if (!res)
|
||||
return NULL;
|
||||
goto error;
|
||||
|
||||
res->lockname.name = kmalloc(namelen, GFP_NOFS);
|
||||
if (!res->lockname.name) {
|
||||
kfree(res);
|
||||
return NULL;
|
||||
}
|
||||
res->lockname.name = (char *)
|
||||
kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
|
||||
if (!res->lockname.name)
|
||||
goto error;
|
||||
|
||||
dlm_init_lockres(dlm, res, name, namelen);
|
||||
return res;
|
||||
|
||||
error:
|
||||
if (res && res->lockname.name)
|
||||
kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name);
|
||||
|
||||
if (res)
|
||||
kmem_cache_free(dlm_lockres_cache, res);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -58,7 +58,7 @@ struct ocfs2_meta_lvb {
|
||||
#define OCFS2_LOCK_NONBLOCK (0x04)
|
||||
|
||||
int ocfs2_dlm_init(struct ocfs2_super *osb);
|
||||
void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
|
||||
void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending);
|
||||
void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
|
||||
void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
|
||||
enum ocfs2_lock_type type,
|
||||
@ -114,5 +114,6 @@ void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb);
|
||||
struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
|
||||
void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
|
||||
|
||||
extern const struct dlm_protocol_version ocfs2_locking_protocol;
|
||||
/* To set the locking protocol on module initialization */
|
||||
void ocfs2_set_locking_protocol(void);
|
||||
#endif /* DLMGLUE_H */
|
||||
|
@ -2242,7 +2242,7 @@ const struct file_operations ocfs2_fops = {
|
||||
.open = ocfs2_file_open,
|
||||
.aio_read = ocfs2_file_aio_read,
|
||||
.aio_write = ocfs2_file_aio_write,
|
||||
.ioctl = ocfs2_ioctl,
|
||||
.unlocked_ioctl = ocfs2_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = ocfs2_compat_ioctl,
|
||||
#endif
|
||||
@ -2258,7 +2258,7 @@ const struct file_operations ocfs2_dops = {
|
||||
.fsync = ocfs2_sync_file,
|
||||
.release = ocfs2_dir_release,
|
||||
.open = ocfs2_dir_open,
|
||||
.ioctl = ocfs2_ioctl,
|
||||
.unlocked_ioctl = ocfs2_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = ocfs2_compat_ioctl,
|
||||
#endif
|
||||
|
@ -28,9 +28,6 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/kmod.h>
|
||||
|
||||
#include <dlm/dlmapi.h>
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_SUPER
|
||||
#include <cluster/masklog.h>
|
||||
@ -48,7 +45,6 @@ static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
|
||||
int bit);
|
||||
static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map,
|
||||
int bit);
|
||||
static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map);
|
||||
|
||||
/* special case -1 for now
|
||||
* TODO: should *really* make sure the calling func never passes -1!! */
|
||||
@ -62,23 +58,23 @@ static void ocfs2_node_map_init(struct ocfs2_node_map *map)
|
||||
void ocfs2_init_node_maps(struct ocfs2_super *osb)
|
||||
{
|
||||
spin_lock_init(&osb->node_map_lock);
|
||||
ocfs2_node_map_init(&osb->recovery_map);
|
||||
ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs);
|
||||
}
|
||||
|
||||
static void ocfs2_do_node_down(int node_num,
|
||||
struct ocfs2_super *osb)
|
||||
void ocfs2_do_node_down(int node_num, void *data)
|
||||
{
|
||||
struct ocfs2_super *osb = data;
|
||||
|
||||
BUG_ON(osb->node_num == node_num);
|
||||
|
||||
mlog(0, "ocfs2: node down event for %d\n", node_num);
|
||||
|
||||
if (!osb->dlm) {
|
||||
if (!osb->cconn) {
|
||||
/*
|
||||
* No DLM means we're not even ready to participate yet.
|
||||
* We check the slots after the DLM comes up, so we will
|
||||
* notice the node death then. We can safely ignore it
|
||||
* here.
|
||||
* No cluster connection means we're not even ready to
|
||||
* participate yet. We check the slots after the cluster
|
||||
* comes up, so we will notice the node death then. We
|
||||
* can safely ignore it here.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
@ -86,61 +82,6 @@ static void ocfs2_do_node_down(int node_num,
|
||||
ocfs2_recovery_thread(osb, node_num);
|
||||
}
|
||||
|
||||
/* Called from the dlm when it's about to evict a node. We may also
|
||||
* get a heartbeat callback later. */
|
||||
static void ocfs2_dlm_eviction_cb(int node_num,
|
||||
void *data)
|
||||
{
|
||||
struct ocfs2_super *osb = (struct ocfs2_super *) data;
|
||||
struct super_block *sb = osb->sb;
|
||||
|
||||
mlog(ML_NOTICE, "device (%u,%u): dlm has evicted node %d\n",
|
||||
MAJOR(sb->s_dev), MINOR(sb->s_dev), node_num);
|
||||
|
||||
ocfs2_do_node_down(node_num, osb);
|
||||
}
|
||||
|
||||
void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb)
|
||||
{
|
||||
/* Not exactly a heartbeat callback, but leads to essentially
|
||||
* the same path so we set it up here. */
|
||||
dlm_setup_eviction_cb(&osb->osb_eviction_cb,
|
||||
ocfs2_dlm_eviction_cb,
|
||||
osb);
|
||||
}
|
||||
|
||||
void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
|
||||
{
|
||||
int ret;
|
||||
char *argv[5], *envp[3];
|
||||
|
||||
if (ocfs2_mount_local(osb))
|
||||
return;
|
||||
|
||||
if (!osb->uuid_str) {
|
||||
/* This can happen if we don't get far enough in mount... */
|
||||
mlog(0, "No UUID with which to stop heartbeat!\n\n");
|
||||
return;
|
||||
}
|
||||
|
||||
argv[0] = (char *)o2nm_get_hb_ctl_path();
|
||||
argv[1] = "-K";
|
||||
argv[2] = "-u";
|
||||
argv[3] = osb->uuid_str;
|
||||
argv[4] = NULL;
|
||||
|
||||
mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]);
|
||||
|
||||
/* minimal command environment taken from cpu_run_sbin_hotplug */
|
||||
envp[0] = "HOME=/";
|
||||
envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
|
||||
envp[2] = NULL;
|
||||
|
||||
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
}
|
||||
|
||||
static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
|
||||
int bit)
|
||||
{
|
||||
@ -192,112 +133,3 @@ int ocfs2_node_map_test_bit(struct ocfs2_super *osb,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map)
|
||||
{
|
||||
int bit;
|
||||
bit = find_next_bit(map->map, map->num_nodes, 0);
|
||||
if (bit < map->num_nodes)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
|
||||
struct ocfs2_node_map *map)
|
||||
{
|
||||
int ret;
|
||||
BUG_ON(map->num_nodes == 0);
|
||||
spin_lock(&osb->node_map_lock);
|
||||
ret = __ocfs2_node_map_is_empty(map);
|
||||
spin_unlock(&osb->node_map_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
static void __ocfs2_node_map_dup(struct ocfs2_node_map *target,
|
||||
struct ocfs2_node_map *from)
|
||||
{
|
||||
BUG_ON(from->num_nodes == 0);
|
||||
ocfs2_node_map_init(target);
|
||||
__ocfs2_node_map_set(target, from);
|
||||
}
|
||||
|
||||
/* returns 1 if bit is the only bit set in target, 0 otherwise */
|
||||
int ocfs2_node_map_is_only(struct ocfs2_super *osb,
|
||||
struct ocfs2_node_map *target,
|
||||
int bit)
|
||||
{
|
||||
struct ocfs2_node_map temp;
|
||||
int ret;
|
||||
|
||||
spin_lock(&osb->node_map_lock);
|
||||
__ocfs2_node_map_dup(&temp, target);
|
||||
__ocfs2_node_map_clear_bit(&temp, bit);
|
||||
ret = __ocfs2_node_map_is_empty(&temp);
|
||||
spin_unlock(&osb->node_map_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
|
||||
struct ocfs2_node_map *from)
|
||||
{
|
||||
int num_longs, i;
|
||||
|
||||
BUG_ON(target->num_nodes != from->num_nodes);
|
||||
BUG_ON(target->num_nodes == 0);
|
||||
|
||||
num_longs = BITS_TO_LONGS(target->num_nodes);
|
||||
for (i = 0; i < num_longs; i++)
|
||||
target->map[i] = from->map[i];
|
||||
}
|
||||
|
||||
#endif /* 0 */
|
||||
|
||||
/* Returns whether the recovery bit was actually set - it may not be
|
||||
* if a node is still marked as needing recovery */
|
||||
int ocfs2_recovery_map_set(struct ocfs2_super *osb,
|
||||
int num)
|
||||
{
|
||||
int set = 0;
|
||||
|
||||
spin_lock(&osb->node_map_lock);
|
||||
|
||||
if (!test_bit(num, osb->recovery_map.map)) {
|
||||
__ocfs2_node_map_set_bit(&osb->recovery_map, num);
|
||||
set = 1;
|
||||
}
|
||||
|
||||
spin_unlock(&osb->node_map_lock);
|
||||
|
||||
return set;
|
||||
}
|
||||
|
||||
void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
|
||||
int num)
|
||||
{
|
||||
ocfs2_node_map_clear_bit(osb, &osb->recovery_map, num);
|
||||
}
|
||||
|
||||
int ocfs2_node_map_iterate(struct ocfs2_super *osb,
|
||||
struct ocfs2_node_map *map,
|
||||
int idx)
|
||||
{
|
||||
int i = idx;
|
||||
|
||||
idx = O2NM_INVALID_NODE_NUM;
|
||||
spin_lock(&osb->node_map_lock);
|
||||
if ((i != O2NM_INVALID_NODE_NUM) &&
|
||||
(i >= 0) &&
|
||||
(i < map->num_nodes)) {
|
||||
while(i < map->num_nodes) {
|
||||
if (test_bit(i, map->map)) {
|
||||
idx = i;
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
spin_unlock(&osb->node_map_lock);
|
||||
return idx;
|
||||
}
|
||||
|
@ -28,13 +28,10 @@
|
||||
|
||||
void ocfs2_init_node_maps(struct ocfs2_super *osb);
|
||||
|
||||
void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb);
|
||||
void ocfs2_stop_heartbeat(struct ocfs2_super *osb);
|
||||
void ocfs2_do_node_down(int node_num, void *data);
|
||||
|
||||
/* node map functions - used to keep track of mounted and in-recovery
|
||||
* nodes. */
|
||||
int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
|
||||
struct ocfs2_node_map *map);
|
||||
void ocfs2_node_map_set_bit(struct ocfs2_super *osb,
|
||||
struct ocfs2_node_map *map,
|
||||
int bit);
|
||||
@ -44,17 +41,5 @@ void ocfs2_node_map_clear_bit(struct ocfs2_super *osb,
|
||||
int ocfs2_node_map_test_bit(struct ocfs2_super *osb,
|
||||
struct ocfs2_node_map *map,
|
||||
int bit);
|
||||
int ocfs2_node_map_iterate(struct ocfs2_super *osb,
|
||||
struct ocfs2_node_map *map,
|
||||
int idx);
|
||||
static inline int ocfs2_node_map_first_set_bit(struct ocfs2_super *osb,
|
||||
struct ocfs2_node_map *map)
|
||||
{
|
||||
return ocfs2_node_map_iterate(osb, map, 0);
|
||||
}
|
||||
int ocfs2_recovery_map_set(struct ocfs2_super *osb,
|
||||
int num);
|
||||
void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
|
||||
int num);
|
||||
|
||||
#endif /* OCFS2_HEARTBEAT_H */
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/smp_lock.h>
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_INODE
|
||||
#include <cluster/masklog.h>
|
||||
@ -112,9 +113,9 @@ bail:
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_ioctl(struct inode * inode, struct file * filp,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = filp->f_path.dentry->d_inode;
|
||||
unsigned int flags;
|
||||
int new_clusters;
|
||||
int status;
|
||||
@ -168,9 +169,6 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
|
||||
#ifdef CONFIG_COMPAT
|
||||
long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file->f_path.dentry->d_inode;
|
||||
int ret;
|
||||
|
||||
switch (cmd) {
|
||||
case OCFS2_IOC32_GETFLAGS:
|
||||
cmd = OCFS2_IOC_GETFLAGS;
|
||||
@ -190,9 +188,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
lock_kernel();
|
||||
ret = ocfs2_ioctl(inode, file, cmd, arg);
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
return ocfs2_ioctl(file, cmd, arg);
|
||||
}
|
||||
#endif
|
||||
|
@ -10,8 +10,7 @@
|
||||
#ifndef OCFS2_IOCTL_H
|
||||
#define OCFS2_IOCTL_H
|
||||
|
||||
int ocfs2_ioctl(struct inode * inode, struct file * filp,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
|
||||
long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg);
|
||||
|
||||
#endif /* OCFS2_IOCTL_H */
|
||||
|
@ -64,6 +64,137 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
|
||||
int slot);
|
||||
static int ocfs2_commit_thread(void *arg);
|
||||
|
||||
|
||||
/*
|
||||
* The recovery_list is a simple linked list of node numbers to recover.
|
||||
* It is protected by the recovery_lock.
|
||||
*/
|
||||
|
||||
struct ocfs2_recovery_map {
|
||||
unsigned int rm_used;
|
||||
unsigned int *rm_entries;
|
||||
};
|
||||
|
||||
int ocfs2_recovery_init(struct ocfs2_super *osb)
|
||||
{
|
||||
struct ocfs2_recovery_map *rm;
|
||||
|
||||
mutex_init(&osb->recovery_lock);
|
||||
osb->disable_recovery = 0;
|
||||
osb->recovery_thread_task = NULL;
|
||||
init_waitqueue_head(&osb->recovery_event);
|
||||
|
||||
rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
|
||||
osb->max_slots * sizeof(unsigned int),
|
||||
GFP_KERNEL);
|
||||
if (!rm) {
|
||||
mlog_errno(-ENOMEM);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rm->rm_entries = (unsigned int *)((char *)rm +
|
||||
sizeof(struct ocfs2_recovery_map));
|
||||
osb->recovery_map = rm;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* we can't grab the goofy sem lock from inside wait_event, so we use
|
||||
* memory barriers to make sure that we'll see the null task before
|
||||
* being woken up */
|
||||
static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
|
||||
{
|
||||
mb();
|
||||
return osb->recovery_thread_task != NULL;
|
||||
}
|
||||
|
||||
void ocfs2_recovery_exit(struct ocfs2_super *osb)
|
||||
{
|
||||
struct ocfs2_recovery_map *rm;
|
||||
|
||||
/* disable any new recovery threads and wait for any currently
|
||||
* running ones to exit. Do this before setting the vol_state. */
|
||||
mutex_lock(&osb->recovery_lock);
|
||||
osb->disable_recovery = 1;
|
||||
mutex_unlock(&osb->recovery_lock);
|
||||
wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
|
||||
|
||||
/* At this point, we know that no more recovery threads can be
|
||||
* launched, so wait for any recovery completion work to
|
||||
* complete. */
|
||||
flush_workqueue(ocfs2_wq);
|
||||
|
||||
/*
|
||||
* Now that recovery is shut down, and the osb is about to be
|
||||
* freed, the osb_lock is not taken here.
|
||||
*/
|
||||
rm = osb->recovery_map;
|
||||
/* XXX: Should we bug if there are dirty entries? */
|
||||
|
||||
kfree(rm);
|
||||
}
|
||||
|
||||
static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
|
||||
unsigned int node_num)
|
||||
{
|
||||
int i;
|
||||
struct ocfs2_recovery_map *rm = osb->recovery_map;
|
||||
|
||||
assert_spin_locked(&osb->osb_lock);
|
||||
|
||||
for (i = 0; i < rm->rm_used; i++) {
|
||||
if (rm->rm_entries[i] == node_num)
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Behaves like test-and-set. Returns the previous value */
|
||||
static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
|
||||
unsigned int node_num)
|
||||
{
|
||||
struct ocfs2_recovery_map *rm = osb->recovery_map;
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
if (__ocfs2_recovery_map_test(osb, node_num)) {
|
||||
spin_unlock(&osb->osb_lock);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* XXX: Can this be exploited? Not from o2dlm... */
|
||||
BUG_ON(rm->rm_used >= osb->max_slots);
|
||||
|
||||
rm->rm_entries[rm->rm_used] = node_num;
|
||||
rm->rm_used++;
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
|
||||
unsigned int node_num)
|
||||
{
|
||||
int i;
|
||||
struct ocfs2_recovery_map *rm = osb->recovery_map;
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
|
||||
for (i = 0; i < rm->rm_used; i++) {
|
||||
if (rm->rm_entries[i] == node_num)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i < rm->rm_used) {
|
||||
/* XXX: be careful with the pointer math */
|
||||
memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
|
||||
(rm->rm_used - i - 1) * sizeof(unsigned int));
|
||||
rm->rm_used--;
|
||||
}
|
||||
|
||||
spin_unlock(&osb->osb_lock);
|
||||
}
|
||||
|
||||
static int ocfs2_commit_cache(struct ocfs2_super *osb)
|
||||
{
|
||||
int status = 0;
|
||||
@ -586,8 +717,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
if (!journal)
|
||||
BUG();
|
||||
BUG_ON(!journal);
|
||||
|
||||
osb = journal->j_osb;
|
||||
|
||||
@ -650,6 +780,23 @@ bail:
|
||||
return status;
|
||||
}
|
||||
|
||||
static int ocfs2_recovery_completed(struct ocfs2_super *osb)
|
||||
{
|
||||
int empty;
|
||||
struct ocfs2_recovery_map *rm = osb->recovery_map;
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
empty = (rm->rm_used == 0);
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
return empty;
|
||||
}
|
||||
|
||||
void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
|
||||
{
|
||||
wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
|
||||
}
|
||||
|
||||
/*
|
||||
* JBD Might read a cached version of another nodes journal file. We
|
||||
* don't want this as this file changes often and we get no
|
||||
@ -848,6 +995,7 @@ static int __ocfs2_recovery_thread(void *arg)
|
||||
{
|
||||
int status, node_num;
|
||||
struct ocfs2_super *osb = arg;
|
||||
struct ocfs2_recovery_map *rm = osb->recovery_map;
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
@ -863,26 +1011,29 @@ restart:
|
||||
goto bail;
|
||||
}
|
||||
|
||||
while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {
|
||||
node_num = ocfs2_node_map_first_set_bit(osb,
|
||||
&osb->recovery_map);
|
||||
if (node_num == O2NM_INVALID_NODE_NUM) {
|
||||
mlog(0, "Out of nodes to recover.\n");
|
||||
break;
|
||||
}
|
||||
spin_lock(&osb->osb_lock);
|
||||
while (rm->rm_used) {
|
||||
/* It's always safe to remove entry zero, as we won't
|
||||
* clear it until ocfs2_recover_node() has succeeded. */
|
||||
node_num = rm->rm_entries[0];
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
status = ocfs2_recover_node(osb, node_num);
|
||||
if (status < 0) {
|
||||
if (!status) {
|
||||
ocfs2_recovery_map_clear(osb, node_num);
|
||||
} else {
|
||||
mlog(ML_ERROR,
|
||||
"Error %d recovering node %d on device (%u,%u)!\n",
|
||||
status, node_num,
|
||||
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
|
||||
mlog(ML_ERROR, "Volume requires unmount.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
ocfs2_recovery_map_clear(osb, node_num);
|
||||
spin_lock(&osb->osb_lock);
|
||||
}
|
||||
spin_unlock(&osb->osb_lock);
|
||||
mlog(0, "All nodes recovered\n");
|
||||
|
||||
ocfs2_super_unlock(osb, 1);
|
||||
|
||||
/* We always run recovery on our own orphan dir - the dead
|
||||
@ -893,8 +1044,7 @@ restart:
|
||||
|
||||
bail:
|
||||
mutex_lock(&osb->recovery_lock);
|
||||
if (!status &&
|
||||
!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {
|
||||
if (!status && !ocfs2_recovery_completed(osb)) {
|
||||
mutex_unlock(&osb->recovery_lock);
|
||||
goto restart;
|
||||
}
|
||||
@ -924,8 +1074,8 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
|
||||
|
||||
/* People waiting on recovery will wait on
|
||||
* the recovery map to empty. */
|
||||
if (!ocfs2_recovery_map_set(osb, node_num))
|
||||
mlog(0, "node %d already be in recovery.\n", node_num);
|
||||
if (ocfs2_recovery_map_set(osb, node_num))
|
||||
mlog(0, "node %d already in recovery map.\n", node_num);
|
||||
|
||||
mlog(0, "starting recovery thread...\n");
|
||||
|
||||
@ -1079,7 +1229,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
|
||||
{
|
||||
int status = 0;
|
||||
int slot_num;
|
||||
struct ocfs2_slot_info *si = osb->slot_info;
|
||||
struct ocfs2_dinode *la_copy = NULL;
|
||||
struct ocfs2_dinode *tl_copy = NULL;
|
||||
|
||||
@ -1092,8 +1241,8 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
|
||||
* case we should've called ocfs2_journal_load instead. */
|
||||
BUG_ON(osb->node_num == node_num);
|
||||
|
||||
slot_num = ocfs2_node_num_to_slot(si, node_num);
|
||||
if (slot_num == OCFS2_INVALID_SLOT) {
|
||||
slot_num = ocfs2_node_num_to_slot(osb, node_num);
|
||||
if (slot_num == -ENOENT) {
|
||||
status = 0;
|
||||
mlog(0, "no slot for this node, so no recovery required.\n");
|
||||
goto done;
|
||||
@ -1123,8 +1272,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
|
||||
|
||||
/* Likewise, this would be a strange but ultimately not so
|
||||
* harmful place to get an error... */
|
||||
ocfs2_clear_slot(si, slot_num);
|
||||
status = ocfs2_update_disk_slots(osb, si);
|
||||
status = ocfs2_clear_slot(osb, slot_num);
|
||||
if (status < 0)
|
||||
mlog_errno(status);
|
||||
|
||||
@ -1184,23 +1332,24 @@ bail:
|
||||
* slot info struct has been updated from disk. */
|
||||
int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
|
||||
{
|
||||
int status, i, node_num;
|
||||
struct ocfs2_slot_info *si = osb->slot_info;
|
||||
unsigned int node_num;
|
||||
int status, i;
|
||||
|
||||
/* This is called with the super block cluster lock, so we
|
||||
* know that the slot map can't change underneath us. */
|
||||
|
||||
spin_lock(&si->si_lock);
|
||||
for(i = 0; i < si->si_num_slots; i++) {
|
||||
spin_lock(&osb->osb_lock);
|
||||
for (i = 0; i < osb->max_slots; i++) {
|
||||
if (i == osb->slot_num)
|
||||
continue;
|
||||
if (ocfs2_is_empty_slot(si, i))
|
||||
|
||||
status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
|
||||
if (status == -ENOENT)
|
||||
continue;
|
||||
|
||||
node_num = si->si_global_node_nums[i];
|
||||
if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num))
|
||||
if (__ocfs2_recovery_map_test(osb, node_num))
|
||||
continue;
|
||||
spin_unlock(&si->si_lock);
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
/* Ok, we have a slot occupied by another node which
|
||||
* is not in the recovery map. We trylock his journal
|
||||
@ -1216,9 +1365,9 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
|
||||
goto bail;
|
||||
}
|
||||
|
||||
spin_lock(&si->si_lock);
|
||||
spin_lock(&osb->osb_lock);
|
||||
}
|
||||
spin_unlock(&si->si_lock);
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
status = 0;
|
||||
bail:
|
||||
|
@ -134,6 +134,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
|
||||
|
||||
/* Exported only for the journal struct init code in super.c. Do not call. */
|
||||
void ocfs2_complete_recovery(struct work_struct *work);
|
||||
void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
|
||||
|
||||
int ocfs2_recovery_init(struct ocfs2_super *osb);
|
||||
void ocfs2_recovery_exit(struct ocfs2_super *osb);
|
||||
|
||||
/*
|
||||
* Journal Control:
|
||||
|
@ -447,6 +447,8 @@ out_mutex:
|
||||
iput(main_bm_inode);
|
||||
|
||||
out:
|
||||
if (!status)
|
||||
ocfs2_init_inode_steal_slot(osb);
|
||||
mlog_exit(status);
|
||||
return status;
|
||||
}
|
||||
@ -523,6 +525,8 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
|
||||
}
|
||||
|
||||
ac->ac_inode = local_alloc_inode;
|
||||
/* We should never use localalloc from another slot */
|
||||
ac->ac_alloc_slot = osb->slot_num;
|
||||
ac->ac_which = OCFS2_AC_USE_LOCAL;
|
||||
get_bh(osb->local_alloc_bh);
|
||||
ac->ac_bh = osb->local_alloc_bh;
|
||||
|
@ -424,7 +424,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
|
||||
fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
|
||||
fe->i_blkno = cpu_to_le64(fe_blkno);
|
||||
fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
|
||||
fe->i_suballoc_slot = cpu_to_le16(osb->slot_num);
|
||||
fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
|
||||
fe->i_uid = cpu_to_le32(current->fsuid);
|
||||
if (dir->i_mode & S_ISGID) {
|
||||
fe->i_gid = cpu_to_le32(dir->i_gid);
|
||||
@ -997,7 +997,7 @@ static int ocfs2_rename(struct inode *old_dir,
|
||||
*
|
||||
* And that's why, just like the VFS, we need a file system
|
||||
* rename lock. */
|
||||
if (old_dentry != new_dentry) {
|
||||
if (old_dir != new_dir && S_ISDIR(old_inode->i_mode)) {
|
||||
status = ocfs2_rename_lock(osb);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
|
@ -36,11 +36,8 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/jbd.h>
|
||||
|
||||
#include "cluster/nodemanager.h"
|
||||
#include "cluster/heartbeat.h"
|
||||
#include "cluster/tcp.h"
|
||||
|
||||
#include "dlm/dlmapi.h"
|
||||
/* For union ocfs2_dlm_lksb */
|
||||
#include "stackglue.h"
|
||||
|
||||
#include "ocfs2_fs.h"
|
||||
#include "ocfs2_lockid.h"
|
||||
@ -101,6 +98,9 @@ enum ocfs2_unlock_action {
|
||||
* dropped. */
|
||||
#define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */
|
||||
#define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */
|
||||
#define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a
|
||||
call to dlm_lock. Only
|
||||
exists with BUSY set. */
|
||||
|
||||
struct ocfs2_lock_res_ops;
|
||||
|
||||
@ -120,13 +120,14 @@ struct ocfs2_lock_res {
|
||||
int l_level;
|
||||
unsigned int l_ro_holders;
|
||||
unsigned int l_ex_holders;
|
||||
struct dlm_lockstatus l_lksb;
|
||||
union ocfs2_dlm_lksb l_lksb;
|
||||
|
||||
/* used from AST/BAST funcs. */
|
||||
enum ocfs2_ast_action l_action;
|
||||
enum ocfs2_unlock_action l_unlock_action;
|
||||
int l_requested;
|
||||
int l_blocking;
|
||||
unsigned int l_pending_gen;
|
||||
|
||||
wait_queue_head_t l_event;
|
||||
|
||||
@ -179,6 +180,8 @@ enum ocfs2_mount_options
|
||||
#define OCFS2_DEFAULT_ATIME_QUANTUM 60
|
||||
|
||||
struct ocfs2_journal;
|
||||
struct ocfs2_slot_info;
|
||||
struct ocfs2_recovery_map;
|
||||
struct ocfs2_super
|
||||
{
|
||||
struct task_struct *commit_task;
|
||||
@ -190,7 +193,6 @@ struct ocfs2_super
|
||||
struct ocfs2_slot_info *slot_info;
|
||||
|
||||
spinlock_t node_map_lock;
|
||||
struct ocfs2_node_map recovery_map;
|
||||
|
||||
u64 root_blkno;
|
||||
u64 system_dir_blkno;
|
||||
@ -206,25 +208,29 @@ struct ocfs2_super
|
||||
u32 s_feature_incompat;
|
||||
u32 s_feature_ro_compat;
|
||||
|
||||
/* Protects s_next_generaion, osb_flags. Could protect more on
|
||||
* osb as it's very short lived. */
|
||||
/* Protects s_next_generation, osb_flags and s_inode_steal_slot.
|
||||
* Could protect more on osb as it's very short lived.
|
||||
*/
|
||||
spinlock_t osb_lock;
|
||||
u32 s_next_generation;
|
||||
unsigned long osb_flags;
|
||||
s16 s_inode_steal_slot;
|
||||
atomic_t s_num_inodes_stolen;
|
||||
|
||||
unsigned long s_mount_opt;
|
||||
unsigned int s_atime_quantum;
|
||||
|
||||
u16 max_slots;
|
||||
s16 node_num;
|
||||
s16 slot_num;
|
||||
s16 preferred_slot;
|
||||
unsigned int max_slots;
|
||||
unsigned int node_num;
|
||||
int slot_num;
|
||||
int preferred_slot;
|
||||
int s_sectsize_bits;
|
||||
int s_clustersize;
|
||||
int s_clustersize_bits;
|
||||
|
||||
atomic_t vol_state;
|
||||
struct mutex recovery_lock;
|
||||
struct ocfs2_recovery_map *recovery_map;
|
||||
struct task_struct *recovery_thread_task;
|
||||
int disable_recovery;
|
||||
wait_queue_head_t checkpoint_event;
|
||||
@ -245,12 +251,11 @@ struct ocfs2_super
|
||||
struct ocfs2_alloc_stats alloc_stats;
|
||||
char dev_str[20]; /* "major,minor" of the device */
|
||||
|
||||
struct dlm_ctxt *dlm;
|
||||
char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
|
||||
struct ocfs2_cluster_connection *cconn;
|
||||
struct ocfs2_lock_res osb_super_lockres;
|
||||
struct ocfs2_lock_res osb_rename_lockres;
|
||||
struct dlm_eviction_cb osb_eviction_cb;
|
||||
struct ocfs2_dlm_debug *osb_dlm_debug;
|
||||
struct dlm_protocol_version osb_locking_proto;
|
||||
|
||||
struct dentry *osb_debug_root;
|
||||
|
||||
@ -367,11 +372,24 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ocfs2_userspace_stack(struct ocfs2_super *osb)
|
||||
{
|
||||
return (osb->s_feature_incompat &
|
||||
OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK);
|
||||
}
|
||||
|
||||
static inline int ocfs2_mount_local(struct ocfs2_super *osb)
|
||||
{
|
||||
return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
|
||||
}
|
||||
|
||||
static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
|
||||
{
|
||||
return (osb->s_feature_incompat &
|
||||
OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP);
|
||||
}
|
||||
|
||||
|
||||
#define OCFS2_IS_VALID_DINODE(ptr) \
|
||||
(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
|
||||
|
||||
@ -522,6 +540,33 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
|
||||
return pages_per_cluster;
|
||||
}
|
||||
|
||||
static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
|
||||
{
|
||||
spin_lock(&osb->osb_lock);
|
||||
osb->s_inode_steal_slot = OCFS2_INVALID_SLOT;
|
||||
spin_unlock(&osb->osb_lock);
|
||||
atomic_set(&osb->s_num_inodes_stolen, 0);
|
||||
}
|
||||
|
||||
static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb,
|
||||
s16 slot)
|
||||
{
|
||||
spin_lock(&osb->osb_lock);
|
||||
osb->s_inode_steal_slot = slot;
|
||||
spin_unlock(&osb->osb_lock);
|
||||
}
|
||||
|
||||
static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
|
||||
{
|
||||
s16 slot;
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
slot = osb->s_inode_steal_slot;
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
return slot;
|
||||
}
|
||||
|
||||
#define ocfs2_set_bit ext2_set_bit
|
||||
#define ocfs2_clear_bit ext2_clear_bit
|
||||
#define ocfs2_test_bit ext2_test_bit
|
||||
|
@ -88,7 +88,9 @@
|
||||
#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
|
||||
#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
|
||||
| OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
|
||||
| OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
|
||||
| OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
|
||||
| OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
|
||||
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK)
|
||||
#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
|
||||
|
||||
/*
|
||||
@ -125,6 +127,21 @@
|
||||
/* Support for data packed into inode blocks */
|
||||
#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040
|
||||
|
||||
/* Support for the extended slot map */
|
||||
#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
|
||||
|
||||
|
||||
/*
|
||||
* Support for alternate, userspace cluster stacks. If set, the superblock
|
||||
* field s_cluster_info contains a tag for the alternate stack in use as
|
||||
* well as the name of the cluster being joined.
|
||||
* mount.ocfs2 must pass in a matching stack name.
|
||||
*
|
||||
* If not set, the classic stack will be used. This is compatbile with
|
||||
* all older versions.
|
||||
*/
|
||||
#define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080
|
||||
|
||||
/*
|
||||
* backup superblock flag is used to indicate that this volume
|
||||
* has backup superblocks.
|
||||
@ -267,6 +284,10 @@ struct ocfs2_new_group_input {
|
||||
#define OCFS2_VOL_UUID_LEN 16
|
||||
#define OCFS2_MAX_VOL_LABEL_LEN 64
|
||||
|
||||
/* The alternate, userspace stack fields */
|
||||
#define OCFS2_STACK_LABEL_LEN 4
|
||||
#define OCFS2_CLUSTER_NAME_LEN 16
|
||||
|
||||
/* Journal limits (in bytes) */
|
||||
#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
|
||||
|
||||
@ -474,6 +495,47 @@ struct ocfs2_extent_block
|
||||
/* Actual on-disk size is one block */
|
||||
};
|
||||
|
||||
/*
|
||||
* On disk slot map for OCFS2. This defines the contents of the "slot_map"
|
||||
* system file. A slot is valid if it contains a node number >= 0. The
|
||||
* value -1 (0xFFFF) is OCFS2_INVALID_SLOT. This marks a slot empty.
|
||||
*/
|
||||
struct ocfs2_slot_map {
|
||||
/*00*/ __le16 sm_slots[0];
|
||||
/*
|
||||
* Actual on-disk size is one block. OCFS2_MAX_SLOTS is 255,
|
||||
* 255 * sizeof(__le16) == 512B, within the 512B block minimum blocksize.
|
||||
*/
|
||||
};
|
||||
|
||||
struct ocfs2_extended_slot {
|
||||
/*00*/ __u8 es_valid;
|
||||
__u8 es_reserved1[3];
|
||||
__le32 es_node_num;
|
||||
/*10*/
|
||||
};
|
||||
|
||||
/*
|
||||
* The extended slot map, used when OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP
|
||||
* is set. It separates out the valid marker from the node number, and
|
||||
* has room to grow. Unlike the old slot map, this format is defined by
|
||||
* i_size.
|
||||
*/
|
||||
struct ocfs2_slot_map_extended {
|
||||
/*00*/ struct ocfs2_extended_slot se_slots[0];
|
||||
/*
|
||||
* Actual size is i_size of the slot_map system file. It should
|
||||
* match s_max_slots * sizeof(struct ocfs2_extended_slot)
|
||||
*/
|
||||
};
|
||||
|
||||
struct ocfs2_cluster_info {
|
||||
/*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN];
|
||||
__le32 ci_reserved;
|
||||
/*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN];
|
||||
/*18*/
|
||||
};
|
||||
|
||||
/*
|
||||
* On disk superblock for OCFS2
|
||||
* Note that it is contained inside an ocfs2_dinode, so all offsets
|
||||
@ -506,7 +568,20 @@ struct ocfs2_super_block {
|
||||
* group header */
|
||||
/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
|
||||
/*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */
|
||||
/*A0*/
|
||||
/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace
|
||||
stack. Only valid
|
||||
with INCOMPAT flag. */
|
||||
/*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */
|
||||
/*140*/
|
||||
|
||||
/*
|
||||
* NOTE: As stated above, all offsets are relative to
|
||||
* ocfs2_dinode.id2, which is at 0xC0 in the inode.
|
||||
* 0xC0 + 0x140 = 0x200 or 512 bytes. A superblock must fit within
|
||||
* our smallest blocksize, which is 512 bytes. To ensure this,
|
||||
* we reserve the space in s_reserved2. Anything past s_reserved2
|
||||
* will not be available on the smallest blocksize.
|
||||
*/
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -100,7 +100,7 @@ static char *ocfs2_lock_type_strings[] = {
|
||||
static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
|
||||
{
|
||||
#ifdef __KERNEL__
|
||||
mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
|
||||
BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
|
||||
#endif
|
||||
return ocfs2_lock_type_strings[type];
|
||||
}
|
||||
|
@ -42,81 +42,244 @@
|
||||
|
||||
#include "buffer_head_io.h"
|
||||
|
||||
static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
||||
s16 global);
|
||||
static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
|
||||
s16 slot_num,
|
||||
s16 node_num);
|
||||
|
||||
/* post the slot information on disk into our slot_info struct. */
|
||||
void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
|
||||
struct ocfs2_slot {
|
||||
int sl_valid;
|
||||
unsigned int sl_node_num;
|
||||
};
|
||||
|
||||
struct ocfs2_slot_info {
|
||||
int si_extended;
|
||||
int si_slots_per_block;
|
||||
struct inode *si_inode;
|
||||
unsigned int si_blocks;
|
||||
struct buffer_head **si_bh;
|
||||
unsigned int si_num_slots;
|
||||
struct ocfs2_slot *si_slots;
|
||||
};
|
||||
|
||||
|
||||
static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
||||
unsigned int node_num);
|
||||
|
||||
static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
|
||||
int slot_num)
|
||||
{
|
||||
BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
|
||||
si->si_slots[slot_num].sl_valid = 0;
|
||||
}
|
||||
|
||||
static void ocfs2_set_slot(struct ocfs2_slot_info *si,
|
||||
int slot_num, unsigned int node_num)
|
||||
{
|
||||
BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
|
||||
|
||||
si->si_slots[slot_num].sl_valid = 1;
|
||||
si->si_slots[slot_num].sl_node_num = node_num;
|
||||
}
|
||||
|
||||
/* This version is for the extended slot map */
|
||||
static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
|
||||
{
|
||||
int b, i, slotno;
|
||||
struct ocfs2_slot_map_extended *se;
|
||||
|
||||
slotno = 0;
|
||||
for (b = 0; b < si->si_blocks; b++) {
|
||||
se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
|
||||
for (i = 0;
|
||||
(i < si->si_slots_per_block) &&
|
||||
(slotno < si->si_num_slots);
|
||||
i++, slotno++) {
|
||||
if (se->se_slots[i].es_valid)
|
||||
ocfs2_set_slot(si, slotno,
|
||||
le32_to_cpu(se->se_slots[i].es_node_num));
|
||||
else
|
||||
ocfs2_invalidate_slot(si, slotno);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Post the slot information on disk into our slot_info struct.
|
||||
* Must be protected by osb_lock.
|
||||
*/
|
||||
static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
|
||||
{
|
||||
int i;
|
||||
__le16 *disk_info;
|
||||
struct ocfs2_slot_map *sm;
|
||||
|
||||
/* we don't read the slot block here as ocfs2_super_lock
|
||||
* should've made sure we have the most recent copy. */
|
||||
spin_lock(&si->si_lock);
|
||||
disk_info = (__le16 *) si->si_bh->b_data;
|
||||
sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
|
||||
|
||||
for (i = 0; i < si->si_size; i++)
|
||||
si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]);
|
||||
for (i = 0; i < si->si_num_slots; i++) {
|
||||
if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
|
||||
ocfs2_invalidate_slot(si, i);
|
||||
else
|
||||
ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&si->si_lock);
|
||||
static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
|
||||
{
|
||||
/*
|
||||
* The slot data will have been refreshed when ocfs2_super_lock
|
||||
* was taken.
|
||||
*/
|
||||
if (si->si_extended)
|
||||
ocfs2_update_slot_info_extended(si);
|
||||
else
|
||||
ocfs2_update_slot_info_old(si);
|
||||
}
|
||||
|
||||
int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
|
||||
{
|
||||
int ret;
|
||||
struct ocfs2_slot_info *si = osb->slot_info;
|
||||
|
||||
if (si == NULL)
|
||||
return 0;
|
||||
|
||||
BUG_ON(si->si_blocks == 0);
|
||||
BUG_ON(si->si_bh == NULL);
|
||||
|
||||
mlog(0, "Refreshing slot map, reading %u block(s)\n",
|
||||
si->si_blocks);
|
||||
|
||||
/*
|
||||
* We pass -1 as blocknr because we expect all of si->si_bh to
|
||||
* be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If
|
||||
* this is not true, the read of -1 (UINT64_MAX) will fail.
|
||||
*/
|
||||
ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0,
|
||||
si->si_inode);
|
||||
if (ret == 0) {
|
||||
spin_lock(&osb->osb_lock);
|
||||
ocfs2_update_slot_info(si);
|
||||
spin_unlock(&osb->osb_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* post the our slot info stuff into it's destination bh and write it
|
||||
* out. */
|
||||
int ocfs2_update_disk_slots(struct ocfs2_super *osb,
|
||||
struct ocfs2_slot_info *si)
|
||||
static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
|
||||
int slot_num,
|
||||
struct buffer_head **bh)
|
||||
{
|
||||
int status, i;
|
||||
__le16 *disk_info = (__le16 *) si->si_bh->b_data;
|
||||
int blkind = slot_num / si->si_slots_per_block;
|
||||
int slotno = slot_num % si->si_slots_per_block;
|
||||
struct ocfs2_slot_map_extended *se;
|
||||
|
||||
spin_lock(&si->si_lock);
|
||||
for (i = 0; i < si->si_size; i++)
|
||||
disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]);
|
||||
spin_unlock(&si->si_lock);
|
||||
BUG_ON(blkind >= si->si_blocks);
|
||||
|
||||
status = ocfs2_write_block(osb, si->si_bh, si->si_inode);
|
||||
se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
|
||||
se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
|
||||
if (si->si_slots[slot_num].sl_valid)
|
||||
se->se_slots[slotno].es_node_num =
|
||||
cpu_to_le32(si->si_slots[slot_num].sl_node_num);
|
||||
*bh = si->si_bh[blkind];
|
||||
}
|
||||
|
||||
static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
|
||||
int slot_num,
|
||||
struct buffer_head **bh)
|
||||
{
|
||||
int i;
|
||||
struct ocfs2_slot_map *sm;
|
||||
|
||||
sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
|
||||
for (i = 0; i < si->si_num_slots; i++) {
|
||||
if (si->si_slots[i].sl_valid)
|
||||
sm->sm_slots[i] =
|
||||
cpu_to_le16(si->si_slots[i].sl_node_num);
|
||||
else
|
||||
sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
|
||||
}
|
||||
*bh = si->si_bh[0];
|
||||
}
|
||||
|
||||
static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
|
||||
struct ocfs2_slot_info *si,
|
||||
int slot_num)
|
||||
{
|
||||
int status;
|
||||
struct buffer_head *bh;
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
if (si->si_extended)
|
||||
ocfs2_update_disk_slot_extended(si, slot_num, &bh);
|
||||
else
|
||||
ocfs2_update_disk_slot_old(si, slot_num, &bh);
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
status = ocfs2_write_block(osb, bh, si->si_inode);
|
||||
if (status < 0)
|
||||
mlog_errno(status);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/* try to find global node in the slot info. Returns
|
||||
* OCFS2_INVALID_SLOT if nothing is found. */
|
||||
static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
||||
s16 global)
|
||||
/*
|
||||
* Calculate how many bytes are needed by the slot map. Returns
|
||||
* an error if the slot map file is too small.
|
||||
*/
|
||||
static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
|
||||
struct inode *inode,
|
||||
unsigned long long *bytes)
|
||||
{
|
||||
int i;
|
||||
s16 ret = OCFS2_INVALID_SLOT;
|
||||
unsigned long long bytes_needed;
|
||||
|
||||
if (ocfs2_uses_extended_slot_map(osb)) {
|
||||
bytes_needed = osb->max_slots *
|
||||
sizeof(struct ocfs2_extended_slot);
|
||||
} else {
|
||||
bytes_needed = osb->max_slots * sizeof(__le16);
|
||||
}
|
||||
if (bytes_needed > i_size_read(inode)) {
|
||||
mlog(ML_ERROR,
|
||||
"Slot map file is too small! (size %llu, needed %llu)\n",
|
||||
i_size_read(inode), bytes_needed);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
*bytes = bytes_needed;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* try to find global node in the slot info. Returns -ENOENT
|
||||
* if nothing is found. */
|
||||
static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
||||
unsigned int node_num)
|
||||
{
|
||||
int i, ret = -ENOENT;
|
||||
|
||||
for(i = 0; i < si->si_num_slots; i++) {
|
||||
if (global == si->si_global_node_nums[i]) {
|
||||
ret = (s16) i;
|
||||
if (si->si_slots[i].sl_valid &&
|
||||
(node_num == si->si_slots[i].sl_node_num)) {
|
||||
ret = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred)
|
||||
static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
|
||||
int preferred)
|
||||
{
|
||||
int i;
|
||||
s16 ret = OCFS2_INVALID_SLOT;
|
||||
int i, ret = -ENOSPC;
|
||||
|
||||
if (preferred >= 0 && preferred < si->si_num_slots) {
|
||||
if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) {
|
||||
if ((preferred >= 0) && (preferred < si->si_num_slots)) {
|
||||
if (!si->si_slots[preferred].sl_valid) {
|
||||
ret = preferred;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < si->si_num_slots; i++) {
|
||||
if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) {
|
||||
ret = (s16) i;
|
||||
if (!si->si_slots[i].sl_valid) {
|
||||
ret = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -124,58 +287,155 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
||||
s16 global)
|
||||
int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
|
||||
{
|
||||
s16 ret;
|
||||
int slot;
|
||||
struct ocfs2_slot_info *si = osb->slot_info;
|
||||
|
||||
spin_lock(&si->si_lock);
|
||||
ret = __ocfs2_node_num_to_slot(si, global);
|
||||
spin_unlock(&si->si_lock);
|
||||
return ret;
|
||||
spin_lock(&osb->osb_lock);
|
||||
slot = __ocfs2_node_num_to_slot(si, node_num);
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
return slot;
|
||||
}
|
||||
|
||||
static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
|
||||
s16 slot_num,
|
||||
s16 node_num)
|
||||
int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
|
||||
unsigned int *node_num)
|
||||
{
|
||||
BUG_ON(slot_num == OCFS2_INVALID_SLOT);
|
||||
BUG_ON(slot_num >= si->si_num_slots);
|
||||
BUG_ON((node_num != O2NM_INVALID_NODE_NUM) &&
|
||||
(node_num >= O2NM_MAX_NODES));
|
||||
struct ocfs2_slot_info *si = osb->slot_info;
|
||||
|
||||
si->si_global_node_nums[slot_num] = node_num;
|
||||
assert_spin_locked(&osb->osb_lock);
|
||||
|
||||
BUG_ON(slot_num < 0);
|
||||
BUG_ON(slot_num > osb->max_slots);
|
||||
|
||||
if (!si->si_slots[slot_num].sl_valid)
|
||||
return -ENOENT;
|
||||
|
||||
*node_num = si->si_slots[slot_num].sl_node_num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ocfs2_clear_slot(struct ocfs2_slot_info *si,
|
||||
s16 slot_num)
|
||||
static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
|
||||
{
|
||||
spin_lock(&si->si_lock);
|
||||
__ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT);
|
||||
spin_unlock(&si->si_lock);
|
||||
unsigned int i;
|
||||
|
||||
if (si == NULL)
|
||||
return;
|
||||
|
||||
if (si->si_inode)
|
||||
iput(si->si_inode);
|
||||
if (si->si_bh) {
|
||||
for (i = 0; i < si->si_blocks; i++) {
|
||||
if (si->si_bh[i]) {
|
||||
brelse(si->si_bh[i]);
|
||||
si->si_bh[i] = NULL;
|
||||
}
|
||||
}
|
||||
kfree(si->si_bh);
|
||||
}
|
||||
|
||||
kfree(si);
|
||||
}
|
||||
|
||||
int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
|
||||
{
|
||||
struct ocfs2_slot_info *si = osb->slot_info;
|
||||
|
||||
if (si == NULL)
|
||||
return 0;
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
ocfs2_invalidate_slot(si, slot_num);
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
|
||||
}
|
||||
|
||||
static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
|
||||
struct ocfs2_slot_info *si)
|
||||
{
|
||||
int status = 0;
|
||||
u64 blkno;
|
||||
unsigned long long blocks, bytes;
|
||||
unsigned int i;
|
||||
struct buffer_head *bh;
|
||||
|
||||
status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
|
||||
if (status)
|
||||
goto bail;
|
||||
|
||||
blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
|
||||
BUG_ON(blocks > UINT_MAX);
|
||||
si->si_blocks = blocks;
|
||||
if (!si->si_blocks)
|
||||
goto bail;
|
||||
|
||||
if (si->si_extended)
|
||||
si->si_slots_per_block =
|
||||
(osb->sb->s_blocksize /
|
||||
sizeof(struct ocfs2_extended_slot));
|
||||
else
|
||||
si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
|
||||
|
||||
/* The size checks above should ensure this */
|
||||
BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
|
||||
|
||||
mlog(0, "Slot map needs %u buffers for %llu bytes\n",
|
||||
si->si_blocks, bytes);
|
||||
|
||||
si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
|
||||
GFP_KERNEL);
|
||||
if (!si->si_bh) {
|
||||
status = -ENOMEM;
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
for (i = 0; i < si->si_blocks; i++) {
|
||||
status = ocfs2_extent_map_get_blocks(si->si_inode, i,
|
||||
&blkno, NULL, NULL);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
mlog(0, "Reading slot map block %u at %llu\n", i,
|
||||
(unsigned long long)blkno);
|
||||
|
||||
bh = NULL; /* Acquire a fresh bh */
|
||||
status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
si->si_bh[i] = bh;
|
||||
}
|
||||
|
||||
bail:
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_init_slot_info(struct ocfs2_super *osb)
|
||||
{
|
||||
int status, i;
|
||||
u64 blkno;
|
||||
int status;
|
||||
struct inode *inode = NULL;
|
||||
struct buffer_head *bh = NULL;
|
||||
struct ocfs2_slot_info *si;
|
||||
|
||||
si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL);
|
||||
si = kzalloc(sizeof(struct ocfs2_slot_info) +
|
||||
(sizeof(struct ocfs2_slot) * osb->max_slots),
|
||||
GFP_KERNEL);
|
||||
if (!si) {
|
||||
status = -ENOMEM;
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
spin_lock_init(&si->si_lock);
|
||||
si->si_extended = ocfs2_uses_extended_slot_map(osb);
|
||||
si->si_num_slots = osb->max_slots;
|
||||
si->si_size = OCFS2_MAX_SLOTS;
|
||||
|
||||
for(i = 0; i < si->si_num_slots; i++)
|
||||
si->si_global_node_nums[i] = OCFS2_INVALID_SLOT;
|
||||
si->si_slots = (struct ocfs2_slot *)((char *)si +
|
||||
sizeof(struct ocfs2_slot_info));
|
||||
|
||||
inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
|
||||
OCFS2_INVALID_SLOT);
|
||||
@ -185,61 +445,53 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
|
||||
goto bail;
|
||||
}
|
||||
|
||||
status = ocfs2_extent_map_get_blocks(inode, 0ULL, &blkno, NULL, NULL);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
status = ocfs2_read_block(osb, blkno, &bh, 0, inode);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
si->si_inode = inode;
|
||||
si->si_bh = bh;
|
||||
osb->slot_info = si;
|
||||
status = ocfs2_map_slot_buffers(osb, si);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
osb->slot_info = (struct ocfs2_slot_info *)si;
|
||||
bail:
|
||||
if (status < 0 && si)
|
||||
ocfs2_free_slot_info(si);
|
||||
__ocfs2_free_slot_info(si);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void ocfs2_free_slot_info(struct ocfs2_slot_info *si)
|
||||
void ocfs2_free_slot_info(struct ocfs2_super *osb)
|
||||
{
|
||||
if (si->si_inode)
|
||||
iput(si->si_inode);
|
||||
if (si->si_bh)
|
||||
brelse(si->si_bh);
|
||||
kfree(si);
|
||||
struct ocfs2_slot_info *si = osb->slot_info;
|
||||
|
||||
osb->slot_info = NULL;
|
||||
__ocfs2_free_slot_info(si);
|
||||
}
|
||||
|
||||
int ocfs2_find_slot(struct ocfs2_super *osb)
|
||||
{
|
||||
int status;
|
||||
s16 slot;
|
||||
int slot;
|
||||
struct ocfs2_slot_info *si;
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
si = osb->slot_info;
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
ocfs2_update_slot_info(si);
|
||||
|
||||
spin_lock(&si->si_lock);
|
||||
/* search for ourselves first and take the slot if it already
|
||||
* exists. Perhaps we need to mark this in a variable for our
|
||||
* own journal recovery? Possibly not, though we certainly
|
||||
* need to warn to the user */
|
||||
slot = __ocfs2_node_num_to_slot(si, osb->node_num);
|
||||
if (slot == OCFS2_INVALID_SLOT) {
|
||||
if (slot < 0) {
|
||||
/* if no slot yet, then just take 1st available
|
||||
* one. */
|
||||
slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
|
||||
if (slot == OCFS2_INVALID_SLOT) {
|
||||
spin_unlock(&si->si_lock);
|
||||
if (slot < 0) {
|
||||
spin_unlock(&osb->osb_lock);
|
||||
mlog(ML_ERROR, "no free slots available!\n");
|
||||
status = -EINVAL;
|
||||
goto bail;
|
||||
@ -248,13 +500,13 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
|
||||
mlog(ML_NOTICE, "slot %d is already allocated to this node!\n",
|
||||
slot);
|
||||
|
||||
__ocfs2_fill_slot(si, slot, osb->node_num);
|
||||
ocfs2_set_slot(si, slot, osb->node_num);
|
||||
osb->slot_num = slot;
|
||||
spin_unlock(&si->si_lock);
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
mlog(0, "taking node slot %d\n", osb->slot_num);
|
||||
|
||||
status = ocfs2_update_disk_slots(osb, si);
|
||||
status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
|
||||
if (status < 0)
|
||||
mlog_errno(status);
|
||||
|
||||
@ -265,27 +517,27 @@ bail:
|
||||
|
||||
void ocfs2_put_slot(struct ocfs2_super *osb)
|
||||
{
|
||||
int status;
|
||||
int status, slot_num;
|
||||
struct ocfs2_slot_info *si = osb->slot_info;
|
||||
|
||||
if (!si)
|
||||
return;
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
ocfs2_update_slot_info(si);
|
||||
|
||||
spin_lock(&si->si_lock);
|
||||
__ocfs2_fill_slot(si, osb->slot_num, OCFS2_INVALID_SLOT);
|
||||
slot_num = osb->slot_num;
|
||||
ocfs2_invalidate_slot(si, osb->slot_num);
|
||||
osb->slot_num = OCFS2_INVALID_SLOT;
|
||||
spin_unlock(&si->si_lock);
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
status = ocfs2_update_disk_slots(osb, si);
|
||||
status = ocfs2_update_disk_slot(osb, si, slot_num);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
bail:
|
||||
osb->slot_info = NULL;
|
||||
ocfs2_free_slot_info(si);
|
||||
ocfs2_free_slot_info(osb);
|
||||
}
|
||||
|
||||
|
@ -27,38 +27,18 @@
|
||||
#ifndef SLOTMAP_H
|
||||
#define SLOTMAP_H
|
||||
|
||||
struct ocfs2_slot_info {
|
||||
spinlock_t si_lock;
|
||||
|
||||
struct inode *si_inode;
|
||||
struct buffer_head *si_bh;
|
||||
unsigned int si_num_slots;
|
||||
unsigned int si_size;
|
||||
s16 si_global_node_nums[OCFS2_MAX_SLOTS];
|
||||
};
|
||||
|
||||
int ocfs2_init_slot_info(struct ocfs2_super *osb);
|
||||
void ocfs2_free_slot_info(struct ocfs2_slot_info *si);
|
||||
void ocfs2_free_slot_info(struct ocfs2_super *osb);
|
||||
|
||||
int ocfs2_find_slot(struct ocfs2_super *osb);
|
||||
void ocfs2_put_slot(struct ocfs2_super *osb);
|
||||
|
||||
void ocfs2_update_slot_info(struct ocfs2_slot_info *si);
|
||||
int ocfs2_update_disk_slots(struct ocfs2_super *osb,
|
||||
struct ocfs2_slot_info *si);
|
||||
int ocfs2_refresh_slot_info(struct ocfs2_super *osb);
|
||||
|
||||
s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
||||
s16 global);
|
||||
void ocfs2_clear_slot(struct ocfs2_slot_info *si,
|
||||
s16 slot_num);
|
||||
int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num);
|
||||
int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
|
||||
unsigned int *node_num);
|
||||
|
||||
static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si,
|
||||
int slot_num)
|
||||
{
|
||||
BUG_ON(slot_num == OCFS2_INVALID_SLOT);
|
||||
assert_spin_locked(&si->si_lock);
|
||||
|
||||
return si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT;
|
||||
}
|
||||
int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num);
|
||||
|
||||
#endif
|
||||
|
420
fs/ocfs2/stack_o2cb.c
Normal file
420
fs/ocfs2/stack_o2cb.c
Normal file
@ -0,0 +1,420 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* stack_o2cb.c
|
||||
*
|
||||
* Code which interfaces ocfs2 with the o2cb stack.
|
||||
*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include "cluster/masklog.h"
|
||||
#include "cluster/nodemanager.h"
|
||||
#include "cluster/heartbeat.h"
|
||||
|
||||
#include "stackglue.h"
|
||||
|
||||
struct o2dlm_private {
|
||||
struct dlm_eviction_cb op_eviction_cb;
|
||||
};
|
||||
|
||||
static struct ocfs2_stack_plugin o2cb_stack;
|
||||
|
||||
/* These should be identical */
|
||||
#if (DLM_LOCK_IV != LKM_IVMODE)
|
||||
# error Lock modes do not match
|
||||
#endif
|
||||
#if (DLM_LOCK_NL != LKM_NLMODE)
|
||||
# error Lock modes do not match
|
||||
#endif
|
||||
#if (DLM_LOCK_CR != LKM_CRMODE)
|
||||
# error Lock modes do not match
|
||||
#endif
|
||||
#if (DLM_LOCK_CW != LKM_CWMODE)
|
||||
# error Lock modes do not match
|
||||
#endif
|
||||
#if (DLM_LOCK_PR != LKM_PRMODE)
|
||||
# error Lock modes do not match
|
||||
#endif
|
||||
#if (DLM_LOCK_PW != LKM_PWMODE)
|
||||
# error Lock modes do not match
|
||||
#endif
|
||||
#if (DLM_LOCK_EX != LKM_EXMODE)
|
||||
# error Lock modes do not match
|
||||
#endif
|
||||
static inline int mode_to_o2dlm(int mode)
|
||||
{
|
||||
BUG_ON(mode > LKM_MAXMODE);
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
#define map_flag(_generic, _o2dlm) \
|
||||
if (flags & (_generic)) { \
|
||||
flags &= ~(_generic); \
|
||||
o2dlm_flags |= (_o2dlm); \
|
||||
}
|
||||
static int flags_to_o2dlm(u32 flags)
|
||||
{
|
||||
int o2dlm_flags = 0;
|
||||
|
||||
map_flag(DLM_LKF_NOQUEUE, LKM_NOQUEUE);
|
||||
map_flag(DLM_LKF_CANCEL, LKM_CANCEL);
|
||||
map_flag(DLM_LKF_CONVERT, LKM_CONVERT);
|
||||
map_flag(DLM_LKF_VALBLK, LKM_VALBLK);
|
||||
map_flag(DLM_LKF_IVVALBLK, LKM_INVVALBLK);
|
||||
map_flag(DLM_LKF_ORPHAN, LKM_ORPHAN);
|
||||
map_flag(DLM_LKF_FORCEUNLOCK, LKM_FORCE);
|
||||
map_flag(DLM_LKF_TIMEOUT, LKM_TIMEOUT);
|
||||
map_flag(DLM_LKF_LOCAL, LKM_LOCAL);
|
||||
|
||||
/* map_flag() should have cleared every flag passed in */
|
||||
BUG_ON(flags != 0);
|
||||
|
||||
return o2dlm_flags;
|
||||
}
|
||||
#undef map_flag
|
||||
|
||||
/*
|
||||
* Map an o2dlm status to standard errno values.
|
||||
*
|
||||
* o2dlm only uses a handful of these, and returns even fewer to the
|
||||
* caller. Still, we try to assign sane values to each error.
|
||||
*
|
||||
* The following value pairs have special meanings to dlmglue, thus
|
||||
* the right hand side needs to stay unique - never duplicate the
|
||||
* mapping elsewhere in the table!
|
||||
*
|
||||
* DLM_NORMAL: 0
|
||||
* DLM_NOTQUEUED: -EAGAIN
|
||||
* DLM_CANCELGRANT: -EBUSY
|
||||
* DLM_CANCEL: -DLM_ECANCEL
|
||||
*/
|
||||
/* Keep in sync with dlmapi.h */
|
||||
static int status_map[] = {
|
||||
[DLM_NORMAL] = 0, /* Success */
|
||||
[DLM_GRANTED] = -EINVAL,
|
||||
[DLM_DENIED] = -EACCES,
|
||||
[DLM_DENIED_NOLOCKS] = -EACCES,
|
||||
[DLM_WORKING] = -EACCES,
|
||||
[DLM_BLOCKED] = -EINVAL,
|
||||
[DLM_BLOCKED_ORPHAN] = -EINVAL,
|
||||
[DLM_DENIED_GRACE_PERIOD] = -EACCES,
|
||||
[DLM_SYSERR] = -ENOMEM, /* It is what it is */
|
||||
[DLM_NOSUPPORT] = -EPROTO,
|
||||
[DLM_CANCELGRANT] = -EBUSY, /* Cancel after grant */
|
||||
[DLM_IVLOCKID] = -EINVAL,
|
||||
[DLM_SYNC] = -EINVAL,
|
||||
[DLM_BADTYPE] = -EINVAL,
|
||||
[DLM_BADRESOURCE] = -EINVAL,
|
||||
[DLM_MAXHANDLES] = -ENOMEM,
|
||||
[DLM_NOCLINFO] = -EINVAL,
|
||||
[DLM_NOLOCKMGR] = -EINVAL,
|
||||
[DLM_NOPURGED] = -EINVAL,
|
||||
[DLM_BADARGS] = -EINVAL,
|
||||
[DLM_VOID] = -EINVAL,
|
||||
[DLM_NOTQUEUED] = -EAGAIN, /* Trylock failed */
|
||||
[DLM_IVBUFLEN] = -EINVAL,
|
||||
[DLM_CVTUNGRANT] = -EPERM,
|
||||
[DLM_BADPARAM] = -EINVAL,
|
||||
[DLM_VALNOTVALID] = -EINVAL,
|
||||
[DLM_REJECTED] = -EPERM,
|
||||
[DLM_ABORT] = -EINVAL,
|
||||
[DLM_CANCEL] = -DLM_ECANCEL, /* Successful cancel */
|
||||
[DLM_IVRESHANDLE] = -EINVAL,
|
||||
[DLM_DEADLOCK] = -EDEADLK,
|
||||
[DLM_DENIED_NOASTS] = -EINVAL,
|
||||
[DLM_FORWARD] = -EINVAL,
|
||||
[DLM_TIMEOUT] = -ETIMEDOUT,
|
||||
[DLM_IVGROUPID] = -EINVAL,
|
||||
[DLM_VERS_CONFLICT] = -EOPNOTSUPP,
|
||||
[DLM_BAD_DEVICE_PATH] = -ENOENT,
|
||||
[DLM_NO_DEVICE_PERMISSION] = -EPERM,
|
||||
[DLM_NO_CONTROL_DEVICE] = -ENOENT,
|
||||
[DLM_RECOVERING] = -ENOTCONN,
|
||||
[DLM_MIGRATING] = -ERESTART,
|
||||
[DLM_MAXSTATS] = -EINVAL,
|
||||
};
|
||||
|
||||
static int dlm_status_to_errno(enum dlm_status status)
|
||||
{
|
||||
BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0])));
|
||||
|
||||
return status_map[status];
|
||||
}
|
||||
|
||||
static void o2dlm_lock_ast_wrapper(void *astarg)
|
||||
{
|
||||
BUG_ON(o2cb_stack.sp_proto == NULL);
|
||||
|
||||
o2cb_stack.sp_proto->lp_lock_ast(astarg);
|
||||
}
|
||||
|
||||
static void o2dlm_blocking_ast_wrapper(void *astarg, int level)
|
||||
{
|
||||
BUG_ON(o2cb_stack.sp_proto == NULL);
|
||||
|
||||
o2cb_stack.sp_proto->lp_blocking_ast(astarg, level);
|
||||
}
|
||||
|
||||
static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status)
|
||||
{
|
||||
int error = dlm_status_to_errno(status);
|
||||
|
||||
BUG_ON(o2cb_stack.sp_proto == NULL);
|
||||
|
||||
/*
|
||||
* In o2dlm, you can get both the lock_ast() for the lock being
|
||||
* granted and the unlock_ast() for the CANCEL failing. A
|
||||
* successful cancel sends DLM_NORMAL here. If the
|
||||
* lock grant happened before the cancel arrived, you get
|
||||
* DLM_CANCELGRANT.
|
||||
*
|
||||
* There's no need for the double-ast. If we see DLM_CANCELGRANT,
|
||||
* we just ignore it. We expect the lock_ast() to handle the
|
||||
* granted lock.
|
||||
*/
|
||||
if (status == DLM_CANCELGRANT)
|
||||
return;
|
||||
|
||||
o2cb_stack.sp_proto->lp_unlock_ast(astarg, error);
|
||||
}
|
||||
|
||||
static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn,
|
||||
int mode,
|
||||
union ocfs2_dlm_lksb *lksb,
|
||||
u32 flags,
|
||||
void *name,
|
||||
unsigned int namelen,
|
||||
void *astarg)
|
||||
{
|
||||
enum dlm_status status;
|
||||
int o2dlm_mode = mode_to_o2dlm(mode);
|
||||
int o2dlm_flags = flags_to_o2dlm(flags);
|
||||
int ret;
|
||||
|
||||
status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm,
|
||||
o2dlm_flags, name, namelen,
|
||||
o2dlm_lock_ast_wrapper, astarg,
|
||||
o2dlm_blocking_ast_wrapper);
|
||||
ret = dlm_status_to_errno(status);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int o2cb_dlm_unlock(struct ocfs2_cluster_connection *conn,
|
||||
union ocfs2_dlm_lksb *lksb,
|
||||
u32 flags,
|
||||
void *astarg)
|
||||
{
|
||||
enum dlm_status status;
|
||||
int o2dlm_flags = flags_to_o2dlm(flags);
|
||||
int ret;
|
||||
|
||||
status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm,
|
||||
o2dlm_flags, o2dlm_unlock_ast_wrapper, astarg);
|
||||
ret = dlm_status_to_errno(status);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
|
||||
{
|
||||
return dlm_status_to_errno(lksb->lksb_o2dlm.status);
|
||||
}
|
||||
|
||||
static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb)
|
||||
{
|
||||
return (void *)(lksb->lksb_o2dlm.lvb);
|
||||
}
|
||||
|
||||
static void o2cb_dump_lksb(union ocfs2_dlm_lksb *lksb)
|
||||
{
|
||||
dlm_print_one_lock(lksb->lksb_o2dlm.lockid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from the dlm when it's about to evict a node. This is how the
|
||||
* classic stack signals node death.
|
||||
*/
|
||||
static void o2dlm_eviction_cb(int node_num, void *data)
|
||||
{
|
||||
struct ocfs2_cluster_connection *conn = data;
|
||||
|
||||
mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n",
|
||||
node_num, conn->cc_namelen, conn->cc_name);
|
||||
|
||||
conn->cc_recovery_handler(node_num, conn->cc_recovery_data);
|
||||
}
|
||||
|
||||
static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
|
||||
{
|
||||
int rc = 0;
|
||||
u32 dlm_key;
|
||||
struct dlm_ctxt *dlm;
|
||||
struct o2dlm_private *priv;
|
||||
struct dlm_protocol_version dlm_version;
|
||||
|
||||
BUG_ON(conn == NULL);
|
||||
BUG_ON(o2cb_stack.sp_proto == NULL);
|
||||
|
||||
/* for now we only have one cluster/node, make sure we see it
|
||||
* in the heartbeat universe */
|
||||
if (!o2hb_check_local_node_heartbeating()) {
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
priv = kzalloc(sizeof(struct o2dlm_private), GFP_KERNEL);
|
||||
if (!priv) {
|
||||
rc = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
/* This just fills the structure in. It is safe to pass conn. */
|
||||
dlm_setup_eviction_cb(&priv->op_eviction_cb, o2dlm_eviction_cb,
|
||||
conn);
|
||||
|
||||
conn->cc_private = priv;
|
||||
|
||||
/* used by the dlm code to make message headers unique, each
|
||||
* node in this domain must agree on this. */
|
||||
dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen);
|
||||
dlm_version.pv_major = conn->cc_version.pv_major;
|
||||
dlm_version.pv_minor = conn->cc_version.pv_minor;
|
||||
|
||||
dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version);
|
||||
if (IS_ERR(dlm)) {
|
||||
rc = PTR_ERR(dlm);
|
||||
mlog_errno(rc);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
conn->cc_version.pv_major = dlm_version.pv_major;
|
||||
conn->cc_version.pv_minor = dlm_version.pv_minor;
|
||||
conn->cc_lockspace = dlm;
|
||||
|
||||
dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
|
||||
|
||||
out_free:
|
||||
if (rc && conn->cc_private)
|
||||
kfree(conn->cc_private);
|
||||
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn,
|
||||
int hangup_pending)
|
||||
{
|
||||
struct dlm_ctxt *dlm = conn->cc_lockspace;
|
||||
struct o2dlm_private *priv = conn->cc_private;
|
||||
|
||||
dlm_unregister_eviction_cb(&priv->op_eviction_cb);
|
||||
conn->cc_private = NULL;
|
||||
kfree(priv);
|
||||
|
||||
dlm_unregister_domain(dlm);
|
||||
conn->cc_lockspace = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void o2hb_stop(const char *group)
|
||||
{
|
||||
int ret;
|
||||
char *argv[5], *envp[3];
|
||||
|
||||
argv[0] = (char *)o2nm_get_hb_ctl_path();
|
||||
argv[1] = "-K";
|
||||
argv[2] = "-u";
|
||||
argv[3] = (char *)group;
|
||||
argv[4] = NULL;
|
||||
|
||||
mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]);
|
||||
|
||||
/* minimal command environment taken from cpu_run_sbin_hotplug */
|
||||
envp[0] = "HOME=/";
|
||||
envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
|
||||
envp[2] = NULL;
|
||||
|
||||
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hangup is a hack for tools compatibility. Older ocfs2-tools software
|
||||
* expects the filesystem to call "ocfs2_hb_ctl" during unmount. This
|
||||
* happens regardless of whether the DLM got started, so we can't do it
|
||||
* in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into
|
||||
* the glue and provide a "hangup" API for super.c to call.
|
||||
*
|
||||
* Other stacks will eventually provide a NULL ->hangup() pointer.
|
||||
*/
|
||||
static void o2cb_cluster_hangup(const char *group, int grouplen)
|
||||
{
|
||||
o2hb_stop(group);
|
||||
}
|
||||
|
||||
static int o2cb_cluster_this_node(unsigned int *node)
|
||||
{
|
||||
int node_num;
|
||||
|
||||
node_num = o2nm_this_node();
|
||||
if (node_num == O2NM_INVALID_NODE_NUM)
|
||||
return -ENOENT;
|
||||
|
||||
if (node_num >= O2NM_MAX_NODES)
|
||||
return -EOVERFLOW;
|
||||
|
||||
*node = node_num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ocfs2_stack_operations o2cb_stack_ops = {
|
||||
.connect = o2cb_cluster_connect,
|
||||
.disconnect = o2cb_cluster_disconnect,
|
||||
.hangup = o2cb_cluster_hangup,
|
||||
.this_node = o2cb_cluster_this_node,
|
||||
.dlm_lock = o2cb_dlm_lock,
|
||||
.dlm_unlock = o2cb_dlm_unlock,
|
||||
.lock_status = o2cb_dlm_lock_status,
|
||||
.lock_lvb = o2cb_dlm_lvb,
|
||||
.dump_lksb = o2cb_dump_lksb,
|
||||
};
|
||||
|
||||
static struct ocfs2_stack_plugin o2cb_stack = {
|
||||
.sp_name = "o2cb",
|
||||
.sp_ops = &o2cb_stack_ops,
|
||||
.sp_owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init o2cb_stack_init(void)
|
||||
{
|
||||
return ocfs2_stack_glue_register(&o2cb_stack);
|
||||
}
|
||||
|
||||
static void __exit o2cb_stack_exit(void)
|
||||
{
|
||||
ocfs2_stack_glue_unregister(&o2cb_stack);
|
||||
}
|
||||
|
||||
MODULE_AUTHOR("Oracle");
|
||||
MODULE_DESCRIPTION("ocfs2 driver for the classic o2cb stack");
|
||||
MODULE_LICENSE("GPL");
|
||||
module_init(o2cb_stack_init);
|
||||
module_exit(o2cb_stack_exit);
|
883
fs/ocfs2/stack_user.c
Normal file
883
fs/ocfs2/stack_user.c
Normal file
@ -0,0 +1,883 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* stack_user.c
|
||||
*
|
||||
* Code which interfaces ocfs2 with fs/dlm and a userspace stack.
|
||||
*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
#include "ocfs2.h" /* For struct ocfs2_lock_res */
|
||||
#include "stackglue.h"
|
||||
|
||||
|
||||
/*
|
||||
* The control protocol starts with a handshake. Until the handshake
|
||||
* is complete, the control device will fail all write(2)s.
|
||||
*
|
||||
* The handshake is simple. First, the client reads until EOF. Each line
|
||||
* of output is a supported protocol tag. All protocol tags are a single
|
||||
* character followed by a two hex digit version number. Currently the
|
||||
* only things supported is T01, for "Text-base version 0x01". Next, the
|
||||
* client writes the version they would like to use, including the newline.
|
||||
* Thus, the protocol tag is 'T01\n'. If the version tag written is
|
||||
* unknown, -EINVAL is returned. Once the negotiation is complete, the
|
||||
* client can start sending messages.
|
||||
*
|
||||
* The T01 protocol has three messages. First is the "SETN" message.
|
||||
* It has the following syntax:
|
||||
*
|
||||
* SETN<space><8-char-hex-nodenum><newline>
|
||||
*
|
||||
* This is 14 characters.
|
||||
*
|
||||
* The "SETN" message must be the first message following the protocol.
|
||||
* It tells ocfs2_control the local node number.
|
||||
*
|
||||
* Next comes the "SETV" message. It has the following syntax:
|
||||
*
|
||||
* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline>
|
||||
*
|
||||
* This is 11 characters.
|
||||
*
|
||||
* The "SETV" message sets the filesystem locking protocol version as
|
||||
* negotiated by the client. The client negotiates based on the maximum
|
||||
* version advertised in /sys/fs/ocfs2/max_locking_protocol. The major
|
||||
* number from the "SETV" message must match
|
||||
* user_stack.sp_proto->lp_max_version.pv_major, and the minor number
|
||||
* must be less than or equal to ...->lp_max_version.pv_minor.
|
||||
*
|
||||
* Once this information has been set, mounts will be allowed. From this
|
||||
* point on, the "DOWN" message can be sent for node down notification.
|
||||
* It has the following syntax:
|
||||
*
|
||||
* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline>
|
||||
*
|
||||
* eg:
|
||||
*
|
||||
* DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n
|
||||
*
|
||||
* This is 47 characters.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Whether or not the client has done the handshake.
|
||||
* For now, we have just one protocol version.
|
||||
*/
|
||||
#define OCFS2_CONTROL_PROTO "T01\n"
|
||||
#define OCFS2_CONTROL_PROTO_LEN 4
|
||||
|
||||
/* Handshake states */
|
||||
#define OCFS2_CONTROL_HANDSHAKE_INVALID (0)
|
||||
#define OCFS2_CONTROL_HANDSHAKE_READ (1)
|
||||
#define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2)
|
||||
#define OCFS2_CONTROL_HANDSHAKE_VALID (3)
|
||||
|
||||
/* Messages */
|
||||
#define OCFS2_CONTROL_MESSAGE_OP_LEN 4
|
||||
#define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN"
|
||||
#define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14
|
||||
#define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV"
|
||||
#define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11
|
||||
#define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN"
|
||||
#define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47
|
||||
#define OCFS2_TEXT_UUID_LEN 32
|
||||
#define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2
|
||||
#define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8
|
||||
|
||||
/*
|
||||
* ocfs2_live_connection is refcounted because the filesystem and
|
||||
* miscdevice sides can detach in different order. Let's just be safe.
|
||||
*/
|
||||
struct ocfs2_live_connection {
|
||||
struct list_head oc_list;
|
||||
struct ocfs2_cluster_connection *oc_conn;
|
||||
};
|
||||
|
||||
struct ocfs2_control_private {
|
||||
struct list_head op_list;
|
||||
int op_state;
|
||||
int op_this_node;
|
||||
struct ocfs2_protocol_version op_proto;
|
||||
};
|
||||
|
||||
/* SETN<space><8-char-hex-nodenum><newline> */
|
||||
struct ocfs2_control_message_setn {
|
||||
char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
|
||||
char space;
|
||||
char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
|
||||
char newline;
|
||||
};
|
||||
|
||||
/* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */
|
||||
struct ocfs2_control_message_setv {
|
||||
char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
|
||||
char space1;
|
||||
char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
|
||||
char space2;
|
||||
char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
|
||||
char newline;
|
||||
};
|
||||
|
||||
/* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */
|
||||
struct ocfs2_control_message_down {
|
||||
char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
|
||||
char space1;
|
||||
char uuid[OCFS2_TEXT_UUID_LEN];
|
||||
char space2;
|
||||
char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
|
||||
char newline;
|
||||
};
|
||||
|
||||
union ocfs2_control_message {
|
||||
char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
|
||||
struct ocfs2_control_message_setn u_setn;
|
||||
struct ocfs2_control_message_setv u_setv;
|
||||
struct ocfs2_control_message_down u_down;
|
||||
};
|
||||
|
||||
static struct ocfs2_stack_plugin user_stack;
|
||||
|
||||
static atomic_t ocfs2_control_opened;
|
||||
static int ocfs2_control_this_node = -1;
|
||||
static struct ocfs2_protocol_version running_proto;
|
||||
|
||||
static LIST_HEAD(ocfs2_live_connection_list);
|
||||
static LIST_HEAD(ocfs2_control_private_list);
|
||||
static DEFINE_MUTEX(ocfs2_control_lock);
|
||||
|
||||
static inline void ocfs2_control_set_handshake_state(struct file *file,
|
||||
int state)
|
||||
{
|
||||
struct ocfs2_control_private *p = file->private_data;
|
||||
p->op_state = state;
|
||||
}
|
||||
|
||||
static inline int ocfs2_control_get_handshake_state(struct file *file)
|
||||
{
|
||||
struct ocfs2_control_private *p = file->private_data;
|
||||
return p->op_state;
|
||||
}
|
||||
|
||||
static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
|
||||
{
|
||||
size_t len = strlen(name);
|
||||
struct ocfs2_live_connection *c;
|
||||
|
||||
BUG_ON(!mutex_is_locked(&ocfs2_control_lock));
|
||||
|
||||
list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) {
|
||||
if ((c->oc_conn->cc_namelen == len) &&
|
||||
!strncmp(c->oc_conn->cc_name, name, len))
|
||||
return c;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* ocfs2_live_connection structures are created underneath the ocfs2
|
||||
* mount path. Since the VFS prevents multiple calls to
|
||||
* fill_super(), we can't get dupes here.
|
||||
*/
|
||||
static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
|
||||
struct ocfs2_live_connection **c_ret)
|
||||
{
|
||||
int rc = 0;
|
||||
struct ocfs2_live_connection *c;
|
||||
|
||||
c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
|
||||
if (!c)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&ocfs2_control_lock);
|
||||
c->oc_conn = conn;
|
||||
|
||||
if (atomic_read(&ocfs2_control_opened))
|
||||
list_add(&c->oc_list, &ocfs2_live_connection_list);
|
||||
else {
|
||||
printk(KERN_ERR
|
||||
"ocfs2: Userspace control daemon is not present\n");
|
||||
rc = -ESRCH;
|
||||
}
|
||||
|
||||
mutex_unlock(&ocfs2_control_lock);
|
||||
|
||||
if (!rc)
|
||||
*c_ret = c;
|
||||
else
|
||||
kfree(c);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function disconnects the cluster connection from ocfs2_control.
|
||||
* Afterwards, userspace can't affect the cluster connection.
|
||||
*/
|
||||
static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c)
|
||||
{
|
||||
mutex_lock(&ocfs2_control_lock);
|
||||
list_del_init(&c->oc_list);
|
||||
c->oc_conn = NULL;
|
||||
mutex_unlock(&ocfs2_control_lock);
|
||||
|
||||
kfree(c);
|
||||
}
|
||||
|
||||
static int ocfs2_control_cfu(void *target, size_t target_len,
|
||||
const char __user *buf, size_t count)
|
||||
{
|
||||
/* The T01 expects write(2) calls to have exactly one command */
|
||||
if ((count != target_len) ||
|
||||
(count > sizeof(union ocfs2_control_message)))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(target, buf, target_len))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_control_validate_protocol(struct file *file,
|
||||
const char __user *buf,
|
||||
size_t count)
|
||||
{
|
||||
ssize_t ret;
|
||||
char kbuf[OCFS2_CONTROL_PROTO_LEN];
|
||||
|
||||
ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN,
|
||||
buf, count);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN))
|
||||
return -EINVAL;
|
||||
|
||||
ocfs2_control_set_handshake_state(file,
|
||||
OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static void ocfs2_control_send_down(const char *uuid,
|
||||
int nodenum)
|
||||
{
|
||||
struct ocfs2_live_connection *c;
|
||||
|
||||
mutex_lock(&ocfs2_control_lock);
|
||||
|
||||
c = ocfs2_connection_find(uuid);
|
||||
if (c) {
|
||||
BUG_ON(c->oc_conn == NULL);
|
||||
c->oc_conn->cc_recovery_handler(nodenum,
|
||||
c->oc_conn->cc_recovery_data);
|
||||
}
|
||||
|
||||
mutex_unlock(&ocfs2_control_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called whenever configuration elements are sent to /dev/ocfs2_control.
|
||||
* If all configuration elements are present, try to set the global
|
||||
* values. If there is a problem, return an error. Skip any missing
|
||||
* elements, and only bump ocfs2_control_opened when we have all elements
|
||||
* and are successful.
|
||||
*/
|
||||
static int ocfs2_control_install_private(struct file *file)
|
||||
{
|
||||
int rc = 0;
|
||||
int set_p = 1;
|
||||
struct ocfs2_control_private *p = file->private_data;
|
||||
|
||||
BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
|
||||
|
||||
mutex_lock(&ocfs2_control_lock);
|
||||
|
||||
if (p->op_this_node < 0) {
|
||||
set_p = 0;
|
||||
} else if ((ocfs2_control_this_node >= 0) &&
|
||||
(ocfs2_control_this_node != p->op_this_node)) {
|
||||
rc = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (!p->op_proto.pv_major) {
|
||||
set_p = 0;
|
||||
} else if (!list_empty(&ocfs2_live_connection_list) &&
|
||||
((running_proto.pv_major != p->op_proto.pv_major) ||
|
||||
(running_proto.pv_minor != p->op_proto.pv_minor))) {
|
||||
rc = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (set_p) {
|
||||
ocfs2_control_this_node = p->op_this_node;
|
||||
running_proto.pv_major = p->op_proto.pv_major;
|
||||
running_proto.pv_minor = p->op_proto.pv_minor;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&ocfs2_control_lock);
|
||||
|
||||
if (!rc && set_p) {
|
||||
/* We set the global values successfully */
|
||||
atomic_inc(&ocfs2_control_opened);
|
||||
ocfs2_control_set_handshake_state(file,
|
||||
OCFS2_CONTROL_HANDSHAKE_VALID);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int ocfs2_control_get_this_node(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
mutex_lock(&ocfs2_control_lock);
|
||||
if (ocfs2_control_this_node < 0)
|
||||
rc = -EINVAL;
|
||||
else
|
||||
rc = ocfs2_control_this_node;
|
||||
mutex_unlock(&ocfs2_control_lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int ocfs2_control_do_setnode_msg(struct file *file,
|
||||
struct ocfs2_control_message_setn *msg)
|
||||
{
|
||||
long nodenum;
|
||||
char *ptr = NULL;
|
||||
struct ocfs2_control_private *p = file->private_data;
|
||||
|
||||
if (ocfs2_control_get_handshake_state(file) !=
|
||||
OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
|
||||
return -EINVAL;
|
||||
|
||||
if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
|
||||
OCFS2_CONTROL_MESSAGE_OP_LEN))
|
||||
return -EINVAL;
|
||||
|
||||
if ((msg->space != ' ') || (msg->newline != '\n'))
|
||||
return -EINVAL;
|
||||
msg->space = msg->newline = '\0';
|
||||
|
||||
nodenum = simple_strtol(msg->nodestr, &ptr, 16);
|
||||
if (!ptr || *ptr)
|
||||
return -EINVAL;
|
||||
|
||||
if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
|
||||
(nodenum > INT_MAX) || (nodenum < 0))
|
||||
return -ERANGE;
|
||||
p->op_this_node = nodenum;
|
||||
|
||||
return ocfs2_control_install_private(file);
|
||||
}
|
||||
|
||||
static int ocfs2_control_do_setversion_msg(struct file *file,
|
||||
struct ocfs2_control_message_setv *msg)
|
||||
{
|
||||
long major, minor;
|
||||
char *ptr = NULL;
|
||||
struct ocfs2_control_private *p = file->private_data;
|
||||
struct ocfs2_protocol_version *max =
|
||||
&user_stack.sp_proto->lp_max_version;
|
||||
|
||||
if (ocfs2_control_get_handshake_state(file) !=
|
||||
OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
|
||||
return -EINVAL;
|
||||
|
||||
if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
|
||||
OCFS2_CONTROL_MESSAGE_OP_LEN))
|
||||
return -EINVAL;
|
||||
|
||||
if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
|
||||
(msg->newline != '\n'))
|
||||
return -EINVAL;
|
||||
msg->space1 = msg->space2 = msg->newline = '\0';
|
||||
|
||||
major = simple_strtol(msg->major, &ptr, 16);
|
||||
if (!ptr || *ptr)
|
||||
return -EINVAL;
|
||||
minor = simple_strtol(msg->minor, &ptr, 16);
|
||||
if (!ptr || *ptr)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* The major must be between 1 and 255, inclusive. The minor
|
||||
* must be between 0 and 255, inclusive. The version passed in
|
||||
* must be within the maximum version supported by the filesystem.
|
||||
*/
|
||||
if ((major == LONG_MIN) || (major == LONG_MAX) ||
|
||||
(major > (u8)-1) || (major < 1))
|
||||
return -ERANGE;
|
||||
if ((minor == LONG_MIN) || (minor == LONG_MAX) ||
|
||||
(minor > (u8)-1) || (minor < 0))
|
||||
return -ERANGE;
|
||||
if ((major != max->pv_major) ||
|
||||
(minor > max->pv_minor))
|
||||
return -EINVAL;
|
||||
|
||||
p->op_proto.pv_major = major;
|
||||
p->op_proto.pv_minor = minor;
|
||||
|
||||
return ocfs2_control_install_private(file);
|
||||
}
|
||||
|
||||
static int ocfs2_control_do_down_msg(struct file *file,
|
||||
struct ocfs2_control_message_down *msg)
|
||||
{
|
||||
long nodenum;
|
||||
char *p = NULL;
|
||||
|
||||
if (ocfs2_control_get_handshake_state(file) !=
|
||||
OCFS2_CONTROL_HANDSHAKE_VALID)
|
||||
return -EINVAL;
|
||||
|
||||
if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
|
||||
OCFS2_CONTROL_MESSAGE_OP_LEN))
|
||||
return -EINVAL;
|
||||
|
||||
if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
|
||||
(msg->newline != '\n'))
|
||||
return -EINVAL;
|
||||
msg->space1 = msg->space2 = msg->newline = '\0';
|
||||
|
||||
nodenum = simple_strtol(msg->nodestr, &p, 16);
|
||||
if (!p || *p)
|
||||
return -EINVAL;
|
||||
|
||||
if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
|
||||
(nodenum > INT_MAX) || (nodenum < 0))
|
||||
return -ERANGE;
|
||||
|
||||
ocfs2_control_send_down(msg->uuid, nodenum);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_control_message(struct file *file,
|
||||
const char __user *buf,
|
||||
size_t count)
|
||||
{
|
||||
ssize_t ret;
|
||||
union ocfs2_control_message msg;
|
||||
|
||||
/* Try to catch padding issues */
|
||||
WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) !=
|
||||
(sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1)));
|
||||
|
||||
memset(&msg, 0, sizeof(union ocfs2_control_message));
|
||||
ret = ocfs2_control_cfu(&msg, count, buf, count);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) &&
|
||||
!strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
|
||||
OCFS2_CONTROL_MESSAGE_OP_LEN))
|
||||
ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn);
|
||||
else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) &&
|
||||
!strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
|
||||
OCFS2_CONTROL_MESSAGE_OP_LEN))
|
||||
ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv);
|
||||
else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) &&
|
||||
!strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
|
||||
OCFS2_CONTROL_MESSAGE_OP_LEN))
|
||||
ret = ocfs2_control_do_down_msg(file, &msg.u_down);
|
||||
else
|
||||
ret = -EINVAL;
|
||||
|
||||
out:
|
||||
return ret ? ret : count;
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_control_write(struct file *file,
|
||||
const char __user *buf,
|
||||
size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
switch (ocfs2_control_get_handshake_state(file)) {
|
||||
case OCFS2_CONTROL_HANDSHAKE_INVALID:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
|
||||
case OCFS2_CONTROL_HANDSHAKE_READ:
|
||||
ret = ocfs2_control_validate_protocol(file, buf,
|
||||
count);
|
||||
break;
|
||||
|
||||
case OCFS2_CONTROL_HANDSHAKE_PROTOCOL:
|
||||
case OCFS2_CONTROL_HANDSHAKE_VALID:
|
||||
ret = ocfs2_control_message(file, buf, count);
|
||||
break;
|
||||
|
||||
default:
|
||||
BUG();
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a naive version. If we ever have a new protocol, we'll expand
|
||||
* it. Probably using seq_file.
|
||||
*/
|
||||
static ssize_t ocfs2_control_read(struct file *file,
|
||||
char __user *buf,
|
||||
size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
char *proto_string = OCFS2_CONTROL_PROTO;
|
||||
size_t to_write = 0;
|
||||
|
||||
if (*ppos >= OCFS2_CONTROL_PROTO_LEN)
|
||||
return 0;
|
||||
|
||||
to_write = OCFS2_CONTROL_PROTO_LEN - *ppos;
|
||||
if (to_write > count)
|
||||
to_write = count;
|
||||
if (copy_to_user(buf, proto_string + *ppos, to_write))
|
||||
return -EFAULT;
|
||||
|
||||
*ppos += to_write;
|
||||
|
||||
/* Have we read the whole protocol list? */
|
||||
if (*ppos >= OCFS2_CONTROL_PROTO_LEN)
|
||||
ocfs2_control_set_handshake_state(file,
|
||||
OCFS2_CONTROL_HANDSHAKE_READ);
|
||||
|
||||
return to_write;
|
||||
}
|
||||
|
||||
static int ocfs2_control_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ocfs2_control_private *p = file->private_data;
|
||||
|
||||
mutex_lock(&ocfs2_control_lock);
|
||||
|
||||
if (ocfs2_control_get_handshake_state(file) !=
|
||||
OCFS2_CONTROL_HANDSHAKE_VALID)
|
||||
goto out;
|
||||
|
||||
if (atomic_dec_and_test(&ocfs2_control_opened)) {
|
||||
if (!list_empty(&ocfs2_live_connection_list)) {
|
||||
/* XXX: Do bad things! */
|
||||
printk(KERN_ERR
|
||||
"ocfs2: Unexpected release of ocfs2_control!\n"
|
||||
" Loss of cluster connection requires "
|
||||
"an emergency restart!\n");
|
||||
emergency_restart();
|
||||
}
|
||||
/*
|
||||
* Last valid close clears the node number and resets
|
||||
* the locking protocol version
|
||||
*/
|
||||
ocfs2_control_this_node = -1;
|
||||
running_proto.pv_major = 0;
|
||||
running_proto.pv_major = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
list_del_init(&p->op_list);
|
||||
file->private_data = NULL;
|
||||
|
||||
mutex_unlock(&ocfs2_control_lock);
|
||||
|
||||
kfree(p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ocfs2_control_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ocfs2_control_private *p;
|
||||
|
||||
p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
p->op_this_node = -1;
|
||||
|
||||
mutex_lock(&ocfs2_control_lock);
|
||||
file->private_data = p;
|
||||
list_add(&p->op_list, &ocfs2_control_private_list);
|
||||
mutex_unlock(&ocfs2_control_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations ocfs2_control_fops = {
|
||||
.open = ocfs2_control_open,
|
||||
.release = ocfs2_control_release,
|
||||
.read = ocfs2_control_read,
|
||||
.write = ocfs2_control_write,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
struct miscdevice ocfs2_control_device = {
|
||||
.minor = MISC_DYNAMIC_MINOR,
|
||||
.name = "ocfs2_control",
|
||||
.fops = &ocfs2_control_fops,
|
||||
};
|
||||
|
||||
static int ocfs2_control_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
atomic_set(&ocfs2_control_opened, 0);
|
||||
|
||||
rc = misc_register(&ocfs2_control_device);
|
||||
if (rc)
|
||||
printk(KERN_ERR
|
||||
"ocfs2: Unable to register ocfs2_control device "
|
||||
"(errno %d)\n",
|
||||
-rc);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void ocfs2_control_exit(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = misc_deregister(&ocfs2_control_device);
|
||||
if (rc)
|
||||
printk(KERN_ERR
|
||||
"ocfs2: Unable to deregister ocfs2_control device "
|
||||
"(errno %d)\n",
|
||||
-rc);
|
||||
}
|
||||
|
||||
static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg)
|
||||
{
|
||||
struct ocfs2_lock_res *res = astarg;
|
||||
return &res->l_lksb.lksb_fsdlm;
|
||||
}
|
||||
|
||||
static void fsdlm_lock_ast_wrapper(void *astarg)
|
||||
{
|
||||
struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg);
|
||||
int status = lksb->sb_status;
|
||||
|
||||
BUG_ON(user_stack.sp_proto == NULL);
|
||||
|
||||
/*
|
||||
* For now we're punting on the issue of other non-standard errors
|
||||
* where we can't tell if the unlock_ast or lock_ast should be called.
|
||||
* The main "other error" that's possible is EINVAL which means the
|
||||
* function was called with invalid args, which shouldn't be possible
|
||||
* since the caller here is under our control. Other non-standard
|
||||
* errors probably fall into the same category, or otherwise are fatal
|
||||
* which means we can't carry on anyway.
|
||||
*/
|
||||
|
||||
if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
|
||||
user_stack.sp_proto->lp_unlock_ast(astarg, 0);
|
||||
else
|
||||
user_stack.sp_proto->lp_lock_ast(astarg);
|
||||
}
|
||||
|
||||
static void fsdlm_blocking_ast_wrapper(void *astarg, int level)
|
||||
{
|
||||
BUG_ON(user_stack.sp_proto == NULL);
|
||||
|
||||
user_stack.sp_proto->lp_blocking_ast(astarg, level);
|
||||
}
|
||||
|
||||
static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
|
||||
int mode,
|
||||
union ocfs2_dlm_lksb *lksb,
|
||||
u32 flags,
|
||||
void *name,
|
||||
unsigned int namelen,
|
||||
void *astarg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!lksb->lksb_fsdlm.sb_lvbptr)
|
||||
lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
|
||||
sizeof(struct dlm_lksb);
|
||||
|
||||
ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm,
|
||||
flags|DLM_LKF_NODLCKWT, name, namelen, 0,
|
||||
fsdlm_lock_ast_wrapper, astarg,
|
||||
fsdlm_blocking_ast_wrapper);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int user_dlm_unlock(struct ocfs2_cluster_connection *conn,
|
||||
union ocfs2_dlm_lksb *lksb,
|
||||
u32 flags,
|
||||
void *astarg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid,
|
||||
flags, &lksb->lksb_fsdlm, astarg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
|
||||
{
|
||||
return lksb->lksb_fsdlm.sb_status;
|
||||
}
|
||||
|
||||
static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
|
||||
{
|
||||
return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
|
||||
}
|
||||
|
||||
static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare a requested locking protocol version against the current one.
|
||||
*
|
||||
* If the major numbers are different, they are incompatible.
|
||||
* If the current minor is greater than the request, they are incompatible.
|
||||
* If the current minor is less than or equal to the request, they are
|
||||
* compatible, and the requester should run at the current minor version.
|
||||
*/
|
||||
static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
|
||||
struct ocfs2_protocol_version *request)
|
||||
{
|
||||
if (existing->pv_major != request->pv_major)
|
||||
return 1;
|
||||
|
||||
if (existing->pv_minor > request->pv_minor)
|
||||
return 1;
|
||||
|
||||
if (existing->pv_minor < request->pv_minor)
|
||||
request->pv_minor = existing->pv_minor;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
|
||||
{
|
||||
dlm_lockspace_t *fsdlm;
|
||||
struct ocfs2_live_connection *control;
|
||||
int rc = 0;
|
||||
|
||||
BUG_ON(conn == NULL);
|
||||
|
||||
rc = ocfs2_live_connection_new(conn, &control);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* running_proto must have been set before we allowed any mounts
|
||||
* to proceed.
|
||||
*/
|
||||
if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
|
||||
printk(KERN_ERR
|
||||
"Unable to mount with fs locking protocol version "
|
||||
"%u.%u because the userspace control daemon has "
|
||||
"negotiated %u.%u\n",
|
||||
conn->cc_version.pv_major, conn->cc_version.pv_minor,
|
||||
running_proto.pv_major, running_proto.pv_minor);
|
||||
rc = -EPROTO;
|
||||
ocfs2_live_connection_drop(control);
|
||||
goto out;
|
||||
}
|
||||
|
||||
rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name),
|
||||
&fsdlm, DLM_LSFL_FS, DLM_LVB_LEN);
|
||||
if (rc) {
|
||||
ocfs2_live_connection_drop(control);
|
||||
goto out;
|
||||
}
|
||||
|
||||
conn->cc_private = control;
|
||||
conn->cc_lockspace = fsdlm;
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn,
|
||||
int hangup_pending)
|
||||
{
|
||||
dlm_release_lockspace(conn->cc_lockspace, 2);
|
||||
conn->cc_lockspace = NULL;
|
||||
ocfs2_live_connection_drop(conn->cc_private);
|
||||
conn->cc_private = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int user_cluster_this_node(unsigned int *this_node)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = ocfs2_control_get_this_node();
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
*this_node = rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct ocfs2_stack_operations user_stack_ops = {
|
||||
.connect = user_cluster_connect,
|
||||
.disconnect = user_cluster_disconnect,
|
||||
.this_node = user_cluster_this_node,
|
||||
.dlm_lock = user_dlm_lock,
|
||||
.dlm_unlock = user_dlm_unlock,
|
||||
.lock_status = user_dlm_lock_status,
|
||||
.lock_lvb = user_dlm_lvb,
|
||||
.dump_lksb = user_dlm_dump_lksb,
|
||||
};
|
||||
|
||||
static struct ocfs2_stack_plugin user_stack = {
|
||||
.sp_name = "user",
|
||||
.sp_ops = &user_stack_ops,
|
||||
.sp_owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
|
||||
static int __init user_stack_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = ocfs2_control_init();
|
||||
if (!rc) {
|
||||
rc = ocfs2_stack_glue_register(&user_stack);
|
||||
if (rc)
|
||||
ocfs2_control_exit();
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void __exit user_stack_exit(void)
|
||||
{
|
||||
ocfs2_stack_glue_unregister(&user_stack);
|
||||
ocfs2_control_exit();
|
||||
}
|
||||
|
||||
MODULE_AUTHOR("Oracle");
|
||||
MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
|
||||
MODULE_LICENSE("GPL");
|
||||
module_init(user_stack_init);
|
||||
module_exit(user_stack_exit);
|
568
fs/ocfs2/stackglue.c
Normal file
568
fs/ocfs2/stackglue.c
Normal file
@ -0,0 +1,568 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* stackglue.c
|
||||
*
|
||||
* Code which implements an OCFS2 specific interface to underlying
|
||||
* cluster stacks.
|
||||
*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/sysfs.h>
|
||||
|
||||
#include "ocfs2_fs.h"
|
||||
|
||||
#include "stackglue.h"
|
||||
|
||||
#define OCFS2_STACK_PLUGIN_O2CB "o2cb"
|
||||
#define OCFS2_STACK_PLUGIN_USER "user"
|
||||
|
||||
static struct ocfs2_locking_protocol *lproto;
|
||||
static DEFINE_SPINLOCK(ocfs2_stack_lock);
|
||||
static LIST_HEAD(ocfs2_stack_list);
|
||||
static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1];
|
||||
|
||||
/*
|
||||
* The stack currently in use. If not null, active_stack->sp_count > 0,
|
||||
* the module is pinned, and the locking protocol cannot be changed.
|
||||
*/
|
||||
static struct ocfs2_stack_plugin *active_stack;
|
||||
|
||||
static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
|
||||
{
|
||||
struct ocfs2_stack_plugin *p;
|
||||
|
||||
assert_spin_locked(&ocfs2_stack_lock);
|
||||
|
||||
list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
|
||||
if (!strcmp(p->sp_name, name))
|
||||
return p;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int ocfs2_stack_driver_request(const char *stack_name,
|
||||
const char *plugin_name)
|
||||
{
|
||||
int rc;
|
||||
struct ocfs2_stack_plugin *p;
|
||||
|
||||
spin_lock(&ocfs2_stack_lock);
|
||||
|
||||
/*
|
||||
* If the stack passed by the filesystem isn't the selected one,
|
||||
* we can't continue.
|
||||
*/
|
||||
if (strcmp(stack_name, cluster_stack_name)) {
|
||||
rc = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (active_stack) {
|
||||
/*
|
||||
* If the active stack isn't the one we want, it cannot
|
||||
* be selected right now.
|
||||
*/
|
||||
if (!strcmp(active_stack->sp_name, plugin_name))
|
||||
rc = 0;
|
||||
else
|
||||
rc = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
p = ocfs2_stack_lookup(plugin_name);
|
||||
if (!p || !try_module_get(p->sp_owner)) {
|
||||
rc = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Ok, the stack is pinned */
|
||||
p->sp_count++;
|
||||
active_stack = p;
|
||||
|
||||
rc = 0;
|
||||
|
||||
out:
|
||||
spin_unlock(&ocfs2_stack_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function looks up the appropriate stack and makes it active. If
|
||||
* there is no stack, it tries to load it. It will fail if the stack still
|
||||
* cannot be found. It will also fail if a different stack is in use.
|
||||
*/
|
||||
static int ocfs2_stack_driver_get(const char *stack_name)
|
||||
{
|
||||
int rc;
|
||||
char *plugin_name = OCFS2_STACK_PLUGIN_O2CB;
|
||||
|
||||
/*
|
||||
* Classic stack does not pass in a stack name. This is
|
||||
* compatible with older tools as well.
|
||||
*/
|
||||
if (!stack_name || !*stack_name)
|
||||
stack_name = OCFS2_STACK_PLUGIN_O2CB;
|
||||
|
||||
if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) {
|
||||
printk(KERN_ERR
|
||||
"ocfs2 passed an invalid cluster stack label: \"%s\"\n",
|
||||
stack_name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Anything that isn't the classic stack is a user stack */
|
||||
if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB))
|
||||
plugin_name = OCFS2_STACK_PLUGIN_USER;
|
||||
|
||||
rc = ocfs2_stack_driver_request(stack_name, plugin_name);
|
||||
if (rc == -ENOENT) {
|
||||
request_module("ocfs2_stack_%s", plugin_name);
|
||||
rc = ocfs2_stack_driver_request(stack_name, plugin_name);
|
||||
}
|
||||
|
||||
if (rc == -ENOENT) {
|
||||
printk(KERN_ERR
|
||||
"ocfs2: Cluster stack driver \"%s\" cannot be found\n",
|
||||
plugin_name);
|
||||
} else if (rc == -EBUSY) {
|
||||
printk(KERN_ERR
|
||||
"ocfs2: A different cluster stack is in use\n");
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void ocfs2_stack_driver_put(void)
|
||||
{
|
||||
spin_lock(&ocfs2_stack_lock);
|
||||
BUG_ON(active_stack == NULL);
|
||||
BUG_ON(active_stack->sp_count == 0);
|
||||
|
||||
active_stack->sp_count--;
|
||||
if (!active_stack->sp_count) {
|
||||
module_put(active_stack->sp_owner);
|
||||
active_stack = NULL;
|
||||
}
|
||||
spin_unlock(&ocfs2_stack_lock);
|
||||
}
|
||||
|
||||
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin)
|
||||
{
|
||||
int rc;
|
||||
|
||||
spin_lock(&ocfs2_stack_lock);
|
||||
if (!ocfs2_stack_lookup(plugin->sp_name)) {
|
||||
plugin->sp_count = 0;
|
||||
plugin->sp_proto = lproto;
|
||||
list_add(&plugin->sp_list, &ocfs2_stack_list);
|
||||
printk(KERN_INFO "ocfs2: Registered cluster interface %s\n",
|
||||
plugin->sp_name);
|
||||
rc = 0;
|
||||
} else {
|
||||
printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n",
|
||||
plugin->sp_name);
|
||||
rc = -EEXIST;
|
||||
}
|
||||
spin_unlock(&ocfs2_stack_lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register);
|
||||
|
||||
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin)
|
||||
{
|
||||
struct ocfs2_stack_plugin *p;
|
||||
|
||||
spin_lock(&ocfs2_stack_lock);
|
||||
p = ocfs2_stack_lookup(plugin->sp_name);
|
||||
if (p) {
|
||||
BUG_ON(p != plugin);
|
||||
BUG_ON(plugin == active_stack);
|
||||
BUG_ON(plugin->sp_count != 0);
|
||||
list_del_init(&plugin->sp_list);
|
||||
printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n",
|
||||
plugin->sp_name);
|
||||
} else {
|
||||
printk(KERN_ERR "Stack \"%s\" is not registered\n",
|
||||
plugin->sp_name);
|
||||
}
|
||||
spin_unlock(&ocfs2_stack_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister);
|
||||
|
||||
void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto)
|
||||
{
|
||||
struct ocfs2_stack_plugin *p;
|
||||
|
||||
BUG_ON(proto == NULL);
|
||||
|
||||
spin_lock(&ocfs2_stack_lock);
|
||||
BUG_ON(active_stack != NULL);
|
||||
|
||||
lproto = proto;
|
||||
list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
|
||||
p->sp_proto = lproto;
|
||||
}
|
||||
|
||||
spin_unlock(&ocfs2_stack_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol);
|
||||
|
||||
|
||||
/*
|
||||
* The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take
|
||||
* "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the
|
||||
* underlying stack plugins need to pilfer the lksb off of the lock_res.
|
||||
* If some other structure needs to be passed as an astarg, the plugins
|
||||
* will need to be given a different avenue to the lksb.
|
||||
*/
|
||||
int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
|
||||
int mode,
|
||||
union ocfs2_dlm_lksb *lksb,
|
||||
u32 flags,
|
||||
void *name,
|
||||
unsigned int namelen,
|
||||
struct ocfs2_lock_res *astarg)
|
||||
{
|
||||
BUG_ON(lproto == NULL);
|
||||
|
||||
return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags,
|
||||
name, namelen, astarg);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_dlm_lock);
|
||||
|
||||
int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
|
||||
union ocfs2_dlm_lksb *lksb,
|
||||
u32 flags,
|
||||
struct ocfs2_lock_res *astarg)
|
||||
{
|
||||
BUG_ON(lproto == NULL);
|
||||
|
||||
return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock);
|
||||
|
||||
int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
|
||||
{
|
||||
return active_stack->sp_ops->lock_status(lksb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
|
||||
|
||||
/*
|
||||
* Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we
|
||||
* don't cast at the glue level. The real answer is that the header
|
||||
* ordering is nigh impossible.
|
||||
*/
|
||||
void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb)
|
||||
{
|
||||
return active_stack->sp_ops->lock_lvb(lksb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb);
|
||||
|
||||
void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
|
||||
{
|
||||
active_stack->sp_ops->dump_lksb(lksb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb);
|
||||
|
||||
int ocfs2_cluster_connect(const char *stack_name,
|
||||
const char *group,
|
||||
int grouplen,
|
||||
void (*recovery_handler)(int node_num,
|
||||
void *recovery_data),
|
||||
void *recovery_data,
|
||||
struct ocfs2_cluster_connection **conn)
|
||||
{
|
||||
int rc = 0;
|
||||
struct ocfs2_cluster_connection *new_conn;
|
||||
|
||||
BUG_ON(group == NULL);
|
||||
BUG_ON(conn == NULL);
|
||||
BUG_ON(recovery_handler == NULL);
|
||||
|
||||
if (grouplen > GROUP_NAME_MAX) {
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection),
|
||||
GFP_KERNEL);
|
||||
if (!new_conn) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memcpy(new_conn->cc_name, group, grouplen);
|
||||
new_conn->cc_namelen = grouplen;
|
||||
new_conn->cc_recovery_handler = recovery_handler;
|
||||
new_conn->cc_recovery_data = recovery_data;
|
||||
|
||||
/* Start the new connection at our maximum compatibility level */
|
||||
new_conn->cc_version = lproto->lp_max_version;
|
||||
|
||||
/* This will pin the stack driver if successful */
|
||||
rc = ocfs2_stack_driver_get(stack_name);
|
||||
if (rc)
|
||||
goto out_free;
|
||||
|
||||
rc = active_stack->sp_ops->connect(new_conn);
|
||||
if (rc) {
|
||||
ocfs2_stack_driver_put();
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
*conn = new_conn;
|
||||
|
||||
out_free:
|
||||
if (rc)
|
||||
kfree(new_conn);
|
||||
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_cluster_connect);
|
||||
|
||||
/* If hangup_pending is 0, the stack driver will be dropped */
|
||||
int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
|
||||
int hangup_pending)
|
||||
{
|
||||
int ret;
|
||||
|
||||
BUG_ON(conn == NULL);
|
||||
|
||||
ret = active_stack->sp_ops->disconnect(conn, hangup_pending);
|
||||
|
||||
/* XXX Should we free it anyway? */
|
||||
if (!ret) {
|
||||
kfree(conn);
|
||||
if (!hangup_pending)
|
||||
ocfs2_stack_driver_put();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect);
|
||||
|
||||
void ocfs2_cluster_hangup(const char *group, int grouplen)
|
||||
{
|
||||
BUG_ON(group == NULL);
|
||||
BUG_ON(group[grouplen] != '\0');
|
||||
|
||||
if (active_stack->sp_ops->hangup)
|
||||
active_stack->sp_ops->hangup(group, grouplen);
|
||||
|
||||
/* cluster_disconnect() was called with hangup_pending==1 */
|
||||
ocfs2_stack_driver_put();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup);
|
||||
|
||||
int ocfs2_cluster_this_node(unsigned int *node)
|
||||
{
|
||||
return active_stack->sp_ops->this_node(node);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node);
|
||||
|
||||
|
||||
/*
|
||||
* Sysfs bits
|
||||
*/
|
||||
|
||||
static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
ssize_t ret = 0;
|
||||
|
||||
spin_lock(&ocfs2_stack_lock);
|
||||
if (lproto)
|
||||
ret = snprintf(buf, PAGE_SIZE, "%u.%u\n",
|
||||
lproto->lp_max_version.pv_major,
|
||||
lproto->lp_max_version.pv_minor);
|
||||
spin_unlock(&ocfs2_stack_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct kobj_attribute ocfs2_attr_max_locking_protocol =
|
||||
__ATTR(max_locking_protocol, S_IFREG | S_IRUGO,
|
||||
ocfs2_max_locking_protocol_show, NULL);
|
||||
|
||||
static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
ssize_t ret = 0, total = 0, remain = PAGE_SIZE;
|
||||
struct ocfs2_stack_plugin *p;
|
||||
|
||||
spin_lock(&ocfs2_stack_lock);
|
||||
list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
|
||||
ret = snprintf(buf, remain, "%s\n",
|
||||
p->sp_name);
|
||||
if (ret < 0) {
|
||||
total = ret;
|
||||
break;
|
||||
}
|
||||
if (ret == remain) {
|
||||
/* snprintf() didn't fit */
|
||||
total = -E2BIG;
|
||||
break;
|
||||
}
|
||||
total += ret;
|
||||
remain -= ret;
|
||||
}
|
||||
spin_unlock(&ocfs2_stack_lock);
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins =
|
||||
__ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO,
|
||||
ocfs2_loaded_cluster_plugins_show, NULL);
|
||||
|
||||
static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
ssize_t ret = 0;
|
||||
|
||||
spin_lock(&ocfs2_stack_lock);
|
||||
if (active_stack) {
|
||||
ret = snprintf(buf, PAGE_SIZE, "%s\n",
|
||||
active_stack->sp_name);
|
||||
if (ret == PAGE_SIZE)
|
||||
ret = -E2BIG;
|
||||
}
|
||||
spin_unlock(&ocfs2_stack_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct kobj_attribute ocfs2_attr_active_cluster_plugin =
|
||||
__ATTR(active_cluster_plugin, S_IFREG | S_IRUGO,
|
||||
ocfs2_active_cluster_plugin_show, NULL);
|
||||
|
||||
static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
ssize_t ret;
|
||||
spin_lock(&ocfs2_stack_lock);
|
||||
ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name);
|
||||
spin_unlock(&ocfs2_stack_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
size_t len = count;
|
||||
ssize_t ret;
|
||||
|
||||
if (len == 0)
|
||||
return len;
|
||||
|
||||
if (buf[len - 1] == '\n')
|
||||
len--;
|
||||
|
||||
if ((len != OCFS2_STACK_LABEL_LEN) ||
|
||||
(strnlen(buf, len) != len))
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&ocfs2_stack_lock);
|
||||
if (active_stack) {
|
||||
if (!strncmp(buf, cluster_stack_name, len))
|
||||
ret = count;
|
||||
else
|
||||
ret = -EBUSY;
|
||||
} else {
|
||||
memcpy(cluster_stack_name, buf, len);
|
||||
ret = count;
|
||||
}
|
||||
spin_unlock(&ocfs2_stack_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static struct kobj_attribute ocfs2_attr_cluster_stack =
|
||||
__ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR,
|
||||
ocfs2_cluster_stack_show,
|
||||
ocfs2_cluster_stack_store);
|
||||
|
||||
static struct attribute *ocfs2_attrs[] = {
|
||||
&ocfs2_attr_max_locking_protocol.attr,
|
||||
&ocfs2_attr_loaded_cluster_plugins.attr,
|
||||
&ocfs2_attr_active_cluster_plugin.attr,
|
||||
&ocfs2_attr_cluster_stack.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group ocfs2_attr_group = {
|
||||
.attrs = ocfs2_attrs,
|
||||
};
|
||||
|
||||
static struct kset *ocfs2_kset;
|
||||
|
||||
static void ocfs2_sysfs_exit(void)
|
||||
{
|
||||
kset_unregister(ocfs2_kset);
|
||||
}
|
||||
|
||||
static int ocfs2_sysfs_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj);
|
||||
if (!ocfs2_kset)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
kset_unregister(ocfs2_kset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init ocfs2_stack_glue_init(void)
|
||||
{
|
||||
strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
|
||||
|
||||
return ocfs2_sysfs_init();
|
||||
}
|
||||
|
||||
static void __exit ocfs2_stack_glue_exit(void)
|
||||
{
|
||||
lproto = NULL;
|
||||
ocfs2_sysfs_exit();
|
||||
}
|
||||
|
||||
MODULE_AUTHOR("Oracle");
|
||||
MODULE_DESCRIPTION("ocfs2 cluter stack glue layer");
|
||||
MODULE_LICENSE("GPL");
|
||||
module_init(ocfs2_stack_glue_init);
|
||||
module_exit(ocfs2_stack_glue_exit);
|
261
fs/ocfs2/stackglue.h
Normal file
261
fs/ocfs2/stackglue.h
Normal file
@ -0,0 +1,261 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* stackglue.h
|
||||
*
|
||||
* Glue to the underlying cluster stack.
|
||||
*
|
||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef STACKGLUE_H
|
||||
#define STACKGLUE_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/dlmconstants.h>
|
||||
|
||||
#include "dlm/dlmapi.h"
|
||||
#include <linux/dlm.h>
|
||||
|
||||
/*
|
||||
* dlmconstants.h does not have a LOCAL flag. We hope to remove it
|
||||
* some day, but right now we need it. Let's fake it. This value is larger
|
||||
* than any flag in dlmconstants.h.
|
||||
*/
|
||||
#define DLM_LKF_LOCAL 0x00100000
|
||||
|
||||
/*
|
||||
* This shadows DLM_LOCKSPACE_LEN in fs/dlm/dlm_internal.h. That probably
|
||||
* wants to be in a public header.
|
||||
*/
|
||||
#define GROUP_NAME_MAX 64
|
||||
|
||||
|
||||
/*
|
||||
* ocfs2_protocol_version changes when ocfs2 does something different in
|
||||
* its inter-node behavior. See dlmglue.c for more information.
|
||||
*/
|
||||
struct ocfs2_protocol_version {
|
||||
u8 pv_major;
|
||||
u8 pv_minor;
|
||||
};
|
||||
|
||||
/*
|
||||
* The ocfs2_locking_protocol defines the handlers called on ocfs2's behalf.
|
||||
*/
|
||||
struct ocfs2_locking_protocol {
|
||||
struct ocfs2_protocol_version lp_max_version;
|
||||
void (*lp_lock_ast)(void *astarg);
|
||||
void (*lp_blocking_ast)(void *astarg, int level);
|
||||
void (*lp_unlock_ast)(void *astarg, int error);
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* The dlm_lockstatus struct includes lvb space, but the dlm_lksb struct only
|
||||
* has a pointer to separately allocated lvb space. This struct exists only to
|
||||
* include in the lksb union to make space for a combined dlm_lksb and lvb.
|
||||
*/
|
||||
struct fsdlm_lksb_plus_lvb {
|
||||
struct dlm_lksb lksb;
|
||||
char lvb[DLM_LVB_LEN];
|
||||
};
|
||||
|
||||
/*
|
||||
* A union of all lock status structures. We define it here so that the
|
||||
* size of the union is known. Lock status structures are embedded in
|
||||
* ocfs2 inodes.
|
||||
*/
|
||||
union ocfs2_dlm_lksb {
|
||||
struct dlm_lockstatus lksb_o2dlm;
|
||||
struct dlm_lksb lksb_fsdlm;
|
||||
struct fsdlm_lksb_plus_lvb padding;
|
||||
};
|
||||
|
||||
/*
|
||||
* A cluster connection. Mostly opaque to ocfs2, the connection holds
|
||||
* state for the underlying stack. ocfs2 does use cc_version to determine
|
||||
* locking compatibility.
|
||||
*/
|
||||
struct ocfs2_cluster_connection {
|
||||
char cc_name[GROUP_NAME_MAX];
|
||||
int cc_namelen;
|
||||
struct ocfs2_protocol_version cc_version;
|
||||
void (*cc_recovery_handler)(int node_num, void *recovery_data);
|
||||
void *cc_recovery_data;
|
||||
void *cc_lockspace;
|
||||
void *cc_private;
|
||||
};
|
||||
|
||||
/*
|
||||
* Each cluster stack implements the stack operations structure. Not used
|
||||
* in the ocfs2 code, the stackglue code translates generic cluster calls
|
||||
* into stack operations.
|
||||
*/
|
||||
struct ocfs2_stack_operations {
|
||||
/*
|
||||
* The fs code calls ocfs2_cluster_connect() to attach a new
|
||||
* filesystem to the cluster stack. The ->connect() op is passed
|
||||
* an ocfs2_cluster_connection with the name and recovery field
|
||||
* filled in.
|
||||
*
|
||||
* The stack must set up any notification mechanisms and create
|
||||
* the filesystem lockspace in the DLM. The lockspace should be
|
||||
* stored on cc_lockspace. Any other information can be stored on
|
||||
* cc_private.
|
||||
*
|
||||
* ->connect() must not return until it is guaranteed that
|
||||
*
|
||||
* - Node down notifications for the filesystem will be recieved
|
||||
* and passed to conn->cc_recovery_handler().
|
||||
* - Locking requests for the filesystem will be processed.
|
||||
*/
|
||||
int (*connect)(struct ocfs2_cluster_connection *conn);
|
||||
|
||||
/*
|
||||
* The fs code calls ocfs2_cluster_disconnect() when a filesystem
|
||||
* no longer needs cluster services. All DLM locks have been
|
||||
* dropped, and recovery notification is being ignored by the
|
||||
* fs code. The stack must disengage from the DLM and discontinue
|
||||
* recovery notification.
|
||||
*
|
||||
* Once ->disconnect() has returned, the connection structure will
|
||||
* be freed. Thus, a stack must not return from ->disconnect()
|
||||
* until it will no longer reference the conn pointer.
|
||||
*
|
||||
* If hangup_pending is zero, ocfs2_cluster_disconnect() will also
|
||||
* be dropping the reference on the module.
|
||||
*/
|
||||
int (*disconnect)(struct ocfs2_cluster_connection *conn,
|
||||
int hangup_pending);
|
||||
|
||||
/*
|
||||
* ocfs2_cluster_hangup() exists for compatibility with older
|
||||
* ocfs2 tools. Only the classic stack really needs it. As such
|
||||
* ->hangup() is not required of all stacks. See the comment by
|
||||
* ocfs2_cluster_hangup() for more details.
|
||||
*
|
||||
* Note that ocfs2_cluster_hangup() can only be called if
|
||||
* hangup_pending was passed to ocfs2_cluster_disconnect().
|
||||
*/
|
||||
void (*hangup)(const char *group, int grouplen);
|
||||
|
||||
/*
|
||||
* ->this_node() returns the cluster's unique identifier for the
|
||||
* local node.
|
||||
*/
|
||||
int (*this_node)(unsigned int *node);
|
||||
|
||||
/*
|
||||
* Call the underlying dlm lock function. The ->dlm_lock()
|
||||
* callback should convert the flags and mode as appropriate.
|
||||
*
|
||||
* ast and bast functions are not part of the call because the
|
||||
* stack will likely want to wrap ast and bast calls before passing
|
||||
* them to stack->sp_proto.
|
||||
*/
|
||||
int (*dlm_lock)(struct ocfs2_cluster_connection *conn,
|
||||
int mode,
|
||||
union ocfs2_dlm_lksb *lksb,
|
||||
u32 flags,
|
||||
void *name,
|
||||
unsigned int namelen,
|
||||
void *astarg);
|
||||
|
||||
/*
|
||||
* Call the underlying dlm unlock function. The ->dlm_unlock()
|
||||
* function should convert the flags as appropriate.
|
||||
*
|
||||
* The unlock ast is not passed, as the stack will want to wrap
|
||||
* it before calling stack->sp_proto->lp_unlock_ast().
|
||||
*/
|
||||
int (*dlm_unlock)(struct ocfs2_cluster_connection *conn,
|
||||
union ocfs2_dlm_lksb *lksb,
|
||||
u32 flags,
|
||||
void *astarg);
|
||||
|
||||
/*
|
||||
* Return the status of the current lock status block. The fs
|
||||
* code should never dereference the union. The ->lock_status()
|
||||
* callback pulls out the stack-specific lksb, converts the status
|
||||
* to a proper errno, and returns it.
|
||||
*/
|
||||
int (*lock_status)(union ocfs2_dlm_lksb *lksb);
|
||||
|
||||
/*
|
||||
* Pull the lvb pointer off of the stack-specific lksb.
|
||||
*/
|
||||
void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb);
|
||||
|
||||
/*
|
||||
* This is an optoinal debugging hook. If provided, the
|
||||
* stack can dump debugging information about this lock.
|
||||
*/
|
||||
void (*dump_lksb)(union ocfs2_dlm_lksb *lksb);
|
||||
};
|
||||
|
||||
/*
|
||||
* Each stack plugin must describe itself by registering a
|
||||
* ocfs2_stack_plugin structure. This is only seen by stackglue and the
|
||||
* stack driver.
|
||||
*/
|
||||
struct ocfs2_stack_plugin {
|
||||
char *sp_name;
|
||||
struct ocfs2_stack_operations *sp_ops;
|
||||
struct module *sp_owner;
|
||||
|
||||
/* These are managed by the stackglue code. */
|
||||
struct list_head sp_list;
|
||||
unsigned int sp_count;
|
||||
struct ocfs2_locking_protocol *sp_proto;
|
||||
};
|
||||
|
||||
|
||||
/* Used by the filesystem */
|
||||
int ocfs2_cluster_connect(const char *stack_name,
|
||||
const char *group,
|
||||
int grouplen,
|
||||
void (*recovery_handler)(int node_num,
|
||||
void *recovery_data),
|
||||
void *recovery_data,
|
||||
struct ocfs2_cluster_connection **conn);
|
||||
int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
|
||||
int hangup_pending);
|
||||
void ocfs2_cluster_hangup(const char *group, int grouplen);
|
||||
int ocfs2_cluster_this_node(unsigned int *node);
|
||||
|
||||
struct ocfs2_lock_res;
|
||||
int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
|
||||
int mode,
|
||||
union ocfs2_dlm_lksb *lksb,
|
||||
u32 flags,
|
||||
void *name,
|
||||
unsigned int namelen,
|
||||
struct ocfs2_lock_res *astarg);
|
||||
int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
|
||||
union ocfs2_dlm_lksb *lksb,
|
||||
u32 flags,
|
||||
struct ocfs2_lock_res *astarg);
|
||||
|
||||
int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
|
||||
void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
|
||||
void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
|
||||
|
||||
void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto);
|
||||
|
||||
|
||||
/* Used by stack plugins */
|
||||
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
|
||||
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
|
||||
#endif /* STACKGLUE_H */
|
@ -46,6 +46,11 @@
|
||||
|
||||
#include "buffer_head_io.h"
|
||||
|
||||
#define NOT_ALLOC_NEW_GROUP 0
|
||||
#define ALLOC_NEW_GROUP 1
|
||||
|
||||
#define OCFS2_MAX_INODES_TO_STEAL 1024
|
||||
|
||||
static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
|
||||
static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
|
||||
static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
|
||||
@ -106,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
|
||||
u64 *bg_blkno,
|
||||
u16 *bg_bit_off);
|
||||
|
||||
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
|
||||
static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
|
||||
{
|
||||
struct inode *inode = ac->ac_inode;
|
||||
|
||||
@ -117,9 +122,17 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
iput(inode);
|
||||
ac->ac_inode = NULL;
|
||||
}
|
||||
if (ac->ac_bh)
|
||||
if (ac->ac_bh) {
|
||||
brelse(ac->ac_bh);
|
||||
ac->ac_bh = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
|
||||
{
|
||||
ocfs2_free_ac_resource(ac);
|
||||
kfree(ac);
|
||||
}
|
||||
|
||||
@ -391,7 +404,8 @@ bail:
|
||||
static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
|
||||
struct ocfs2_alloc_context *ac,
|
||||
int type,
|
||||
u32 slot)
|
||||
u32 slot,
|
||||
int alloc_new_group)
|
||||
{
|
||||
int status;
|
||||
u32 bits_wanted = ac->ac_bits_wanted;
|
||||
@ -420,6 +434,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
|
||||
}
|
||||
|
||||
ac->ac_inode = alloc_inode;
|
||||
ac->ac_alloc_slot = slot;
|
||||
|
||||
fe = (struct ocfs2_dinode *) bh->b_data;
|
||||
if (!OCFS2_IS_VALID_DINODE(fe)) {
|
||||
@ -446,6 +461,14 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (alloc_new_group != ALLOC_NEW_GROUP) {
|
||||
mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
|
||||
"and we don't alloc a new group for it.\n",
|
||||
slot, bits_wanted, free_bits);
|
||||
status = -ENOSPC;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
|
||||
if (status < 0) {
|
||||
if (status != -ENOSPC)
|
||||
@ -490,7 +513,8 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
|
||||
(*ac)->ac_group_search = ocfs2_block_group_search;
|
||||
|
||||
status = ocfs2_reserve_suballoc_bits(osb, (*ac),
|
||||
EXTENT_ALLOC_SYSTEM_INODE, slot);
|
||||
EXTENT_ALLOC_SYSTEM_INODE,
|
||||
slot, ALLOC_NEW_GROUP);
|
||||
if (status < 0) {
|
||||
if (status != -ENOSPC)
|
||||
mlog_errno(status);
|
||||
@ -508,10 +532,42 @@ bail:
|
||||
return status;
|
||||
}
|
||||
|
||||
static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
|
||||
struct ocfs2_alloc_context *ac)
|
||||
{
|
||||
int i, status = -ENOSPC;
|
||||
s16 slot = ocfs2_get_inode_steal_slot(osb);
|
||||
|
||||
/* Start to steal inodes from the first slot after ours. */
|
||||
if (slot == OCFS2_INVALID_SLOT)
|
||||
slot = osb->slot_num + 1;
|
||||
|
||||
for (i = 0; i < osb->max_slots; i++, slot++) {
|
||||
if (slot == osb->max_slots)
|
||||
slot = 0;
|
||||
|
||||
if (slot == osb->slot_num)
|
||||
continue;
|
||||
|
||||
status = ocfs2_reserve_suballoc_bits(osb, ac,
|
||||
INODE_ALLOC_SYSTEM_INODE,
|
||||
slot, NOT_ALLOC_NEW_GROUP);
|
||||
if (status >= 0) {
|
||||
ocfs2_set_inode_steal_slot(osb, slot);
|
||||
break;
|
||||
}
|
||||
|
||||
ocfs2_free_ac_resource(ac);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
|
||||
struct ocfs2_alloc_context **ac)
|
||||
{
|
||||
int status;
|
||||
s16 slot = ocfs2_get_inode_steal_slot(osb);
|
||||
|
||||
*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
|
||||
if (!(*ac)) {
|
||||
@ -525,9 +581,43 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
|
||||
|
||||
(*ac)->ac_group_search = ocfs2_block_group_search;
|
||||
|
||||
/*
|
||||
* slot is set when we successfully steal inode from other nodes.
|
||||
* It is reset in 3 places:
|
||||
* 1. when we flush the truncate log
|
||||
* 2. when we complete local alloc recovery.
|
||||
* 3. when we successfully allocate from our own slot.
|
||||
* After it is set, we will go on stealing inodes until we find the
|
||||
* need to check our slots to see whether there is some space for us.
|
||||
*/
|
||||
if (slot != OCFS2_INVALID_SLOT &&
|
||||
atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL)
|
||||
goto inode_steal;
|
||||
|
||||
atomic_set(&osb->s_num_inodes_stolen, 0);
|
||||
status = ocfs2_reserve_suballoc_bits(osb, *ac,
|
||||
INODE_ALLOC_SYSTEM_INODE,
|
||||
osb->slot_num);
|
||||
osb->slot_num, ALLOC_NEW_GROUP);
|
||||
if (status >= 0) {
|
||||
status = 0;
|
||||
|
||||
/*
|
||||
* Some inodes must be freed by us, so try to allocate
|
||||
* from our own next time.
|
||||
*/
|
||||
if (slot != OCFS2_INVALID_SLOT)
|
||||
ocfs2_init_inode_steal_slot(osb);
|
||||
goto bail;
|
||||
} else if (status < 0 && status != -ENOSPC) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
ocfs2_free_ac_resource(*ac);
|
||||
|
||||
inode_steal:
|
||||
status = ocfs2_steal_inode_from_other_nodes(osb, *ac);
|
||||
atomic_inc(&osb->s_num_inodes_stolen);
|
||||
if (status < 0) {
|
||||
if (status != -ENOSPC)
|
||||
mlog_errno(status);
|
||||
@ -557,7 +647,8 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
|
||||
|
||||
status = ocfs2_reserve_suballoc_bits(osb, ac,
|
||||
GLOBAL_BITMAP_SYSTEM_INODE,
|
||||
OCFS2_INVALID_SLOT);
|
||||
OCFS2_INVALID_SLOT,
|
||||
ALLOC_NEW_GROUP);
|
||||
if (status < 0 && status != -ENOSPC) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
|
@ -36,6 +36,7 @@ typedef int (group_search_t)(struct inode *,
|
||||
struct ocfs2_alloc_context {
|
||||
struct inode *ac_inode; /* which bitmap are we allocating from? */
|
||||
struct buffer_head *ac_bh; /* file entry bh */
|
||||
u32 ac_alloc_slot; /* which slot are we allocating from? */
|
||||
u32 ac_bits_wanted;
|
||||
u32 ac_bits_given;
|
||||
#define OCFS2_AC_USE_LOCAL 1
|
||||
|
208
fs/ocfs2/super.c
208
fs/ocfs2/super.c
@ -40,8 +40,7 @@
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/mount.h>
|
||||
|
||||
#include <cluster/nodemanager.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_SUPER
|
||||
#include <cluster/masklog.h>
|
||||
@ -88,6 +87,7 @@ struct mount_options
|
||||
unsigned int atime_quantum;
|
||||
signed short slot;
|
||||
unsigned int localalloc_opt;
|
||||
char cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
|
||||
};
|
||||
|
||||
static int ocfs2_parse_options(struct super_block *sb, char *options,
|
||||
@ -109,7 +109,6 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait);
|
||||
static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb);
|
||||
static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb);
|
||||
static void ocfs2_release_system_inodes(struct ocfs2_super *osb);
|
||||
static int ocfs2_fill_local_node_info(struct ocfs2_super *osb);
|
||||
static int ocfs2_check_volume(struct ocfs2_super *osb);
|
||||
static int ocfs2_verify_volume(struct ocfs2_dinode *di,
|
||||
struct buffer_head *bh,
|
||||
@ -154,6 +153,7 @@ enum {
|
||||
Opt_commit,
|
||||
Opt_localalloc,
|
||||
Opt_localflocks,
|
||||
Opt_stack,
|
||||
Opt_err,
|
||||
};
|
||||
|
||||
@ -172,6 +172,7 @@ static match_table_t tokens = {
|
||||
{Opt_commit, "commit=%u"},
|
||||
{Opt_localalloc, "localalloc=%d"},
|
||||
{Opt_localflocks, "localflocks"},
|
||||
{Opt_stack, "cluster_stack=%s"},
|
||||
{Opt_err, NULL}
|
||||
};
|
||||
|
||||
@ -551,8 +552,17 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
|
||||
}
|
||||
}
|
||||
|
||||
if (ocfs2_userspace_stack(osb)) {
|
||||
if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
|
||||
mlog(ML_ERROR, "Userspace stack expected, but "
|
||||
"o2cb heartbeat arguments passed to mount\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
|
||||
if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) {
|
||||
if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) &&
|
||||
!ocfs2_userspace_stack(osb)) {
|
||||
mlog(ML_ERROR, "Heartbeat has to be started to mount "
|
||||
"a read-write clustered device.\n");
|
||||
return -EINVAL;
|
||||
@ -562,6 +572,35 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're using a userspace stack, mount should have passed
|
||||
* a name that matches the disk. If not, mount should not
|
||||
* have passed a stack.
|
||||
*/
|
||||
static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
|
||||
struct mount_options *mopt)
|
||||
{
|
||||
if (!ocfs2_userspace_stack(osb) && mopt->cluster_stack[0]) {
|
||||
mlog(ML_ERROR,
|
||||
"cluster stack passed to mount, but this filesystem "
|
||||
"does not support it\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ocfs2_userspace_stack(osb) &&
|
||||
strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
|
||||
OCFS2_STACK_LABEL_LEN)) {
|
||||
mlog(ML_ERROR,
|
||||
"cluster stack passed to mount (\"%s\") does not "
|
||||
"match the filesystem (\"%s\")\n",
|
||||
mopt->cluster_stack,
|
||||
osb->osb_cluster_stack);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
|
||||
{
|
||||
struct dentry *root;
|
||||
@ -579,15 +618,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
|
||||
goto read_super_error;
|
||||
}
|
||||
|
||||
/* for now we only have one cluster/node, make sure we see it
|
||||
* in the heartbeat universe */
|
||||
if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) {
|
||||
if (!o2hb_check_local_node_heartbeating()) {
|
||||
status = -EINVAL;
|
||||
goto read_super_error;
|
||||
}
|
||||
}
|
||||
|
||||
/* probe for superblock */
|
||||
status = ocfs2_sb_probe(sb, &bh, §or_size);
|
||||
if (status < 0) {
|
||||
@ -609,6 +639,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
|
||||
osb->osb_commit_interval = parsed_options.commit_interval;
|
||||
osb->local_alloc_size = parsed_options.localalloc_opt;
|
||||
|
||||
status = ocfs2_verify_userspace_stack(osb, &parsed_options);
|
||||
if (status)
|
||||
goto read_super_error;
|
||||
|
||||
sb->s_magic = OCFS2_SUPER_MAGIC;
|
||||
|
||||
/* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
|
||||
@ -694,7 +728,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
|
||||
if (ocfs2_mount_local(osb))
|
||||
snprintf(nodestr, sizeof(nodestr), "local");
|
||||
else
|
||||
snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num);
|
||||
snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num);
|
||||
|
||||
printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) "
|
||||
"with %s data mode.\n",
|
||||
@ -763,6 +797,7 @@ static int ocfs2_parse_options(struct super_block *sb,
|
||||
mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
|
||||
mopt->slot = OCFS2_INVALID_SLOT;
|
||||
mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
|
||||
mopt->cluster_stack[0] = '\0';
|
||||
|
||||
if (!options) {
|
||||
status = 1;
|
||||
@ -864,6 +899,25 @@ static int ocfs2_parse_options(struct super_block *sb,
|
||||
if (!is_remount)
|
||||
mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS;
|
||||
break;
|
||||
case Opt_stack:
|
||||
/* Check both that the option we were passed
|
||||
* is of the right length and that it is a proper
|
||||
* string of the right length.
|
||||
*/
|
||||
if (((args[0].to - args[0].from) !=
|
||||
OCFS2_STACK_LABEL_LEN) ||
|
||||
(strnlen(args[0].from,
|
||||
OCFS2_STACK_LABEL_LEN) !=
|
||||
OCFS2_STACK_LABEL_LEN)) {
|
||||
mlog(ML_ERROR,
|
||||
"Invalid cluster_stack option\n");
|
||||
status = 0;
|
||||
goto bail;
|
||||
}
|
||||
memcpy(mopt->cluster_stack, args[0].from,
|
||||
OCFS2_STACK_LABEL_LEN);
|
||||
mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
|
||||
break;
|
||||
default:
|
||||
mlog(ML_ERROR,
|
||||
"Unrecognized mount option \"%s\" "
|
||||
@ -922,6 +976,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
|
||||
if (opts & OCFS2_MOUNT_LOCALFLOCKS)
|
||||
seq_printf(s, ",localflocks,");
|
||||
|
||||
if (osb->osb_cluster_stack[0])
|
||||
seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
|
||||
osb->osb_cluster_stack);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -957,6 +1015,8 @@ static int __init ocfs2_init(void)
|
||||
mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
|
||||
}
|
||||
|
||||
ocfs2_set_locking_protocol();
|
||||
|
||||
leave:
|
||||
if (status < 0) {
|
||||
ocfs2_free_mem_caches();
|
||||
@ -1132,31 +1192,6 @@ static int ocfs2_get_sector(struct super_block *sb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ocfs2 1.0 only allows one cluster and node identity per kernel image. */
|
||||
static int ocfs2_fill_local_node_info(struct ocfs2_super *osb)
|
||||
{
|
||||
int status;
|
||||
|
||||
/* XXX hold a ref on the node while mounte? easy enough, if
|
||||
* desirable. */
|
||||
if (ocfs2_mount_local(osb))
|
||||
osb->node_num = 0;
|
||||
else
|
||||
osb->node_num = o2nm_this_node();
|
||||
|
||||
if (osb->node_num == O2NM_MAX_NODES) {
|
||||
mlog(ML_ERROR, "could not find this host's node number\n");
|
||||
status = -ENOENT;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
mlog(0, "I am node %d\n", osb->node_num);
|
||||
|
||||
status = 0;
|
||||
bail:
|
||||
return status;
|
||||
}
|
||||
|
||||
static int ocfs2_mount_volume(struct super_block *sb)
|
||||
{
|
||||
int status = 0;
|
||||
@ -1168,12 +1203,6 @@ static int ocfs2_mount_volume(struct super_block *sb)
|
||||
if (ocfs2_is_hard_readonly(osb))
|
||||
goto leave;
|
||||
|
||||
status = ocfs2_fill_local_node_info(osb);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
status = ocfs2_dlm_init(osb);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
@ -1224,18 +1253,9 @@ leave:
|
||||
return status;
|
||||
}
|
||||
|
||||
/* we can't grab the goofy sem lock from inside wait_event, so we use
|
||||
* memory barriers to make sure that we'll see the null task before
|
||||
* being woken up */
|
||||
static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
|
||||
{
|
||||
mb();
|
||||
return osb->recovery_thread_task != NULL;
|
||||
}
|
||||
|
||||
static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
|
||||
{
|
||||
int tmp;
|
||||
int tmp, hangup_needed = 0;
|
||||
struct ocfs2_super *osb = NULL;
|
||||
char nodestr[8];
|
||||
|
||||
@ -1249,25 +1269,16 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
|
||||
|
||||
ocfs2_truncate_log_shutdown(osb);
|
||||
|
||||
/* disable any new recovery threads and wait for any currently
|
||||
* running ones to exit. Do this before setting the vol_state. */
|
||||
mutex_lock(&osb->recovery_lock);
|
||||
osb->disable_recovery = 1;
|
||||
mutex_unlock(&osb->recovery_lock);
|
||||
wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
|
||||
|
||||
/* At this point, we know that no more recovery threads can be
|
||||
* launched, so wait for any recovery completion work to
|
||||
* complete. */
|
||||
flush_workqueue(ocfs2_wq);
|
||||
/* This will disable recovery and flush any recovery work. */
|
||||
ocfs2_recovery_exit(osb);
|
||||
|
||||
ocfs2_journal_shutdown(osb);
|
||||
|
||||
ocfs2_sync_blockdev(sb);
|
||||
|
||||
/* No dlm means we've failed during mount, so skip all the
|
||||
* steps which depended on that to complete. */
|
||||
if (osb->dlm) {
|
||||
/* No cluster connection means we've failed during mount, so skip
|
||||
* all the steps which depended on that to complete. */
|
||||
if (osb->cconn) {
|
||||
tmp = ocfs2_super_lock(osb, 1);
|
||||
if (tmp < 0) {
|
||||
mlog_errno(tmp);
|
||||
@ -1278,25 +1289,34 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
|
||||
if (osb->slot_num != OCFS2_INVALID_SLOT)
|
||||
ocfs2_put_slot(osb);
|
||||
|
||||
if (osb->dlm)
|
||||
if (osb->cconn)
|
||||
ocfs2_super_unlock(osb, 1);
|
||||
|
||||
ocfs2_release_system_inodes(osb);
|
||||
|
||||
if (osb->dlm)
|
||||
ocfs2_dlm_shutdown(osb);
|
||||
/*
|
||||
* If we're dismounting due to mount error, mount.ocfs2 will clean
|
||||
* up heartbeat. If we're a local mount, there is no heartbeat.
|
||||
* If we failed before we got a uuid_str yet, we can't stop
|
||||
* heartbeat. Otherwise, do it.
|
||||
*/
|
||||
if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str)
|
||||
hangup_needed = 1;
|
||||
|
||||
if (osb->cconn)
|
||||
ocfs2_dlm_shutdown(osb, hangup_needed);
|
||||
|
||||
debugfs_remove(osb->osb_debug_root);
|
||||
|
||||
if (!mnt_err)
|
||||
ocfs2_stop_heartbeat(osb);
|
||||
if (hangup_needed)
|
||||
ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str));
|
||||
|
||||
atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
|
||||
|
||||
if (ocfs2_mount_local(osb))
|
||||
snprintf(nodestr, sizeof(nodestr), "local");
|
||||
else
|
||||
snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num);
|
||||
snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num);
|
||||
|
||||
printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n",
|
||||
osb->dev_str, nodestr);
|
||||
@ -1355,7 +1375,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
|
||||
sb->s_fs_info = osb;
|
||||
sb->s_op = &ocfs2_sops;
|
||||
sb->s_export_op = &ocfs2_export_ops;
|
||||
osb->osb_locking_proto = ocfs2_locking_protocol;
|
||||
sb->s_time_gran = 1;
|
||||
sb->s_flags |= MS_NOATIME;
|
||||
/* this is needed to support O_LARGEFILE */
|
||||
@ -1368,7 +1387,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
|
||||
osb->s_sectsize_bits = blksize_bits(sector_size);
|
||||
BUG_ON(!osb->s_sectsize_bits);
|
||||
|
||||
init_waitqueue_head(&osb->recovery_event);
|
||||
spin_lock_init(&osb->dc_task_lock);
|
||||
init_waitqueue_head(&osb->dc_event);
|
||||
osb->dc_work_sequence = 0;
|
||||
@ -1376,6 +1394,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
|
||||
INIT_LIST_HEAD(&osb->blocked_lock_list);
|
||||
osb->blocked_lock_count = 0;
|
||||
spin_lock_init(&osb->osb_lock);
|
||||
ocfs2_init_inode_steal_slot(osb);
|
||||
|
||||
atomic_set(&osb->alloc_stats.moves, 0);
|
||||
atomic_set(&osb->alloc_stats.local_data, 0);
|
||||
@ -1388,24 +1407,23 @@ static int ocfs2_initialize_super(struct super_block *sb,
|
||||
snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
|
||||
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
|
||||
|
||||
mutex_init(&osb->recovery_lock);
|
||||
|
||||
osb->disable_recovery = 0;
|
||||
osb->recovery_thread_task = NULL;
|
||||
status = ocfs2_recovery_init(osb);
|
||||
if (status) {
|
||||
mlog(ML_ERROR, "Unable to initialize recovery state\n");
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
init_waitqueue_head(&osb->checkpoint_event);
|
||||
atomic_set(&osb->needs_checkpoint, 0);
|
||||
|
||||
osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
|
||||
|
||||
osb->node_num = O2NM_INVALID_NODE_NUM;
|
||||
osb->slot_num = OCFS2_INVALID_SLOT;
|
||||
|
||||
osb->local_alloc_state = OCFS2_LA_UNUSED;
|
||||
osb->local_alloc_bh = NULL;
|
||||
|
||||
ocfs2_setup_hb_callbacks(osb);
|
||||
|
||||
init_waitqueue_head(&osb->osb_mount_event);
|
||||
|
||||
osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
|
||||
@ -1455,6 +1473,25 @@ static int ocfs2_initialize_super(struct super_block *sb,
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (ocfs2_userspace_stack(osb)) {
|
||||
memcpy(osb->osb_cluster_stack,
|
||||
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
|
||||
OCFS2_STACK_LABEL_LEN);
|
||||
osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
|
||||
if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
|
||||
mlog(ML_ERROR,
|
||||
"couldn't mount because of an invalid "
|
||||
"cluster stack label (%s) \n",
|
||||
osb->osb_cluster_stack);
|
||||
status = -EINVAL;
|
||||
goto bail;
|
||||
}
|
||||
} else {
|
||||
/* The empty string is identical with classic tools that
|
||||
* don't know about s_cluster_info. */
|
||||
osb->osb_cluster_stack[0] = '\0';
|
||||
}
|
||||
|
||||
get_random_bytes(&osb->s_next_generation, sizeof(u32));
|
||||
|
||||
/* FIXME
|
||||
@ -1724,8 +1761,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
|
||||
|
||||
/* This function assumes that the caller has the main osb resource */
|
||||
|
||||
if (osb->slot_info)
|
||||
ocfs2_free_slot_info(osb->slot_info);
|
||||
ocfs2_free_slot_info(osb);
|
||||
|
||||
kfree(osb->osb_orphan_wipes);
|
||||
/* FIXME
|
||||
|
@ -87,7 +87,14 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
|
||||
|
||||
void sysfs_remove_link(struct kobject * kobj, const char * name)
|
||||
{
|
||||
sysfs_hash_and_remove(kobj->sd, name);
|
||||
struct sysfs_dirent *parent_sd = NULL;
|
||||
|
||||
if (!kobj)
|
||||
parent_sd = &sysfs_root;
|
||||
else
|
||||
parent_sd = kobj->sd;
|
||||
|
||||
sysfs_hash_and_remove(parent_sd, name);
|
||||
}
|
||||
|
||||
static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
|
||||
|
Loading…
Reference in New Issue
Block a user