From 8b19cb17b9ae9e1cd4a96df6e7dedb75aff7ee96 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Tue, 23 Oct 2007 20:43:27 +1000 Subject: [PATCH] added a syncops VFS module for filesystems which do not guarantee meta-data operations are immediately committed to disk in stable form. Essential for clustered Samba setups --- source/Makefile.in | 5 + source/configure.in | 3 +- source/modules/vfs_syncops.c | 210 +++++++++++++++++++++++++++++++++++ 3 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 source/modules/vfs_syncops.c diff --git a/source/Makefile.in b/source/Makefile.in index ea3708ea18e..a559d63f3e9 100644 --- a/source/Makefile.in +++ b/source/Makefile.in @@ -486,6 +486,7 @@ VFS_GPFS_OBJ = modules/vfs_gpfs.o modules/gpfs.o modules/nfs4_acls.o VFS_NOTIFY_FAM_OBJ = modules/vfs_notify_fam.o VFS_READAHEAD_OBJ = modules/vfs_readahead.o VFS_FILEID_OBJ = modules/vfs_fileid.o +VFS_SYNCOPS_OBJ = modules/vfs_syncops.o PLAINTEXT_AUTH_OBJ = auth/pampass.o auth/pass_check.o @@ -1572,6 +1573,10 @@ bin/shadow_copy.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_SHADOW_COPY_OBJ) @echo "Building plugin $@" @$(SHLD_MODULE) $(VFS_SHADOW_COPY_OBJ) +bin/syncops.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_SYNCOPS_OBJ) + @echo "Building plugin $@" + @$(SHLD_MODULE) $(VFS_SYNCOPS_OBJ) + bin/cap.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_CAP_OBJ) @echo "Building plugin $@" @$(SHLD_MODULE) $(VFS_CAP_OBJ) diff --git a/source/configure.in b/source/configure.in index 90c61c7152c..80e57f87c44 100644 --- a/source/configure.in +++ b/source/configure.in @@ -679,7 +679,7 @@ dnl These have to be built static: default_static_modules="pdb_smbpasswd pdb_tdbsam rpc_lsa rpc_samr rpc_winreg rpc_initshutdown rpc_lsa_ds rpc_wkssvc rpc_svcctl2 rpc_ntsvcs rpc_net rpc_netdfs rpc_srvsvc2 rpc_spoolss rpc_eventlog2 auth_sam auth_unix auth_winbind auth_server auth_domain auth_builtin vfs_default nss_info_template" dnl These are preferably build shared, and static if dlopen() is not available -default_shared_modules="vfs_recycle vfs_audit vfs_extd_audit vfs_full_audit vfs_netatalk vfs_fake_perms vfs_default_quota vfs_readonly vfs_cap vfs_expand_msdfs vfs_shadow_copy charset_CP850 charset_CP437 auth_script vfs_readahead" +default_shared_modules="vfs_recycle vfs_audit vfs_extd_audit vfs_full_audit vfs_netatalk vfs_fake_perms vfs_default_quota vfs_readonly vfs_cap vfs_expand_msdfs vfs_shadow_copy charset_CP850 charset_CP437 auth_script vfs_readahead vfs_syncops" if test "x$developer" = xyes; then default_static_modules="$default_static_modules rpc_rpcecho" @@ -6294,6 +6294,7 @@ SMB_MODULE(vfs_commit, \$(VFS_COMMIT_OBJ), "bin/commit.$SHLIBEXT", VFS) SMB_MODULE(vfs_gpfs, \$(VFS_GPFS_OBJ), "bin/gpfs.$SHLIBEXT", VFS) SMB_MODULE(vfs_readahead, \$(VFS_READAHEAD_OBJ), "bin/readahead.$SHLIBEXT", VFS) SMB_MODULE(vfs_fileid, \$(VFS_FILEID_OBJ), "bin/fileid.$SHLIBEXT", VFS) +SMB_MODULE(vfs_syncops, \$(VFS_SYNCOPS_OBJ), "bin/syncops.$SHLIBEXT", VFS) SMB_SUBSYSTEM(VFS,smbd/vfs.o) diff --git a/source/modules/vfs_syncops.c b/source/modules/vfs_syncops.c new file mode 100644 index 00000000000..3aa89b40a68 --- /dev/null +++ b/source/modules/vfs_syncops.c @@ -0,0 +1,210 @@ +/* + * ensure meta data operations are performed synchronously + * + * Copyright (C) Andrew Tridgell 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "includes.h" + +/* + + Some filesystems (even some journaled filesystems) require that a + fsync() be performed on many meta data operations to ensure that the + operation is guaranteed to remain in the filesystem after a power + failure. This is particularly important for some cluster filesystems + which are participating in a node failover system with clustered + Samba + + On those filesystems this module provides a way to perform those + operations safely. + */ + +/* + most of the performance loss with this module is in fsync on close(). + You can disable that with syncops:onclose = no + */ +static bool sync_onclose; + +/* + given a filename, find the parent directory + */ +static char *parent_dir(TALLOC_CTX *mem_ctx, const char *name) +{ + const char *p = strrchr(name, '/'); + if (p == NULL) { + return talloc_strdup(mem_ctx, "."); + } + return talloc_strndup(mem_ctx, name, (p+1) - name); +} + +/* + fsync a directory by name + */ +static void syncops_sync_directory(const char *dname) +{ +#ifdef O_DIRECTORY + int fd = open(dname, O_DIRECTORY|O_RDONLY); + if (fd != -1) { + fsync(fd); + close(fd); + } +#else + DIR *d = opendir(dname); + if (d != NULL) { + fsync(dirfd(d)); + closedir(d); + } +#endif +} + +/* + sync two meta data changes for 2 names + */ +static void syncops_two_names(const char *name1, const char *name2) +{ + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + char *parent1, *parent2; + parent1 = parent_dir(tmp_ctx, name1); + parent2 = parent_dir(tmp_ctx, name2); + if (!parent1 || !parent2) { + talloc_free(tmp_ctx); + return; + } + syncops_sync_directory(parent1); + if (strcmp(parent1, parent2) != 0) { + syncops_sync_directory(parent2); + } + talloc_free(tmp_ctx); +} + +/* + sync two meta data changes for 1 names + */ +static void syncops_name(const char *name) +{ + char *parent; + parent = parent_dir(NULL, name); + if (parent) { + syncops_sync_directory(parent); + talloc_free(parent); + } +} + + +/* + rename needs special handling, as we may need to fsync two directories + */ +static int syncops_rename(vfs_handle_struct *handle, + const char *oldname, const char *newname) +{ + int ret = SMB_VFS_NEXT_RENAME(handle, oldname, newname); + if (ret == 0) { + syncops_two_names(oldname, newname); + } + return ret; +} + +/* handle the rest with a macro */ +#define SYNCOPS_NEXT(op, fname, args) do { \ + int ret = SMB_VFS_NEXT_ ## op args; \ + if (ret == 0 && fname) syncops_name(fname); \ + return ret; \ +} while (0) + +static int syncops_symlink(vfs_handle_struct *handle, + const char *oldname, const char *newname) +{ + SYNCOPS_NEXT(SYMLINK, newname, (handle, oldname, newname)); +} + +static int syncops_link(vfs_handle_struct *handle, + const char *oldname, const char *newname) +{ + SYNCOPS_NEXT(LINK, newname, (handle, oldname, newname)); +} + +static int syncops_open(vfs_handle_struct *handle, + const char *fname, files_struct *fsp, int flags, mode_t mode) +{ + SYNCOPS_NEXT(OPEN, (flags&O_CREAT?fname:NULL), (handle, fname, fsp, flags, mode)); +} + +static int syncops_unlink(vfs_handle_struct *handle, const char *fname) +{ + SYNCOPS_NEXT(UNLINK, fname, (handle, fname)); +} + +static int syncops_mknod(vfs_handle_struct *handle, + const char *fname, mode_t mode, SMB_DEV_T dev) +{ + SYNCOPS_NEXT(MKNOD, fname, (handle, fname, mode, dev)); +} + +static int syncops_mkdir(vfs_handle_struct *handle, const char *fname, mode_t mode) +{ + SYNCOPS_NEXT(MKDIR, fname, (handle, fname, mode)); +} + +static int syncops_rmdir(vfs_handle_struct *handle, const char *fname) +{ + SYNCOPS_NEXT(RMDIR, fname, (handle, fname)); +} + +/* close needs to be handled specially */ +static int syncops_close(vfs_handle_struct *handle, files_struct *fsp, int fd) +{ + if (fsp->can_write && sync_onclose) { + /* ideally we'd only do this if we have written some + data, but there is no flag for that in fsp yet. */ + fsync(fd); + } + return SMB_VFS_NEXT_CLOSE(handle, fsp, fd); +} + + +/* VFS operations structure */ + +static vfs_op_tuple syncops_ops[] = { + /* directory operations */ + {SMB_VFS_OP(syncops_mkdir), SMB_VFS_OP_MKDIR, SMB_VFS_LAYER_TRANSPARENT}, + {SMB_VFS_OP(syncops_rmdir), SMB_VFS_OP_RMDIR, SMB_VFS_LAYER_TRANSPARENT}, + + /* File operations */ + {SMB_VFS_OP(syncops_open), SMB_VFS_OP_OPEN, SMB_VFS_LAYER_TRANSPARENT}, + {SMB_VFS_OP(syncops_rename), SMB_VFS_OP_RENAME, SMB_VFS_LAYER_TRANSPARENT}, + {SMB_VFS_OP(syncops_unlink), SMB_VFS_OP_UNLINK, SMB_VFS_LAYER_TRANSPARENT}, + {SMB_VFS_OP(syncops_symlink), SMB_VFS_OP_SYMLINK, SMB_VFS_LAYER_TRANSPARENT}, + {SMB_VFS_OP(syncops_link), SMB_VFS_OP_LINK, SMB_VFS_LAYER_TRANSPARENT}, + {SMB_VFS_OP(syncops_mknod), SMB_VFS_OP_MKNOD, SMB_VFS_LAYER_TRANSPARENT}, + {SMB_VFS_OP(syncops_close), SMB_VFS_OP_CLOSE, SMB_VFS_LAYER_TRANSPARENT}, + + {SMB_VFS_OP(NULL), SMB_VFS_OP_NOOP, SMB_VFS_LAYER_NOOP} +}; + +NTSTATUS vfs_syncops_init(void) +{ + NTSTATUS ret; + + ret = smb_register_vfs(SMB_VFS_INTERFACE_VERSION, "syncops", syncops_ops); + + if (!NT_STATUS_IS_OK(ret)) + return ret; + + sync_onclose = lp_parm_bool(-1, "syncops", "onclose", true); + + return ret; +}