1
0
mirror of https://github.com/samba-team/samba.git synced 2025-03-24 10:50:22 +03:00

Add a vfs_preopen module to hide fs latencies

This commit is contained in:
Volker Lendecke 2009-03-10 18:02:21 +01:00
parent 66a26a0ac2
commit 3d280639c4
4 changed files with 578 additions and 1 deletions

View File

@ -0,0 +1,115 @@
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE refentry PUBLIC "-//Samba-Team//DTD DocBook V4.2-Based Variant V1.0//EN" "http://www.samba.org/samba/DTD/samba-doc">
<refentry id="vfs_preopen.8">
<refmeta>
<refentrytitle>vfs_preopen</refentrytitle>
<manvolnum>8</manvolnum>
<refmiscinfo class="source">Samba</refmiscinfo>
<refmiscinfo class="manual">System Administration tools</refmiscinfo>
<refmiscinfo class="version">3.3</refmiscinfo>
</refmeta>
<refnamediv>
<refname>vfs_preopen</refname>
<refpurpose>Hide read latencies for applications reading numbered files</refpurpose>
</refnamediv>
<refsynopsisdiv>
<cmdsynopsis>
<command>vfs objects = preopen</command>
</cmdsynopsis>
</refsynopsisdiv>
<refsect1>
<title>DESCRIPTION</title>
<para>This VFS module is part of the
<citerefentry><refentrytitle>samba</refentrytitle>
<manvolnum>7</manvolnum></citerefentry> suite.</para>
<para>This module assists applications that want to read numbered
files in sequence with very strict latency requirements. One area
where this happens in video streaming applications that want to read
one file per frame.</para>
<para>When you use this module, a number of helper processes is
started that speculatively open files and read a number of bytes to
prime the file system cache, so that later on when the real
application's request comes along, no disk access is necessary.</para>
<para>This module is stackable.</para>
</refsect1>
<refsect1>
<title>OPTIONS</title>
<variablelist>
<varlistentry>
<term>preopen:names = /pattern/</term>
<listitem>
<para>
preopen:names specifies the file name pattern which should
trigger the preopen helpers to do their work. We assume that
the files are numbered incrementally. So if your file names
are numbered FRAME00000.frm FRAME00001.frm and so on you would
list them as <command>preopen:names=/FRAME*.frm/</command>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>preopen:num_bytes = BYTES</term>
<listitem>
<para>
Specifies the number of bytes the helpers should speculatively
read, defaults to 1.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>preopen:helpers = NUM-PROCS</term>
<listitem>
<para>
Number of forked helper processes, defaults to 1.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>preopen:queuelen = NUM-FILES</term>
<listitem>
<para>
Number of files that should be speculatively opened. Defaults
to the 10 subsequent files.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>VERSION</title>
<para>This man page is correct for version 3.3 of the Samba suite.
</para>
</refsect1>
<refsect1>
<title>AUTHOR</title>
<para>The original Samba software and related utilities
were created by Andrew Tridgell. Samba is now developed
by the Samba Team as an Open Source project similar
to the way the Linux kernel is developed.</para>
<para>The PREOPEN VFS module was created with contributions from
Volker Lendecke and the developers at IBM.
</para>
</refsect1>
</refentry>

View File

@ -667,6 +667,7 @@ VFS_READAHEAD_OBJ = modules/vfs_readahead.o
VFS_TSMSM_OBJ = modules/vfs_tsmsm.o
VFS_FILEID_OBJ = modules/vfs_fileid.o
VFS_AIO_FORK_OBJ = modules/vfs_aio_fork.o
VFS_PREOPEN_OBJ = modules/vfs_preopen.o
VFS_SYNCOPS_OBJ = modules/vfs_syncops.o
VFS_ACL_XATTR_OBJ = modules/vfs_acl_xattr.o
VFS_ACL_TDB_OBJ = modules/vfs_acl_tdb.o
@ -2567,6 +2568,10 @@ bin/aio_fork.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_AIO_FORK_OBJ)
@echo "Building plugin $@"
@$(SHLD_MODULE) $(VFS_AIO_FORK_OBJ)
bin/preopen.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_PREOPEN_OBJ)
@echo "Building plugin $@"
@$(SHLD_MODULE) $(VFS_PREOPEN_OBJ)
bin/acl_xattr.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_ACL_XATTR_OBJ)
@echo "Building plugin $@"
@$(SHLD_MODULE) $(VFS_ACL_XATTR_OBJ)

View File

@ -417,7 +417,7 @@ dnl These have to be built static:
default_static_modules="pdb_smbpasswd pdb_tdbsam pdb_wbc_sam rpc_lsarpc rpc_samr rpc_winreg rpc_initshutdown rpc_dssetup rpc_wkssvc rpc_svcctl rpc_ntsvcs rpc_netlogon rpc_netdfs rpc_srvsvc rpc_spoolss2 rpc_eventlog auth_sam auth_unix auth_winbind auth_wbc auth_server auth_domain auth_builtin auth_netlogond vfs_default nss_info_template"
dnl These are preferably build shared, and static if dlopen() is not available
default_shared_modules="vfs_recycle vfs_audit vfs_extd_audit vfs_full_audit vfs_netatalk vfs_fake_perms vfs_default_quota vfs_readonly vfs_cap vfs_expand_msdfs vfs_shadow_copy vfs_shadow_copy2 charset_CP850 charset_CP437 auth_script vfs_readahead vfs_xattr_tdb vfs_streams_xattr vfs_streams_depot vfs_acl_xattr vfs_acl_tdb vfs_smb_traffic_analyzer"
default_shared_modules="vfs_recycle vfs_audit vfs_extd_audit vfs_full_audit vfs_netatalk vfs_fake_perms vfs_default_quota vfs_readonly vfs_cap vfs_expand_msdfs vfs_shadow_copy vfs_shadow_copy2 charset_CP850 charset_CP437 auth_script vfs_readahead vfs_xattr_tdb vfs_streams_xattr vfs_streams_depot vfs_acl_xattr vfs_acl_tdb vfs_smb_traffic_analyzer vfs_preopen"
if test "x$developer" = xyes; then
default_static_modules="$default_static_modules rpc_rpcecho"
@ -6185,6 +6185,7 @@ SMB_MODULE(vfs_readahead, \$(VFS_READAHEAD_OBJ), "bin/readahead.$SHLIBEXT", VFS)
SMB_MODULE(vfs_tsmsm, \$(VFS_TSMSM_OBJ), "bin/tsmsm.$SHLIBEXT", VFS)
SMB_MODULE(vfs_fileid, \$(VFS_FILEID_OBJ), "bin/fileid.$SHLIBEXT", VFS)
SMB_MODULE(vfs_aio_fork, \$(VFS_AIO_FORK_OBJ), "bin/aio_fork.$SHLIBEXT", VFS)
SMB_MODULE(vfs_preopen, \$(VFS_PREOPEN_OBJ), "bin/preopen.$SHLIBEXT", VFS)
SMB_MODULE(vfs_syncops, \$(VFS_SYNCOPS_OBJ), "bin/syncops.$SHLIBEXT", VFS)
SMB_MODULE(vfs_zfsacl, \$(VFS_ZFSACL_OBJ), "bin/zfsacl.$SHLIBEXT", VFS)
SMB_MODULE(vfs_notify_fam, \$(VFS_NOTIFY_FAM_OBJ), "bin/notify_fam.$SHLIBEXT", VFS)

View File

@ -0,0 +1,456 @@
/*
* Force a readahead of files by opening them and reading the first bytes
*
* Copyright (C) Volker Lendecke 2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "includes.h"
struct preopen_state;
struct preopen_helper {
struct preopen_state *state;
struct fd_event *fde;
pid_t pid;
int fd;
bool busy;
};
struct preopen_state {
int num_helpers;
struct preopen_helper *helpers;
size_t to_read; /* How many bytes to read in children? */
int queue_max;
char *template_fname; /* Filename to be sent to children */
size_t number_start; /* start offset into "template_fname" */
int num_digits; /* How many digits is the number long? */
int fnum_sent; /* last fname sent to children */
int fnum_queue_end; /* last fname to be sent, based on
* last open call + preopen:queuelen
*/
name_compare_entry *preopen_names;
};
static void preopen_helper_destroy(struct preopen_helper *c)
{
int status;
close(c->fd);
c->fd = -1;
kill(c->pid, SIGKILL);
waitpid(c->pid, &status, 0);
c->busy = true;
}
static void preopen_queue_run(struct preopen_state *state)
{
char *pdelimiter;
char delimiter;
pdelimiter = state->template_fname + state->number_start
+ state->num_digits;
delimiter = *pdelimiter;
while (state->fnum_sent < state->fnum_queue_end) {
ssize_t written;
size_t to_write;
int helper;
for (helper=0; helper<state->num_helpers; helper++) {
if (state->helpers[helper].busy) {
continue;
}
break;
}
if (helper == state->num_helpers) {
/* everyone is busy */
return;
}
snprintf(state->template_fname + state->number_start,
state->num_digits + 1,
"%.*lu", state->num_digits,
(long unsigned int)(state->fnum_sent + 1));
*pdelimiter = delimiter;
to_write = talloc_get_size(state->template_fname);
written = write_data(state->helpers[helper].fd,
state->template_fname, to_write);
state->helpers[helper].busy = true;
if (written != to_write) {
preopen_helper_destroy(&state->helpers[helper]);
}
state->fnum_sent += 1;
}
}
static void preopen_helper_readable(struct event_context *ev,
struct fd_event *fde, uint16_t flags,
void *priv)
{
struct preopen_helper *helper = (struct preopen_helper *)priv;
struct preopen_state *state = helper->state;
ssize_t nread;
char c;
if ((flags & EVENT_FD_READ) == 0) {
return;
}
nread = read(helper->fd, &c, 1);
if (nread <= 0) {
preopen_helper_destroy(helper);
return;
}
helper->busy = false;
preopen_queue_run(state);
}
static int preopen_helpers_destructor(struct preopen_state *c)
{
int i;
for (i=0; i<c->num_helpers; i++) {
if (c->helpers[i].fd == -1) {
continue;
}
preopen_helper_destroy(&c->helpers[i]);
}
return 0;
}
static bool preopen_helper_open_one(int sock_fd, char **pnamebuf,
size_t to_read, void *filebuf)
{
char *namebuf = *pnamebuf;
ssize_t nwritten, nread;
char c = 0;
int fd;
nread = 0;
while ((nread == 0) || (namebuf[nread-1] != '\0')) {
ssize_t thistime;
thistime = read(sock_fd, namebuf + nread,
talloc_get_size(namebuf) - nread);
if (thistime <= 0) {
return false;
}
nread += thistime;
if (nread == talloc_get_size(namebuf)) {
namebuf = TALLOC_REALLOC_ARRAY(
NULL, namebuf, char,
talloc_get_size(namebuf) * 2);
if (namebuf == NULL) {
return false;
}
*pnamebuf = namebuf;
}
}
fd = open(namebuf, O_RDONLY);
if (fd == -1) {
goto done;
}
nread = read(fd, filebuf, to_read);
close(fd);
done:
nwritten = write(sock_fd, &c, 1);
return true;
}
static bool preopen_helper(int fd, size_t to_read)
{
char *namebuf;
void *readbuf;
namebuf = TALLOC_ARRAY(NULL, char, 1024);
if (namebuf == NULL) {
return false;
}
readbuf = talloc_size(NULL, to_read);
if (readbuf == NULL) {
TALLOC_FREE(namebuf);
return false;
}
while (preopen_helper_open_one(fd, &namebuf, to_read, readbuf)) {
;
}
TALLOC_FREE(readbuf);
TALLOC_FREE(namebuf);
return false;
}
static NTSTATUS preopen_init_helper(struct preopen_helper *h)
{
int fdpair[2];
NTSTATUS status;
if (socketpair(AF_UNIX, SOCK_STREAM, 0, fdpair) == -1) {
status = map_nt_error_from_unix(errno);
DEBUG(10, ("socketpair() failed: %s\n", strerror(errno)));
return status;
}
h->pid = sys_fork();
if (h->pid == -1) {
return map_nt_error_from_unix(errno);
}
if (h->pid == 0) {
close(fdpair[0]);
preopen_helper(fdpair[1], h->state->to_read);
exit(0);
}
close(fdpair[1]);
h->fd = fdpair[0];
h->fde = event_add_fd(smbd_event_context(), h->state, h->fd,
EVENT_FD_READ, preopen_helper_readable, h);
if (h->fde == NULL) {
close(h->fd);
h->fd = -1;
return NT_STATUS_NO_MEMORY;
}
h->busy = false;
return NT_STATUS_OK;
}
static NTSTATUS preopen_init_helpers(TALLOC_CTX *mem_ctx, size_t to_read,
int num_helpers, int queue_max,
struct preopen_state **presult)
{
struct preopen_state *result;
int i;
result = talloc(mem_ctx, struct preopen_state);
if (result == NULL) {
return NT_STATUS_NO_MEMORY;
}
result->num_helpers = num_helpers;
result->helpers = TALLOC_ARRAY(result, struct preopen_helper,
num_helpers);
if (result->helpers == NULL) {
TALLOC_FREE(result);
return NT_STATUS_NO_MEMORY;
}
result->to_read = to_read;
result->queue_max = queue_max;
result->template_fname = NULL;
result->fnum_sent = 0;
for (i=0; i<num_helpers; i++) {
result->helpers[i].state = result;
result->helpers[i].fd = -1;
}
talloc_set_destructor(result, preopen_helpers_destructor);
for (i=0; i<num_helpers; i++) {
preopen_init_helper(&result->helpers[i]);
}
*presult = result;
return NT_STATUS_OK;
}
static void preopen_free_helpers(void **ptr)
{
TALLOC_FREE(*ptr);
}
static struct preopen_state *preopen_state_get(vfs_handle_struct *handle)
{
struct preopen_state *state;
NTSTATUS status;
const char *namelist;
if (SMB_VFS_HANDLE_TEST_DATA(handle)) {
SMB_VFS_HANDLE_GET_DATA(handle, state, struct preopen_state,
return NULL);
return state;
}
namelist = lp_parm_const_string(SNUM(handle->conn), "preopen", "names",
NULL);
if (namelist == NULL) {
return NULL;
}
status = preopen_init_helpers(
NULL,
lp_parm_int(SNUM(handle->conn), "preopen", "num_bytes", 1),
lp_parm_int(SNUM(handle->conn), "preopen", "helpers", 1),
lp_parm_int(SNUM(handle->conn), "preopen", "queuelen", 10),
&state);
if (!NT_STATUS_IS_OK(status)) {
return NULL;
}
set_namearray(&state->preopen_names, (char *)namelist);
if (state->preopen_names == NULL) {
TALLOC_FREE(state);
return NULL;
}
if (!SMB_VFS_HANDLE_TEST_DATA(handle)) {
SMB_VFS_HANDLE_SET_DATA(handle, state, preopen_free_helpers,
struct preopen_state, return NULL);
}
return state;
}
static bool preopen_parse_fname(const char *fname, unsigned long *pnum,
size_t *pstart_idx, int *pnum_digits)
{
const char *p, *q;
unsigned long num;
p = strrchr_m(fname, '/');
if (p == NULL) {
p = fname;
}
p += 1;
while (p[0] != '\0') {
if (isdigit(p[0]) && isdigit(p[1]) && isdigit(p[2])) {
break;
}
p += 1;
}
if (*p == '\0') {
/* no digits around */
return false;
}
num = strtoul(p, (char **)&q, 10);
if (num+1 < num) {
/* overflow */
return false;
}
*pnum = num;
*pstart_idx = (p - fname);
*pnum_digits = (q - p);
return true;
}
static int preopen_open(vfs_handle_struct *handle, const char *fname,
files_struct *fsp, int flags, mode_t mode)
{
struct preopen_state *state;
int res;
unsigned long num;
DEBUG(10, ("preopen_open called on %s\n", fname));
state = preopen_state_get(handle);
if (state == NULL) {
return SMB_VFS_NEXT_OPEN(handle, fname, fsp, flags, mode);
}
res = SMB_VFS_NEXT_OPEN(handle, fname, fsp, flags, mode);
if (res == -1) {
return -1;
}
if (flags != O_RDONLY) {
return res;
}
if (!is_in_path(fname, state->preopen_names, true)) {
DEBUG(10, ("%s does not match the preopen:names list\n",
fname));
return res;
}
TALLOC_FREE(state->template_fname);
state->template_fname = talloc_asprintf(
state, "%s/%s", fsp->conn->connectpath, fname);
if (state->template_fname == NULL) {
return res;
}
if (!preopen_parse_fname(state->template_fname, &num,
&state->number_start, &state->num_digits)) {
TALLOC_FREE(state->template_fname);
return res;
}
if (num > state->fnum_sent) {
/*
* Helpers were too slow, there's no point in reading
* files in helpers that we already read in the
* parent.
*/
state->fnum_sent = num;
}
if ((state->fnum_queue_end != 0) /* Something was started earlier */
&& (num < (state->fnum_queue_end - state->queue_max))) {
/*
* "num" is before the queue we announced. This means
* a new run is started.
*/
state->fnum_sent = num;
}
state->fnum_queue_end = num + state->queue_max;
preopen_queue_run(state);
return res;
}
/* VFS operations structure */
static vfs_op_tuple preopen_ops[] = {
{SMB_VFS_OP(preopen_open), SMB_VFS_OP_OPEN,
SMB_VFS_LAYER_TRANSPARENT},
{SMB_VFS_OP(NULL), SMB_VFS_OP_NOOP,
SMB_VFS_LAYER_NOOP}
};
NTSTATUS vfs_preopen_init(void);
NTSTATUS vfs_preopen_init(void)
{
return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
"preopen", preopen_ops);
}