1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-25 06:04:04 +03:00
samba-mirror/source3/modules/vfs_aio_linux.c

355 lines
8.7 KiB
C
Raw Normal View History

/*
* Simulate Posix AIO using Linux kernel AIO.
*
* Copyright (C) Jeremy Allison 2012
* Copyright (C) Volker Lendecke 2012
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "includes.h"
#include "system/filesys.h"
#include "smbd/smbd.h"
#include "smbd/globals.h"
#include "lib/util/tevent_unix.h"
#include <sys/eventfd.h>
#include <libaio.h>
static int event_fd = -1;
static io_context_t io_ctx;
static struct tevent_fd *aio_read_event;
static bool used;
static unsigned num_busy;
static void aio_linux_done(struct tevent_context *event_ctx,
struct tevent_fd *event,
uint16 flags, void *private_data);
/************************************************************************
Housekeeping. Cleanup if no activity for 30 seconds.
***********************************************************************/
static void aio_linux_housekeeping(struct tevent_context *event_ctx,
struct tevent_timer *te,
struct timeval now,
void *private_data)
{
/* Remove this timed event handler. */
TALLOC_FREE(te);
if ((num_busy != 0) || used) {
used = false;
/* Still busy. Look again in 30 seconds. */
(void)tevent_add_timer(event_ctx,
NULL,
timeval_current_ofs(30, 0),
aio_linux_housekeeping,
NULL);
return;
}
/* No activity for 30 seconds. Close out kernel resources. */
io_queue_release(io_ctx);
memset(&io_ctx, '\0', sizeof(io_ctx));
if (event_fd != -1) {
close(event_fd);
event_fd = -1;
}
TALLOC_FREE(aio_read_event);
}
/************************************************************************
Ensure event fd and aio context are initialized.
***********************************************************************/
static bool init_aio_linux(struct vfs_handle_struct *handle)
{
struct tevent_timer *te = NULL;
if (event_fd != -1) {
/* Already initialized. */
return true;
}
/* Schedule a shutdown event for 30 seconds from now. */
te = tevent_add_timer(handle->conn->sconn->ev_ctx,
NULL,
timeval_current_ofs(30, 0),
aio_linux_housekeeping,
NULL);
if (te == NULL) {
goto fail;
}
event_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
if (event_fd == -1) {
goto fail;
}
aio_read_event = tevent_add_fd(server_event_context(),
NULL,
event_fd,
TEVENT_FD_READ,
aio_linux_done,
NULL);
if (aio_read_event == NULL) {
goto fail;
}
if (io_queue_init(aio_pending_size, &io_ctx)) {
goto fail;
}
DEBUG(10,("init_aio_linux: initialized with up to %d events\n",
aio_pending_size));
return true;
fail:
DEBUG(10,("init_aio_linux: initialization failed\n"));
TALLOC_FREE(te);
TALLOC_FREE(aio_read_event);
if (event_fd != -1) {
close(event_fd);
event_fd = -1;
}
memset(&io_ctx, '\0', sizeof(io_ctx));
return false;
}
struct aio_linux_state {
struct iocb event_iocb;
ssize_t ret;
int err;
};
static struct tevent_req *aio_linux_pread_send(
struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
struct tevent_context *ev, struct files_struct *fsp,
void *data, size_t n, off_t offset)
{
struct tevent_req *req;
struct aio_linux_state *state;
struct iocb *piocb;
int ret;
req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
if (req == NULL) {
return NULL;
}
if (!init_aio_linux(handle)) {
tevent_req_error(req, EIO);
return tevent_req_post(req, ev);
}
io_prep_pread(&state->event_iocb, fsp->fh->fd, data, n, offset);
io_set_eventfd(&state->event_iocb, event_fd);
state->event_iocb.data = req;
piocb = &state->event_iocb;
ret = io_submit(io_ctx, 1, &piocb);
if (ret < 0) {
tevent_req_error(req, -ret);
return tevent_req_post(req, ev);
}
num_busy += 1;
used = true;
return req;
}
static struct tevent_req *aio_linux_pwrite_send(
struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
struct tevent_context *ev, struct files_struct *fsp,
const void *data, size_t n, off_t offset)
{
struct tevent_req *req;
struct aio_linux_state *state;
struct iocb *piocb;
int ret;
req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
if (req == NULL) {
return NULL;
}
if (!init_aio_linux(handle)) {
tevent_req_error(req, EIO);
return tevent_req_post(req, ev);
}
io_prep_pwrite(&state->event_iocb, fsp->fh->fd, discard_const(data),
n, offset);
io_set_eventfd(&state->event_iocb, event_fd);
state->event_iocb.data = req;
piocb = &state->event_iocb;
ret = io_submit(io_ctx, 1, &piocb);
if (ret < 0) {
tevent_req_error(req, -ret);
return tevent_req_post(req, ev);
}
num_busy += 1;
used = true;
return req;
}
static struct tevent_req *aio_linux_fsync_send(
struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
struct tevent_context *ev, struct files_struct *fsp)
{
struct tevent_req *req;
struct aio_linux_state *state;
struct iocb *piocb;
int ret;
req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
if (req == NULL) {
return NULL;
}
if (!init_aio_linux(handle)) {
tevent_req_error(req, EIO);
return tevent_req_post(req, ev);
}
io_prep_fsync(&state->event_iocb, fsp->fh->fd);
io_set_eventfd(&state->event_iocb, event_fd);
state->event_iocb.data = req;
piocb = &state->event_iocb;
ret = io_submit(io_ctx, 1, &piocb);
if (ret < 0) {
tevent_req_error(req, -ret);
return tevent_req_post(req, ev);
}
num_busy += 1;
used = true;
return req;
}
static void aio_linux_done(struct tevent_context *event_ctx,
struct tevent_fd *event,
uint16 flags, void *private_data)
{
uint64_t num_events = 0;
DEBUG(10, ("aio_linux_done called with flags=%d\n",
(int)flags));
/* Read the number of events available. */
if (sys_read(event_fd, &num_events, sizeof(num_events)) !=
sizeof(num_events)) {
smb_panic("aio_linux_handle_completion: invalid read");
}
while (num_events > 0) {
struct timespec ts = { 0, };
struct io_event finished;
struct tevent_req *req;
struct aio_linux_state *state;
int ret;
ret = io_getevents(io_ctx, 1, 1, &finished, &ts);
if (ret < 0) {
DEBUG(1, ("aio_linux_done: io_getevents returned %s\n",
strerror(-ret)));
return;
}
if (ret == 0) {
DEBUG(10, ("aio_linux_done: io_getvents returned "
"0\n"));
continue;
}
num_busy -= 1;
req = talloc_get_type_abort(finished.data,
struct tevent_req);
state = tevent_req_data(req, struct aio_linux_state);
if (finished.res < 0) {
state->ret = -1;
state->err = -finished.res;
} else {
state->ret = finished.res;
state->err = 0;
}
tevent_req_done(req);
num_events -= 1;
}
}
static ssize_t aio_linux_recv(struct tevent_req *req, int *err)
{
struct aio_linux_state *state = tevent_req_data(
req, struct aio_linux_state);
if (tevent_req_is_unix_error(req, err)) {
return -1;
}
if (state->ret == -1) {
*err = state->err;
}
return state->ret;
}
static int aio_linux_int_recv(struct tevent_req *req, int *err)
{
/*
* Use implicit conversion ssize_t->int
*/
return aio_linux_recv(req, err);
}
static int aio_linux_connect(vfs_handle_struct *handle, const char *service,
const char *user)
{
/*********************************************************************
* How many io_events to initialize ?
* 128 per process seems insane as a default until you realize that
* (a) Throttling is done in SMB2 via the crediting algorithm.
* (b) SMB1 clients are limited to max_mux (50) outstanding
* requests and Windows clients don't use this anyway.
* Essentially we want this to be unlimited unless smb.conf
* says different.
*********************************************************************/
aio_pending_size = lp_parm_int(
SNUM(handle->conn), "aio_linux", "aio num events", 128);
return SMB_VFS_NEXT_CONNECT(handle, service, user);
}
static struct vfs_fn_pointers vfs_aio_linux_fns = {
.connect_fn = aio_linux_connect,
.pread_send_fn = aio_linux_pread_send,
.pread_recv_fn = aio_linux_recv,
.pwrite_send_fn = aio_linux_pwrite_send,
.pwrite_recv_fn = aio_linux_recv,
.fsync_send_fn = aio_linux_fsync_send,
.fsync_recv_fn = aio_linux_int_recv,
};
NTSTATUS vfs_aio_linux_init(void)
{
return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
"aio_linux", &vfs_aio_linux_fns);
}