1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-22 13:34:15 +03:00
samba-mirror/ctdb/common/ctdb_io.c
Ronnie Sahlberg a2857b1504 We only queued up to 1000 packets per queue before we start dropping
packets, to avoid the queue to grow excessively if smbd has blocked.

This could cause traverse packets to become discarded in case the main
smbd daemon does a traverse of a database while there is a recovery
(sending a erconfigured message to smbd, causing an avalanche of unlock
messages to be sent across the cluster.)

This avalance of messages could cause also the tranversal message to be
discarded  causing the main smbd process to hang indefinitely waiting
for the traversal message that will never arrive.

Bump the maximum queue length before starting to discard messages from
1000 to 1000000 and at the same time rework the queueing slightly so we
can append messages cheaply to the queue instead of walking the list
from head to tail every time.

(This used to be ctdb commit 59ba5d7f80e0465e5076533374fb9ee862ed7bb6)
2010-02-04 09:54:06 +11:00

405 lines
10 KiB
C

/*
ctdb database library
Utility functions to read/write blobs of data from a file descriptor
and handle the case where we might need multiple read/writes to get all the
data.
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/tdb/include/tdb.h"
#include "lib/events/events.h"
#include "lib/util/dlinklist.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
#include "../include/ctdb.h"
/* structures for packet queueing - see common/ctdb_io.c */
struct ctdb_partial {
uint8_t *data;
uint32_t length;
};
struct ctdb_queue_pkt {
struct ctdb_queue_pkt *next, *prev;
uint8_t *data;
uint32_t length;
uint32_t full_length;
};
struct ctdb_queue {
struct ctdb_context *ctdb;
struct ctdb_partial partial; /* partial input packet */
struct ctdb_queue_pkt *out_queue;
/* This field is used to track the last added item so we
can append new items to the end cheaply.
This relies of that items are always appended to the tail
and that when reamoving items we only remove the head.
*/
struct ctdb_queue_pkt *out_queue_last_added;
uint32_t out_queue_length;
struct fd_event *fde;
int fd;
size_t alignment;
void *private_data;
ctdb_queue_cb_fn_t callback;
bool *destroyed;
};
int ctdb_queue_length(struct ctdb_queue *queue)
{
return queue->out_queue_length;
}
/*
called when an incoming connection is readable
*/
static void queue_io_read(struct ctdb_queue *queue)
{
int num_ready = 0;
ssize_t nread;
uint8_t *data, *data_base;
if (ioctl(queue->fd, FIONREAD, &num_ready) != 0) {
return;
}
if (num_ready == 0) {
/* the descriptor has been closed */
goto failed;
}
queue->partial.data = talloc_realloc_size(queue, queue->partial.data,
num_ready + queue->partial.length);
if (queue->partial.data == NULL) {
DEBUG(DEBUG_ERR,("read error alloc failed for %u\n",
num_ready + queue->partial.length));
goto failed;
}
nread = read(queue->fd, queue->partial.data + queue->partial.length, num_ready);
if (nread <= 0) {
DEBUG(DEBUG_ERR,("read error nread=%d\n", (int)nread));
goto failed;
}
data = queue->partial.data;
nread += queue->partial.length;
queue->partial.data = NULL;
queue->partial.length = 0;
if (nread >= 4 && *(uint32_t *)data == nread) {
/* it is the responsibility of the incoming packet
function to free 'data' */
queue->callback(data, nread, queue->private_data);
return;
}
data_base = data;
while (nread >= 4 && *(uint32_t *)data <= nread) {
/* we have at least one packet */
uint8_t *d2;
uint32_t len;
bool destroyed = false;
len = *(uint32_t *)data;
if (len == 0) {
/* bad packet! treat as EOF */
DEBUG(DEBUG_CRIT,("Invalid packet of length 0\n"));
goto failed;
}
d2 = talloc_memdup(queue, data, len);
if (d2 == NULL) {
DEBUG(DEBUG_ERR,("read error memdup failed for %u\n", len));
/* sigh */
goto failed;
}
queue->destroyed = &destroyed;
queue->callback(d2, len, queue->private_data);
/* If callback freed us, don't do anything else. */
if (destroyed) {
return;
}
queue->destroyed = NULL;
data += len;
nread -= len;
}
if (nread > 0) {
/* we have only part of a packet */
if (data_base == data) {
queue->partial.data = data;
queue->partial.length = nread;
} else {
queue->partial.data = talloc_memdup(queue, data, nread);
if (queue->partial.data == NULL) {
DEBUG(DEBUG_ERR,("read error memdup partial failed for %u\n",
(unsigned)nread));
goto failed;
}
queue->partial.length = nread;
talloc_free(data_base);
}
return;
}
talloc_free(data_base);
return;
failed:
queue->callback(NULL, 0, queue->private_data);
}
/* used when an event triggers a dead queue */
static void queue_dead(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
queue->callback(NULL, 0, queue->private_data);
}
/*
called when an incoming connection is writeable
*/
static void queue_io_write(struct ctdb_queue *queue)
{
while (queue->out_queue) {
struct ctdb_queue_pkt *pkt = queue->out_queue;
ssize_t n;
if (queue->ctdb->flags & CTDB_FLAG_TORTURE) {
n = write(queue->fd, pkt->data, 1);
} else {
n = write(queue->fd, pkt->data, pkt->length);
}
if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
if (pkt->length != pkt->full_length) {
/* partial packet sent - we have to drop it */
DLIST_REMOVE(queue->out_queue, pkt);
queue->out_queue_length--;
talloc_free(pkt);
}
talloc_free(queue->fde);
queue->fde = NULL;
queue->fd = -1;
event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
queue_dead, queue);
return;
}
if (n <= 0) return;
if (n != pkt->length) {
pkt->length -= n;
pkt->data += n;
return;
}
DLIST_REMOVE(queue->out_queue, pkt);
queue->out_queue_length--;
talloc_free(pkt);
}
EVENT_FD_NOT_WRITEABLE(queue->fde);
}
/*
called when an incoming connection is readable or writeable
*/
static void queue_io_handler(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
if (flags & EVENT_FD_READ) {
queue_io_read(queue);
} else {
queue_io_write(queue);
}
}
/*
queue a packet for sending
*/
int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length)
{
struct ctdb_queue_pkt *pkt;
uint32_t length2, full_length;
if (queue->alignment) {
/* enforce the length and alignment rules from the tcp packet allocator */
length2 = (length+(queue->alignment-1)) & ~(queue->alignment-1);
*(uint32_t *)data = length2;
} else {
length2 = length;
}
if (length2 != length) {
memset(data+length, 0, length2-length);
}
full_length = length2;
/* if the queue is empty then try an immediate write, avoiding
queue overhead. This relies on non-blocking sockets */
if (queue->out_queue == NULL && queue->fd != -1 &&
!(queue->ctdb->flags & CTDB_FLAG_TORTURE)) {
ssize_t n = write(queue->fd, data, length2);
if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
talloc_free(queue->fde);
queue->fde = NULL;
queue->fd = -1;
event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
queue_dead, queue);
/* yes, we report success, as the dead node is
handled via a separate event */
return 0;
}
if (n > 0) {
data += n;
length2 -= n;
}
if (length2 == 0) return 0;
}
pkt = talloc(queue, struct ctdb_queue_pkt);
CTDB_NO_MEMORY(queue->ctdb, pkt);
pkt->data = talloc_memdup(pkt, data, length2);
CTDB_NO_MEMORY(queue->ctdb, pkt->data);
pkt->length = length2;
pkt->full_length = full_length;
if (queue->out_queue == NULL && queue->fd != -1) {
EVENT_FD_WRITEABLE(queue->fde);
}
/* This relies on that when adding items to the queue, we always add
them to the tail and that when removing items we only remove
the head of queue item.
The last_added item thus allows non n^2 behaviour when appending to
very long queues.
*/
if (queue->out_queue == NULL) {
DLIST_ADD(queue->out_queue, pkt);
} else {
DLIST_ADD_END(queue->out_queue_last_added, pkt, struct ctdb_queue_pkt *);
}
queue->out_queue_last_added = pkt;
queue->out_queue_length++;
if (queue->ctdb->tunable.verbose_memory_names != 0) {
struct ctdb_req_header *hdr = (struct ctdb_req_header *)pkt->data;
switch (hdr->operation) {
case CTDB_REQ_CONTROL: {
struct ctdb_req_control *c = (struct ctdb_req_control *)hdr;
talloc_set_name(pkt, "ctdb_queue_pkt: control opcode=%u srvid=%llu datalen=%u",
(unsigned)c->opcode, (unsigned long long)c->srvid, (unsigned)c->datalen);
break;
}
case CTDB_REQ_MESSAGE: {
struct ctdb_req_message *m = (struct ctdb_req_message *)hdr;
talloc_set_name(pkt, "ctdb_queue_pkt: message srvid=%llu datalen=%u",
(unsigned long long)m->srvid, (unsigned)m->datalen);
break;
}
default:
talloc_set_name(pkt, "ctdb_queue_pkt: operation=%u length=%u src=%u dest=%u",
(unsigned)hdr->operation, (unsigned)hdr->length,
(unsigned)hdr->srcnode, (unsigned)hdr->destnode);
break;
}
}
return 0;
}
/*
setup the fd used by the queue
*/
int ctdb_queue_set_fd(struct ctdb_queue *queue, int fd)
{
queue->fd = fd;
talloc_free(queue->fde);
queue->fde = NULL;
if (fd != -1) {
queue->fde = event_add_fd(queue->ctdb->ev, queue, fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
queue_io_handler, queue);
if (queue->fde == NULL) {
return -1;
}
if (queue->out_queue) {
EVENT_FD_WRITEABLE(queue->fde);
}
}
return 0;
}
/* If someone sets up this pointer, they want to know if the queue is freed */
static int queue_destructor(struct ctdb_queue *queue)
{
if (queue->destroyed != NULL)
*queue->destroyed = true;
return 0;
}
/*
setup a packet queue on a socket
*/
struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx, int fd, int alignment,
ctdb_queue_cb_fn_t callback,
void *private_data)
{
struct ctdb_queue *queue;
queue = talloc_zero(mem_ctx, struct ctdb_queue);
CTDB_NO_MEMORY_NULL(ctdb, queue);
queue->ctdb = ctdb;
queue->fd = fd;
queue->alignment = alignment;
queue->private_data = private_data;
queue->callback = callback;
if (fd != -1) {
if (ctdb_queue_set_fd(queue, fd) != 0) {
talloc_free(queue);
return NULL;
}
}
talloc_set_destructor(queue, queue_destructor);
return queue;
}