1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-10-27 01:55:10 +03:00

Fix dmeventd mirror to cope if monitored device disappears.

This commit is contained in:
Alasdair Kergon 2006-12-20 14:35:02 +00:00
parent 1a42d65aa7
commit 0c8569edea
5 changed files with 135 additions and 131 deletions

View File

@ -1,5 +1,6 @@
Version 2.02.18 -
====================================
Fix dmeventd mirror to cope if monitored device disappears.
Version 2.02.17 - 14th December 2006
====================================

View File

@ -1,5 +1,7 @@
Version 1.02.14 -
=============================
Some dmevent cleanups.
Fix dmeventd to cope if monitored device disappears.
Version 1.02.13 - 28 Nov 2006
=============================

View File

@ -129,8 +129,8 @@ struct thread_status {
struct dso_data *dso_data;/* DSO this thread accesses. */
char *device_path; /* Mapped device path. */
uint32_t event_nr; /* event number */
int processing; /* Set when event is being processed */
uint32_t event_nr; /* event number */
int processing; /* Set when event is being processed */
enum dm_event_type events; /* bitfield for event filter. */
enum dm_event_type current_events;/* bitfield for occured events. */
enum dm_event_type processed_events;/* bitfield for processed events. */
@ -179,12 +179,13 @@ static struct dso_data *alloc_dso_data(struct message_data *data)
{
struct dso_data *ret = (typeof(ret)) dm_malloc(sizeof(*ret));
if (ret) {
if (!memset(ret, 0, sizeof(*ret)) ||
!(ret->dso_name = dm_strdup(data->dso_name))) {
dm_free(ret);
ret = NULL;
}
if (!ret)
return NULL;
if (!memset(ret, 0, sizeof(*ret)) ||
!(ret->dso_name = dm_strdup(data->dso_name))) {
dm_free(ret);
return NULL;
}
return ret;
@ -342,10 +343,9 @@ static struct thread_status *lookup_thread_status(struct message_data *data)
{
struct thread_status *thread;
list_iterate_items(thread, &thread_registry) {
list_iterate_items(thread, &thread_registry)
if (!strcmp(data->device_path, thread->device_path))
return thread;
}
return NULL;
}
@ -546,6 +546,15 @@ static int event_wait(struct thread_status *thread)
thread->current_events |= DM_EVENT_TIMEOUT;
ret = 1;
thread->processed_events = 0;
} else {
/* FIXME replace with log_* macro */
syslog(LOG_NOTICE, "dm_task_run failed, errno = %d, %s",
errno, strerror(errno));
if (errno == ENXIO) {
/* FIXME replace with log_* macro */
syslog(LOG_ERR, "%s disappeared, detaching", thread->device_path);
ret = 2; /* FIXME What does 2 mean? Use macro. */
}
}
pthread_sigmask(SIG_SETMASK, &set, NULL);
@ -592,6 +601,7 @@ static void *monitor_thread(void *arg)
static void *monitor_thread(void *arg)
{
struct thread_status *thread = arg;
int wait_error = 0;
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
pthread_cleanup_push(monitor_unregister, thread);
@ -605,13 +615,18 @@ static void *monitor_thread(void *arg)
thread->current_events = 0;
/*
* FIXME: if unrecoverable error (ENODEV) happens,
* FIXME If unrecoverable error (ENODEV) happens
* we loop indefinitely. event_wait should return
* more than 0/1.
*/
if (!event_wait(thread))
wait_error = event_wait(thread);
if (!wait_error)
continue;
/* FIXME Give a DSO a chance to clean up. */
if (wait_error == 2)
break;
/*
* Check against filter.
*
@ -680,13 +695,12 @@ static struct dso_data *lookup_dso(struct message_data *data)
lock_mutex();
list_iterate_items(dso_data, &dso_registry) {
list_iterate_items(dso_data, &dso_registry)
if (!strcmp(data->dso_name, dso_data->dso_name)) {
lib_get(dso_data);
ret = dso_data;
break;
}
}
unlock_mutex();
@ -935,31 +949,29 @@ static int _get_registered_device(struct message_data *message_data, int next)
lock_mutex();
/* Iterate list of threads checking if we want a particular one. */
list_iterate_items(thread, &thread_registry) {
list_iterate_items(thread, &thread_registry)
if ((hit = want_registered_device(message_data->dso_name,
message_data->device_path,
thread)))
break;
}
/*
* If we got a registered device and want the next one ->
* fetch next conforming element off the list.
*/
if (hit) {
if (next) {
do {
if (list_end(&thread_registry, &thread->list))
goto out;
thread = list_item(thread->list.n,
struct thread_status);
} while (!want_registered_device(message_data->dso_name,
NULL, thread));
}
if (!hit || !next)
goto out;
return registered_device(message_data, thread);
}
do {
if (list_end(&thread_registry, &thread->list))
goto out;
thread = list_item(thread->list.n,
struct thread_status);
} while (!want_registered_device(message_data->dso_name,
NULL, thread));
return registered_device(message_data, thread);
out:
unlock_mutex();
@ -1118,10 +1130,9 @@ static int handle_request(struct dm_event_daemon_message *msg,
{ DM_EVENT_CMD_ACTIVE, active },
}, *req;
for (req = requests; req < requests + sizeof(requests); req++) {
for (req = requests; req < requests + sizeof(requests); req++)
if (req->cmd == msg->opcode.cmd)
return req->f(message_data);
}
return -EINVAL;
}
@ -1139,9 +1150,8 @@ static int do_process_request(struct dm_event_daemon_message *msg)
!parse_message(&message_data)) {
stack;
ret = -EINVAL;
} else {
} else
ret = handle_request(msg, &message_data);
}
free_message(&message_data);
@ -1242,7 +1252,7 @@ static int daemonize(void)
static int lock_pidfile(void)
{
int lf;
char pidfile[] = "/var/run/dmeventd.pid";
char pidfile[] = "/var/run/dmeventd.pid"; /* FIXME Must be configurable at compile-time! */
if ((lf = open(pidfile, O_CREAT | O_RDWR, 0644)) < 0)
return -EXIT_OPEN_PID_FAILURE;
@ -1326,14 +1336,3 @@ void dmeventd(void)
exit(EXIT_SUCCESS);
}
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically
* adjust the settings for this buffer only. This must remain at the end
* of the file.
* ---------------------------------------------------------------------------
* Local variables:
* c-file-style: "linux"
* End:
*/

View File

@ -26,6 +26,7 @@
#include <unistd.h>
#include <syslog.h> /* FIXME Replace syslog with multilog */
/* FIXME Missing openlog? */
#define ME_IGNORE 0
#define ME_INSYNC 1
@ -34,23 +35,29 @@
static pthread_mutex_t _lock = PTHREAD_MUTEX_INITIALIZER;
/* FIXME: We may need to lock around operations to these */
static int register_count = 0;
static struct dm_pool *mem_pool = NULL;
static int _register_count = 0;
/* FIXME Unsafe static? */
static struct dm_pool *_mem_pool = NULL;
static int _get_mirror_event(char *params)
{
int i, rtn = ME_INSYNC;
int max_args = 30; /* should support at least 8-way mirrors */
char *args[max_args];
int i, r = ME_INSYNC;
#define MAX_ARGS 30; /* should support at least 8-way mirrors */
/* FIXME Remove unnecessary limit. It tells you how many devices there are - use it! */
char *args[MAX_ARGS];
char *dev_status_str;
char *log_status_str;
char *sync_str;
char *p;
int log_argc, num_devs, num_failures=0;
if (max_args <= dm_split_words(params, max_args, 0, args)) {
/* FIXME Remove unnecessary limit - get num_devs here */
if (MAX_ARGS <= dm_split_words(params, MAX_ARGS, 0, args)) {
syslog(LOG_ERR, "Unable to split mirror parameters: Arg list too long");
return -E2BIG;
return -E2BIG; /* FIXME Why? Unused */
}
/*
@ -58,18 +65,20 @@ static int _get_mirror_event(char *params)
* Used : 2 253:4 253:5 400/400 1 AA 3 cluster 253:3 A
*/
num_devs = atoi(args[0]);
/* FIXME *Now* split rest of args */
dev_status_str = args[3 + num_devs];
log_argc = atoi(args[4 + num_devs]);
log_status_str = args[4 + num_devs + log_argc];
sync_str = args[1 + num_devs];
/* Check for bad mirror devices */
for (i = 0; i < num_devs; i++) {
for (i = 0; i < num_devs; i++)
if (dev_status_str[i] == 'D') {
syslog(LOG_ERR, "Mirror device, %s, has failed.\n", args[i+1]);
num_failures++;
}
}
/* Check for bad log device */
if (log_status_str[0] == 'D') {
@ -79,7 +88,7 @@ static int _get_mirror_event(char *params)
}
if (num_failures) {
rtn = ME_FAILURE;
r = ME_FAILURE;
goto out;
}
@ -87,7 +96,7 @@ static int _get_mirror_event(char *params)
if (p) {
p[0] = '\0';
if (strcmp(sync_str, p+1))
rtn = ME_IGNORE;
r = ME_IGNORE;
p[0] = '/';
} else {
/*
@ -95,10 +104,10 @@ static int _get_mirror_event(char *params)
* Might mean all our parameters are screwed.
*/
syslog(LOG_ERR, "Unable to parse sync string.");
rtn = ME_IGNORE;
r = ME_IGNORE;
}
out:
return rtn;
return r;
}
static void _temporary_log_fn(int level, const char *file,
@ -114,25 +123,25 @@ static int _remove_failed_devices(const char *device)
{
int r;
void *handle;
int cmd_size = 256; /* FIXME Use system restriction */
char cmd_str[cmd_size];
#define CMD_SIZE 256 /* FIXME Use system restriction */
char cmd_str[CMD_SIZE];
char *vg = NULL, *lv = NULL, *layer = NULL;
if (strlen(device) > 200)
return -ENAMETOOLONG;
if (strlen(device) > 200) /* FIXME Use real restriction */
return -ENAMETOOLONG; /* FIXME These return code distinctions are not used so remove them! */
if (!dm_split_lvm_name(mem_pool, device, &vg, &lv, &layer)) {
if (!dm_split_lvm_name(_mem_pool, device, &vg, &lv, &layer)) {
syslog(LOG_ERR, "Unable to determine VG name from %s",
device);
return -ENOMEM;
return -ENOMEM; /* FIXME Replace with generic error return - reason for failure has already got logged */
}
/* FIXME Is any sanity-checking required on %s? */
if (cmd_size <= snprintf(cmd_str, cmd_size, "vgreduce --removemissing %s", vg)) {
if (CMD_SIZE <= snprintf(cmd_str, CMD_SIZE, "vgreduce --removemissing %s", vg)) {
/* this error should be caught above, but doesn't hurt to check again */
syslog(LOG_ERR, "Unable to form LVM command: Device name too long");
dm_pool_empty(mem_pool); /* FIXME: not safe with multiple threads */
return -ENAMETOOLONG;
dm_pool_empty(_mem_pool); /* FIXME: not safe with multiple threads */
return -ENAMETOOLONG; /* FIXME Replace with generic error return - reason for failure has already got logged */
}
lvm2_log_fn(_temporary_log_fn);
@ -140,8 +149,8 @@ static int _remove_failed_devices(const char *device)
lvm2_log_level(handle, 1);
r = lvm2_run(handle, cmd_str);
dm_pool_empty(mem_pool); /* FIXME: not safe with multiple threads */
return (r == 1)? 0: -1;
dm_pool_empty(_mem_pool); /* FIXME: not safe with multiple threads */
return (r == 1) ? 0 : -1;
}
void process_event(const char *device, enum dm_event_type event)
@ -176,6 +185,10 @@ void process_event(const char *device, enum dm_event_type event)
next = dm_get_next_target(dmt, next, &start, &length,
&target_type, &params);
if (!target_type)
syslog(LOG_INFO, "%s mapping lost.\n", device);
continue;
if (strcmp(target_type, "mirror")) {
syslog(LOG_INFO, "%s has unmirrored portion.\n", device);
continue;
@ -192,6 +205,7 @@ void process_event(const char *device, enum dm_event_type event)
case ME_FAILURE:
syslog(LOG_ERR, "Device failure in %s\n", device);
if (_remove_failed_devices(device))
/* FIXME Why are all the error return codes unused? Get rid of them? */
syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
device);
/* Should check before warning user that device is now linear
@ -203,6 +217,7 @@ void process_event(const char *device, enum dm_event_type event)
case ME_IGNORE:
break;
default:
/* FIXME Wrong: it can also return -E2BIG but it's never used! */
syslog(LOG_INFO, "Unknown event received.\n");
}
} while (next);
@ -221,34 +236,20 @@ int register_device(const char *device)
* Need some space for allocations. 1024 should be more
* than enough for what we need (device mapper name splitting)
*/
if (!mem_pool)
mem_pool = dm_pool_create("mirror_dso", 1024);
if (!mem_pool)
if (!_mem_pool && !(_mem_pool = dm_pool_create("mirror_dso", 1024)))
return 0;
register_count++;
_register_count++;
return 1;
}
int unregister_device(const char *device)
{
if (!(--register_count)) {
dm_pool_destroy(mem_pool);
mem_pool = NULL;
if (!--_register_count) {
dm_pool_destroy(_mem_pool);
_mem_pool = NULL;
}
return 1;
}
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically
* adjust the settings for this buffer only. This must remain at the end
* of the file.
* ---------------------------------------------------------------------------
* Local variables:
* c-file-style: "linux"
* End:
*/

View File

@ -26,6 +26,7 @@
#include <unistd.h>
#include <syslog.h> /* FIXME Replace syslog with multilog */
/* FIXME Missing openlog? */
#define ME_IGNORE 0
#define ME_INSYNC 1
@ -34,23 +35,29 @@
static pthread_mutex_t _lock = PTHREAD_MUTEX_INITIALIZER;
/* FIXME: We may need to lock around operations to these */
static int register_count = 0;
static struct dm_pool *mem_pool = NULL;
static int _register_count = 0;
/* FIXME Unsafe static? */
static struct dm_pool *_mem_pool = NULL;
static int _get_mirror_event(char *params)
{
int i, rtn = ME_INSYNC;
int max_args = 30; /* should support at least 8-way mirrors */
char *args[max_args];
int i, r = ME_INSYNC;
#define MAX_ARGS 30; /* should support at least 8-way mirrors */
/* FIXME Remove unnecessary limit. It tells you how many devices there are - use it! */
char *args[MAX_ARGS];
char *dev_status_str;
char *log_status_str;
char *sync_str;
char *p;
int log_argc, num_devs, num_failures=0;
if (max_args <= dm_split_words(params, max_args, 0, args)) {
/* FIXME Remove unnecessary limit - get num_devs here */
if (MAX_ARGS <= dm_split_words(params, MAX_ARGS, 0, args)) {
syslog(LOG_ERR, "Unable to split mirror parameters: Arg list too long");
return -E2BIG;
return -E2BIG; /* FIXME Why? Unused */
}
/*
@ -58,18 +65,20 @@ static int _get_mirror_event(char *params)
* Used : 2 253:4 253:5 400/400 1 AA 3 cluster 253:3 A
*/
num_devs = atoi(args[0]);
/* FIXME *Now* split rest of args */
dev_status_str = args[3 + num_devs];
log_argc = atoi(args[4 + num_devs]);
log_status_str = args[4 + num_devs + log_argc];
sync_str = args[1 + num_devs];
/* Check for bad mirror devices */
for (i = 0; i < num_devs; i++) {
for (i = 0; i < num_devs; i++)
if (dev_status_str[i] == 'D') {
syslog(LOG_ERR, "Mirror device, %s, has failed.\n", args[i+1]);
num_failures++;
}
}
/* Check for bad log device */
if (log_status_str[0] == 'D') {
@ -79,7 +88,7 @@ static int _get_mirror_event(char *params)
}
if (num_failures) {
rtn = ME_FAILURE;
r = ME_FAILURE;
goto out;
}
@ -87,7 +96,7 @@ static int _get_mirror_event(char *params)
if (p) {
p[0] = '\0';
if (strcmp(sync_str, p+1))
rtn = ME_IGNORE;
r = ME_IGNORE;
p[0] = '/';
} else {
/*
@ -95,10 +104,10 @@ static int _get_mirror_event(char *params)
* Might mean all our parameters are screwed.
*/
syslog(LOG_ERR, "Unable to parse sync string.");
rtn = ME_IGNORE;
r = ME_IGNORE;
}
out:
return rtn;
return r;
}
static void _temporary_log_fn(int level, const char *file,
@ -114,25 +123,25 @@ static int _remove_failed_devices(const char *device)
{
int r;
void *handle;
int cmd_size = 256; /* FIXME Use system restriction */
char cmd_str[cmd_size];
#define CMD_SIZE 256 /* FIXME Use system restriction */
char cmd_str[CMD_SIZE];
char *vg = NULL, *lv = NULL, *layer = NULL;
if (strlen(device) > 200)
return -ENAMETOOLONG;
if (strlen(device) > 200) /* FIXME Use real restriction */
return -ENAMETOOLONG; /* FIXME These return code distinctions are not used so remove them! */
if (!dm_split_lvm_name(mem_pool, device, &vg, &lv, &layer)) {
if (!dm_split_lvm_name(_mem_pool, device, &vg, &lv, &layer)) {
syslog(LOG_ERR, "Unable to determine VG name from %s",
device);
return -ENOMEM;
return -ENOMEM; /* FIXME Replace with generic error return - reason for failure has already got logged */
}
/* FIXME Is any sanity-checking required on %s? */
if (cmd_size <= snprintf(cmd_str, cmd_size, "vgreduce --removemissing %s", vg)) {
if (CMD_SIZE <= snprintf(cmd_str, CMD_SIZE, "vgreduce --removemissing %s", vg)) {
/* this error should be caught above, but doesn't hurt to check again */
syslog(LOG_ERR, "Unable to form LVM command: Device name too long");
dm_pool_empty(mem_pool); /* FIXME: not safe with multiple threads */
return -ENAMETOOLONG;
dm_pool_empty(_mem_pool); /* FIXME: not safe with multiple threads */
return -ENAMETOOLONG; /* FIXME Replace with generic error return - reason for failure has already got logged */
}
lvm2_log_fn(_temporary_log_fn);
@ -140,8 +149,8 @@ static int _remove_failed_devices(const char *device)
lvm2_log_level(handle, 1);
r = lvm2_run(handle, cmd_str);
dm_pool_empty(mem_pool); /* FIXME: not safe with multiple threads */
return (r == 1)? 0: -1;
dm_pool_empty(_mem_pool); /* FIXME: not safe with multiple threads */
return (r == 1) ? 0 : -1;
}
void process_event(const char *device, enum dm_event_type event)
@ -176,6 +185,10 @@ void process_event(const char *device, enum dm_event_type event)
next = dm_get_next_target(dmt, next, &start, &length,
&target_type, &params);
if (!target_type)
syslog(LOG_INFO, "%s mapping lost.\n", device);
continue;
if (strcmp(target_type, "mirror")) {
syslog(LOG_INFO, "%s has unmirrored portion.\n", device);
continue;
@ -192,6 +205,7 @@ void process_event(const char *device, enum dm_event_type event)
case ME_FAILURE:
syslog(LOG_ERR, "Device failure in %s\n", device);
if (_remove_failed_devices(device))
/* FIXME Why are all the error return codes unused? Get rid of them? */
syslog(LOG_ERR, "Failed to remove faulty devices in %s\n",
device);
/* Should check before warning user that device is now linear
@ -203,6 +217,7 @@ void process_event(const char *device, enum dm_event_type event)
case ME_IGNORE:
break;
default:
/* FIXME Wrong: it can also return -E2BIG but it's never used! */
syslog(LOG_INFO, "Unknown event received.\n");
}
} while (next);
@ -221,34 +236,20 @@ int register_device(const char *device)
* Need some space for allocations. 1024 should be more
* than enough for what we need (device mapper name splitting)
*/
if (!mem_pool)
mem_pool = dm_pool_create("mirror_dso", 1024);
if (!mem_pool)
if (!_mem_pool && !(_mem_pool = dm_pool_create("mirror_dso", 1024)))
return 0;
register_count++;
_register_count++;
return 1;
}
int unregister_device(const char *device)
{
if (!(--register_count)) {
dm_pool_destroy(mem_pool);
mem_pool = NULL;
if (!--_register_count) {
dm_pool_destroy(_mem_pool);
_mem_pool = NULL;
}
return 1;
}
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically
* adjust the settings for this buffer only. This must remain at the end
* of the file.
* ---------------------------------------------------------------------------
* Local variables:
* c-file-style: "linux"
* End:
*/