fence-virt/server/checkpoint.c

834 lines
16 KiB
C
Raw Normal View History

/*
Copyright Red Hat, Inc. 2009
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
MA 02139, USA.
*/
/*
* Author: Lon Hohberger <lhh at redhat.com>
*/
#include <config.h>
#include <stdio.h>
#include <simpleconfig.h>
#include <static_map.h>
#include <sys/types.h>
#include <stdint.h>
#include <time.h>
#include <server_plugin.h>
#include <string.h>
#include <malloc.h>
#include <syslog.h>
#include <errno.h>
#include <unistd.h>
#include <libvirt/libvirt.h>
#include <pthread.h>
#ifdef HAVE_OPENAIS_CPG_H
#include <openais/cpg.h>
#else
#ifdef HAVE_COROSYNC_CPG_H
#include <corosync/cpg.h>
#endif
#endif
#include <libcman.h>
#include <debug.h>
#include "virt.h"
#include "xvm.h"
#include "checkpoint.h"
#define NAME "checkpoint"
#define VERSION "0.8"
#define MAGIC 0x1e017afe
struct check_info {
int magic;
int pad;
};
#define VALIDATE(arg) \
do {\
if (!arg || ((struct check_info *)arg)->magic != MAGIC) { \
errno = EINVAL;\
return -1; \
} \
} while(0)
static void *checkpoint_handle = NULL;
static virt_list_t *local_vms = NULL;
static char *uri = NULL;
static int use_uuid = 0;
static int
virt_list_update(virConnectPtr vp, virt_list_t **vl, int my_id)
{
virt_list_t *list = NULL;
list = vl_get(vp, my_id);
if (!list)
return -1;
if (*vl)
vl_free(*vl);
*vl = list;
return 0;
}
static int
get_cman_ids(cman_handle_t ch, uint32_t *my_id, uint32_t *high_id)
{
int max_nodes;
int actual;
cman_node_t *nodes = NULL;
cman_node_t me;
uint32_t high = 0;
int ret = -1, x, _local = 0;
if (!my_id && !high_id)
return 0;
if (!ch) {
_local = 1;
ch = cman_init(NULL);
}
if (!ch)
return -1;
max_nodes = cman_get_node_count(ch);
if (max_nodes <= 0)
goto out;
if (my_id) {
memset(&me, 0, sizeof(me));
if (cman_get_node(ch, CMAN_NODEID_US, &me) < 0)
goto out;
*my_id = me.cn_nodeid;
}
if (!high_id) {
ret = 0;
goto out;
}
nodes = malloc(sizeof(cman_node_t) * max_nodes);
if (!nodes)
goto out;
memset(nodes, 0, sizeof(cman_node_t) * max_nodes);
if (cman_get_nodes(ch, max_nodes, &actual, nodes) < 0)
goto out;
for (x = 0; x < actual; x++)
if (nodes[x].cn_nodeid > high && nodes[x].cn_member)
high = nodes[x].cn_nodeid;
*high_id = high;
ret = 0;
out:
if (nodes)
free(nodes);
if (ch && _local)
cman_finish(ch);
return ret;
}
static int
node_operational(uint32_t nodeid)
{
cman_handle_t ch;
cman_node_t node;
ch = cman_init(NULL);
if (!ch)
return -1;
memset(&node, 0, sizeof(node));
if (cman_get_node(ch, nodeid, &node) == 0) {
cman_finish(ch);
return !!node.cn_member;
}
cman_finish(ch);
return 0;
}
static int
get_domain_state_ckpt(void *hp, const char *domain, vm_state_t *state)
{
errno = EINVAL;
if (!hp || !domain || !state || !strlen((char *)domain))
return -1;
if (!strcmp(DOMAIN0NAME, (char *)domain))
return -1;
return ckpt_read(hp, domain, state, sizeof(*state));
}
static inline int
wait_domain(const char *vm_name, virConnectPtr vp, int timeout)
{
int tries = 0;
int response = 1;
int ret;
virDomainPtr vdp;
virDomainInfo vdi;
if (use_uuid) {
vdp = virDomainLookupByUUIDString(vp, (const char *)vm_name);
} else {
vdp = virDomainLookupByName(vp, vm_name);
}
if (!vdp)
return 0;
/* Check domain liveliness. If the domain is still here,
we return failure, and the client must then retry */
/* XXX On the xen 3.0.4 API, we will be able to guarantee
synchronous virDomainDestroy, so this check will not
be necessary */
do {
if (++tries > timeout)
break;
sleep(1);
if (use_uuid) {
vdp = virDomainLookupByUUIDString(vp,
(const char *)vm_name);
} else {
vdp = virDomainLookupByName(vp, vm_name);
}
if (!vdp) {
dbg_printf(2, "Domain no longer exists\n");
response = 0;
break;
}
memset(&vdi, 0, sizeof(vdi));
ret = virDomainGetInfo(vdp, &vdi);
virDomainFree(vdp);
if (ret < 0)
continue;
if (vdi.state == VIR_DOMAIN_SHUTOFF) {
dbg_printf(2, "Domain has been shut off\n");
response = 0;
break;
}
dbg_printf(4, "Domain still exists (state %d) "
"after %d seconds\n",
vdi.state, tries);
} while (1);
return response;
}
/*
Returns: 0 - operational
1 - dead or presumed so
2 - VM not local and I am not the right node to deal with it
3 - VM status unknown; cannot operate on it
*/
static int
cluster_virt_status(const char *vm_name, uint32_t *owner)
{
vm_state_t chk_state;
virt_state_t *vs;
uint32_t me, high_id;
int ret = 0;
dbg_printf(80, "%s %s\n", __FUNCTION__, vm_name);
/* if we can't find the high ID, we can't do anything useful */
/* This should be cpg_get_ids() but it's segfaulting for some
reason :( */
if (get_cman_ids(NULL, &me, &high_id) != 0)
return 2;
if (use_uuid) {
vs = vl_find_uuid(local_vms, vm_name);
} else {
vs = vl_find_name(local_vms, vm_name);
}
if (!vs) {
ret = 2; /* not found locally */
if (me != high_id)
goto out;
if (get_domain_state_ckpt(checkpoint_handle,
vm_name, &chk_state)) {
dbg_printf(2, "High ID: Unknown VM\n");
ret = 3;
goto out;
}
if (node_operational(chk_state.s_owner)) {
*owner = chk_state.s_owner;
dbg_printf(2, "High ID: Owner is operational\n");
ret = 2;
} else {
dbg_printf(2, "High ID: Owner is dead; returning 'off'\n");
ret = 1;
}
} else if (vs->v_state.s_state == VIR_DOMAIN_SHUTOFF) {
ret = 1; /* local and off */
}
out:
dbg_printf(80, "%s %s\n", __FUNCTION__, vm_name);
return ret;
}
static void
store_domains_by_name(void *hp, virt_list_t *vl)
{
int x;
if (!vl)
return;
for (x = 0; x < vl->vm_count; x++) {
if (!strcmp(DOMAIN0NAME, vl->vm_states[x].v_name))
continue;
dbg_printf(2, "Storing %s\n", vl->vm_states[x].v_name);
ckpt_write(hp, vl->vm_states[x].v_name,
&vl->vm_states[x].v_state,
sizeof(vm_state_t));
}
}
static void
store_domains_by_uuid(void *hp, virt_list_t *vl)
{
int x;
if (!vl)
return;
for (x = 0; x < vl->vm_count; x++) {
if (!strcmp(DOMAIN0UUID, vl->vm_states[x].v_uuid))
continue;
dbg_printf(2, "Storing %s\n", vl->vm_states[x].v_uuid);
ckpt_write(hp, vl->vm_states[x].v_uuid,
&vl->vm_states[x].v_state,
sizeof(vm_state_t));
}
}
static void
update_local_vms(void)
{
virConnectPtr vp = NULL;
uint32_t my_id = 0;
cpg_get_ids(&my_id, NULL);
vp = virConnectOpen(uri);
if (!vp) {
syslog(LOG_ERR, "Failed to connect to hypervisor\n");
}
virt_list_update(vp, &local_vms, my_id);
vl_print(local_vms);
if (use_uuid)
store_domains_by_uuid(checkpoint_handle, local_vms);
else
store_domains_by_name(checkpoint_handle, local_vms);
virConnectClose(vp);
}
static int
do_off(const char *vm_name)
{
virConnectPtr vp;
virDomainPtr vdp;
virDomainInfo vdi;
int ret = -1;
dbg_printf(5, "%s %s\n", __FUNCTION__, vm_name);
vp = virConnectOpen(uri);
if (!vp)
return 1;
if (use_uuid) {
vdp = virDomainLookupByUUIDString(vp,
(const char *)vm_name);
} else {
vdp = virDomainLookupByName(vp, vm_name);
}
if (!vdp ||
((virDomainGetInfo(vdp, &vdi) == 0) &&
(vdi.state == VIR_DOMAIN_SHUTOFF))) {
dbg_printf(2, "Nothing to do - domain does not exist\n");
if (vdp)
virDomainFree(vdp);
return 1;
}
syslog(LOG_NOTICE, "Destroying domain %s\n", vm_name);
dbg_printf(2, "[OFF] Calling virDomainDestroy\n");
ret = virDomainDestroy(vdp);
if (ret < 0) {
syslog(LOG_NOTICE, "Failed to destroy domain: %d\n", ret);
printf("virDomainDestroy() failed: %d\n", ret);
ret = 1;
goto out;
}
if (ret) {
syslog(LOG_NOTICE,
"Domain %s still exists; fencing failed\n",
vm_name);
printf("Domain %s still exists; fencing failed\n", vm_name);
ret = 1;
goto out;
}
ret = 0;
out:
virConnectClose(vp);
return ret;
}
static int
do_reboot(const char *vm_name)
{
virConnectPtr vp;
virDomainPtr vdp, nvdp;
virDomainInfo vdi;
char *domain_desc;
int ret;
//uuid_unparse(vm_uuid, uu_string);
dbg_printf(5, "%s %s\n", __FUNCTION__, vm_name);
vp = virConnectOpen(uri);
if (!vp)
return 1;
if (use_uuid) {
vdp = virDomainLookupByUUIDString(vp,
(const char *)vm_name);
} else {
vdp = virDomainLookupByName(vp, vm_name);
}
if (!vdp || ((virDomainGetInfo(vdp, &vdi) == 0) &&
(vdi.state == VIR_DOMAIN_SHUTOFF))) {
dbg_printf(2, "[libvirt:REBOOT] Nothing to "
"do - domain does not exist\n");
if (vdp)
virDomainFree(vdp);
return 1;
}
syslog(LOG_NOTICE, "Rebooting domain %s\n", vm_name);
printf("Rebooting domain %s...\n", vm_name);
domain_desc = virDomainGetXMLDesc(vdp, 0);
if (!domain_desc) {
printf("Failed getting domain description from "
"libvirt\n");
}
dbg_printf(2, "[REBOOT] Calling virDomainDestroy(%p)\n", vdp);
ret = virDomainDestroy(vdp);
if (ret < 0) {
printf("virDomainDestroy() failed: %d/%d\n", ret, errno);
free(domain_desc);
virDomainFree(vdp);
ret = 1;
goto out;
}
ret = wait_domain(vm_name, vp, 15);
if (ret) {
syslog(LOG_NOTICE, "Domain %s still exists; fencing failed\n",
vm_name);
printf("Domain %s still exists; fencing failed\n", vm_name);
if (domain_desc)
free(domain_desc);
ret = 1;
goto out;
}
if (!domain_desc) {
ret = 0;
goto out;
}
/* 'on' is not a failure */
ret = 0;
dbg_printf(3, "[[ XML Domain Info ]]\n");
dbg_printf(3, "%s\n[[ XML END ]]\n", domain_desc);
dbg_printf(2, "Calling virDomainCreateLinux()...\n");
nvdp = virDomainCreateLinux(vp, domain_desc, 0);
if (nvdp == NULL) {
/* More recent versions of libvirt or perhaps the
* KVM back-end do not let you create a domain from
* XML if there is already a defined domain description
* with the same name that it knows about. You must
* then call virDomainCreate() */
dbg_printf(2, "Failed; Trying virDomainCreate()...\n");
if (virDomainCreate(vdp) < 0) {
syslog(LOG_NOTICE,
"Could not restart %s\n",
vm_name);
dbg_printf(1, "Failed to recreate guest"
" %s!\n", vm_name);
}
}
free(domain_desc);
out:
virConnectClose(vp);
return ret;
}
static void
do_real_work(void *data, size_t len, uint32_t nodeid, uint32_t seqno)
{
struct ckpt_fence_req *req = data;
struct ckpt_fence_req reply;
uint32_t owner;
int ret = 1;
memcpy(&reply, req, sizeof(reply));
update_local_vms();
switch(req->request) {
case FENCE_STATUS:
ret = cluster_virt_status(req->vm_name, &owner);
if (ret == 3) {
ret = RESP_OFF;
break;
}
if (ret == 2) {
return;
}
if (ret == 1) {
ret = RESP_OFF;
}
break;
case FENCE_OFF:
ret = cluster_virt_status(req->vm_name, &owner);
if (ret == 3) {
/* No record of this VM in the checkpoint. */
ret = 0;
break;
}
if (ret != 0) {
return;
}
/* Must be running locally to perform 'off' */
ret = do_off(req->vm_name);
break;
case FENCE_REBOOT:
ret = cluster_virt_status(req->vm_name, &owner);
if (ret != 0) {
return;
}
/* Must be running locally to perform 'reboot' */
ret = do_reboot(req->vm_name);
break;
}
reply.response = ret;
cpg_send_reply(&reply, sizeof(reply), nodeid, seqno);
}
static int
do_request(const char *vm_name, int request, uint32_t seqno)
{
struct ckpt_fence_req freq, *frp;
size_t retlen;
uint32_t seq;
int ret;
memset(&freq, 0, sizeof(freq));
snprintf(freq.vm_name, sizeof(freq.vm_name), vm_name);
freq.request = request;
freq.seqno = seqno;
if (cpg_send_req(&freq, sizeof(freq), &seq) != 0) {
printf("Failed to send\n");
return 1;
}
if (cpg_wait_reply((void *)&frp, &retlen, seq) != 0) {
printf("Failed to receive\n");
return 1;
}
ret = frp->response;
free(frp);
return ret;
}
static int
checkpoint_null(const char *vm_name, void *priv)
{
VALIDATE(priv);
printf("[CKPT] Null operation on %s\n", vm_name);
return 1;
}
static int
checkpoint_off(const char *vm_name, const char *src,
uint32_t seqno, void *priv)
{
VALIDATE(priv);
printf("[CKPT] OFF operation on %s seq %d\n", vm_name, seqno);
return do_request(vm_name, FENCE_OFF, seqno);
}
static int
checkpoint_on(const char *vm_name, const char *src,
uint32_t seqno, void *priv)
{
VALIDATE(priv);
printf("[CKPT] ON operation on %s seq %d\n", vm_name, seqno);
return 1;
}
static int
checkpoint_devstatus(void *priv)
{
printf("[CKPT] Device status\n");
VALIDATE(priv);
return 0;
}
static int
checkpoint_status(const char *vm_name, void *priv)
{
VALIDATE(priv);
printf("[CKPT] STATUS operation on %s\n", vm_name);
return do_request(vm_name, FENCE_STATUS, 0);
}
static int
checkpoint_reboot(const char *vm_name, const char *src,
uint32_t seqno, void *priv)
{
VALIDATE(priv);
printf("[CKPT] REBOOT operation on %s seq %d\n", vm_name, seqno);
return do_request(vm_name, FENCE_REBOOT, 0);
}
static int
checkpoint_hostlist(hostlist_callback callback, void *arg, void *priv)
{
VALIDATE(priv);
printf("[CKPT] HOSTLIST operation\n");
return 1;
}
static int
checkpoint_init(backend_context_t *c, config_object_t *config)
{
char value[1024];
struct check_info *info = NULL;
int x;
#ifdef _MODULE
if (sc_get(config, "fence_virtd/@debug", value, sizeof(value))==0)
dset(atoi(value));
#endif
if (sc_get(config, "backends/libvirt/@uri",
value, sizeof(value)) == 0) {
uri = strdup(value);
if (!uri) {
free(info);
return -1;
}
dbg_printf(1, "Using %s\n", uri);
}
if (sc_get(config, "backends/checkpoint/@uri",
value, sizeof(value)) == 0) {
if (uri)
free(uri);
uri = strdup(value);
if (!uri) {
free(info);
return -1;
}
dbg_printf(1, "Using %s\n", uri);
}
/* Naming scheme is no longer a top-level config option.
* However, we retain it here for configuration compatibility with
* versions 0.1.3 and previous.
*/
if (sc_get(config, "fence_virtd/@name_mode",
value, sizeof(value)-1) == 0) {
dbg_printf(1, "Got %s for name_mode\n", value);
if (!strcasecmp(value, "uuid")) {
use_uuid = 1;
} else if (!strcasecmp(value, "name")) {
use_uuid = 0;
} else {
dbg_printf(1, "Unsupported name_mode: %s\n", value);
}
}
if (sc_get(config, "backends/checkpoint/@name_mode",
value, sizeof(value)-1) == 0) {
dbg_printf(1, "Got %s for name_mode\n", value);
if (!strcasecmp(value, "uuid")) {
use_uuid = 1;
} else if (!strcasecmp(value, "name")) {
use_uuid = 0;
} else {
dbg_printf(1, "Unsupported name_mode: %s\n", value);
}
}
if (cpg_start(PACKAGE_NAME, do_real_work) < 0) {
return -1;
}
info = malloc(sizeof(*info));
if (!info)
return -1;
memset(info, 0, sizeof(*info));
info->magic = MAGIC;
x = 0;
while ((checkpoint_handle = ckpt_init(
"vm_states", 262144, 4096, 64, 10
)) == NULL) {
if (!x) {
dbg_printf(1, "Could not initialize "
"saCkPt; retrying...\n");
x = 1;
}
sleep(3);
}
if (x)
dbg_printf(1, "Checkpoint initialized\n");
update_local_vms();
*c = (void *)info;
return 0;
}
static int
checkpoint_shutdown(backend_context_t c)
{
struct check_info *info = (struct check_info *)c;
VALIDATE(info);
info->magic = 0;
free(info);
cpg_stop();
return 0;
}
static fence_callbacks_t checkpoint_callbacks = {
.null = checkpoint_null,
.off = checkpoint_off,
.on = checkpoint_on,
.reboot = checkpoint_reboot,
.status = checkpoint_status,
.devstatus = checkpoint_devstatus,
.hostlist = checkpoint_hostlist
};
static backend_plugin_t checkpoint_plugin = {
.name = NAME,
.version = VERSION,
.callbacks = &checkpoint_callbacks,
.init = checkpoint_init,
.cleanup = checkpoint_shutdown,
};
#ifdef _MODULE
double
BACKEND_VER_SYM(void)
{
return PLUGIN_VERSION_BACKEND;
}
const backend_plugin_t *
BACKEND_INFO_SYM(void)
{
return &checkpoint_plugin;
}
#else
static void __attribute__((constructor))
checkpoint_register_plugin(void)
{
plugin_reg_backend(&checkpoint_plugin);
}
#endif