fence_virtd: Remove checkpoint, replace it with a CPG only plugin
Remove the checkpoint backend plugin, as it used CMAN and openais checkpoints. Given nobody is using CMAN or openais anymore, the plugin was useless for clusters running the current stack. Signed-off-by: Ryan McCabe <rmccabe@redhat.com>
This commit is contained in:
parent
3bfe80d293
commit
7f6798916a
21
README
21
README
@ -42,15 +42,14 @@ has failed.
|
||||
In order to be able to guarantee safe fencing of a VM even if the
|
||||
last- known host is down, we must store the last-known locations of
|
||||
each virtual machine in some sort of cluster-wide way. For this, we
|
||||
use the AIS Checkpointing API, which is provided by OpenAIS. Every
|
||||
few seconds, fence_virtd queries the hypervisor via libvirt and
|
||||
stores any local VM states in a checkpoint. In the event of a
|
||||
physical node failure (which consequently causes the failure of one
|
||||
or more guests), we can then read the checkpoint section corresponding
|
||||
to the guest we need to fence to find out the previous owner. With
|
||||
that information, we can then check with CMAN to see if the last-
|
||||
known host node has been fenced. If so, then the VM is clean as well.
|
||||
The physical cluster must, therefore, have fencing in order for
|
||||
use the corosync CPG API. Every few seconds, fence_virtd queries the
|
||||
hypervisor via libvirt and stores any local VM states and sends those
|
||||
states over CPG to all other members. In the event of a physical node
|
||||
failure (which consequently causes the failure of one or more guests),
|
||||
we can then read the stored VM state corresponding to the guest we need
|
||||
to fence to find out the previous owner. With that information, we can
|
||||
infer if the known host node has been fenced. If so, then the VM is clean
|
||||
as well. The physical cluster must, therefore, have fencing in order for
|
||||
fence_virtd to work.
|
||||
|
||||
Operation of a node hosting a VM which needs to be fenced:
|
||||
@ -68,7 +67,7 @@ Operation of high-node-ID:
|
||||
|
||||
(a) Receive multicast packet
|
||||
(b) Authenticate multicast packet
|
||||
(c) Read VM state from checkpoint
|
||||
(c) Read VM state from stored CPG messages
|
||||
(d) Check liveliness of nodeID hosting VM (if alive, do nothing)
|
||||
(e) Open connection to host contained within multicast
|
||||
packet.
|
||||
@ -77,7 +76,7 @@ Operation of high-node-ID:
|
||||
(h) Authenticate server & send response.
|
||||
|
||||
NOTE: There is always a possibility that a VM is started again
|
||||
before the fencing operation and checkpoint update for that VM
|
||||
before the fencing operation and CPG update for that VM
|
||||
occurs. If the VM has booted and rejoined the cluster, fencing will
|
||||
not be necessary. If it is in the process of booting, but has not
|
||||
yet joined the cluster, fencing will also not be necessary - because
|
||||
|
17
configure.in
17
configure.in
@ -2,7 +2,7 @@
|
||||
# Process this file with autoconf to produce a configure script.
|
||||
|
||||
AC_PREREQ(2.59)
|
||||
AC_INIT(fence-virt, 0.3.3, rmccabe@redhat.com)
|
||||
AC_INIT(fence-virt, 0.4.0, rmccabe@redhat.com)
|
||||
AC_CONFIG_SRCDIR([server/main.c])
|
||||
AC_CONFIG_HEADER([include/config.h])
|
||||
|
||||
@ -18,7 +18,6 @@ AC_PROG_LN_S
|
||||
AC_PROG_MAKE_SET
|
||||
|
||||
# Checks for libraries.
|
||||
AC_CHECK_LIB([SaCkpt], [saCkptCheckpointOpen])
|
||||
AC_CHECK_LIB([cpg], [cpg_finalize])
|
||||
AC_CHECK_LIB([nss3], [NSS_NoDB_Init])
|
||||
AC_CHECK_LIB([pthread], [pthread_create])
|
||||
@ -33,7 +32,7 @@ AC_SUBST(pm_ver)
|
||||
AC_HEADER_DIRENT
|
||||
AC_HEADER_STDC
|
||||
AC_HEADER_SYS_WAIT
|
||||
AC_CHECK_HEADERS([arpa/inet.h fcntl.h malloc.h netdb.h netinet/in.h stdint.h stdlib.h string.h sys/ioctl.h sys/socket.h sys/time.h syslog.h termios.h unistd.h openais/cpg.h corosync/cpg.h])
|
||||
AC_CHECK_HEADERS([arpa/inet.h fcntl.h malloc.h netdb.h netinet/in.h stdint.h stdlib.h string.h sys/ioctl.h sys/socket.h sys/time.h syslog.h termios.h unistd.h corosync/cpg.h])
|
||||
|
||||
# Checks for typedefs, structures, and compiler characteristics.
|
||||
AC_C_CONST
|
||||
@ -80,12 +79,12 @@ AC_ARG_ENABLE(libvirt-plugin,
|
||||
[ mod_libvirt=$enableval ], [ mod_libvirt=yes ])
|
||||
AC_SUBST(mod_libvirt)
|
||||
|
||||
# checkpoint plugin: Disabled by default
|
||||
AC_ARG_ENABLE(checkpoint-plugin,
|
||||
[AS_HELP_STRING([--enable-checkpoint-plugin],
|
||||
[Enable CMAN / AIS checkpoint backend plugin])],
|
||||
[ mod_checkpoint=$enableval ], [ mod_checkpoint=no ])
|
||||
AC_SUBST(mod_checkpoint)
|
||||
# cpg plugin: Disabled by default
|
||||
AC_ARG_ENABLE(cpg-plugin,
|
||||
[AS_HELP_STRING([--enable-cpg-plugin],
|
||||
[Enable CPG/libvirt backend plugin])],
|
||||
[ mod_cpg=$enableval ], [ mod_cpg=no ])
|
||||
AC_SUBST(mod_cpg)
|
||||
|
||||
# libvirt-qmf plugin: Disabled by default
|
||||
AC_ARG_ENABLE(libvirt-qmf-plugin,
|
||||
|
@ -80,13 +80,13 @@ We propose at 5 plugins in this case:
|
||||
* Libvirt (local-only). There is no intracommunication and no
|
||||
migration support is provided
|
||||
|
||||
* Cluster checkpoint (+ libvirt). This the way fence_xvmd
|
||||
* Cluster CPG (+ libvirt). This the way fence_xvmd
|
||||
operates today. This setup has the most requirements on the
|
||||
infrastructure, as it requires guest to host networking _and_
|
||||
host-to-host clustering in order to keep track of virtual
|
||||
machines. The benefit is that it is self-contained and requires
|
||||
no external management nodes. VM states are stored in checkpoints
|
||||
so that other hosts know the locations of other VMs and can make
|
||||
no external management nodes. VM states are stored so that other
|
||||
CPG group members know the locations of other VMs and can make
|
||||
some decisions about whether a VM is dead based on whether a host
|
||||
is dead (i.e. if fencing is in use or can be performed on the
|
||||
host).
|
||||
@ -113,7 +113,7 @@ We propose at 5 plugins in this case:
|
||||
|
||||
These plugins have no requirements on which guest to host communication
|
||||
plugin is used (you could, if you wanted, use 'direct serial' with
|
||||
'cluster checkpoint', or 'multicast' with 'RHEV-H' for example).
|
||||
'cluster cpg', or 'multicast' with 'RHEV-H' for example).
|
||||
|
||||
These plugins must also be able to discover where appropriate. For
|
||||
example, the Checkpoint plugin can only be used if corosync/openais
|
||||
|
@ -9,7 +9,7 @@ fence_virt and fence_xvm are an I/O Fencing agents which can be used with virtua
|
||||
|
||||
Fence_virt and fence_xvm talk to fence_virtd, which supports multiple backend plugins, including:
|
||||
- libvirt for single-node operation
|
||||
- Cluster Checkpoints when using Linux-cluster release 3.0.0 or later
|
||||
- Corosync CPG when using Linux-cluster release 3.0.0 or later
|
||||
- libvirt-qpid for multi-node, non-cluster operation
|
||||
|
||||
For compatibility, fence_xvm may talk to fence_xvmd from linux-cluster
|
||||
|
@ -236,21 +236,21 @@ Qpid service to connect to.
|
||||
If set to 1, have fence_virtd use GSSAPI for authentication when communicating
|
||||
with the Qpid broker. Default is 0 (off).
|
||||
|
||||
.SS checkpoint
|
||||
.SS cpg
|
||||
|
||||
The checkpoint plugin uses CMAN, CPG, and OpenAIS checkpoints to track virtual
|
||||
The cpg plugin uses corosync CPG and libvirt to track virtual
|
||||
machines and route fencing requests to the appropriate computer.
|
||||
|
||||
.TP
|
||||
.B uri
|
||||
.
|
||||
the URI to use when connecting to libvirt by the checkpoint plugin.
|
||||
the URI to use when connecting to libvirt by the cpg plugin.
|
||||
|
||||
.TP
|
||||
.B name_mode
|
||||
.
|
||||
The checkpoint plugin, in order to retain compatibility with fence_xvm,
|
||||
stores virtual machines in a certain way in the OpenAIS checkpoints. The
|
||||
The cpg plugin, in order to retain compatibility with fence_xvm,
|
||||
stores virtual machines in a certain way. The
|
||||
default was to use 'name' when using fence_xvm and fence_xvmd, and so this
|
||||
is still the default. However, it is strongly recommended to use 'uuid'
|
||||
instead of 'name' in all cluster environments involving more than one
|
||||
@ -286,7 +286,7 @@ a key file.
|
||||
|
||||
fence_virtd {
|
||||
listener = "multicast";
|
||||
backend = "checkpoint";
|
||||
backend = "cpg";
|
||||
}
|
||||
|
||||
# this is the listeners section
|
||||
|
@ -9,7 +9,7 @@ virtual machines.
|
||||
|
||||
Fence_virt and fence_xvm talk to fence_virtd, which supports multiple backend plugins, including:
|
||||
- libvirt for single-node operation
|
||||
- Cluster Checkpoints when using Linux-cluster release 3.0.0 or later
|
||||
- Corosync CPG and libvirt when using Linux-cluster release 3.0.0 or later
|
||||
- libvirt-qpid for multi-node, non-cluster operation
|
||||
|
||||
For compatibility, fence_xvm from linux-cluster release 2 may talk to fence_virtd.
|
||||
|
@ -20,7 +20,6 @@ with_modules=@modules@
|
||||
TARGETS=fence_virtd
|
||||
|
||||
MAIN_LIBS=-L../config -lsimpleconfig -ldl
|
||||
AIS_LIBS=-L/usr/lib64/openais -lSaCkpt
|
||||
COROSYNC_LIBS=-L/usr/lib64/corosync -lcpg
|
||||
PACEMAKER_LIBS=-lcib -lcrmcommon -lpe_status -lplumb \
|
||||
`pkg-config --libs ncurses` `pkg-config --libs glib-2.0`
|
||||
@ -47,9 +46,9 @@ null_so_SOURCES = null.c
|
||||
libvirt_qmf_so_SOURCES = uuid-test.c
|
||||
libvirt_qmf_cxx_so_SOURCES = libvirt-qmf.cpp
|
||||
pm_fence_so_SOURCES = pm-fence.c
|
||||
cpg_so_SOURCES = cpg-virt.c cpg.c virt.c history.c uuid-test.c
|
||||
multicast_so_SOURCES = mcast.c history.c
|
||||
tcp_so_SOURCES = tcp.c history.c
|
||||
checkpoint_so_SOURCES = virt.c vm_states.c history.c checkpoint.c cpg.c
|
||||
serial_so_SOURCES = virt-serial.c virt-sockets.c serial.c history.c
|
||||
|
||||
|
||||
@ -59,9 +58,9 @@ serial_so_SOURCES = virt-serial.c virt-sockets.c serial.c history.c
|
||||
|
||||
mod_null=@mod_null@
|
||||
mod_libvirt=@mod_libvirt@
|
||||
mod_checkpoint=@mod_checkpoint@
|
||||
mod_libvirt_qmf=@mod_libvirt_qmf@
|
||||
mod_pm_fence=@mod_pm_fence@
|
||||
mod_cpg=@mod_cpg@
|
||||
mod_multicast=@mod_multicast@
|
||||
mod_serial=@mod_serial@
|
||||
mod_tcp=@mod_tcp@
|
||||
@ -77,9 +76,6 @@ MODULES=
|
||||
ifneq ($(mod_libvirt),no)
|
||||
MODULES+=libvirt.so
|
||||
endif
|
||||
ifneq ($(mod_checkpoint),no)
|
||||
MODULES+=checkpoint.so
|
||||
endif
|
||||
ifneq ($(mod_libvirt_qmf),no)
|
||||
MODULES+=libvirt-qmf.so
|
||||
endif
|
||||
@ -87,6 +83,9 @@ ifneq ($(mod_pm_fence),no)
|
||||
INCLUDES+=$(PACEMAKER_INCLUDES)
|
||||
MODULES+=pm-fence.so
|
||||
endif
|
||||
ifneq ($(mod_cpg),no)
|
||||
MODULES+=cpg.so
|
||||
endif
|
||||
ifneq ($(mod_multicast),no)
|
||||
MODULES+=multicast.so
|
||||
endif
|
||||
@ -110,10 +109,6 @@ ifneq ($(mod_libvirt),no)
|
||||
fence_virtd_SOURCES+=${libvirt_so_SOURCES}
|
||||
LIBS+=$(VIRT_LIBS) $(UUID_LIBS)
|
||||
endif
|
||||
ifneq ($(mod_checkpoint),no)
|
||||
fence_virtd_SOURCES+=${checkpoint_so_SOURCES}
|
||||
LIBS+=$(AIS_LIBS) $(COROSYNC_LIBS) $(CMAN_LIBS)
|
||||
endif
|
||||
ifneq ($(mod_libvirt_qmf),no)
|
||||
fence_virtd_SOURCES+=${libvirt_qmf_so_SOURCES}
|
||||
fence_virtd_cxx_SOURCES+=${libvirt_qmf_cxx_so_SOURCES}
|
||||
@ -124,6 +119,9 @@ fence_virtd_SOURCES+=${pm_fence_so_SOURCES}
|
||||
INCLUDES+=$(PACEMAKER_INCLUDES)
|
||||
LIBS+=$(PACEMAKER_LIBS)
|
||||
endif
|
||||
ifneq ($(mod_cpg),no)
|
||||
fence_virtd_SOURCES+=${cpg_so_SOURCES}
|
||||
endif
|
||||
ifneq ($(mod_multicast),no)
|
||||
fence_virtd_SOURCES+=${multicast_so_SOURCES}
|
||||
LIBS+=$(NSS_LIBS)
|
||||
@ -166,13 +164,12 @@ libvirt-qmf.so: ${libvirt_qmf_so_SOURCES:.c=.o} ${libvirt_qmf_cxx_so_SOURCES:.cp
|
||||
pm-fence.so: ${pm_fence_so_SOURCES:.c=.o}
|
||||
$(CC) -o $@ $^ $(LIBS) -shared $(PACEMAKER_LIBS)
|
||||
|
||||
cpg.so: ${cpg_so_SOURCES:.c=.o}
|
||||
$(CC) -o $@ $^ $(LIBS) -shared $(VIRT_LIBS) $(UUID_LIBS) $(XML_LIBS) $(COROSYNC_LIBS)
|
||||
|
||||
null.so: ${null_so_SOURCES:.c=.o}
|
||||
$(CC) -o $@ $^ $(LIBS) -shared
|
||||
|
||||
checkpoint.so: ${checkpoint_so_SOURCES:.c=.o}
|
||||
$(CC) -o $@ $^ $(LIBS) -shared $(AIS_LIBS) $(VIRT_LIBS) \
|
||||
$(COROSYNC_LIBS) $(CMAN_LIBS)
|
||||
|
||||
serial.so: ${serial_so_SOURCES:.c=.o}
|
||||
$(CC) -o $@ $^ $(LIBS) -shared $(VIRT_LIBS) $(UUID_LIBS) $(XML_LIBS)
|
||||
|
||||
|
@ -1,824 +0,0 @@
|
||||
/*
|
||||
Copyright Red Hat, Inc. 2009
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
|
||||
MA 02139, USA.
|
||||
*/
|
||||
/*
|
||||
* Author: Lon Hohberger <lhh at redhat.com>
|
||||
*/
|
||||
#include <config.h>
|
||||
#include <stdio.h>
|
||||
#include <simpleconfig.h>
|
||||
#include <static_map.h>
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
#include <server_plugin.h>
|
||||
#include <string.h>
|
||||
#include <malloc.h>
|
||||
#include <syslog.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <libvirt/libvirt.h>
|
||||
#include <pthread.h>
|
||||
#ifdef HAVE_OPENAIS_CPG_H
|
||||
#include <openais/cpg.h>
|
||||
#else
|
||||
#ifdef HAVE_COROSYNC_CPG_H
|
||||
#include <corosync/cpg.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <libcman.h>
|
||||
|
||||
#include <debug.h>
|
||||
#include "virt.h"
|
||||
#include "xvm.h"
|
||||
#include "checkpoint.h"
|
||||
|
||||
|
||||
#define NAME "checkpoint"
|
||||
#define VERSION "0.9"
|
||||
|
||||
#define MAGIC 0x1e017afe
|
||||
|
||||
struct check_info {
|
||||
int magic;
|
||||
int pad;
|
||||
};
|
||||
|
||||
#define VALIDATE(arg) \
|
||||
do {\
|
||||
if (!arg || ((struct check_info *)arg)->magic != MAGIC) { \
|
||||
errno = EINVAL;\
|
||||
return -1; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
static void *checkpoint_handle = NULL;
|
||||
static virt_list_t *local_vms = NULL;
|
||||
static char *uri = NULL;
|
||||
static int use_uuid = 0;
|
||||
|
||||
static int
|
||||
virt_list_update(virConnectPtr vp, virt_list_t **vl, int my_id)
|
||||
{
|
||||
virt_list_t *list = NULL;
|
||||
if (*vl)
|
||||
vl_free(*vl);
|
||||
list = vl_get(vp, 1, my_id);
|
||||
*vl = list;
|
||||
|
||||
if (!list)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
node_operational(uint32_t nodeid)
|
||||
{
|
||||
cman_handle_t ch;
|
||||
cman_node_t node;
|
||||
|
||||
ch = cman_init(NULL);
|
||||
if (!ch)
|
||||
return -1;
|
||||
|
||||
memset(&node, 0, sizeof(node));
|
||||
if (cman_get_node(ch, nodeid, &node) == 0) {
|
||||
cman_finish(ch);
|
||||
return !!node.cn_member;
|
||||
}
|
||||
|
||||
cman_finish(ch);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
get_domain_state_ckpt(void *hp, const char *domain, vm_state_t *state)
|
||||
{
|
||||
errno = EINVAL;
|
||||
|
||||
if (!hp || !domain || !state || !strlen((char *)domain))
|
||||
return -1;
|
||||
if (!strcmp(DOMAIN0NAME, (char *)domain))
|
||||
return -1;
|
||||
|
||||
return ckpt_read(hp, domain, state, sizeof(*state));
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
wait_domain(const char *vm_name, virConnectPtr vp, int timeout)
|
||||
{
|
||||
int tries = 0;
|
||||
int response = 1;
|
||||
int ret;
|
||||
virDomainPtr vdp;
|
||||
virDomainInfo vdi;
|
||||
|
||||
if (use_uuid) {
|
||||
vdp = virDomainLookupByUUIDString(vp, (const char *)vm_name);
|
||||
} else {
|
||||
vdp = virDomainLookupByName(vp, vm_name);
|
||||
}
|
||||
if (!vdp)
|
||||
return 0;
|
||||
|
||||
/* Check domain liveliness. If the domain is still here,
|
||||
we return failure, and the client must then retry */
|
||||
/* XXX On the xen 3.0.4 API, we will be able to guarantee
|
||||
synchronous virDomainDestroy, so this check will not
|
||||
be necessary */
|
||||
do {
|
||||
if (++tries > timeout)
|
||||
break;
|
||||
|
||||
sleep(1);
|
||||
if (use_uuid) {
|
||||
vdp = virDomainLookupByUUIDString(vp,
|
||||
(const char *)vm_name);
|
||||
} else {
|
||||
vdp = virDomainLookupByName(vp, vm_name);
|
||||
}
|
||||
if (!vdp) {
|
||||
dbg_printf(2, "Domain no longer exists\n");
|
||||
response = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
memset(&vdi, 0, sizeof(vdi));
|
||||
ret = virDomainGetInfo(vdp, &vdi);
|
||||
virDomainFree(vdp);
|
||||
if (ret < 0)
|
||||
continue;
|
||||
|
||||
if (vdi.state == VIR_DOMAIN_SHUTOFF) {
|
||||
dbg_printf(2, "Domain has been shut off\n");
|
||||
response = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
dbg_printf(4, "Domain still exists (state %d) "
|
||||
"after %d seconds\n",
|
||||
vdi.state, tries);
|
||||
} while (1);
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Returns: 0 - operational
|
||||
1 - dead or presumed so
|
||||
2 - VM not local and I am not the right node to deal with it
|
||||
3 - VM status unknown; cannot operate on it
|
||||
*/
|
||||
static int
|
||||
cluster_virt_status(const char *vm_name, uint32_t *owner)
|
||||
{
|
||||
vm_state_t chk_state, temp_state;
|
||||
virt_state_t *vs;
|
||||
uint32_t me, high_id;
|
||||
int ret = 0;
|
||||
|
||||
dbg_printf(80, "%s %s\n", __FUNCTION__, vm_name);
|
||||
|
||||
/* if we can't find the high ID, we can't do anything useful */
|
||||
if (cpg_get_ids(&me, &high_id) != 0)
|
||||
return 2;
|
||||
|
||||
if (use_uuid) {
|
||||
vs = vl_find_uuid(local_vms, vm_name);
|
||||
} else {
|
||||
vs = vl_find_name(local_vms, vm_name);
|
||||
}
|
||||
|
||||
if (!vs) {
|
||||
ret = 2; /* not found locally */
|
||||
temp_state.s_owner = 0;
|
||||
temp_state.s_state = 0;
|
||||
|
||||
if (get_domain_state_ckpt(checkpoint_handle,
|
||||
vm_name, &chk_state) < 0) {
|
||||
if (me == high_id) {
|
||||
dbg_printf(2, "High ID: Unknown VM\n");
|
||||
ret = 3;
|
||||
goto out;
|
||||
}
|
||||
} else if (me == chk_state.s_owner) {
|
||||
/* <UVT> If domain has disappeared completely from libvirt (i.e., destroyed)
|
||||
we'd end up with the checkpoing section containing its last state and last owner.
|
||||
fence_virtd will freeze at the next status call, as no one will be willing to
|
||||
return anything but 2. So we should delete corresponding section, but only if
|
||||
we are high_id, because otherwise we don't know if the domain hasn't been started
|
||||
on some other node. If checkpoint states us as an owner of the domain, but we
|
||||
don't have it, we set s_state to a special value to let high_id know about
|
||||
this situation. </UVT> */
|
||||
dbg_printf(2, "I am an owner of unexisting domain, mangling field\n");
|
||||
temp_state.s_owner = me;
|
||||
temp_state.s_state = -1;
|
||||
if (ckpt_write(checkpoint_handle, vm_name,
|
||||
&temp_state, sizeof(vm_state_t)) < 0)
|
||||
dbg_printf(2, "error storing in %s\n", __FUNCTION__);
|
||||
}
|
||||
|
||||
if (me != high_id)
|
||||
goto out;
|
||||
|
||||
if ((chk_state.s_state == -1) || (temp_state.s_state == -1)) {
|
||||
dbg_printf(2, "I am high id and state field is mangled, removing section\n");
|
||||
ckpt_erase (checkpoint_handle, vm_name);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (node_operational(chk_state.s_owner)) {
|
||||
*owner = chk_state.s_owner;
|
||||
dbg_printf(2, "High ID: Owner is operational\n");
|
||||
ret = 2;
|
||||
} else {
|
||||
dbg_printf(2, "High ID: Owner is dead; returning 'off'\n");
|
||||
ret = 1;
|
||||
}
|
||||
} else if (vs->v_state.s_state == VIR_DOMAIN_SHUTOFF) {
|
||||
ret = 1; /* local and off */
|
||||
}
|
||||
|
||||
out:
|
||||
dbg_printf(80, "%s %s %d\n", __FUNCTION__, vm_name, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
store_domains_by_name(void *hp, virt_list_t *vl)
|
||||
{
|
||||
int x;
|
||||
|
||||
if (!vl)
|
||||
return;
|
||||
|
||||
for (x = 0; x < vl->vm_count; x++) {
|
||||
if (!strcmp(DOMAIN0NAME, vl->vm_states[x].v_name))
|
||||
continue;
|
||||
dbg_printf(2, "Storing %s\n", vl->vm_states[x].v_name);
|
||||
if (ckpt_write(hp, vl->vm_states[x].v_name,
|
||||
&vl->vm_states[x].v_state,
|
||||
sizeof(vm_state_t)) < 0)
|
||||
dbg_printf(2, "error storing in %s\n", __FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
store_domains_by_uuid(void *hp, virt_list_t *vl)
|
||||
{
|
||||
int x;
|
||||
|
||||
if (!vl)
|
||||
return;
|
||||
|
||||
for (x = 0; x < vl->vm_count; x++) {
|
||||
if (!strcmp(DOMAIN0UUID, vl->vm_states[x].v_uuid))
|
||||
continue;
|
||||
dbg_printf(2, "Storing %s\n", vl->vm_states[x].v_uuid);
|
||||
if (ckpt_write(hp, vl->vm_states[x].v_uuid,
|
||||
&vl->vm_states[x].v_state,
|
||||
sizeof(vm_state_t)) < 0)
|
||||
dbg_printf(2, "error storing in %s\n", __FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
update_local_vms(void)
|
||||
{
|
||||
virConnectPtr vp = NULL;
|
||||
uint32_t my_id = 0;
|
||||
|
||||
cpg_get_ids(&my_id, NULL);
|
||||
|
||||
vp = virConnectOpen(uri);
|
||||
if (!vp) {
|
||||
syslog(LOG_ERR, "Failed to connect to hypervisor\n");
|
||||
}
|
||||
virt_list_update(vp, &local_vms, my_id);
|
||||
vl_print(local_vms);
|
||||
if (use_uuid)
|
||||
store_domains_by_uuid(checkpoint_handle, local_vms);
|
||||
else
|
||||
store_domains_by_name(checkpoint_handle, local_vms);
|
||||
if (vp) virConnectClose(vp);
|
||||
}
|
||||
|
||||
|
||||
/* <UVT>
|
||||
Functions do_off and do_reboot should return error only if fencing
|
||||
was actualy unsuccessful, i.e., domain was running and is still
|
||||
running after fencing attempt. If domain is not running after fencing
|
||||
(did not exist before or couldn't be started after), 0 should be returned
|
||||
</UVT> */
|
||||
static int
|
||||
do_off(const char *vm_name)
|
||||
{
|
||||
virConnectPtr vp;
|
||||
virDomainPtr vdp;
|
||||
virDomainInfo vdi;
|
||||
int ret = -1;
|
||||
|
||||
dbg_printf(5, "%s %s\n", __FUNCTION__, vm_name);
|
||||
vp = virConnectOpen(uri);
|
||||
if (!vp)
|
||||
return 1;
|
||||
|
||||
if (use_uuid) {
|
||||
vdp = virDomainLookupByUUIDString(vp,
|
||||
(const char *)vm_name);
|
||||
} else {
|
||||
vdp = virDomainLookupByName(vp, vm_name);
|
||||
}
|
||||
|
||||
if (!vdp) {
|
||||
dbg_printf(2, "Nothing to do - domain does not exist\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (((virDomainGetInfo(vdp, &vdi) == 0) &&
|
||||
(vdi.state == VIR_DOMAIN_SHUTOFF))) {
|
||||
dbg_printf(2, "Nothing to do - domain is off\n");
|
||||
virDomainFree(vdp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
syslog(LOG_NOTICE, "Destroying domain %s\n", vm_name);
|
||||
dbg_printf(2, "[OFF] Calling virDomainDestroy\n");
|
||||
ret = virDomainDestroy(vdp);
|
||||
if (ret < 0) {
|
||||
syslog(LOG_NOTICE, "Failed to destroy domain: %d\n", ret);
|
||||
printf("virDomainDestroy() failed: %d\n", ret);
|
||||
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
syslog(LOG_NOTICE,
|
||||
"Domain %s still exists; fencing failed\n",
|
||||
vm_name);
|
||||
printf("Domain %s still exists; fencing failed\n", vm_name);
|
||||
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
virConnectClose(vp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
do_reboot(const char *vm_name)
|
||||
{
|
||||
virConnectPtr vp;
|
||||
virDomainPtr vdp, nvdp;
|
||||
virDomainInfo vdi;
|
||||
char *domain_desc;
|
||||
int ret;
|
||||
|
||||
//uuid_unparse(vm_uuid, uu_string);
|
||||
dbg_printf(5, "%s %s\n", __FUNCTION__, vm_name);
|
||||
vp = virConnectOpen(uri);
|
||||
if (!vp)
|
||||
return 1;
|
||||
|
||||
if (use_uuid) {
|
||||
vdp = virDomainLookupByUUIDString(vp,
|
||||
(const char *)vm_name);
|
||||
} else {
|
||||
vdp = virDomainLookupByName(vp, vm_name);
|
||||
}
|
||||
|
||||
if (!vdp) {
|
||||
dbg_printf(2, "[libvirt:REBOOT] Nothing to "
|
||||
"do - domain does not exist\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (((virDomainGetInfo(vdp, &vdi) == 0) &&
|
||||
(vdi.state == VIR_DOMAIN_SHUTOFF))) {
|
||||
dbg_printf(2, "[libvirt:REBOOT] Nothing to "
|
||||
"do - domain is off\n");
|
||||
virDomainFree(vdp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
syslog(LOG_NOTICE, "Rebooting domain %s\n", vm_name);
|
||||
printf("Rebooting domain %s...\n", vm_name);
|
||||
domain_desc = virDomainGetXMLDesc(vdp, 0);
|
||||
|
||||
if (!domain_desc) {
|
||||
printf("Failed getting domain description from "
|
||||
"libvirt\n");
|
||||
}
|
||||
|
||||
dbg_printf(2, "[REBOOT] Calling virDomainDestroy(%p)\n", vdp);
|
||||
ret = virDomainDestroy(vdp);
|
||||
if (ret < 0) {
|
||||
printf("virDomainDestroy() failed: %d/%d\n", ret, errno);
|
||||
free(domain_desc);
|
||||
virDomainFree(vdp);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = wait_domain(vm_name, vp, 15);
|
||||
|
||||
if (ret) {
|
||||
syslog(LOG_NOTICE, "Domain %s still exists; fencing failed\n",
|
||||
vm_name);
|
||||
printf("Domain %s still exists; fencing failed\n", vm_name);
|
||||
if (domain_desc)
|
||||
free(domain_desc);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!domain_desc) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* 'on' is not a failure */
|
||||
ret = 0;
|
||||
|
||||
dbg_printf(3, "[[ XML Domain Info ]]\n");
|
||||
dbg_printf(3, "%s\n[[ XML END ]]\n", domain_desc);
|
||||
dbg_printf(2, "Calling virDomainCreateLinux()...\n");
|
||||
|
||||
nvdp = virDomainCreateLinux(vp, domain_desc, 0);
|
||||
if (nvdp == NULL) {
|
||||
/* More recent versions of libvirt or perhaps the
|
||||
* KVM back-end do not let you create a domain from
|
||||
* XML if there is already a defined domain description
|
||||
* with the same name that it knows about. You must
|
||||
* then call virDomainCreate() */
|
||||
dbg_printf(2, "Failed; Trying virDomainCreate()...\n");
|
||||
if (virDomainCreate(vdp) < 0) {
|
||||
syslog(LOG_NOTICE,
|
||||
"Could not restart %s\n",
|
||||
vm_name);
|
||||
dbg_printf(1, "Failed to recreate guest"
|
||||
" %s!\n", vm_name);
|
||||
}
|
||||
}
|
||||
|
||||
free(domain_desc);
|
||||
|
||||
out:
|
||||
virConnectClose(vp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*<UVT> This function must send reply from at least one node, otherwise
|
||||
requesting fence_virtd would block forever in wait_cpt_reply </UVT> */
|
||||
static void
|
||||
do_real_work(void *data, size_t len, uint32_t nodeid, uint32_t seqno)
|
||||
{
|
||||
struct ckpt_fence_req *req = data;
|
||||
struct ckpt_fence_req reply;
|
||||
uint32_t owner;
|
||||
int ret = 1;
|
||||
|
||||
memcpy(&reply, req, sizeof(reply));
|
||||
|
||||
update_local_vms();
|
||||
|
||||
switch(req->request) {
|
||||
case FENCE_STATUS:
|
||||
ret = cluster_virt_status(req->vm_name, &owner);
|
||||
if (ret == 3) {
|
||||
ret = RESP_OFF;
|
||||
break;
|
||||
}
|
||||
if (ret == 2) {
|
||||
return;
|
||||
}
|
||||
if (ret == 1) {
|
||||
ret = RESP_OFF;
|
||||
}
|
||||
break;
|
||||
case FENCE_OFF:
|
||||
ret = cluster_virt_status(req->vm_name, &owner);
|
||||
if (ret == 3) {
|
||||
/* No record of this VM in the checkpoint. */
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
if (ret == 2) {
|
||||
return;
|
||||
}
|
||||
if (ret == 1) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
/* Must be running locally to perform 'off' */
|
||||
ret = do_off(req->vm_name);
|
||||
break;
|
||||
case FENCE_REBOOT:
|
||||
ret = cluster_virt_status(req->vm_name, &owner);
|
||||
if (ret == 3) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
if (ret == 2) {
|
||||
return;
|
||||
}
|
||||
if (ret == 1) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
/* Must be running locally to perform 'reboot' */
|
||||
ret = do_reboot(req->vm_name);
|
||||
break;
|
||||
}
|
||||
|
||||
reply.response = ret;
|
||||
|
||||
cpg_send_reply(&reply, sizeof(reply), nodeid, seqno);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
do_request(const char *vm_name, int request, uint32_t seqno)
|
||||
{
|
||||
struct ckpt_fence_req freq, *frp;
|
||||
size_t retlen;
|
||||
uint32_t seq;
|
||||
int ret;
|
||||
|
||||
memset(&freq, 0, sizeof(freq));
|
||||
snprintf(freq.vm_name, sizeof(freq.vm_name), vm_name);
|
||||
freq.request = request;
|
||||
freq.seqno = seqno;
|
||||
|
||||
if (cpg_send_req(&freq, sizeof(freq), &seq) != 0) {
|
||||
printf("Failed to send\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (cpg_wait_reply((void *)&frp, &retlen, seq) != 0) {
|
||||
printf("Failed to receive\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
ret = frp->response;
|
||||
free(frp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
checkpoint_null(const char *vm_name, void *priv)
|
||||
{
|
||||
VALIDATE(priv);
|
||||
printf("[CKPT] Null operation on %s\n", vm_name);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
checkpoint_off(const char *vm_name, const char *src,
|
||||
uint32_t seqno, void *priv)
|
||||
{
|
||||
VALIDATE(priv);
|
||||
printf("[CKPT] OFF operation on %s seq %d\n", vm_name, seqno);
|
||||
|
||||
return do_request(vm_name, FENCE_OFF, seqno);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
checkpoint_on(const char *vm_name, const char *src,
|
||||
uint32_t seqno, void *priv)
|
||||
{
|
||||
VALIDATE(priv);
|
||||
printf("[CKPT] ON operation on %s seq %d\n", vm_name, seqno);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
checkpoint_devstatus(void *priv)
|
||||
{
|
||||
printf("[CKPT] Device status\n");
|
||||
VALIDATE(priv);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
checkpoint_status(const char *vm_name, void *priv)
|
||||
{
|
||||
VALIDATE(priv);
|
||||
printf("[CKPT] STATUS operation on %s\n", vm_name);
|
||||
|
||||
return do_request(vm_name, FENCE_STATUS, 0);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
checkpoint_reboot(const char *vm_name, const char *src,
|
||||
uint32_t seqno, void *priv)
|
||||
{
|
||||
VALIDATE(priv);
|
||||
printf("[CKPT] REBOOT operation on %s seq %d\n", vm_name, seqno);
|
||||
|
||||
return do_request(vm_name, FENCE_REBOOT, 0);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
checkpoint_hostlist(hostlist_callback callback, void *arg, void *priv)
|
||||
{
|
||||
VALIDATE(priv);
|
||||
printf("[CKPT] HOSTLIST operation\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
checkpoint_init(backend_context_t *c, config_object_t *config)
|
||||
{
|
||||
char value[1024];
|
||||
struct check_info *info = NULL;
|
||||
int x;
|
||||
|
||||
#ifdef _MODULE
|
||||
if (sc_get(config, "fence_virtd/@debug", value, sizeof(value))==0)
|
||||
dset(atoi(value));
|
||||
#endif
|
||||
|
||||
if (sc_get(config, "backends/libvirt/@uri",
|
||||
value, sizeof(value)) == 0) {
|
||||
uri = strdup(value);
|
||||
if (!uri) {
|
||||
free(info);
|
||||
return -1;
|
||||
}
|
||||
dbg_printf(1, "Using %s\n", uri);
|
||||
}
|
||||
|
||||
if (sc_get(config, "backends/checkpoint/@uri",
|
||||
value, sizeof(value)) == 0) {
|
||||
if (uri)
|
||||
free(uri);
|
||||
uri = strdup(value);
|
||||
if (!uri) {
|
||||
free(info);
|
||||
return -1;
|
||||
}
|
||||
dbg_printf(1, "Using %s\n", uri);
|
||||
}
|
||||
|
||||
/* Naming scheme is no longer a top-level config option.
|
||||
* However, we retain it here for configuration compatibility with
|
||||
* versions 0.1.3 and previous.
|
||||
*/
|
||||
if (sc_get(config, "fence_virtd/@name_mode",
|
||||
value, sizeof(value)-1) == 0) {
|
||||
|
||||
dbg_printf(1, "Got %s for name_mode\n", value);
|
||||
if (!strcasecmp(value, "uuid")) {
|
||||
use_uuid = 1;
|
||||
} else if (!strcasecmp(value, "name")) {
|
||||
use_uuid = 0;
|
||||
} else {
|
||||
dbg_printf(1, "Unsupported name_mode: %s\n", value);
|
||||
}
|
||||
}
|
||||
|
||||
if (sc_get(config, "backends/checkpoint/@name_mode",
|
||||
value, sizeof(value)-1) == 0) {
|
||||
|
||||
dbg_printf(1, "Got %s for name_mode\n", value);
|
||||
if (!strcasecmp(value, "uuid")) {
|
||||
use_uuid = 1;
|
||||
} else if (!strcasecmp(value, "name")) {
|
||||
use_uuid = 0;
|
||||
} else {
|
||||
dbg_printf(1, "Unsupported name_mode: %s\n", value);
|
||||
}
|
||||
}
|
||||
|
||||
if (cpg_start(PACKAGE_NAME, do_real_work) < 0) {
|
||||
return -1;
|
||||
}
|
||||
info = malloc(sizeof(*info));
|
||||
if (!info)
|
||||
return -1;
|
||||
|
||||
memset(info, 0, sizeof(*info));
|
||||
|
||||
info->magic = MAGIC;
|
||||
|
||||
x = 0;
|
||||
while ((checkpoint_handle = ckpt_init(
|
||||
"vm_states", 262144, 4096, 64, 10
|
||||
)) == NULL) {
|
||||
if (!x) {
|
||||
dbg_printf(1, "Could not initialize "
|
||||
"saCkPt; retrying...\n");
|
||||
x = 1;
|
||||
}
|
||||
sleep(3);
|
||||
}
|
||||
if (x)
|
||||
dbg_printf(1, "Checkpoint initialized\n");
|
||||
|
||||
update_local_vms();
|
||||
|
||||
*c = (void *)info;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
checkpoint_shutdown(backend_context_t c)
|
||||
{
|
||||
struct check_info *info = (struct check_info *)c;
|
||||
|
||||
VALIDATE(info);
|
||||
info->magic = 0;
|
||||
free(info);
|
||||
|
||||
cpg_stop();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static fence_callbacks_t checkpoint_callbacks = {
|
||||
.null = checkpoint_null,
|
||||
.off = checkpoint_off,
|
||||
.on = checkpoint_on,
|
||||
.reboot = checkpoint_reboot,
|
||||
.status = checkpoint_status,
|
||||
.devstatus = checkpoint_devstatus,
|
||||
.hostlist = checkpoint_hostlist
|
||||
};
|
||||
|
||||
static backend_plugin_t checkpoint_plugin = {
|
||||
.name = NAME,
|
||||
.version = VERSION,
|
||||
.callbacks = &checkpoint_callbacks,
|
||||
.init = checkpoint_init,
|
||||
.cleanup = checkpoint_shutdown,
|
||||
};
|
||||
|
||||
|
||||
#ifdef _MODULE
|
||||
double
|
||||
BACKEND_VER_SYM(void)
|
||||
{
|
||||
return PLUGIN_VERSION_BACKEND;
|
||||
}
|
||||
|
||||
const backend_plugin_t *
|
||||
BACKEND_INFO_SYM(void)
|
||||
{
|
||||
return &checkpoint_plugin;
|
||||
}
|
||||
#else
|
||||
static void __attribute__((constructor))
|
||||
checkpoint_register_plugin(void)
|
||||
{
|
||||
plugin_reg_backend(&checkpoint_plugin);
|
||||
}
|
||||
#endif
|
@ -129,36 +129,35 @@ backend_config_libvirt(config_object_t *config)
|
||||
|
||||
|
||||
static int
|
||||
backend_config_checkpoint(config_object_t *config)
|
||||
backend_config_cpg(config_object_t *config)
|
||||
{
|
||||
char val[4096];
|
||||
char inp[4096];
|
||||
int done = 0;
|
||||
|
||||
printf("\n");
|
||||
printf("The checkpoint backend module is designed for use in clusters\n"
|
||||
"running corosync, openais, and CMAN. It utilizes the SAF \n"
|
||||
"checkpoint API to store virtual machine states and CPG to \n"
|
||||
printf("The CPG backend module is designed for use in clusters\n"
|
||||
"running corosync and libvirt. It utilizes the CPG API to \n"
|
||||
"route fencing requests, finally utilizing libvirt to perform\n"
|
||||
"fencing actions.\n\n");
|
||||
|
||||
if (sc_get(config, "backends/checkpoint/@uri", val,
|
||||
if (sc_get(config, "backends/cpg/@uri", val,
|
||||
sizeof(val))) {
|
||||
strncpy(val, DEFAULT_HYPERVISOR_URI, sizeof(val));
|
||||
}
|
||||
|
||||
text_input("Libvirt URI", val, inp, sizeof(inp));
|
||||
|
||||
sc_set(config, "backends/checkpoint/@uri", inp);
|
||||
sc_set(config, "backends/cpg/@uri", inp);
|
||||
|
||||
printf("\n");
|
||||
printf("The name mode is how the checkpoint plugin stores and \n"
|
||||
printf("The name mode is how the cpg plugin stores and \n"
|
||||
"references virtual machines. Since virtual machine names\n"
|
||||
"are not guaranteed to be unique cluster-wide, use of UUIDs\n"
|
||||
"is strongly recommended. However, for compatibility with \n"
|
||||
"fence_xvmd, the use of 'name' mode is also supported.\n\n");
|
||||
|
||||
if (sc_get(config, "backends/checkpoint/@name_mode", val,
|
||||
if (sc_get(config, "backends/cpg/@name_mode", val,
|
||||
sizeof(val))) {
|
||||
strncpy(val, "uuid", sizeof(val));
|
||||
}
|
||||
@ -179,7 +178,7 @@ backend_config_checkpoint(config_object_t *config)
|
||||
}
|
||||
} while (!done);
|
||||
|
||||
sc_set(config, "backends/checkpoint/@name_mode", inp);
|
||||
sc_set(config, "backends/cpg/@name_mode", inp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -527,8 +526,8 @@ backend_configure(config_object_t *config)
|
||||
#if 0
|
||||
if (!strcmp(inp, "libvirt")) {
|
||||
backend_config_libvirt(config);
|
||||
} else if (!strcmp(inp, "checkpoint")) {
|
||||
backend_config_checkpoint(config);
|
||||
} else if (!strcmp(inp, "cpg")) {
|
||||
backend_config_cpg(config);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
266
server/cpg.c
266
server/cpg.c
@ -12,18 +12,14 @@
|
||||
#include <sys/uio.h>
|
||||
#include <list.h>
|
||||
#include <pthread.h>
|
||||
#ifdef HAVE_OPENAIS_CPG_H
|
||||
#include <openais/cpg.h>
|
||||
#else
|
||||
#ifdef HAVE_COROSYNC_CPG_H
|
||||
|
||||
#include <corosync/cpg.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "checkpoint.h"
|
||||
|
||||
#define NODE_ID_NONE ((uint32_t)-1)
|
||||
#include "debug.h"
|
||||
#include "virt.h"
|
||||
#include "cpg.h"
|
||||
|
||||
#define NODE_ID_NONE ((uint32_t) -1)
|
||||
|
||||
struct msg_queue_node {
|
||||
list_head();
|
||||
@ -38,6 +34,7 @@ struct msg_queue_node {
|
||||
struct wire_msg {
|
||||
#define TYPE_REQUEST 0
|
||||
#define TYPE_REPLY 1
|
||||
#define TYPE_STORE_VM 2
|
||||
uint32_t type;
|
||||
uint32_t seqno;
|
||||
uint32_t target;
|
||||
@ -45,95 +42,55 @@ struct wire_msg {
|
||||
char data[0];
|
||||
};
|
||||
|
||||
static uint32_t seqnum = 0, my_node_id = NODE_ID_NONE;
|
||||
static uint32_t high_id_from_callback = NODE_ID_NONE;
|
||||
static struct msg_queue_node *pending= NULL;
|
||||
static uint32_t seqnum = 0;
|
||||
static struct msg_queue_node *pending = NULL;
|
||||
static cpg_handle_t cpg_handle;
|
||||
static struct cpg_name gname;
|
||||
|
||||
static pthread_mutex_t cpg_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_cond_t cpg_cond = PTHREAD_COND_INITIALIZER;
|
||||
static pthread_t cpg_thread = 0;
|
||||
static request_callback_fn req_callback_fn;
|
||||
|
||||
/* <UVT> function cpg_membership_get is (probably) buggy and returns correct
|
||||
count only before cpg_mcast_joined, subsequent calls set count to 0 </UVT> */
|
||||
#if 0
|
||||
static pthread_mutex_t cpg_ids_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static uint32_t my_node_id = NODE_ID_NONE;
|
||||
static uint32_t high_id_from_callback = NODE_ID_NONE;
|
||||
|
||||
static request_callback_fn req_callback_fn;
|
||||
static request_callback_fn store_callback_fn;
|
||||
static confchange_callback_fn conf_leave_fn;
|
||||
static confchange_callback_fn conf_join_fn;
|
||||
|
||||
|
||||
int
|
||||
cpg_get_ids(uint32_t *my_id, uint32_t *high_id)
|
||||
{
|
||||
/* This is segfaulting for some reason */
|
||||
struct cpg_address cpg_nodes[CPG_MEMBERS_MAX];
|
||||
uint32_t high = my_node_id;
|
||||
int count = CPG_MEMBERS_MAX, x;
|
||||
|
||||
if (!my_id && !high_id)
|
||||
return 0;
|
||||
|
||||
if (my_id)
|
||||
*my_id = my_node_id;
|
||||
|
||||
if (!high_id)
|
||||
return 0;
|
||||
|
||||
memset(&cpg_nodes, 0, sizeof(cpg_nodes));
|
||||
|
||||
if (cpg_membership_get(cpg_handle, &gname,
|
||||
cpg_nodes, &count) != CPG_OK)
|
||||
return -1;
|
||||
|
||||
for (x = 0; x < count; x++) {
|
||||
if (cpg_nodes[x].nodeid > high) {
|
||||
high = cpg_nodes[x].nodeid;
|
||||
}
|
||||
}
|
||||
|
||||
*high_id = high;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
cpg_get_ids(uint32_t *my_id, uint32_t *high_id)
|
||||
{
|
||||
if (!my_id && !high_id)
|
||||
return 0;
|
||||
|
||||
pthread_mutex_lock(&cpg_ids_mutex);
|
||||
if (my_id)
|
||||
*my_id = my_node_id;
|
||||
|
||||
if (!high_id)
|
||||
return 0;
|
||||
|
||||
*high_id = high_id_from_callback;
|
||||
if (high_id)
|
||||
*high_id = high_id_from_callback;
|
||||
pthread_mutex_unlock(&cpg_ids_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
#ifdef HAVE_OPENAIS_CPG_H
|
||||
cpg_deliver_func(cpg_handle_t h,
|
||||
struct cpg_name *group_name,
|
||||
uint32_t nodeid,
|
||||
uint32_t pid,
|
||||
void *msg,
|
||||
int msglen)
|
||||
#else
|
||||
cpg_deliver_func(cpg_handle_t h,
|
||||
const struct cpg_name *group_name,
|
||||
uint32_t nodeid,
|
||||
uint32_t pid,
|
||||
void *msg,
|
||||
size_t msglen)
|
||||
#endif
|
||||
{
|
||||
struct msg_queue_node *n;
|
||||
struct wire_msg *m = msg;
|
||||
int x, found;
|
||||
|
||||
pthread_mutex_lock(&cpg_mutex);
|
||||
|
||||
if (m->type == TYPE_REPLY) {
|
||||
/* Reply to a request we sent */
|
||||
found = 0;
|
||||
@ -165,10 +122,7 @@ cpg_deliver_func(cpg_handle_t h,
|
||||
list_remove(&pending, n);
|
||||
list_insert(&pending, n);
|
||||
|
||||
#if 0
|
||||
printf("Seqnum %d replied; removing from list",
|
||||
n->seqno);
|
||||
#endif
|
||||
dbg_printf(2, "Seqnum %d replied; removing from list\n", n->seqno);
|
||||
|
||||
pthread_cond_broadcast(&cpg_cond);
|
||||
goto out_unlock;
|
||||
@ -179,6 +133,10 @@ cpg_deliver_func(cpg_handle_t h,
|
||||
req_callback_fn(&m->data, msglen - sizeof(*m),
|
||||
nodeid, m->seqno);
|
||||
}
|
||||
if (m->type == TYPE_STORE_VM) {
|
||||
store_callback_fn(&m->data, msglen - sizeof(*m),
|
||||
nodeid, m->seqno);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
@ -188,32 +146,31 @@ out_unlock:
|
||||
|
||||
|
||||
void
|
||||
#ifdef HAVE_OPENAIS_CPG_H
|
||||
cpg_config_change(cpg_handle_t h,
|
||||
struct cpg_name *group_name,
|
||||
struct cpg_address *members, int memberlen,
|
||||
struct cpg_address *left, int leftlen,
|
||||
struct cpg_address *join, int joinlen)
|
||||
#else
|
||||
cpg_config_change(cpg_handle_t h,
|
||||
const struct cpg_name *group_name,
|
||||
const struct cpg_address *members, size_t memberlen,
|
||||
const struct cpg_address *left, size_t leftlen,
|
||||
const struct cpg_address *join, size_t joinlen)
|
||||
#endif
|
||||
{
|
||||
int x;
|
||||
int high = my_node_id;
|
||||
int high;
|
||||
|
||||
pthread_mutex_lock(&cpg_ids_mutex);
|
||||
high = my_node_id;
|
||||
|
||||
for (x = 0; x < memberlen; x++) {
|
||||
if (members[x].nodeid > high) {
|
||||
if (members[x].nodeid > high)
|
||||
high = members[x].nodeid;
|
||||
}
|
||||
}
|
||||
|
||||
high_id_from_callback = high;
|
||||
pthread_mutex_unlock(&cpg_ids_mutex);
|
||||
|
||||
return;
|
||||
if (joinlen > 0)
|
||||
conf_join_fn(join, joinlen);
|
||||
|
||||
if (leftlen > 0)
|
||||
conf_leave_fn(left, leftlen);
|
||||
}
|
||||
|
||||
|
||||
@ -235,9 +192,12 @@ cpg_send_req(void *data, size_t len, uint32_t *seqno)
|
||||
n = malloc(sizeof(*n));
|
||||
if (!n)
|
||||
return -1;
|
||||
|
||||
m = malloc(msgsz);
|
||||
if (!m)
|
||||
if (!m) {
|
||||
free(n);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* only incremented on send */
|
||||
n->state = STATE_CLEAR;
|
||||
@ -260,15 +220,58 @@ cpg_send_req(void *data, size_t len, uint32_t *seqno)
|
||||
ret = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, &iov, 1);
|
||||
|
||||
free(m);
|
||||
if (ret == CPG_OK)
|
||||
if (ret == CS_OK)
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
cpg_send_reply(void *data, size_t len, uint32_t nodeid,
|
||||
uint32_t seqno)
|
||||
cpg_send_vm_state(virt_state_t *vs)
|
||||
{
|
||||
struct iovec iov;
|
||||
struct msg_queue_node *n;
|
||||
struct wire_msg *m;
|
||||
size_t msgsz = sizeof(*m) + sizeof(*vs);
|
||||
int ret;
|
||||
|
||||
n = calloc(1, (sizeof(*n)));
|
||||
if (!n)
|
||||
return -1;
|
||||
|
||||
m = calloc(1, msgsz);
|
||||
if (!m) {
|
||||
free(n);
|
||||
return -1;
|
||||
}
|
||||
|
||||
n->state = STATE_MESSAGE;
|
||||
n->msg = NULL;
|
||||
n->msglen = 0;
|
||||
|
||||
pthread_mutex_lock(&cpg_mutex);
|
||||
list_insert(&pending, n);
|
||||
pthread_mutex_unlock(&cpg_mutex);
|
||||
|
||||
m->type = TYPE_STORE_VM;
|
||||
m->target = NODE_ID_NONE;
|
||||
|
||||
memcpy(&m->data, vs, sizeof(*vs));
|
||||
|
||||
iov.iov_base = m;
|
||||
iov.iov_len = msgsz;
|
||||
ret = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, &iov, 1);
|
||||
|
||||
free(m);
|
||||
if (ret == CS_OK)
|
||||
return 0;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
cpg_send_reply(void *data, size_t len, uint32_t nodeid, uint32_t seqno)
|
||||
{
|
||||
struct iovec iov;
|
||||
struct wire_msg *m;
|
||||
@ -290,8 +293,9 @@ cpg_send_reply(void *data, size_t len, uint32_t nodeid,
|
||||
ret = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, &iov, 1);
|
||||
|
||||
free(m);
|
||||
if (ret == CPG_OK)
|
||||
if (ret == CS_OK)
|
||||
return 0;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -313,11 +317,12 @@ cpg_wait_reply(void **data, size_t *len, uint32_t seqno)
|
||||
if (n->state != STATE_MESSAGE)
|
||||
continue;
|
||||
found = 1;
|
||||
break;
|
||||
goto out;
|
||||
}
|
||||
pthread_mutex_unlock(&cpg_mutex);
|
||||
}
|
||||
|
||||
out:
|
||||
list_remove(&pending, n);
|
||||
pthread_mutex_unlock(&cpg_mutex);
|
||||
|
||||
@ -332,54 +337,60 @@ cpg_wait_reply(void **data, size_t *len, uint32_t seqno)
|
||||
static void *
|
||||
cpg_dispatch_thread(void *arg)
|
||||
{
|
||||
cpg_dispatch(cpg_handle, CPG_DISPATCH_BLOCKING);
|
||||
cpg_dispatch(cpg_handle, CS_DISPATCH_BLOCKING);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
cpg_start(const char *name, request_callback_fn func)
|
||||
cpg_start( const char *name,
|
||||
request_callback_fn req_cb_fn,
|
||||
request_callback_fn store_cb_fn,
|
||||
confchange_callback_fn join_fn,
|
||||
confchange_callback_fn leave_fn)
|
||||
{
|
||||
cpg_handle_t h;
|
||||
int ret;
|
||||
|
||||
errno = EINVAL;
|
||||
|
||||
if (!name)
|
||||
return -1;
|
||||
|
||||
gname.length = snprintf(gname.value,
|
||||
sizeof(gname.value), name);
|
||||
if (gname.length >= sizeof(gname.value)) {
|
||||
ret = snprintf(gname.value, sizeof(gname.value), name);
|
||||
if (ret <= 0)
|
||||
return -1;
|
||||
|
||||
if (ret >= sizeof(gname.value)) {
|
||||
errno = ENAMETOOLONG;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (gname.length <= 0)
|
||||
return -1;
|
||||
|
||||
gname.length = ret;
|
||||
|
||||
memset(&h, 0, sizeof(h));
|
||||
if (cpg_initialize(&h, &my_callbacks) != CPG_OK) {
|
||||
if (cpg_initialize(&h, &my_callbacks) != CS_OK) {
|
||||
perror("cpg_initialize");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (cpg_join(h, &gname) != CPG_OK) {
|
||||
if (cpg_join(h, &gname) != CS_OK) {
|
||||
perror("cpg_join");
|
||||
return -1;
|
||||
}
|
||||
|
||||
cpg_local_get(h, &my_node_id);
|
||||
dbg_printf(2, "My CPG nodeid is %d\n", my_node_id);
|
||||
|
||||
pthread_mutex_lock(&cpg_mutex);
|
||||
|
||||
cpg_local_get(h, &my_node_id);
|
||||
|
||||
pthread_create(&cpg_thread, NULL, cpg_dispatch_thread, NULL);
|
||||
|
||||
memcpy(&cpg_handle, &h, sizeof(h));
|
||||
|
||||
req_callback_fn = func;
|
||||
req_callback_fn = req_cb_fn;
|
||||
store_callback_fn = store_cb_fn;
|
||||
conf_join_fn = join_fn;
|
||||
conf_leave_fn = leave_fn;
|
||||
|
||||
pthread_mutex_unlock(&cpg_mutex);
|
||||
|
||||
@ -392,59 +403,8 @@ cpg_stop(void)
|
||||
{
|
||||
pthread_cancel(cpg_thread);
|
||||
pthread_join(cpg_thread, NULL);
|
||||
|
||||
cpg_leave(cpg_handle, &gname);
|
||||
cpg_finalize(cpg_handle);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#ifdef STANDALONE
|
||||
int please_quit = 0;
|
||||
|
||||
void
|
||||
go_away(int sig)
|
||||
{
|
||||
please_quit = 1;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
request_callback(void *data, size_t len, uint32_t nodeid, uint32_t seqno)
|
||||
{
|
||||
char *msg = data;
|
||||
|
||||
printf("msg = %s\n", msg);
|
||||
|
||||
cpg_send_reply("fail.", 7, nodeid, seqno);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
uint32_t seqno = 0;
|
||||
int fd;
|
||||
char *data;
|
||||
size_t len;
|
||||
|
||||
signal(SIGINT, go_away);
|
||||
|
||||
if (cpg_start("lhh1", request_callback) < 0) {
|
||||
perror("cpg_start");
|
||||
return 1;
|
||||
}
|
||||
|
||||
cpg_send_req("hi", 2, &seqno);
|
||||
cpg_wait_reply(&data, &len, seqno);
|
||||
|
||||
printf("%s\n", data);
|
||||
|
||||
printf("going bye\n");
|
||||
|
||||
cpg_stop();
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef _CHECKPOINT_H
|
||||
#define _CHECKPOINT_H
|
||||
#ifndef __FENCE_VIRTD_CPG_H
|
||||
#define __FENCE_VIRTD_CPG_H
|
||||
|
||||
struct ckpt_fence_req {
|
||||
struct cpg_fence_req {
|
||||
char vm_name[128];
|
||||
int request;
|
||||
uint32_t seqno;
|
||||
@ -10,12 +10,20 @@ struct ckpt_fence_req {
|
||||
|
||||
typedef void (*request_callback_fn)(void *data, size_t len, uint32_t nodeid,
|
||||
uint32_t seqno);
|
||||
typedef void (*confchange_callback_fn)(const struct cpg_address *m, size_t len);
|
||||
|
||||
int cpg_start( const char *name,
|
||||
request_callback_fn func,
|
||||
request_callback_fn store_func,
|
||||
confchange_callback_fn join,
|
||||
confchange_callback_fn leave);
|
||||
|
||||
int cpg_get_ids(uint32_t *me, uint32_t *high);
|
||||
int cpg_start(const char *name, request_callback_fn func);
|
||||
int cpg_stop(void);
|
||||
int cpg_send_req(void *data, size_t len, uint32_t *seqno);
|
||||
int cpg_wait_reply(void **data, size_t *len, uint32_t seqno);
|
||||
int cpg_send_reply(void *data, size_t len, uint32_t nodeid, uint32_t seqno);
|
||||
int cpg_send_vm_state(virt_state_t *vs);
|
||||
|
||||
|
||||
#endif
|
101
server/virt.c
101
server/virt.c
@ -68,17 +68,20 @@ virt_list_t *vl_get(virConnectPtr *vp, int vp_count, int my_id)
|
||||
for (i = 0 ; i < vp_count ; i++) {
|
||||
int x;
|
||||
virDomainPtr *dom_list;
|
||||
virt_list_t *new_vl;
|
||||
|
||||
int ret = virConnectListAllDomains(vp[i], &dom_list, 0);
|
||||
if (ret <= 0)
|
||||
continue;
|
||||
|
||||
d_count += ret;
|
||||
vl = realloc(vl, sizeof(uint32_t) + sizeof(virt_state_t) * d_count);
|
||||
if (!vl) {
|
||||
new_vl = realloc(vl, sizeof(uint32_t) + sizeof(virt_state_t) * d_count);
|
||||
if (!new_vl) {
|
||||
_free_dom_list(dom_list, ret);
|
||||
free(vl);
|
||||
return NULL;
|
||||
}
|
||||
vl = new_vl;
|
||||
vl->vm_count = d_count;
|
||||
|
||||
/* Ok, we have the domain IDs - let's get their names and states */
|
||||
@ -123,6 +126,100 @@ virt_list_t *vl_get(virConnectPtr *vp, int vp_count, int my_id)
|
||||
return vl;
|
||||
}
|
||||
|
||||
int
|
||||
vl_add(virt_list_t **vl, virt_state_t *vm) {
|
||||
virt_list_t *new_vl;
|
||||
size_t oldlen;
|
||||
size_t newlen;
|
||||
|
||||
if (!vl)
|
||||
return -1;
|
||||
|
||||
if (!*vl) {
|
||||
*vl = malloc(sizeof(uint32_t) + sizeof(virt_state_t));
|
||||
if (!*vl)
|
||||
return -1;
|
||||
(*vl)->vm_count = 1;
|
||||
memcpy(&(*vl)->vm_states[0], vm, sizeof(virt_state_t));
|
||||
return 0;
|
||||
}
|
||||
|
||||
oldlen = sizeof(uint32_t) + sizeof(virt_state_t) * (*vl)->vm_count;
|
||||
newlen = oldlen + sizeof(virt_state_t);
|
||||
|
||||
new_vl = malloc(newlen);
|
||||
if (!new_vl)
|
||||
return -1;
|
||||
|
||||
memcpy(new_vl, *vl, oldlen);
|
||||
memcpy(&new_vl->vm_states[(*vl)->vm_count], vm, sizeof(virt_state_t));
|
||||
new_vl->vm_count++;
|
||||
|
||||
free(*vl);
|
||||
*vl = new_vl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vl_remove_by_owner(virt_list_t **vl, uint32_t owner) {
|
||||
int i;
|
||||
int removed = 0;
|
||||
virt_list_t *new_vl;
|
||||
|
||||
if (!vl || !*vl)
|
||||
return 0;
|
||||
|
||||
for (i = 0 ; i < (*vl)->vm_count ; i++) {
|
||||
if ((*vl)->vm_states[i].v_state.s_owner == owner) {
|
||||
dbg_printf(2, "Removing %s\n", (*vl)->vm_states[i].v_name);
|
||||
memset(&(*vl)->vm_states[i].v_state, 0,
|
||||
sizeof((*vl)->vm_states[i].v_state));
|
||||
(*vl)->vm_states[i].v_name[0] = 0xff;
|
||||
(*vl)->vm_states[i].v_uuid[0] = 0xff;
|
||||
removed++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!removed)
|
||||
return 0;
|
||||
|
||||
qsort(&(*vl)->vm_states[0], (*vl)->vm_count, sizeof((*vl)->vm_states[0]),
|
||||
_compare_virt);
|
||||
(*vl)->vm_count -= removed;
|
||||
|
||||
new_vl = realloc(*vl, sizeof(uint32_t) + (sizeof(virt_state_t) * ((*vl)->vm_count)));
|
||||
if (new_vl)
|
||||
*vl = new_vl;
|
||||
return removed;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
vl_update(virt_list_t **vl, virt_state_t *vm) {
|
||||
virt_state_t *v = NULL;
|
||||
|
||||
if (!vl)
|
||||
return -1;
|
||||
|
||||
if (!*vl)
|
||||
return vl_add(vl, vm);
|
||||
|
||||
if (strlen(vm->v_uuid) > 0)
|
||||
v = vl_find_uuid(*vl, vm->v_uuid);
|
||||
|
||||
if (v == NULL && strlen(vm->v_name) > 0)
|
||||
v = vl_find_name(*vl, vm->v_name);
|
||||
|
||||
if (v == NULL) {
|
||||
dbg_printf(2, "Adding new entry for VM %s\n", vm->v_name);
|
||||
vl_add(vl, vm);
|
||||
} else {
|
||||
dbg_printf(2, "Updating entry for VM %s\n", vm->v_name);
|
||||
memcpy(&v->v_state, &vm->v_state, sizeof(v->v_state));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
vl_print(virt_list_t *vl)
|
||||
|
@ -26,22 +26,6 @@
|
||||
|
||||
#include "xvm.h"
|
||||
|
||||
/*
|
||||
Owner 0 = no owner.
|
||||
|
||||
checkpoint "xen-vm-states" {
|
||||
section "vm-name0" {
|
||||
owner_nodeid;
|
||||
vm_state;
|
||||
}
|
||||
section "vm-name1" {
|
||||
owner_nodeid;
|
||||
vm_state;
|
||||
}
|
||||
...
|
||||
}
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
uint32_t s_owner;
|
||||
int32_t s_state;
|
||||
@ -66,19 +50,13 @@ void vl_print(virt_list_t *vl);
|
||||
void vl_free(virt_list_t *old);
|
||||
virt_state_t *vl_find_uuid(virt_list_t *vl, const char *name);
|
||||
virt_state_t *vl_find_name(virt_list_t *vl, const char *name);
|
||||
int vl_add(virt_list_t **vl, virt_state_t *vm);
|
||||
int vl_update(virt_list_t **vl, virt_state_t *vm);
|
||||
int vl_remove_by_owner(virt_list_t **vl, uint32_t owner);
|
||||
|
||||
int vm_off(virConnectPtr *vp, int vp_count, const char *vm_name);
|
||||
int vm_on(virConnectPtr *vp, int vp_count, const char *vm_name);
|
||||
int vm_status(virConnectPtr *vp, int vp_count, const char *vm_name);
|
||||
int vm_reboot(virConnectPtr *vp, int vp_count, const char *vm_name);
|
||||
|
||||
typedef void ckpt_handle;
|
||||
int ckpt_read(void *hp, const char *secid, void *buf, size_t maxlen);
|
||||
int ckpt_finish(void *hp);
|
||||
int ckpt_write(void *hp, const char *secid, void *buf, size_t maxlen);
|
||||
int ckpt_erase(void *hp, const char *secid);
|
||||
void *ckpt_init(const char *ckpt_name, int maxlen, int maxsec, int maxseclen,
|
||||
int timeout);
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -1,458 +0,0 @@
|
||||
/*
|
||||
Copyright Red Hat, Inc. 2006
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
|
||||
MA 02139, USA.
|
||||
*/
|
||||
/** @file
|
||||
* Distributed VM states using saCkpt interface
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/wait.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
#include <pthread.h>
|
||||
#include <openais/saAis.h>
|
||||
#include <openais/saCkpt.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include "xvm.h"
|
||||
|
||||
typedef struct {
|
||||
uint32_t ck_ready;
|
||||
int ck_timeout;
|
||||
SaCkptCheckpointHandleT ck_checkpoint;
|
||||
SaCkptHandleT ck_handle;
|
||||
char *ck_name;
|
||||
} ckpt_handle;
|
||||
|
||||
|
||||
#define READY_MAGIC 0x13fd237c
|
||||
#define VALIDATE(h) \
|
||||
do { \
|
||||
if (!h || h->ck_ready != READY_MAGIC) { \
|
||||
errno = EINVAL; \
|
||||
return -1; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
int ais_to_posix(SaAisErrorT err);
|
||||
|
||||
int
|
||||
ais_to_posix(SaAisErrorT err)
|
||||
{
|
||||
switch (err) {
|
||||
case SA_AIS_OK:
|
||||
return 0;
|
||||
case SA_AIS_ERR_LIBRARY:
|
||||
return ELIBBAD;
|
||||
case SA_AIS_ERR_VERSION:
|
||||
return EPROTONOSUPPORT; //XXX
|
||||
case SA_AIS_ERR_INIT:
|
||||
return EFAULT; //XXX
|
||||
case SA_AIS_ERR_TIMEOUT:
|
||||
return ETIMEDOUT;
|
||||
case SA_AIS_ERR_TRY_AGAIN:
|
||||
return EAGAIN;
|
||||
case SA_AIS_ERR_INVALID_PARAM:
|
||||
return EINVAL;
|
||||
case SA_AIS_ERR_NO_MEMORY:
|
||||
return ENOMEM;
|
||||
case SA_AIS_ERR_BAD_HANDLE:
|
||||
return EBADF;
|
||||
case SA_AIS_ERR_BUSY:
|
||||
return EBUSY;
|
||||
case SA_AIS_ERR_ACCESS:
|
||||
return EACCES;
|
||||
case SA_AIS_ERR_NOT_EXIST:
|
||||
return ENOENT;
|
||||
case SA_AIS_ERR_NAME_TOO_LONG:
|
||||
return ENAMETOOLONG;
|
||||
case SA_AIS_ERR_EXIST:
|
||||
return EEXIST;
|
||||
case SA_AIS_ERR_NO_SPACE:
|
||||
return ENOSPC;
|
||||
case SA_AIS_ERR_INTERRUPT:
|
||||
return EINTR;
|
||||
case SA_AIS_ERR_NAME_NOT_FOUND:
|
||||
return ENOENT;
|
||||
case SA_AIS_ERR_NO_RESOURCES:
|
||||
return ENOMEM; //XXX
|
||||
case SA_AIS_ERR_NOT_SUPPORTED:
|
||||
return ENOSYS;
|
||||
case SA_AIS_ERR_BAD_OPERATION:
|
||||
return EINVAL; //XXX
|
||||
case SA_AIS_ERR_FAILED_OPERATION:
|
||||
return EIO; //XXX
|
||||
case SA_AIS_ERR_MESSAGE_ERROR:
|
||||
return EIO; // XXX
|
||||
case SA_AIS_ERR_QUEUE_FULL:
|
||||
return ENOBUFS;
|
||||
case SA_AIS_ERR_QUEUE_NOT_AVAILABLE:
|
||||
return ENOENT;
|
||||
case SA_AIS_ERR_BAD_FLAGS:
|
||||
return EINVAL;
|
||||
case SA_AIS_ERR_TOO_BIG:
|
||||
return E2BIG;
|
||||
case SA_AIS_ERR_NO_SECTIONS:
|
||||
return ENOENT; // XXX
|
||||
/*case SA_AIS_ERR_SECURITY:
|
||||
return EPERM;*/
|
||||
default:
|
||||
return EINVAL; /* XXX */
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
ckpt_open(ckpt_handle *h, const char *ckpt_name, int maxsize,
|
||||
int maxsec, int maxsecsize, int timeout)
|
||||
{
|
||||
SaCkptCheckpointCreationAttributesT attrs;
|
||||
SaCkptCheckpointOpenFlagsT flags;
|
||||
SaNameT cpname;
|
||||
#if 0
|
||||
SaCkptCheckpointDescriptorT status;
|
||||
#endif
|
||||
SaAisErrorT err = SA_AIS_OK;
|
||||
|
||||
VALIDATE(h);
|
||||
|
||||
flags = SA_CKPT_CHECKPOINT_READ |
|
||||
SA_CKPT_CHECKPOINT_WRITE;
|
||||
|
||||
snprintf((char *)cpname.value, SA_MAX_NAME_LENGTH-1,
|
||||
"%s", ckpt_name);
|
||||
cpname.length = strlen(ckpt_name);
|
||||
|
||||
h->ck_timeout = timeout;
|
||||
|
||||
err = saCkptCheckpointOpen(h->ck_handle,
|
||||
&cpname,
|
||||
NULL,
|
||||
flags,
|
||||
timeout,
|
||||
&h->ck_checkpoint);
|
||||
|
||||
if (err == SA_AIS_OK) {
|
||||
#if 0
|
||||
saCkptCheckpointStatusGet(h->ck_handle,
|
||||
&status);
|
||||
|
||||
printf("Checkpoint Size = %d bytes\n", (int)
|
||||
status.checkpointCreationAttributes.checkpointSize);
|
||||
printf("Flags = ");
|
||||
if (status.checkpointCreationAttributes.creationFlags &
|
||||
SA_CKPT_WR_ALL_REPLICAS) {
|
||||
printf("%s ", "SA_CKPT_WR_ALL_REPLICAS");
|
||||
}
|
||||
if (status.checkpointCreationAttributes.creationFlags &
|
||||
SA_CKPT_WR_ACTIVE_REPLICA) {
|
||||
printf("%s ", "SA_CKPT_WR_ACTIVE_REPLICA");
|
||||
}
|
||||
if (status.checkpointCreationAttributes.creationFlags &
|
||||
SA_CKPT_WR_ACTIVE_REPLICA_WEAK) {
|
||||
printf("%s ", "SA_CKPT_WR_ACTIVE_REPLICA_WEAK");
|
||||
}
|
||||
if (status.checkpointCreationAttributes.creationFlags &
|
||||
SA_CKPT_CHECKPOINT_COLLOCATED) {
|
||||
printf("%s ", "SA_CKPT_CHECKPOINT_COLLOCATED");
|
||||
}
|
||||
printf("\nMax sections = %d\n",
|
||||
(int)status.checkpointCreationAttributes.maxSections);
|
||||
printf("Max section size = %d\n",
|
||||
(int)status.checkpointCreationAttributes.maxSectionSize);
|
||||
printf("Max section ID size = %d\n",
|
||||
(int)status.checkpointCreationAttributes.maxSectionIdSize);
|
||||
printf("Section count = %d\n", status.numberOfSections);
|
||||
printf("\n");
|
||||
#endif
|
||||
goto good;
|
||||
}
|
||||
|
||||
attrs.creationFlags = SA_CKPT_WR_ALL_REPLICAS;
|
||||
attrs.checkpointSize = (SaSizeT)maxsize;
|
||||
attrs.retentionDuration = SA_TIME_ONE_MINUTE;
|
||||
attrs.maxSections = maxsec;
|
||||
attrs.maxSectionSize = (SaSizeT)maxsecsize;
|
||||
attrs.maxSectionIdSize = (SaSizeT)MAX_DOMAINNAME_LENGTH;
|
||||
|
||||
flags = SA_CKPT_CHECKPOINT_READ |
|
||||
SA_CKPT_CHECKPOINT_WRITE |
|
||||
SA_CKPT_CHECKPOINT_CREATE;
|
||||
|
||||
err = saCkptCheckpointOpen(h->ck_handle,
|
||||
&cpname,
|
||||
&attrs,
|
||||
flags,
|
||||
timeout,
|
||||
&h->ck_checkpoint);
|
||||
if (err == SA_AIS_OK)
|
||||
goto good;
|
||||
|
||||
/* No checkpoint */
|
||||
errno = ais_to_posix(err);
|
||||
return (errno == 0 ? 0 : -1);
|
||||
good:
|
||||
printf("Opened ckpt %s\n", ckpt_name);
|
||||
h->ck_name = strdup(ckpt_name);
|
||||
|
||||
errno = ais_to_posix(err);
|
||||
return (errno == 0 ? 0 : -1);
|
||||
}
|
||||
|
||||
|
||||
void *
|
||||
ckpt_init(char *ckpt_name, int maxlen, int maxsec,
|
||||
int maxseclen, int timeout)
|
||||
{
|
||||
ckpt_handle *h;
|
||||
SaAisErrorT err;
|
||||
SaVersionT ver;
|
||||
|
||||
if (!ckpt_name || !strlen(ckpt_name)) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
h = malloc(sizeof(*h));
|
||||
if (!h)
|
||||
return NULL;
|
||||
memset(h, 0, sizeof(*h));
|
||||
|
||||
ver.releaseCode = 'B';
|
||||
ver.majorVersion = 1;
|
||||
ver.minorVersion = 1;
|
||||
|
||||
err = saCkptInitialize(&h->ck_handle, NULL, &ver);
|
||||
|
||||
if (err != SA_AIS_OK) {
|
||||
free(h);
|
||||
return NULL;
|
||||
} else {
|
||||
h->ck_ready = READY_MAGIC;
|
||||
}
|
||||
|
||||
if (ckpt_open(h, ckpt_name, maxlen, maxsec, maxseclen,
|
||||
timeout) < 0) {
|
||||
saCkptCheckpointClose(h->ck_checkpoint);
|
||||
if (h->ck_name)
|
||||
free(h->ck_name);
|
||||
free(h);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return (void *)h;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ckpt_write(void *hp, const char *secid, void *buf, size_t maxlen)
|
||||
{
|
||||
ckpt_handle *h = (ckpt_handle *)hp;
|
||||
SaCkptIOVectorElementT iov = {SA_CKPT_DEFAULT_SECTION_ID,
|
||||
NULL, 0, 0, 0};
|
||||
SaAisErrorT err;
|
||||
SaCkptSectionCreationAttributesT attrs;
|
||||
|
||||
VALIDATE(h);
|
||||
|
||||
/* Set section ID here */
|
||||
iov.sectionId.id = (uint8_t *)secid;
|
||||
iov.sectionId.idLen = strlen(secid);
|
||||
iov.dataBuffer = buf;
|
||||
iov.dataSize = (SaSizeT)maxlen;
|
||||
iov.dataOffset = 0;
|
||||
iov.readSize = 0;
|
||||
|
||||
err = saCkptCheckpointWrite(h->ck_checkpoint, &iov, 1, NULL);
|
||||
|
||||
if (err == SA_AIS_ERR_NOT_EXIST) {
|
||||
attrs.sectionId = &iov.sectionId;
|
||||
attrs.expirationTime = SA_TIME_END;
|
||||
|
||||
err = saCkptSectionCreate(h->ck_checkpoint, &attrs,
|
||||
buf, maxlen);
|
||||
}
|
||||
|
||||
if (err == SA_AIS_OK)
|
||||
saCkptCheckpointSynchronize(h->ck_checkpoint,
|
||||
h->ck_timeout);
|
||||
|
||||
errno = ais_to_posix(err);
|
||||
if (errno)
|
||||
return -1;
|
||||
return maxlen; /* XXX */
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ckpt_read(void *hp, const char *secid, void *buf, size_t maxlen)
|
||||
{
|
||||
ckpt_handle *h = (ckpt_handle *)hp;
|
||||
SaCkptIOVectorElementT iov = {SA_CKPT_DEFAULT_SECTION_ID,
|
||||
NULL, 0, 0, 0};
|
||||
SaAisErrorT err;
|
||||
|
||||
VALIDATE(h);
|
||||
//printf("reading ckpt %s\n", keyid);
|
||||
|
||||
iov.sectionId.id = (uint8_t *)secid;
|
||||
iov.sectionId.idLen = strlen(secid);
|
||||
iov.dataBuffer = buf;
|
||||
iov.dataSize = (SaSizeT)maxlen;
|
||||
iov.dataOffset = 0;
|
||||
iov.readSize = 0;
|
||||
|
||||
err = saCkptCheckpointRead(h->ck_checkpoint, &iov, 1, NULL);
|
||||
|
||||
errno = ais_to_posix(err);
|
||||
if (errno)
|
||||
return -1;
|
||||
return iov.readSize; /* XXX */
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ckpt_erase(void *hp, const char *secid)
|
||||
{
|
||||
ckpt_handle *h = (ckpt_handle *)hp;
|
||||
SaAisErrorT err;
|
||||
SaCkptSectionIdT sectionId;
|
||||
VALIDATE(h);
|
||||
|
||||
sectionId.id = (uint8_t *)secid;
|
||||
sectionId.idLen = strlen(secid);
|
||||
|
||||
err = saCkptSectionDelete(h->ck_checkpoint, §ionId);
|
||||
|
||||
if (err == SA_AIS_OK)
|
||||
saCkptCheckpointSynchronize(h->ck_checkpoint,
|
||||
h->ck_timeout);
|
||||
|
||||
errno = ais_to_posix(err);
|
||||
if (errno)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ckpt_finish(void *hp)
|
||||
{
|
||||
ckpt_handle *h = (ckpt_handle *)hp;
|
||||
int ret = 0;
|
||||
SaAisErrorT err;
|
||||
|
||||
saCkptCheckpointClose(h->ck_checkpoint);
|
||||
err = saCkptFinalize(h->ck_handle);
|
||||
|
||||
if (err != SA_AIS_OK)
|
||||
ret = -1;
|
||||
else
|
||||
h->ck_ready = 0;
|
||||
|
||||
if (h->ck_name)
|
||||
free(h->ck_name);
|
||||
|
||||
if (ret != 0)
|
||||
errno = ais_to_posix(err);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#ifdef STANDALONE
|
||||
void
|
||||
usage(int ret)
|
||||
{
|
||||
printf("usage: ckpt [-c ckpt_name] <-r key|-w key -d data>\n");
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
char *ckptname = "ckpt_test";
|
||||
char *sec = "default";
|
||||
char *val;
|
||||
void *h;
|
||||
char buf[64];
|
||||
int ret;
|
||||
int op = 0;
|
||||
|
||||
while((ret = getopt(argc, argv, "c:w:r:d:j?")) != EOF) {
|
||||
switch(ret) {
|
||||
case 'c':
|
||||
ckptname = optarg;
|
||||
break;
|
||||
case 'w':
|
||||
op = 'w';
|
||||
sec = optarg;
|
||||
break;
|
||||
case 'r':
|
||||
op = 'r';
|
||||
sec = optarg;
|
||||
break;
|
||||
case 'd':
|
||||
val = optarg;
|
||||
break;
|
||||
case '?':
|
||||
case 'h':
|
||||
usage(0);
|
||||
default:
|
||||
usage(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!op) {
|
||||
usage(1);
|
||||
}
|
||||
|
||||
if (!sec) {
|
||||
usage(1);
|
||||
}
|
||||
|
||||
h = ckpt_init(ckptname, 262144, 4096, 64, 10);
|
||||
if (!h) {
|
||||
perror("ckpt_init");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (op == 'w') {
|
||||
if (ckpt_write(h, sec, val, strlen(val)+1) < 0) {
|
||||
perror("ckpt_write");
|
||||
return 1;
|
||||
}
|
||||
} else if (op == 'r') {
|
||||
ret = ckpt_read(h, sec, buf, sizeof(buf));
|
||||
if (ret < 0) {
|
||||
perror("ckpt_read");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("%d bytes\nDATA for '%s':\n%s\n", ret, sec,
|
||||
buf);
|
||||
}
|
||||
|
||||
ckpt_finish(h);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user