1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-11 05:18:09 +03:00
samba-mirror/ctdb/utils/ceph/ctdb_mutex_ceph_rados_helper.c
David Disseldorp d8b61863ec ctdb: cluster mutex helper using Ceph RADOS
ctdb_mutex_ceph_rados_helper implements the cluster mutex helper API
atop Ceph using the librados rados_lock_exclusive()/rados_unlock()
functionality.

Once configured, split brain avoidance during CTDB recovery will be
handled using locks against an object located in a Ceph RADOS pool.

Signed-off-by: David Disseldorp <ddiss@samba.org>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
2016-12-09 04:10:20 +01:00

329 lines
8.7 KiB
C

/*
CTDB mutex helper using Ceph librados locks
Copyright (C) David Disseldorp 2016
Based on ctdb_mutex_fcntl_helper.c, which is:
Copyright (C) Martin Schwenke 2015
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "replace.h"
#include "tevent.h"
#include "talloc.h"
#include "rados/librados.h"
#define CTDB_MUTEX_CEPH_LOCK_NAME "ctdb_reclock_mutex"
#define CTDB_MUTEX_CEPH_LOCK_COOKIE CTDB_MUTEX_CEPH_LOCK_NAME
#define CTDB_MUTEX_CEPH_LOCK_DESC "CTDB recovery lock"
#define CTDB_MUTEX_STATUS_HOLDING "0"
#define CTDB_MUTEX_STATUS_CONTENDED "1"
#define CTDB_MUTEX_STATUS_TIMEOUT "2"
#define CTDB_MUTEX_STATUS_ERROR "3"
static char *progname = NULL;
static int ctdb_mutex_rados_ctx_create(const char *ceph_cluster_name,
const char *ceph_auth_name,
const char *pool_name,
rados_t *_ceph_cluster,
rados_ioctx_t *_ioctx)
{
rados_t ceph_cluster = NULL;
rados_ioctx_t ioctx = NULL;
int ret;
ret = rados_create2(&ceph_cluster, ceph_cluster_name, ceph_auth_name, 0);
if (ret < 0) {
fprintf(stderr, "%s: failed to initialise Ceph cluster %s as %s"
" - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
strerror(-ret));
return ret;
}
/* path=NULL tells librados to use default locations */
ret = rados_conf_read_file(ceph_cluster, NULL);
if (ret < 0) {
fprintf(stderr, "%s: failed to parse Ceph cluster config"
" - (%s)\n", progname, strerror(-ret));
rados_shutdown(ceph_cluster);
return ret;
}
ret = rados_connect(ceph_cluster);
if (ret < 0) {
fprintf(stderr, "%s: failed to connect to Ceph cluster %s as %s"
" - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
strerror(-ret));
rados_shutdown(ceph_cluster);
return ret;
}
ret = rados_ioctx_create(ceph_cluster, pool_name, &ioctx);
if (ret < 0) {
fprintf(stderr, "%s: failed to create Ceph ioctx for pool %s"
" - (%s)\n", progname, pool_name, strerror(-ret));
rados_shutdown(ceph_cluster);
return ret;
}
*_ceph_cluster = ceph_cluster;
*_ioctx = ioctx;
return 0;
}
static void ctdb_mutex_rados_ctx_destroy(rados_t ceph_cluster,
rados_ioctx_t ioctx)
{
rados_ioctx_destroy(ioctx);
rados_shutdown(ceph_cluster);
}
static int ctdb_mutex_rados_lock(rados_ioctx_t *ioctx,
const char *oid)
{
int ret;
ret = rados_lock_exclusive(ioctx, oid,
CTDB_MUTEX_CEPH_LOCK_NAME,
CTDB_MUTEX_CEPH_LOCK_COOKIE,
CTDB_MUTEX_CEPH_LOCK_DESC,
NULL, /* infinite duration */
0);
if ((ret == -EEXIST) || (ret == -EBUSY)) {
/* lock contention */
return ret;
} else if (ret < 0) {
/* unexpected failure */
fprintf(stderr,
"%s: Failed to get lock on RADOS object '%s' - (%s)\n",
progname, oid, strerror(-ret));
return ret;
}
/* lock obtained */
return 0;
}
static int ctdb_mutex_rados_unlock(rados_ioctx_t *ioctx,
const char *oid)
{
int ret;
ret = rados_unlock(ioctx, oid,
CTDB_MUTEX_CEPH_LOCK_NAME,
CTDB_MUTEX_CEPH_LOCK_COOKIE);
if (ret < 0) {
fprintf(stderr,
"%s: Failed to drop lock on RADOS object '%s' - (%s)\n",
progname, oid, strerror(-ret));
return ret;
}
return 0;
}
struct ctdb_mutex_rados_state {
bool holding_mutex;
const char *ceph_cluster_name;
const char *ceph_auth_name;
const char *pool_name;
const char *object;
int ppid;
struct tevent_context *ev;
struct tevent_signal *sig_ev;
struct tevent_timer *timer_ev;
rados_t ceph_cluster;
rados_ioctx_t ioctx;
};
static void ctdb_mutex_rados_sigterm_cb(struct tevent_context *ev,
struct tevent_signal *se,
int signum,
int count,
void *siginfo,
void *private_data)
{
struct ctdb_mutex_rados_state *cmr_state = private_data;
int ret;
if (!cmr_state->holding_mutex) {
fprintf(stderr, "Sigterm callback invoked without mutex!\n");
ret = -EINVAL;
goto err_ctx_cleanup;
}
ret = ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object);
err_ctx_cleanup:
ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster,
cmr_state->ioctx);
talloc_free(cmr_state);
exit(ret ? 1 : 0);
}
static void ctdb_mutex_rados_timer_cb(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval current_time,
void *private_data)
{
struct ctdb_mutex_rados_state *cmr_state = private_data;
int ret;
if (!cmr_state->holding_mutex) {
fprintf(stderr, "Timer callback invoked without mutex!\n");
ret = -EINVAL;
goto err_ctx_cleanup;
}
if ((kill(cmr_state->ppid, 0) == 0) || (errno != ESRCH)) {
/* parent still around, keep waiting */
cmr_state->timer_ev = tevent_add_timer(cmr_state->ev, cmr_state,
tevent_timeval_current_ofs(5, 0),
ctdb_mutex_rados_timer_cb,
cmr_state);
if (cmr_state->timer_ev == NULL) {
fprintf(stderr, "Failed to create timer event\n");
/* rely on signal cb */
}
return;
}
/* parent ended, drop lock and exit */
ret = ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object);
err_ctx_cleanup:
ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster,
cmr_state->ioctx);
talloc_free(cmr_state);
exit(ret ? 1 : 0);
}
int main(int argc, char *argv[])
{
int ret;
struct ctdb_mutex_rados_state *cmr_state;
progname = argv[0];
if (argc != 5) {
fprintf(stderr, "Usage: %s <Ceph Cluster> <Ceph user> "
"<RADOS pool> <RADOS object>\n",
progname);
ret = -EINVAL;
goto err_out;
}
ret = setvbuf(stdout, NULL, _IONBF, 0);
if (ret != 0) {
fprintf(stderr, "Failed to configure unbuffered stdout I/O\n");
}
cmr_state = talloc_zero(NULL, struct ctdb_mutex_rados_state);
if (cmr_state == NULL) {
fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
ret = -ENOMEM;
goto err_out;
}
cmr_state->ceph_cluster_name = argv[1];
cmr_state->ceph_auth_name = argv[2];
cmr_state->pool_name = argv[3];
cmr_state->object = argv[4];
cmr_state->ppid = getppid();
if (cmr_state->ppid == 1) {
/*
* The original parent is gone and the process has
* been reparented to init. This can happen if the
* helper is started just as the parent is killed
* during shutdown. The error message doesn't need to
* be stellar, since there won't be anything around to
* capture and log it...
*/
fprintf(stderr, "%s: PPID == 1\n", progname);
ret = -EPIPE;
goto err_state_free;
}
cmr_state->ev = tevent_context_init(cmr_state);
if (cmr_state->ev == NULL) {
fprintf(stderr, "tevent_context_init failed\n");
fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
ret = -ENOMEM;
goto err_state_free;
}
/* wait for sigterm */
cmr_state->sig_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGTERM, 0,
ctdb_mutex_rados_sigterm_cb,
cmr_state);
if (cmr_state->sig_ev == NULL) {
fprintf(stderr, "Failed to create signal event\n");
fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
ret = -ENOMEM;
goto err_state_free;
}
/* periodically check parent */
cmr_state->timer_ev = tevent_add_timer(cmr_state->ev, cmr_state,
tevent_timeval_current_ofs(5, 0),
ctdb_mutex_rados_timer_cb,
cmr_state);
if (cmr_state->timer_ev == NULL) {
fprintf(stderr, "Failed to create timer event\n");
fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
ret = -ENOMEM;
goto err_state_free;
}
ret = ctdb_mutex_rados_ctx_create(cmr_state->ceph_cluster_name,
cmr_state->ceph_auth_name,
cmr_state->pool_name,
&cmr_state->ceph_cluster,
&cmr_state->ioctx);
if (ret < 0) {
fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
goto err_state_free;
}
ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object);
if ((ret == -EEXIST) || (ret == -EBUSY)) {
fprintf(stdout, CTDB_MUTEX_STATUS_CONTENDED);
goto err_ctx_cleanup;
} else if (ret < 0) {
fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
goto err_ctx_cleanup;
}
cmr_state->holding_mutex = true;
fprintf(stdout, CTDB_MUTEX_STATUS_HOLDING);
/* wait for the signal / timer events to do their work */
ret = tevent_loop_wait(cmr_state->ev);
if (ret < 0) {
goto err_ctx_cleanup;
}
err_ctx_cleanup:
ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster,
cmr_state->ioctx);
err_state_free:
talloc_free(cmr_state);
err_out:
return ret ? 1 : 0;
}