1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-12-22 17:35:59 +03:00
lvm2/daemons/lvmetad/lvmetad-core.c
David Teigland a7c45ddc59 lvmetad: two phase vg_update
Previously, a command sent lvmetad new VG metadata in vg_commit().
In vg_commit(), devices are suspended, so any memory allocation
done by the command while sending to lvmetad, or by lvmetad while
updating its cache could deadlock if memory reclaim was triggered.

Now lvmetad is updated in unlock_vg(), after devices are resumed.
The new method for updating VG metadata in lvmetad is in two phases:

1. In vg_write(), before devices are suspended, the command sends
   lvmetad a short message ("set_vg_info") telling it what the new
   VG seqno will be.  lvmetad sees that the seqno is newer than
   the seqno of its cached VG, so it sets the INVALID flag for the
   cached VG.  If sending the message to lvmetad fails, the command
   fails before the metadata is committed and the change is not made.
   If sending the message succeeds, vg_commit() is called.

2. In unlock_vg(), after devices are resumed, the command sends
   lvmetad the standard vg_update message with the new metadata.
   lvmetad sees that the seqno in the new metadata matches the
   seqno it saved from set_vg_info, and knows it has the latest
   copy, so it clears the INVALID flag for the cached VG.

If a command fails between 1 and 2 (after committing the VG on disk,
but before sending lvmetad the new metadata), the cached VG retains
the INVALID flag in lvmetad.  A subsequent command will read the
cached VG from lvmetad, see the INVALID flag, ignore the cached
copy, read the VG from disk instead, update the lvmetad copy
with the latest copy from disk, (this clears the INVALID flag
in lvmetad), and use the correct VG metadata for the command.

(This INVALID mechanism already existed for use by lvmlockd.)
2016-06-28 02:30:31 +01:00

2999 lines
89 KiB
C

/*
* Copyright (C) 2012-2015 Red Hat, Inc.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define _XOPEN_SOURCE 500 /* pthread */
#define _REENTRANT
#include "tool.h"
#include "daemon-io.h"
#include "daemon-server.h"
#include "daemon-log.h"
#include "lvm-version.h"
#include "lvmetad-client.h"
#include <assert.h>
#include <errno.h>
#include <pthread.h>
#define LVMETAD_SOCKET DEFAULT_RUN_DIR "/lvmetad.socket"
/*
* cache states:
* . Empty: no devices visible to the system have been added to lvmetad
* . Scanning: some devices visible to the system have been added to lvmetad
* . Initialized: all devices visible to the system have been added to lvmetad
* . Outdated: event on system or storage is not yet processed by lvmetad
* Outdated variations:
* - MissingDev: device added to system, not yet added to lvmetad
* - RemovedDev: device removed from system, not yet removed from lvmetad
* - MissingVG: new vg is written on disk, not yet added to lvmetad
* - RemovedVG: vg is removed on disk, not yet removed in lvmetad
* - ChangedVG: vg metadata is changed on disk, not yet updated in lvmetad
* - MissingPV: new pv is written on disk, not yet added to in lvmetad
* - RemovedPV: pv is removed on disk, not yet removed in lvmetad
* - ChangedPV: pv metadata is changed on disk, not yet updated in lvmetad
* . Updated: events have been processed by lvmetad
*
* state transitions:
* . Empty -> Scanning
* . Scanning -> Initialized
* . Initialized -> Scanning
* . Initialized -> Outdated
* . Outdated -> Updated
* . Updated -> Outdated
* . Updated -> Scanning
* . Outdated -> Scanning
*
* state transitions caused by:
* . Empty is caused by:
* - starting/restarting lvmetad
* . Scanning is caused by:
* - running pvscan --cache
* - running any command with different global_filter (token mismatch)
* - running any command while lvmetad is Empty
* - running a report/display command with --foreign
* - running a report/display command with --shared
* - running a command using lvmlockd global lock where global state is changed
* . Initialized is caused by:
* - completion of Scanning
* . Outdated is caused by:
* - device being added or removed on the system
* - creating/removing/changing a VG
* - creating/removing/changing a PV
* . Updated is caused by:
* - receiving and processing all events
*
* request handling:
* . Empty: short period during startup, token error returned
* . Scanning: should be very short, lvmetad responds to requests with
* the token error "updating"
* . Initialized: lvmetad responds to requests
* . Updated: lvmetad responds to requests
* . Outdated: should be very short, lvmetad responds to requests
*
* In general, the cache state before and after the transition
* "Updated -> Scanning -> Initialized" should match, unless
* events occur during that transition.
*
* The Scanning state includes:
* . receive a request to set the token to "updating" (Scanning state begins.)
* . receive a pv_clear_all request to clear current cache
* . receive a number of pv_found events to repopulate cache
* . receive a request to set the token to a hash value (Initialized state begins.)
*
* The transition from Outdated to Updated depends on lvm commands
* sending events to lvmetad, i.e. pv_found, pv_gone, vg_update,
* vg_remove. Prior to receiving these events, lvmetad is not aware
* that it is in the Outdated state.
*
* When using a shared VG with lvmlockd, the Outdated state can last a
* longer time, but it won't be used in that state. lvmlockd forces a
* transition "Outdated -> Scanning -> Initialized" before the cache
* is used.
*/
/*
* valid/invalid state of cached metadata
*
* Normally when using lvmetad, the state is kept up-to-date through a
* combination of notifications from clients and updates triggered by uevents.
* When using lvmlockd, the lvmetad state is expected to become out of
* date (invalid/stale) when other hosts make changes to the metadata on disk.
*
* To deal with this, the metadata cached in lvmetad can be flagged as invalid.
* This invalid flag is returned along with the metadata when read by a
* command. The command can check for the invalid flag and decide that it
* should either use the stale metadata (uncommon), or read the latest metadata
* from disk rather than using the invalid metadata that was returned. If the
* command reads the latest metadata from disk, it can choose to send it to
* lvmetad to update the cached copy and clear the invalid flag in lvmetad.
* Otherwise, the next command to read the metadata from lvmetad will also
* receive the invalid metadata with the invalid flag (and like the previous
* command, it too may choose to read the latest metadata from disk and can
* then also choose to update the lvmetad copy.)
*
* For purposes of tracking the invalid state, LVM metadata is considered
* to be either VG-specific or global. VG-specific metadata is metadata
* that is isolated to a VG, such as the LVs it contains. Global
* metadata is metadata that is not isolated to a single VG. Global
* metdata includes:
* . the VG namespace (which VG names are used)
* . the set of orphan PVs (which PVs are in VGs and which are not)
* . properties of orphan PVs (the size of an orphan PV)
*
* If the metadata for a single VG becomes invalid, the VGFL_INVALID
* flag can be set in the vg_info struct for that VG. If the global
* metdata becomes invalid, the GLFL_INVALID flag can be set in the
* lvmetad daemon state.
*
* If a command reads VG metadata and VGFL_INVALID is set, an
* extra config node called "vg_invalid" is added to the config
* data returned to the command.
*
* If a command reads global metdata and GLFL_INVALID is set, an
* extra config node called "global_invalid" is added to the
* config data returned to the command.
*
* If a command sees vg_invalid, and wants the latest VG metadata,
* it only needs to scan disks of the PVs in that VG.
* It can then use vg_update to send the latest metadata to lvmetad
* which clears the VGFL_INVALID flag.
*
* If a command sees global_invalid, and wants the latest metadata,
* it should scan all devices to update lvmetad, and then send
* lvmetad the "set_global_info global_invalid=0" message to clear
* GLFL_INVALID.
*
* (When rescanning devices to update lvmetad, the command must use
* the global filter cmd->lvmetad_filter so that it processes the same
* devices that are seen by lvmetad.)
*
* The lvmetad INVALID flags can be set by sending lvmetad the messages:
*
* . set_vg_info with the latest VG seqno. If the VG seqno is larger
* than the cached VG seqno, VGFL_INVALID is set for the VG.
*
* . set_global_info with global_invalid=1 sets GLFL_INVALID.
*
* Different entities could use these functions to invalidate metadata
* if/when they detected that the cache is stale. How they detect that
* the cache is stale depends on the details of the specific entity.
*
* In the case of lvmlockd, it embeds values into its locks to keep track
* of when other nodes have changed metadata on disk related to those locks.
* When acquring locks it can look at these values and detect that
* the metadata associated with the lock has been changed.
* When the values change, it uses set_vg_info/set_global_info to
* invalidate the lvmetad cache.
*
* The values that lvmlockd distributes through its locks are the
* latest VG seqno in VG locks and a global counter in the global lock.
* When a host acquires a VG lock and sees that the embedded seqno is
* larger than it was previously, it knows that it should invalidate the
* lvmetad cache for the VG. If the host acquires the global lock
* and sees that the counter is larger than previously, it knows that
* it should invalidate the global info in lvmetad. This invalidation
* is done before the lock is returned to the command. This way the
* invalid flag will be set on the metadata before the command reads
* it from lvmetad.
*/
struct vg_info {
int64_t external_version;
uint32_t flags; /* VGFL_ */
};
#define GLFL_INVALID 0x00000001
#define GLFL_DISABLE 0x00000002
#define GLFL_DISABLE_REASON_DIRECT 0x00000004
#define GLFL_DISABLE_REASON_LVM1 0x00000008
#define GLFL_DISABLE_REASON_DUPLICATES 0x00000010
#define GLFL_DISABLE_REASON_VGRESTORE 0x00000020
#define GLFL_DISABLE_REASON_ALL (GLFL_DISABLE_REASON_DIRECT | GLFL_DISABLE_REASON_LVM1 | GLFL_DISABLE_REASON_DUPLICATES | GLFL_DISABLE_REASON_VGRESTORE)
#define VGFL_INVALID 0x00000001
#define CMD_NAME_SIZE 32
typedef struct {
daemon_idle *idle;
log_state *log; /* convenience */
const char *log_config;
struct dm_hash_table *pvid_to_pvmeta;
struct dm_hash_table *device_to_pvid; /* shares locks with above */
struct dm_hash_table *vgid_to_metadata;
struct dm_hash_table *vgid_to_vgname;
struct dm_hash_table *vgid_to_outdated_pvs;
struct dm_hash_table *vgid_to_info;
struct dm_hash_table *vgname_to_vgid;
struct dm_hash_table *pvid_to_vgid;
char token[128];
char update_cmd[CMD_NAME_SIZE];
int update_pid;
int update_timeout;
uint64_t update_begin;
uint32_t flags; /* GLFL_ */
pthread_mutex_t token_lock;
pthread_mutex_t info_lock;
pthread_rwlock_t cache_lock;
} lvmetad_state;
static uint64_t _monotonic_seconds(void)
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
return 0;
return ts.tv_sec;
}
static void destroy_metadata_hashes(lvmetad_state *s)
{
struct dm_hash_node *n = NULL;
dm_hash_iterate(n, s->vgid_to_metadata)
dm_config_destroy(dm_hash_get_data(s->vgid_to_metadata, n));
dm_hash_iterate(n, s->vgid_to_outdated_pvs)
dm_config_destroy(dm_hash_get_data(s->vgid_to_outdated_pvs, n));
dm_hash_iterate(n, s->pvid_to_pvmeta)
dm_config_destroy(dm_hash_get_data(s->pvid_to_pvmeta, n));
dm_hash_destroy(s->pvid_to_pvmeta);
dm_hash_destroy(s->vgid_to_metadata);
dm_hash_destroy(s->vgid_to_vgname);
dm_hash_destroy(s->vgid_to_outdated_pvs);
dm_hash_destroy(s->vgid_to_info);
dm_hash_destroy(s->vgname_to_vgid);
dm_hash_destroy(s->device_to_pvid);
dm_hash_destroy(s->pvid_to_vgid);
}
static void create_metadata_hashes(lvmetad_state *s)
{
s->pvid_to_pvmeta = dm_hash_create(32);
s->device_to_pvid = dm_hash_create(32);
s->vgid_to_metadata = dm_hash_create(32);
s->vgid_to_vgname = dm_hash_create(32);
s->vgid_to_outdated_pvs = dm_hash_create(32);
s->vgid_to_info = dm_hash_create(32);
s->pvid_to_vgid = dm_hash_create(32);
s->vgname_to_vgid = dm_hash_create(32);
}
static response reply_fail(const char *reason)
{
return daemon_reply_simple("failed", "reason = %s", reason, NULL);
}
static response reply_unknown(const char *reason)
{
return daemon_reply_simple("unknown", "reason = %s", reason, NULL);
}
static struct dm_config_node *pvs(struct dm_config_node *vg)
{
struct dm_config_node *pv = dm_config_find_node(vg, "metadata/physical_volumes");
if (pv)
pv = pv->child;
return pv;
}
static void filter_metadata(struct dm_config_node *vg) {
struct dm_config_node *pv = pvs(vg);
while (pv) {
struct dm_config_node *item = pv->child;
while (item) {
/* Remove the advisory device nodes. */
if (item->sib && !strcmp(item->sib->key, "device"))
item->sib = item->sib->sib;
item = item->sib;
}
pv = pv->sib;
}
vg->sib = NULL; /* Drop any trailing garbage. */
}
static void merge_pvmeta(struct dm_config_node *pv, struct dm_config_node *pvmeta)
{
struct dm_config_node *tmp;
if (!pvmeta)
return;
tmp = pvmeta;
while (tmp->sib) {
/* drop the redundant ID and dev_size nodes */
if (!strcmp(tmp->sib->key, "id") || !strcmp(tmp->sib->key, "dev_size"))
tmp->sib = tmp->sib->sib;
if (!tmp->sib) break;
tmp = tmp->sib;
tmp->parent = pv;
}
tmp->sib = pv->child;
pv->child = pvmeta;
pvmeta->parent = pv;
}
/*
* Either the "big" vgs lock, or a per-vg lock needs to be held before entering
* this function.
*
* cft and vg is data being sent to the caller.
*/
static int update_pv_status(lvmetad_state *s,
struct dm_config_tree *cft,
struct dm_config_node *vg)
{
struct dm_config_node *pv;
const char *uuid;
struct dm_config_tree *pvmeta;
struct dm_config_node *pvmeta_cn;
int ret = 1;
for (pv = pvs(vg); pv; pv = pv->sib) {
if (!(uuid = dm_config_find_str(pv->child, "id", NULL))) {
ERROR(s, "update_pv_status found no uuid for PV");
continue;
}
pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, uuid);
set_flag(cft, pv, "status", "MISSING", !pvmeta);
if (pvmeta) {
if (!(pvmeta_cn = dm_config_clone_node(cft, pvmeta->root->child, 1))) {
ERROR(s, "update_pv_status out of memory");
ret = 0;
goto out;
}
merge_pvmeta(pv, pvmeta_cn);
}
}
out:
return ret;
}
static struct dm_config_node *add_last_node(struct dm_config_tree *cft, const char *node_name)
{
struct dm_config_node *cn, *last;
cn = cft->root;
last = cn;
while (cn->sib) {
last = cn->sib;
cn = last;
}
cn = dm_config_create_node(cft, node_name);
if (!cn)
return NULL;
cn->v = NULL;
cn->sib = NULL;
cn->parent = cft->root;
last->sib = cn;
return cn;
}
static struct dm_config_node *make_pv_node(lvmetad_state *s, const char *pvid,
struct dm_config_tree *cft,
struct dm_config_node *parent,
struct dm_config_node *pre_sib)
{
struct dm_config_tree *pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
const char *vgid = dm_hash_lookup(s->pvid_to_vgid, pvid), *vgname = NULL;
struct dm_config_node *pv;
struct dm_config_node *cn = NULL;
if (!pvmeta)
return NULL;
if (vgid) {
vgname = dm_hash_lookup(s->vgid_to_vgname, vgid);
}
/* Nick the pvmeta config tree. */
if (!(pv = dm_config_clone_node(cft, pvmeta->root, 0)))
return 0;
if (pre_sib)
pre_sib->sib = pv;
if (parent && !parent->child)
parent->child = pv;
pv->parent = parent;
pv->key = pvid;
/* Add the "variable" bits to it. */
if (vgid && strcmp(vgid, "#orphan"))
cn = make_text_node(cft, "vgid", vgid, pv, cn);
if (vgname)
cn = make_text_node(cft, "vgname", vgname, pv, cn);
return pv;
}
static response pv_list(lvmetad_state *s, request r)
{
struct dm_config_node *cn = NULL, *cn_pvs;
struct dm_hash_node *n;
const char *id;
response res = { 0 };
DEBUGLOG(s, "pv_list");
buffer_init( &res.buffer );
if (!(res.cft = dm_config_create()))
return res; /* FIXME error reporting */
/* The response field */
if (!(res.cft->root = make_text_node(res.cft, "response", "OK", NULL, NULL)))
return res; /* FIXME doomed */
cn_pvs = make_config_node(res.cft, "physical_volumes", NULL, res.cft->root);
dm_hash_iterate(n, s->pvid_to_pvmeta) {
id = dm_hash_get_key(s->pvid_to_pvmeta, n);
cn = make_pv_node(s, id, res.cft, cn_pvs, cn);
}
if (s->flags & GLFL_INVALID)
add_last_node(res.cft, "global_invalid");
return res;
}
static response pv_lookup(lvmetad_state *s, request r)
{
const char *pvid = daemon_request_str(r, "uuid", NULL);
int64_t devt = daemon_request_int(r, "device", 0);
response res = { 0 };
struct dm_config_node *pv;
DEBUGLOG(s, "pv_lookup pvid %s", pvid);
buffer_init( &res.buffer );
if (!pvid && !devt)
return reply_fail("need PVID or device");
if (!(res.cft = dm_config_create()))
return reply_fail("out of memory");
if (!(res.cft->root = make_text_node(res.cft, "response", "OK", NULL, NULL)))
return reply_fail("out of memory");
if (!pvid && devt)
pvid = dm_hash_lookup_binary(s->device_to_pvid, &devt, sizeof(devt));
if (!pvid) {
WARN(s, "pv_lookup: could not find device %" PRIu64, devt);
dm_config_destroy(res.cft);
return reply_unknown("device not found");
}
pv = make_pv_node(s, pvid, res.cft, NULL, res.cft->root);
if (!pv) {
dm_config_destroy(res.cft);
return reply_unknown("PV not found");
}
pv->key = "physical_volume";
if (s->flags & GLFL_INVALID)
add_last_node(res.cft, "global_invalid");
return res;
}
static response vg_list(lvmetad_state *s, request r)
{
struct dm_config_node *cn, *cn_vgs, *cn_last = NULL;
struct dm_hash_node *n;
const char *id;
const char *name;
response res = { 0 };
DEBUGLOG(s, "vg_list");
buffer_init( &res.buffer );
if (!(res.cft = dm_config_create()))
goto bad; /* FIXME: better error reporting */
/* The response field */
res.cft->root = cn = dm_config_create_node(res.cft, "response");
if (!cn)
goto bad; /* FIXME */
cn->parent = res.cft->root;
if (!(cn->v = dm_config_create_value(res.cft)))
goto bad; /* FIXME */
cn->v->type = DM_CFG_STRING;
cn->v->v.str = "OK";
cn_vgs = cn = cn->sib = dm_config_create_node(res.cft, "volume_groups");
if (!cn_vgs)
goto bad; /* FIXME */
cn->parent = res.cft->root;
cn->v = NULL;
cn->child = NULL;
dm_hash_iterate(n, s->vgid_to_vgname) {
id = dm_hash_get_key(s->vgid_to_vgname, n),
name = dm_hash_get_data(s->vgid_to_vgname, n);
if (!(cn = dm_config_create_node(res.cft, id)))
goto bad; /* FIXME */
if (cn_last)
cn_last->sib = cn;
cn->parent = cn_vgs;
cn->sib = NULL;
cn->v = NULL;
if (!(cn->child = dm_config_create_node(res.cft, "name")))
goto bad; /* FIXME */
cn->child->parent = cn;
cn->child->sib = 0;
if (!(cn->child->v = dm_config_create_value(res.cft)))
goto bad; /* FIXME */
cn->child->v->type = DM_CFG_STRING;
cn->child->v->v.str = name;
if (!cn_vgs->child)
cn_vgs->child = cn;
cn_last = cn;
}
if (s->flags & GLFL_INVALID)
add_last_node(res.cft, "global_invalid");
bad:
return res;
}
static void mark_outdated_pv(lvmetad_state *s, const char *vgid, const char *pvid)
{
struct dm_config_tree *pvmeta, *outdated_pvs;
struct dm_config_node *list, *cft_vgid;
struct dm_config_value *v;
pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
/* if the MDA exists and is used, it will have ignore=0 set */
if (!pvmeta ||
(dm_config_find_int64(pvmeta->root, "pvmeta/mda0/ignore", 1) &&
dm_config_find_int64(pvmeta->root, "pvmeta/mda1/ignore", 1)))
return;
ERROR(s, "PV %s has outdated metadata for VG %s", pvid, vgid);
outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid);
if (!outdated_pvs) {
if (!(outdated_pvs = dm_config_from_string("outdated_pvs/pv_list = []")) ||
!(cft_vgid = make_text_node(outdated_pvs, "vgid", dm_pool_strdup(outdated_pvs->mem, vgid),
outdated_pvs->root, NULL)))
abort();
if (!dm_hash_insert(s->vgid_to_outdated_pvs, cft_vgid->v->v.str, outdated_pvs))
abort();
DEBUGLOG(s, "created outdated_pvs list for VG %s", vgid);
}
list = dm_config_find_node(outdated_pvs->root, "outdated_pvs/pv_list");
v = list->v;
while (v) {
if (v->type != DM_CFG_EMPTY_ARRAY && !strcmp(v->v.str, pvid))
return;
v = v->next;
}
if (!(v = dm_config_create_value(outdated_pvs)))
abort();
v->type = DM_CFG_STRING;
v->v.str = dm_pool_strdup(outdated_pvs->mem, pvid);
v->next = list->v;
list->v = v;
}
static void chain_outdated_pvs(lvmetad_state *s, const char *vgid, struct dm_config_tree *metadata_cft, struct dm_config_node *metadata)
{
struct dm_config_tree *cft = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid), *pvmeta;
struct dm_config_node *pv, *res, *out_pvs = cft ? dm_config_find_node(cft->root, "outdated_pvs/pv_list") : NULL;
struct dm_config_value *pvs_v = out_pvs ? out_pvs->v : NULL;
if (!pvs_v)
return;
if (!(res = make_config_node(metadata_cft, "outdated_pvs", metadata_cft->root, 0)))
return; /* oops */
res->sib = metadata->child;
metadata->child = res;
for (; pvs_v && pvs_v->type != DM_CFG_EMPTY_ARRAY; pvs_v = pvs_v->next) {
pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvs_v->v.str);
if (!pvmeta) {
WARN(s, "metadata for PV %s not found", pvs_v->v.str);
continue;
}
if (!(pv = dm_config_clone_node(metadata_cft, pvmeta->root, 0)))
continue;
pv->key = dm_config_find_str(pv, "pvmeta/id", NULL);
pv->sib = res->child;
res->child = pv;
}
}
static response vg_lookup(lvmetad_state *s, request r)
{
struct dm_config_tree *cft;
struct dm_config_node *metadata, *n;
struct vg_info *info;
response res = { 0 };
const char *uuid = daemon_request_str(r, "uuid", NULL);
const char *name = daemon_request_str(r, "name", NULL);
int count = 0;
buffer_init( &res.buffer );
if (!uuid && !name) {
ERROR(s, "vg_lookup with no uuid or name");
return reply_unknown("VG not found");
} else if (!uuid || !name) {
DEBUGLOG(s, "vg_lookup vgid %s name %s needs lookup",
uuid ?: "none", name ?: "none");
if (name && !uuid)
uuid = dm_hash_lookup_with_count(s->vgname_to_vgid, name, &count);
else if (uuid && !name)
name = dm_hash_lookup(s->vgid_to_vgname, uuid);
if (name && uuid && (count > 1)) {
DEBUGLOG(s, "vg_lookup name %s vgid %s found %d vgids",
name, uuid, count);
return daemon_reply_simple("multiple", "reason = %s", "Multiple VGs found with same name", NULL);
}
if (!uuid || !name)
return reply_unknown("VG not found");
} else {
char *name_lookup = dm_hash_lookup(s->vgid_to_vgname, uuid);
char *uuid_lookup = dm_hash_lookup_with_val(s->vgname_to_vgid, name, uuid, strlen(uuid) + 1);
/* FIXME: comment out these sanity checks when not testing */
if (!name_lookup || !uuid_lookup) {
ERROR(s, "vg_lookup vgid %s name %s found incomplete mapping uuid %s name %s",
uuid, name, uuid_lookup ?: "none", name_lookup ?: "none");
return reply_unknown("VG mapping incomplete");
} else if (strcmp(name_lookup, name) || strcmp(uuid_lookup, uuid)) {
ERROR(s, "vg_lookup vgid %s name %s found inconsistent mapping uuid %s name %s",
uuid, name, uuid_lookup, name_lookup);
return reply_unknown("VG mapping inconsistent");
}
}
DEBUGLOG(s, "vg_lookup vgid %s name %s", uuid ?: "none", name ?: "none");
cft = dm_hash_lookup(s->vgid_to_metadata, uuid);
if (!cft || !cft->root) {
return reply_unknown("UUID not found");
}
metadata = cft->root;
if (!(res.cft = dm_config_create()))
goto nomem_un;
/* The response field */
if (!(res.cft->root = n = dm_config_create_node(res.cft, "response")))
goto nomem_un;
if (!(n->v = dm_config_create_value(cft)))
goto nomem_un;
n->parent = res.cft->root;
n->v->type = DM_CFG_STRING;
n->v->v.str = "OK";
if (!(n = n->sib = dm_config_create_node(res.cft, "name")))
goto nomem_un;
if (!(n->v = dm_config_create_value(res.cft)))
goto nomem_un;
n->parent = res.cft->root;
n->v->type = DM_CFG_STRING;
n->v->v.str = name;
/* The metadata section */
if (!(n = n->sib = dm_config_clone_node(res.cft, metadata, 1)))
goto nomem_un;
n->parent = res.cft->root;
if (!update_pv_status(s, res.cft, n))
goto nomem;
chain_outdated_pvs(s, uuid, res.cft, n);
if (s->flags & GLFL_INVALID)
add_last_node(res.cft, "global_invalid");
info = dm_hash_lookup(s->vgid_to_info, uuid);
if (info && (info->flags & VGFL_INVALID)) {
if (!add_last_node(res.cft, "vg_invalid"))
goto nomem;
}
return res;
nomem_un:
nomem:
reply_fail("out of memory");
ERROR(s, "vg_lookup vgid %s name %s out of memory.", uuid ?: "none", name ?: "none");
ERROR(s, "lvmetad could not be updated and is aborting.");
exit(EXIT_FAILURE);
}
static int vg_remove_if_missing(lvmetad_state *s, const char *vgid, int update_pvids);
enum update_pvid_mode { UPDATE_ONLY, REMOVE_EMPTY, MARK_OUTDATED };
/* You need to be holding the pvid_to_vgid lock already to call this. */
static int _update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
const char *vgid, int mode)
{
struct dm_config_node *pv;
struct dm_hash_table *to_check;
struct dm_hash_node *n;
const char *pvid;
char *vgid_old;
char *vgid_dup;
const char *check_vgid;
int r = 0;
if (!vgid)
return 0;
if (!(to_check = dm_hash_create(32)))
goto abort_daemon;
for (pv = pvs(vg->root); pv; pv = pv->sib) {
if (!(pvid = dm_config_find_str(pv->child, "id", NULL))) {
ERROR(s, "PV has no id for update_pvid_to_vgid");
continue;
}
vgid_old = dm_hash_lookup(s->pvid_to_vgid, pvid);
if ((mode == REMOVE_EMPTY) && vgid_old) {
/* This copies the vgid_old string, doesn't reference it. */
if (!dm_hash_insert(to_check, vgid_old, (void*) 1)) {
ERROR(s, "update_pvid_to_vgid out of memory for hash insert vgid_old %s", vgid_old);
goto abort_daemon;
}
}
if (mode == MARK_OUTDATED)
mark_outdated_pv(s, vgid, pvid);
if (!(vgid_dup = dm_strdup(vgid))) {
ERROR(s, "update_pvid_to_vgid out of memory for vgid %s", vgid);
goto abort_daemon;
}
if (!dm_hash_insert(s->pvid_to_vgid, pvid, vgid_dup)) {
ERROR(s, "update_pvid_to_vgid out of memory for hash insert vgid %s", vgid_dup);
goto abort_daemon;
}
/* pvid_to_vgid no longer references vgid_old */
dm_free(vgid_old);
DEBUGLOG(s, "moving PV %s to VG %s", pvid, vgid);
}
dm_hash_iterate(n, to_check) {
check_vgid = dm_hash_get_key(to_check, n);
vg_remove_if_missing(s, check_vgid, 0);
}
r = 1;
dm_hash_destroy(to_check);
return r;
abort_daemon:
ERROR(s, "lvmetad could not be updated and is aborting.");
exit(EXIT_FAILURE);
}
/* A pvid map lock needs to be held if update_pvids = 1. */
static int remove_metadata(lvmetad_state *s, const char *vgid, int update_pvids)
{
struct dm_config_tree *meta_lookup;
struct dm_config_tree *outdated_pvs_lookup;
struct vg_info *info_lookup;
char *name_lookup = NULL;
char *vgid_lookup = NULL;
/* get data pointers from hash table so they can be freed */
info_lookup = dm_hash_lookup(s->vgid_to_info, vgid);
meta_lookup = dm_hash_lookup(s->vgid_to_metadata, vgid);
name_lookup = dm_hash_lookup(s->vgid_to_vgname, vgid);
outdated_pvs_lookup = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid);
if (name_lookup)
vgid_lookup = dm_hash_lookup_with_val(s->vgname_to_vgid, name_lookup, vgid, strlen(vgid) + 1);
/* remove hash table mappings */
dm_hash_remove(s->vgid_to_info, vgid);
dm_hash_remove(s->vgid_to_metadata, vgid);
dm_hash_remove(s->vgid_to_vgname, vgid);
dm_hash_remove(s->vgid_to_outdated_pvs, vgid);
if (name_lookup)
dm_hash_remove_with_val(s->vgname_to_vgid, name_lookup, vgid, strlen(vgid) + 1);
/* update_pvid_to_vgid will clear/free the pvid_to_vgid hash */
if (update_pvids && meta_lookup)
_update_pvid_to_vgid(s, meta_lookup, "#orphan", 0);
/* free the unmapped data */
if (info_lookup)
dm_free(info_lookup);
if (meta_lookup)
dm_config_destroy(meta_lookup);
if (name_lookup)
dm_free(name_lookup);
if (outdated_pvs_lookup)
dm_config_destroy(outdated_pvs_lookup);
if (vgid_lookup)
dm_free(vgid_lookup);
return 1;
}
/* The VG must be locked. */
static int vg_remove_if_missing(lvmetad_state *s, const char *vgid, int update_pvids)
{
struct dm_config_tree *vg;
struct dm_config_node *pv;
const char *vgid_check;
const char *pvid;
int missing = 1;
if (!vgid)
return 0;
if (!(vg = dm_hash_lookup(s->vgid_to_metadata, vgid)))
return 1;
for (pv = pvs(vg->root); pv; pv = pv->sib) {
if (!(pvid = dm_config_find_str(pv->child, "id", NULL)))
continue;
if ((vgid_check = dm_hash_lookup(s->pvid_to_vgid, pvid)) &&
dm_hash_lookup(s->pvid_to_pvmeta, pvid) &&
!strcmp(vgid, vgid_check))
missing = 0; /* at least one PV is around */
}
if (missing) {
DEBUGLOG(s, "removing empty VG %s", vgid);
remove_metadata(s, vgid, update_pvids);
}
return 1;
}
/*
* Remove all hash table references to arg_name and arg_vgid
* so that new metadata using this name and/or vgid can be added
* without interference previous data.
*
* This is used if a command updates metadata in the cache,
* but update_metadata finds that what's in the cache is not
* consistent with a normal transition between old and new
* metadata. If this happens, it assumes that the command
* is providing the correct metadata, so it first calls this
* function to purge all records of the old metadata so the
* new metadata can be added.
*/
static void _purge_metadata(lvmetad_state *s, const char *arg_name, const char *arg_vgid)
{
char *rem_vgid;
remove_metadata(s, arg_vgid, 1);
if ((rem_vgid = dm_hash_lookup_with_val(s->vgname_to_vgid, arg_name, arg_vgid, strlen(arg_vgid) + 1))) {
dm_hash_remove_with_val(s->vgname_to_vgid, arg_name, arg_vgid, strlen(arg_vgid) + 1);
dm_free(rem_vgid);
}
}
/*
* Updates for new vgid and new metadata.
*
* Remove any existing vg_info struct since it will be
* recreated by lvmlockd if/when needed.
*
* Remove any existing outdated pvs since their metadata
* will no longer be associated with this VG.
*/
static int _update_metadata_new_vgid(lvmetad_state *s,
const char *arg_name,
const char *old_vgid,
const char *new_vgid,
struct dm_config_tree *old_meta,
struct dm_config_tree *new_meta)
{
struct vg_info *rem_info;
struct dm_config_tree *rem_outdated;
char *new_vgid_dup = NULL;
char *arg_name_dup = NULL;
int abort_daemon = 0;
int retval = 0;
if (!(new_vgid_dup = dm_strdup(new_vgid)))
goto ret;
if (!(arg_name_dup = dm_strdup(arg_name)))
goto ret;
/*
* Temporarily orphan the PVs in the old metadata.
*/
if (!_update_pvid_to_vgid(s, old_meta, "#orphan", 0)) {
ERROR(s, "update_metadata_new_vgid failed to move PVs for %s old_vgid %s", arg_name, old_vgid);
abort_daemon = 1;
goto ret;
}
/*
* Remove things related to the old vgid. (like remove_metadata)
*/
if ((rem_info = dm_hash_lookup(s->vgid_to_info, old_vgid))) {
dm_hash_remove(s->vgid_to_info, old_vgid);
dm_free(rem_info);
}
if ((rem_outdated = dm_hash_lookup(s->vgid_to_outdated_pvs, old_vgid))) {
dm_hash_remove(s->vgid_to_outdated_pvs, old_vgid);
dm_config_destroy(rem_outdated);
}
dm_hash_remove(s->vgid_to_metadata, old_vgid);
dm_config_destroy(old_meta);
old_meta = NULL;
dm_hash_remove_with_val(s->vgname_to_vgid, arg_name, old_vgid, strlen(old_vgid) + 1);
dm_hash_remove(s->vgid_to_vgname, old_vgid);
dm_free((char *)old_vgid);
old_vgid = NULL;
/*
* Insert things with the new vgid.
*/
if (!dm_hash_insert(s->vgid_to_metadata, new_vgid, new_meta)) {
ERROR(s, "update_metadata_new_vgid out of memory for meta hash insert for %s %s", arg_name, new_vgid);
abort_daemon = 1;
goto out;
}
if (!dm_hash_insert(s->vgid_to_vgname, new_vgid, arg_name_dup)) {
ERROR(s, "update_metadata_new_vgid out of memory for name hash insert for %s %s", arg_name, new_vgid);
abort_daemon = 1;
goto out;
}
if (!dm_hash_insert_allow_multiple(s->vgname_to_vgid, arg_name, new_vgid_dup, strlen(new_vgid_dup) + 1)) {
ERROR(s, "update_metadata_new_vgid out of memory for vgid hash insert for %s %s", arg_name, new_vgid);
abort_daemon = 1;
goto out;
}
/*
* Reassign PVs based on the new metadata.
*/
if (!_update_pvid_to_vgid(s, new_meta, new_vgid, 1)) {
ERROR(s, "update_metadata_new_name failed to update PVs for %s %s", arg_name, new_vgid);
abort_daemon = 1;
goto out;
}
DEBUGLOG(s, "update_metadata_new_vgid is done for %s %s", arg_name, new_vgid);
retval = 1;
out:
ret:
if (!new_vgid_dup || !arg_name_dup || abort_daemon) {
ERROR(s, "lvmetad could not be updated and is aborting.");
exit(EXIT_FAILURE);
}
if (!retval && new_meta)
dm_config_destroy(new_meta);
return retval;
}
/*
* Updates for new name and new metadata.
*
* Remove any existing vg_info struct since it will be
* recreated by lvmlockd if/when needed.
*
* Remove any existing outdated pvs since their metadata
* will no longer be associated with this VG.
*/
static int _update_metadata_new_name(lvmetad_state *s,
const char *arg_vgid,
const char *old_name,
const char *new_name,
struct dm_config_tree *old_meta,
struct dm_config_tree *new_meta)
{
struct vg_info *rem_info;
struct dm_config_tree *rem_outdated;
char *new_name_dup = NULL;
char *arg_vgid_dup = NULL;
int abort_daemon = 0;
int retval = 0;
if (!(new_name_dup = dm_strdup(new_name)))
goto ret;
if (!(arg_vgid_dup = dm_strdup(arg_vgid)))
goto ret;
/*
* Temporarily orphan the PVs in the old metadata.
*/
if (!_update_pvid_to_vgid(s, old_meta, "#orphan", 0)) {
ERROR(s, "update_metadata_new_name failed to move PVs for old_name %s %s", old_name, arg_vgid);
abort_daemon = 1;
goto ret;
}
/*
* Remove things related to the old name.
*/
if ((rem_info = dm_hash_lookup(s->vgid_to_info, arg_vgid))) {
dm_hash_remove(s->vgid_to_info, arg_vgid);
dm_free(rem_info);
}
if ((rem_outdated = dm_hash_lookup(s->vgid_to_outdated_pvs, arg_vgid))) {
dm_hash_remove(s->vgid_to_outdated_pvs, arg_vgid);
dm_config_destroy(rem_outdated);
}
dm_hash_remove(s->vgid_to_metadata, arg_vgid);
dm_config_destroy(old_meta);
old_meta = NULL;
dm_hash_remove(s->vgid_to_vgname, arg_vgid);
dm_hash_remove_with_val(s->vgname_to_vgid, old_name, arg_vgid, strlen(arg_vgid) + 1);
dm_free((char *)old_name);
old_name = NULL;
/*
* Insert things with the new name.
*/
if (!dm_hash_insert(s->vgid_to_metadata, arg_vgid, new_meta)) {
ERROR(s, "update_metadata_new_name out of memory for meta hash insert for %s %s", new_name, arg_vgid);
abort_daemon = 1;
goto out;
}
if (!dm_hash_insert(s->vgid_to_vgname, arg_vgid, new_name_dup)) {
ERROR(s, "update_metadata_new_name out of memory for name hash insert for %s %s", new_name, arg_vgid);
abort_daemon = 1;
goto out;
}
if (!dm_hash_insert_allow_multiple(s->vgname_to_vgid, new_name, arg_vgid_dup, strlen(arg_vgid_dup) + 1)) {
ERROR(s, "update_metadata_new_name out of memory for vgid hash insert for %s %s", new_name, arg_vgid);
abort_daemon = 1;
goto out;
}
/*
* Reassign PVs based on the new metadata.
*/
if (!_update_pvid_to_vgid(s, new_meta, arg_vgid, 1)) {
ERROR(s, "update_metadata_new_name failed to update PVs for %s %s", new_name, arg_vgid);
abort_daemon = 1;
goto out;
}
DEBUGLOG(s, "update_metadata_new_name is done for %s %s", new_name, arg_vgid);
retval = 1;
out:
ret:
if (!new_name_dup || !arg_vgid_dup || abort_daemon) {
ERROR(s, "lvmetad could not be updated and is aborting.");
exit(EXIT_FAILURE);
}
if (!retval && new_meta)
dm_config_destroy(new_meta);
return retval;
}
/*
* Add new entries to all hash tables.
*/
static int _update_metadata_add_new(lvmetad_state *s, const char *new_name, const char *new_vgid,
struct dm_config_tree *new_meta)
{
char *new_name_dup = NULL;
char *new_vgid_dup = NULL;
int abort_daemon = 0;
int retval = 0;
DEBUGLOG(s, "update_metadata_add_new for %s %s", new_name, new_vgid);
if (!(new_name_dup = dm_strdup(new_name)))
goto out_free;
if (!(new_vgid_dup = dm_strdup(new_vgid)))
goto out_free;
if (!dm_hash_insert(s->vgid_to_metadata, new_vgid, new_meta)) {
ERROR(s, "update_metadata_add_new out of memory for meta hash insert for %s %s", new_name, new_vgid);
abort_daemon = 1;
goto out;
}
if (!dm_hash_insert(s->vgid_to_vgname, new_vgid, new_name_dup)) {
ERROR(s, "update_metadata_add_new out of memory for name hash insert for %s %s", new_name, new_vgid);
abort_daemon = 1;
goto out;
}
if (!dm_hash_insert_allow_multiple(s->vgname_to_vgid, new_name, new_vgid_dup, strlen(new_vgid_dup) + 1)) {
ERROR(s, "update_metadata_add_new out of memory for vgid hash insert for %s %s", new_name, new_vgid);
abort_daemon = 1;
goto out;
}
if (!_update_pvid_to_vgid(s, new_meta, new_vgid, 1)) {
ERROR(s, "update_metadata_add_new failed to update PVs for %s %s", new_name, new_vgid);
abort_daemon = 1;
goto out;
}
DEBUGLOG(s, "update_metadata_add_new is done for %s %s", new_name, new_vgid);
retval = 1;
out:
out_free:
if (!new_name_dup || !new_vgid_dup || abort_daemon) {
ERROR(s, "lvmetad could not be updated and is aborting.");
exit(EXIT_FAILURE);
}
if (!retval && new_meta)
dm_config_destroy(new_meta);
return retval;
}
/*
* No locks need to be held. The pointers are never used outside of the scope of
* this function, so they can be safely destroyed after update_metadata returns
* (anything that might have been retained is copied).
*
* When this is called from pv_found, the metadata was read from a single
* PV specified by the pvid arg and ret_old_seq is not NULL. The metadata
* should match the existing metadata (matching seqno). If the metadata
* from pv_found has a smaller seqno, it means that the PV is outdated
* (was previously used in the VG and now reappeared after changes to the VG).
* The next command to access the VG will erase the outdated PV and then clear
* the outdated pv record here. If the metadata from pv_found has a larger
* seqno than the existing metadata, it means ... (existing pvs are outdated?)
*
* When this is caleld from vg_update, the metadata is from a command that
* has new metadata that should replace the existing metadata.
* pvid and ret_old_seq are both NULL.
*/
static int _update_metadata(lvmetad_state *s, const char *arg_name, const char *arg_vgid,
struct dm_config_node *new_metadata, int *ret_old_seq,
const char *pvid)
{
struct dm_config_tree *old_meta = NULL;
struct dm_config_tree *new_meta = NULL;
const char *arg_name_lookup; /* name lookup result from arg_vgid */
const char *arg_vgid_lookup; /* vgid lookup result from arg_name */
const char *old_name = NULL;
const char *new_name = NULL;
const char *old_vgid = NULL;
const char *new_vgid = NULL;
const char *new_metadata_vgid;
int old_seq = -1;
int new_seq = -1;
int needs_repair = 0;
int abort_daemon = 0;
int retval = 0;
int count = 0;
if (!arg_vgid || !arg_name) {
ERROR(s, "update_metadata missing args arg_vgid %s arg_name %s pvid %s",
arg_vgid ?: "none", arg_name ?: "none", pvid ?: "none");
return 0;
}
DEBUGLOG(s, "update_metadata begin arg_vgid %s arg_name %s pvid %s",
arg_vgid, arg_name, pvid ?: "none");
/*
* Begin by figuring out what has changed:
* . the VG could be new - found no existing record of the vgid or name.
* . the VG could have a new vgid - found an existing record of the name.
* . the VG could have a new name - found an existing record of the vgid.
* . the VG could have unchanged vgid and name - found existing record of both.
*/
arg_name_lookup = dm_hash_lookup(s->vgid_to_vgname, arg_vgid);
arg_vgid_lookup = dm_hash_lookup_with_val(s->vgname_to_vgid, arg_name, arg_vgid, strlen(arg_vgid) + 1);
/*
* A new VG when there is no existing record of the name or vgid args.
*/
if (!arg_name_lookup && !arg_vgid_lookup) {
new_vgid = arg_vgid;
new_name = arg_name;
DEBUGLOG(s, "update_metadata new name %s and new vgid %s",
new_name, new_vgid);
goto update;
}
/*
* An existing name has a new vgid (new_vgid = arg_vgid).
* A lookup of the name arg was successful in finding arg_vgid_lookup,
* but that resulting vgid doesn't match the arg_vgid.
*/
if (arg_vgid_lookup && strcmp(arg_vgid_lookup, arg_vgid)) {
if (arg_name_lookup) {
/*
* This shouldn't happen.
* arg_vgid should be new and should not map to any name.
*/
ERROR(s, "update_metadata arg_vgid %s arg_name %s unexpected arg_name_lookup %s",
arg_vgid, arg_name, arg_name_lookup);
needs_repair = 1;
goto update;
}
new_vgid = arg_vgid;
old_vgid = dm_hash_lookup_with_count(s->vgname_to_vgid, arg_name, &count);
/*
* FIXME: this ensures that arg_name maps to only one existing
* VG (old_vgid), because if it maps to multiple vgids, then we
* don't know which one should get the new vgid (arg_vgid). If
* this function was given both the existing name and existing
* vgid to identify the VG, then this wouldn't be a problem.
* But as it is now, the vgid arg to this function is the new
* vgid and the existing VG is specified only by name.
*/
if (old_vgid && (count > 1)) {
ERROR(s, "update_metadata arg_vgid %s arg_name %s found %d vgids for name",
arg_vgid, arg_name, count);
old_vgid = NULL;
}
if (!old_vgid) {
/* This shouldn't happen. */
ERROR(s, "update_metadata arg_vgid %s arg_name %s no old_vgid",
arg_vgid, arg_name);
needs_repair = 1;
goto update;
}
if (!(old_meta = dm_hash_lookup(s->vgid_to_metadata, old_vgid))) {
/* This shouldn't happen. */
ERROR(s, "update_metadata arg_vgid %s arg_name %s old_vgid %s no old_meta",
arg_vgid, arg_name, old_vgid);
needs_repair = 1;
goto update;
}
DEBUGLOG(s, "update_metadata existing name %s has new vgid %s old vgid %s",
arg_name, new_vgid, old_vgid);
goto update;
}
/*
* An existing vgid has a new name (new_name = arg_name).
* A lookup of the vgid arg was successful in finding arg_name_lookup,
* but that resulting name doesn't match the arg_name.
*/
if (arg_name_lookup && strcmp(arg_name_lookup, arg_name)) {
if (arg_vgid_lookup) {
/*
* This shouldn't happen.
* arg_name should be new and should not map to any vgid.
*/
ERROR(s, "update_metadata arg_vgid %s arg_name %s unexpected arg_vgid_lookup %s",
arg_vgid, arg_name, arg_vgid_lookup);
needs_repair = 1;
goto update;
}
new_name = arg_name;
old_name = dm_hash_lookup(s->vgid_to_vgname, arg_vgid);
if (!old_name) {
/* This shouldn't happen. */
ERROR(s, "update_metadata arg_vgid %s arg_name %s no old_name",
arg_vgid, arg_name);
needs_repair = 1;
goto update;
}
if (!(old_meta = dm_hash_lookup(s->vgid_to_metadata, arg_vgid))) {
/* This shouldn't happen. */
ERROR(s, "update_metadata arg_vgid %s arg_name %s old_name %s no old_meta",
arg_vgid, arg_name, old_name);
needs_repair = 1;
goto update;
}
DEBUGLOG(s, "update_metadata existing vgid %s has new name %s old name %s",
arg_vgid, new_name, old_name);
goto update;
}
/*
* An existing VG has unchanged name and vgid.
*/
if (!new_vgid && !new_name) {
if (!arg_vgid_lookup || !arg_name_lookup) {
/* This shouldn't happen. */
ERROR(s, "update_metadata arg_vgid %s arg_name %s missing lookups vgid %s name %s",
arg_vgid ?: "none", arg_name ?: "none", arg_vgid_lookup ?: "none", arg_name_lookup ?: "none");
needs_repair = 1;
goto update;
}
if (strcmp(arg_name_lookup, arg_name)) {
/* This shouldn't happen. */
ERROR(s, "update_metadata arg_vgid %s arg_name %s mismatch arg_name_lookup %s",
arg_vgid, arg_name, arg_name_lookup);
needs_repair = 1;
goto update;
}
if (strcmp(arg_vgid_lookup, arg_vgid)) {
/* This shouldn't happen. Two VGs with the same name is handled above. */
ERROR(s, "update_metadata arg_vgid %s arg_name %s mismatch arg_vgid_lookup %s",
arg_vgid, arg_name, arg_vgid_lookup);
needs_repair = 1;
goto update;
}
/* old_vgid == arg_vgid, and old_name == arg_name */
if (!(old_meta = dm_hash_lookup(s->vgid_to_metadata, arg_vgid))) {
/* This shouldn't happen. */
ERROR(s, "update_metadata arg_vgid %s arg_name %s no old_meta",
arg_vgid, arg_name);
needs_repair = 1;
goto update;
}
DEBUGLOG(s, "update_metadata existing vgid %s and existing name %s",
arg_vgid, arg_name);
goto update;
}
update:
filter_metadata(new_metadata); /* sanitize */
/*
* FIXME: verify that there's at least one PV in common between
* the old and new metadata?
*/
if (!(new_meta = dm_config_create()) ||
!(new_meta->root = dm_config_clone_node(new_meta, new_metadata, 0))) {
ERROR(s, "update_metadata out of memory for new metadata for %s %s",
arg_name, arg_vgid);
/* FIXME: should we purge the old metadata here? */
retval = 0;
goto out;
}
/*
* Get the seqno from existing (old) and new metadata and perform
* sanity checks for transitions that generally shouldn't happen.
* Sometimes ignore the new metadata and leave the existing metadata
* alone, and sometimes purge the existing metadata and add the new.
* This often depends on whether the new metadata comes from a single
* PV (via pv_found) that's been scanned, or a vg_update sent from a
* command.
*/
new_seq = dm_config_find_int(new_metadata, "metadata/seqno", -1);
if (old_meta)
old_seq = dm_config_find_int(old_meta->root, "metadata/seqno", -1);
if (ret_old_seq)
*ret_old_seq = old_meta ? old_seq : new_seq;
/*
* The new metadata has an invalid seqno.
* This shouldn't happen, but if it does, ignore the new metadata.
*/
if (new_seq <= 0) {
ERROR(s, "update_metadata ignore new metadata because of invalid seqno for %s %s",
arg_vgid, arg_name);
DEBUGLOG_cft(s, "NEW: ", new_metadata);
retval = 0;
goto out;
}
/*
* The new metadata is missing an internal vgid.
* This shouldn't happen, but if it does, ignore the new metadata.
*/
if (!(new_metadata_vgid = dm_config_find_str(new_meta->root, "metadata/id", NULL))) {
ERROR(s, "update_metadata has no internal vgid for %s %s",
arg_name, arg_vgid);
DEBUGLOG_cft(s, "NEW: ", new_metadata);
retval = 0;
goto out;
}
/*
* The new metadata internal vgid doesn't match the arg vgid.
* This shouldn't happen, but if it does, ignore the new metadata.
*/
if (strcmp(new_metadata_vgid, arg_vgid)) {
ERROR(s, "update_metadata has bad internal vgid %s for %s %s",
new_metadata_vgid, arg_name, arg_vgid);
DEBUGLOG_cft(s, "NEW: ", new_metadata);
retval = 0;
goto out;
}
/*
* A single PV appears with metadata that's inconsistent with
* existing, ignore the PV. FIXME: make it outdated?
*/
if (pvid && needs_repair) {
ERROR(s, "update_metadata ignore inconsistent metadata on PV %s seqno %d for %s %s seqno %d",
pvid, new_seq, arg_vgid, arg_name, old_seq);
if (old_meta)
DEBUGLOG_cft(s, "OLD: ", old_meta->root);
DEBUGLOG_cft(s, "NEW: ", new_metadata);
retval = 0;
goto out;
}
/*
* A VG update with metadata that's inconsistent with existing.
*/
if (!pvid && needs_repair) {
ERROR(s, "update_metadata inconsistent with cache for vgid %s and name %s",
arg_vgid, arg_name);
if (old_meta)
DEBUGLOG_cft(s, "OLD: ", old_meta->root);
DEBUGLOG_cft(s, "NEW: ", new_metadata);
abort_daemon = 1;
retval = 0;
goto out;
}
/*
* A single PV appears with metadata that's older than the existing,
* e.g. an PV that had been in the VG has reappeared after the VG changed.
* old PV: the PV that lvmetad was told about first
* new PV: the PV that lvmetad is being told about here, second
* old_seq: the larger seqno on the old PV, for the newer version of the VG
* new_seq: the smaller seqno on the new PV, for the older version of the VG
*
* So, the new PV (by notification order) is "older" (in terms of
* VG seqno) than the old PV.
*
* Make the new PV outdated so it'll be cleared and keep the existing
* metadata from the old PV.
*/
if (pvid && (old_seq > 0) && (new_seq < old_seq)) {
ERROR(s, "update_metadata ignoring outdated metadata on PV %s seqno %d for %s %s seqno %d",
pvid, new_seq, arg_vgid, arg_name, old_seq);
DEBUGLOG_cft(s, "OLD: ", old_meta->root);
DEBUGLOG_cft(s, "NEW: ", new_metadata);
mark_outdated_pv(s, arg_vgid, pvid);
retval = 0;
goto out;
}
/*
* A single PV appears with metadata that's newer than the existing,
* e.g. a PV has been found with VG metadata that is newer than the
* VG metdata we know about. This can happen when scanning PVs after
* an outdated PV (with an older version of the VG metadata) has
* reappeared. The rescanning may initially scan the outdated PV
* and notify lvmetad about it, and then scan a current PV from
* the VG and notify lvmetad about it.
* old PV: the PV that lvmetad was told about first
* new PV: the PV that lvmetad is being told about here, second
* old_seq: the smaller seqno on the old PV, for the older version of the VG
* new_seq: the larger seqno on the new PV, for the newer version of the VG
*
* Make the existing PVs outdated, and use the new metadata.
*/
if (pvid && (old_seq > 0) && (new_seq > old_seq)) {
ERROR(s, "update_metadata found newer metadata on PV %s seqno %d for %s %s seqno %d",
pvid, new_seq, arg_vgid, arg_name, old_seq);
DEBUGLOG_cft(s, "OLD: ", old_meta->root);
DEBUGLOG_cft(s, "NEW: ", new_metadata);
_update_pvid_to_vgid(s, old_meta, arg_vgid, MARK_OUTDATED);
}
/*
* The existing/old metadata has an invalid seqno.
* This shouldn't happen, but if it does, purge old and add the new.
*/
if (old_meta && (old_seq <= 0)) {
ERROR(s, "update_metadata bad old seqno %d for %s %s",
old_seq, arg_name, arg_vgid);
DEBUGLOG_cft(s, "OLD: ", old_meta->root);
_purge_metadata(s, arg_name, arg_vgid);
new_name = arg_name;
new_vgid = arg_vgid;
old_name = NULL;
old_vgid = NULL;
old_meta = NULL;
old_seq = -1;
}
/*
* A single PV appears with a seqno matching existing metadata,
* but unmatching metadata content. This shouldn't happen,
* but if it does, ignore the PV. FIXME: make it outdated?
*/
if (pvid && (new_seq == old_seq) && compare_config(new_metadata, old_meta->root)) {
ERROR(s, "update_metadata from pv %s same seqno %d with unmatching data for %s %s",
pvid, new_seq, arg_name, arg_vgid);
DEBUGLOG_cft(s, "OLD: ", old_meta->root);
DEBUGLOG_cft(s, "NEW: ", new_metadata);
retval = 0;
goto out;
}
/*
* A VG update with metadata matching existing seqno but unmatching content.
* This shouldn't happen, but if it does, purge existing and add the new.
*/
if (!pvid && (new_seq == old_seq) && compare_config(new_metadata, old_meta->root)) {
ERROR(s, "update_metadata same seqno %d with unmatching data for %s %s",
new_seq, arg_name, arg_vgid);
DEBUGLOG_cft(s, "OLD: ", old_meta->root);
DEBUGLOG_cft(s, "NEW: ", new_metadata);
_purge_metadata(s, arg_name, arg_vgid);
new_name = arg_name;
new_vgid = arg_vgid;
old_name = NULL;
old_vgid = NULL;
old_meta = NULL;
old_seq = -1;
}
/*
* A VG update with metadata older than existing. VG updates should
* have increasing seqno. This shouldn't happen, but if it does,
* purge existing and add the new.
*/
if (!pvid && (new_seq < old_seq)) {
ERROR(s, "update_metadata new seqno %d less than old seqno %d for %s %s",
new_seq, old_seq, arg_name, arg_vgid);
DEBUGLOG_cft(s, "OLD: ", old_meta->root);
DEBUGLOG_cft(s, "NEW: ", new_metadata);
_purge_metadata(s, arg_name, arg_vgid);
new_name = arg_name;
new_vgid = arg_vgid;
old_name = NULL;
old_vgid = NULL;
old_meta = NULL;
old_seq = -1;
}
/*
* All the checks are done, do one of the four possible updates
* outlined above:
*/
/*
* Add metadata for a new VG to the cache.
*/
if (new_name && new_vgid)
return _update_metadata_add_new(s, new_name, new_vgid, new_meta);
/*
* Update cached metadata for a VG with a new vgid.
*/
if (new_vgid)
return _update_metadata_new_vgid(s, arg_name, old_vgid, new_vgid, old_meta, new_meta);
/*
* Update cached metadata for a renamed VG.
*/
if (new_name)
return _update_metadata_new_name(s, arg_vgid, old_name, new_name, old_meta, new_meta);
/*
* If the old and new seqnos are the same, we've already compared the
* old/new metadata and verified it's the same, so there's no reason
* to replace old meta with new meta.
*/
if (old_seq == new_seq) {
DEBUGLOG(s, "update_metadata skipped for %s %s seqno %d is unchanged",
arg_name, arg_vgid, old_seq);
dm_config_destroy(new_meta);
new_meta = NULL;
retval = 1;
goto out;
}
/*
* Update cached metdata for a VG with unchanged name and vgid.
* Replace the old metadata with the new metadata.
* old_meta is the old copy of the metadata from the cache.
* new_meta is the new copy of the metadata from the command.
*/
DEBUGLOG(s, "update_metadata for %s %s from %d to %d", arg_name, arg_vgid, old_seq, new_seq);
/*
* The PVs in the VG may have changed in the new metadata, so
* temporarily orphan all of the PVs in the existing VG.
* The PVs that are still in the VG will be reassigned to this
* VG below by the next call to _update_pvid_to_vgid().
*/
if (!_update_pvid_to_vgid(s, old_meta, "#orphan", 0)) {
ERROR(s, "update_metadata failed to move PVs for %s %s", arg_name, arg_vgid);
abort_daemon = 1;
retval = 0;
goto out;
}
/*
* The only hash table update that is needed is the actual
* metadata config tree in vgid_to_metadata. The VG name
* and vgid are unchanged.
*/
dm_hash_remove(s->vgid_to_metadata, arg_vgid);
dm_config_destroy(old_meta);
old_meta = NULL;
if (!dm_hash_insert(s->vgid_to_metadata, arg_vgid, new_meta)) {
ERROR(s, "update_metadata out of memory for hash insert for %s %s", arg_name, arg_vgid);
abort_daemon = 1;
retval = 0;
goto out;
}
/*
* Map the PVs in the new metadata to the vgid.
* All pre-existing PVs were temporarily orphaned above.
* Previous PVs that were removed from the VG will not
* be remapped. New PVs that were added to the VG will
* be newly mapped to this vgid, and previous PVs that
* remain in the VG will be remapped to the VG again.
*/
if (!_update_pvid_to_vgid(s, new_meta, arg_vgid, 1)) {
ERROR(s, "update_metadata failed to update PVs for %s %s", arg_name, arg_vgid);
abort_daemon = 1;
retval = 0;
} else {
DEBUGLOG(s, "update_metadata is done for %s %s", arg_name, arg_vgid);
retval = 1;
}
out:
if (abort_daemon) {
ERROR(s, "lvmetad could not be updated is aborting.");
exit(EXIT_FAILURE);
}
if (!retval && new_meta)
dm_config_destroy(new_meta);
return retval;
}
static response pv_gone(lvmetad_state *s, request r)
{
const char *arg_pvid = NULL;
char *old_pvid = NULL;
const char *pvid;
int64_t device;
struct dm_config_tree *pvmeta;
char *vgid;
arg_pvid = daemon_request_str(r, "uuid", NULL);
device = daemon_request_int(r, "device", 0);
if (!arg_pvid && device > 0)
old_pvid = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device));
if (!arg_pvid && !old_pvid) {
DEBUGLOG(s, "pv_gone device %" PRIu64 " not found", device);
return reply_unknown("device not in cache");
}
pvid = arg_pvid ? arg_pvid : old_pvid;
DEBUGLOG(s, "pv_gone %s device %" PRIu64, pvid ?: "none", device);
if (!(pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid))) {
DEBUGLOG(s, "pv_gone %s device %" PRIu64 " has no PV metadata",
pvid ?: "none", device);
return reply_unknown("PVID does not exist");
}
vgid = dm_hash_lookup(s->pvid_to_vgid, pvid);
dm_hash_remove_binary(s->device_to_pvid, &device, sizeof(device));
dm_hash_remove(s->pvid_to_pvmeta, pvid);
if (vgid) {
char *vgid_dup;
/*
* vg_remove_if_missing will clear and free the pvid_to_vgid
* mappings for this vg, which will free the "vgid" string that
* was returned above from the pvid_to_vgid lookup.
*/
if (!(vgid_dup = dm_strdup(vgid)))
return reply_fail("out of memory");
vg_remove_if_missing(s, vgid_dup, 1);
dm_free(vgid_dup);
vgid_dup = NULL;
vgid = NULL;
}
dm_config_destroy(pvmeta);
if (old_pvid)
dm_free(old_pvid);
return daemon_reply_simple("OK", NULL );
}
static response pv_clear_all(lvmetad_state *s, request r)
{
DEBUGLOG(s, "pv_clear_all");
destroy_metadata_hashes(s);
create_metadata_hashes(s);
return daemon_reply_simple("OK", NULL);
}
/*
* Returns 1 if PV metadata exists for all PVs in a VG.
*/
static int _vg_is_complete(lvmetad_state *s, struct dm_config_tree *vgmeta)
{
struct dm_config_node *vg = vgmeta->root;
struct dm_config_node *pv;
int complete = 1;
const char *pvid;
for (pv = pvs(vg); pv; pv = pv->sib) {
if (!(pvid = dm_config_find_str(pv->child, "id", NULL)))
continue;
if (!dm_hash_lookup(s->pvid_to_pvmeta, pvid)) {
complete = 0;
break;
}
}
return complete;
}
/*
* pv_found: a PV has appeared and been scanned
* It contains PV metadata, and optionally VG metadata.
* Both kinds of metadata should be added to the cache
* and hash table mappings related to the PV and device
* should be updated.
*
* Input values from request:
* . arg_pvmeta: PV metadata from the found pv
* . arg_pvid: pvid from arg_pvmeta (pvmeta/id)
* . arg_device: device from arg_pvmeta (pvmeta/device)
* . arg_vgmeta: VG metadata from the found pv (optional)
* . arg_name: VG name from found pv (optional)
* . arg_vgid: VG vgid from arg_vgmeta (optional)
*
* Search for existing mappings in hash tables:
* . pvid_to_pvmeta (which produces pvid to device)
* . device_to_pvid
* . pvid_to_vgid
*
* Existing data from cache:
* . old_pvmeta: result of pvid_to_pvmeta(arg_pvid)
* . arg_device_lookup: result of old_pvmeta:pvmeta/device using arg_pvid
* . arg_pvid_lookup: result of device_to_pvid(arg_device)
* . arg_vgid_lookup: result of pvid_to_vgid(arg_pvid)
*
* When arg_pvid doesn't match arg_pvid_lookup:
* . a new PV replaces a previous PV on arg_device
* . prev_pvid_on_dev: set to arg_pvid_lookup, pvid of the prev PV
* . prev_pvmeta_on_dev: result pvid_to_pvmeta(prev_pvid_on_dev)
* . prev_vgid_on_dev: result of pvid_to_vgid(prev_pvid_on_dev)
*
* Old PV on old device
* . no PV/device mappings have changed
* . arg_pvid_lookup == arg_pvid && arg_device_lookup == arg_device
* . arg_device was used to look up a PV and found a PV with
* the same pvid as arg_pvid
* . arg_pvid was used to look up a PV and found a PV on the
* same device as arg_device
* . new_pvmeta may be more recent than old_pvmeta
*
* New PV on new device
* . add new mappings in hash tables
* . !arg_pvid_lookup && !arg_device_lookup
* . arg_device was used to look up a PV and found nothing
* . arg_pvid was used to look up a PV and found nothing
*
* New PV on old device
* . a new PV replaces a previous PV on a device
* . arg_pvid_lookup != arg_pvid
* . arg_device was used to look up a PV and found a PV with
* a different pvid than arg_pvid
* . replace existing mappings for arg_device and arg_pvid
* . replace existing old_pvmeta with new_pvmeta
* . remove arg_device association with prev PV (prev_pvid_on_dev)
* . possibly remove prev PV (if arg_device was previously a duplicate)
*
* Old PV on new device
* . a duplicate PV
* . arg_device_lookup != arg_device
* . arg_pvid was used to look up a PV, and found that the PV
* has a different device than arg_device.
*/
static response pv_found(lvmetad_state *s, request r)
{
struct dm_config_node *arg_vgmeta = NULL;
struct dm_config_node *arg_pvmeta = NULL;
struct dm_config_tree *old_pvmeta = NULL;
struct dm_config_tree *new_pvmeta = NULL;
struct dm_config_tree *prev_pvmeta_on_dev = NULL;
struct dm_config_tree *vgmeta = NULL;
const char *arg_pvid = NULL;
const char *arg_pvid_lookup = NULL;
const char *new_pvid = NULL;
const char *new_pvid_dup = NULL;
const char *arg_name = NULL;
const char *arg_vgid = NULL;
const char *arg_vgid_lookup = NULL;
const char *prev_pvid_on_dev = NULL;
const char *prev_vgid_on_dev = NULL;
const char *vg_status = NULL;
uint64_t arg_device = 0;
uint64_t arg_device_lookup = 0;
uint64_t new_device = 0;
uint64_t old_device = 0;
int arg_seqno = -1;
int old_seqno = -1;
int vg_status_seqno = -1;
int changed = 0;
/*
* New input values.
*/
if (!(arg_pvmeta = dm_config_find_node(r.cft->root, "pvmeta"))) {
ERROR(s, "Ignore PV without PV metadata");
return reply_fail("Ignore PV without PV metadata");
}
if (!(arg_pvid = daemon_request_str(r, "pvmeta/id", NULL))) {
ERROR(s, "Ignore PV without PV UUID");
return reply_fail("Ignore PV without PV UUID");
}
if (!dm_config_get_uint64(arg_pvmeta, "pvmeta/device", &arg_device)) {
ERROR(s, "Ignore PV without device pvid %s", arg_pvid);
return reply_fail("Ignore PV without device");
}
if ((arg_vgmeta = dm_config_find_node(r.cft->root, "metadata"))) {
arg_name = daemon_request_str(r, "vgname", NULL);
arg_vgid = daemon_request_str(r, "metadata/id", NULL);
arg_seqno = daemon_request_int(r, "metadata/seqno", -1);
if (!arg_name || !arg_vgid || (arg_seqno < 0))
ERROR(s, "Ignore VG metadata from PV %s", arg_pvid);
if (!arg_name)
return reply_fail("Ignore VG metadata from PV without VG name");
if (!arg_vgid)
return reply_fail("Ignore VG metadata from PV without VG vgid");
if (arg_seqno < 0)
return reply_fail("Ignore VG metadata from PV without VG seqno");
}
/* Make a copy of the new pvmeta that can be inserted into cache. */
if (!(new_pvmeta = dm_config_create()) ||
!(new_pvmeta->root = dm_config_clone_node(new_pvmeta, arg_pvmeta, 0))) {
ERROR(s, "pv_found out of memory for new pvmeta %s", arg_pvid);
goto nomem;
}
/*
* Existing (old) cache values.
*/
old_pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, arg_pvid);
if (old_pvmeta)
dm_config_get_uint64(old_pvmeta->root, "pvmeta/device", &arg_device_lookup);
arg_pvid_lookup = dm_hash_lookup_binary(s->device_to_pvid, &arg_device, sizeof(arg_device));
/*
* Determine which of the four possible changes is happening
* by comparing the existing/old and new values:
* old PV, old device
* new PV, new device
* new PV, old device
* old PV, new device
*/
if (arg_pvid_lookup && arg_device_lookup &&
(arg_device == arg_device_lookup) &&
!strcmp(arg_pvid_lookup, arg_pvid)) {
/*
* Old PV on old device (existing values unchanged)
*/
new_pvid = NULL;
new_device = 0;
DEBUGLOG(s, "pv_found pvid %s on device %" PRIu64 " matches existing",
arg_pvid, arg_device);
} else if (!arg_pvid_lookup && !arg_device_lookup) {
/*
* New PV on new device (no existing values)
*/
new_pvid = arg_pvid;
new_device = arg_device;
DEBUGLOG(s, "pv_found pvid %s on device %" PRIu64 " is new",
arg_pvid, arg_device);
} else if (arg_pvid_lookup && strcmp(arg_pvid_lookup, arg_pvid)) {
/*
* New PV on old device (existing device reused for new PV)
*/
new_pvid = arg_pvid;
new_device = 0;
prev_pvid_on_dev = arg_pvid_lookup;
prev_pvmeta_on_dev = dm_hash_lookup(s->pvid_to_pvmeta, arg_pvid_lookup);
prev_vgid_on_dev = dm_hash_lookup(s->pvid_to_vgid, arg_pvid_lookup);
DEBUGLOG(s, "pv_found pvid %s vgid %s on device %" PRIu64 " previous pvid %s vgid %s",
arg_pvid, arg_vgid ?: "none", arg_device,
prev_pvid_on_dev, prev_vgid_on_dev ?: "none");
} else if (arg_device_lookup && (arg_device_lookup != arg_device)) {
/*
* Old PV on new device (existing PV on a new device, i.e. duplicate)
*/
new_device = arg_device;
new_pvid = NULL;
old_device = arg_device_lookup;
DEBUGLOG(s, "pv_found pvid %s vgid %s on device %" PRIu64 " duplicate %" PRIu64,
arg_pvid, arg_vgid ?: "none", arg_device, arg_device_lookup);
} else {
ERROR(s, "pv_found pvid %s vgid %s on device %" PRIu64 " unknown lookup %s %s %" PRIu64,
arg_pvid,
arg_vgid ?: "none",
arg_device,
arg_pvid_lookup ?: "none",
arg_vgid_lookup ?: "none",
arg_device_lookup);
return reply_fail("Ignore PV for unknown state");
}
/*
* Make changes to hashes device_to_pvid and pvid_to_pvmeta for each case.
*/
if (!new_pvid && !new_device) {
/*
* Old PV on old device (unchanged)
* . add new_pvmeta, replacing old_pvmeta
*/
if (compare_config(old_pvmeta->root, new_pvmeta->root))
changed |= 1;
if (!dm_hash_insert(s->pvid_to_pvmeta, arg_pvid, new_pvmeta))
goto nomem;
} else if (new_pvid && new_device) {
/*
* New PV on new device (new entry)
* . add new_device/new_pvid mapping
* . add new_pvmeta
*/
changed |= 1;
DEBUGLOG(s, "pv_found new entry device_to_pvid %" PRIu64 " to %s",
new_device, new_pvid);
if (!(new_pvid_dup = dm_strdup(new_pvid)))
goto nomem;
if (!dm_hash_insert_binary(s->device_to_pvid, &new_device, sizeof(new_device), (char *)new_pvid_dup))
goto nomem;
if (!dm_hash_insert(s->pvid_to_pvmeta, new_pvid, new_pvmeta))
goto nomem;
} else if (new_pvid && !new_device) {
/*
* New PV on old device (existing device reused for new PV).
* The previous PV on arg_device is replaced by the new one.
*
* Don't free prev_pvid or prev_vgid strings because they are
* used at the end to check the VG metadata.
*/
changed |= 1;
if (prev_pvmeta_on_dev) {
DEBUGLOG(s, "pv_found new pvid device_to_pvid %" PRIu64 " to %s removes prev pvid %s",
arg_device, new_pvid, prev_pvid_on_dev);
dm_hash_remove(s->pvid_to_pvmeta, prev_pvid_on_dev);
dm_config_destroy(prev_pvmeta_on_dev);
prev_pvmeta_on_dev = NULL;
/* removes arg_device/prev_pvid_on_dev mapping */
dm_hash_remove_binary(s->device_to_pvid, &arg_device, sizeof(arg_device));
/*
* The new PV replacing the prev PV was copied from
* another existing PV, creating a duplicate PV which
* we ignore.
*/
if (dm_hash_lookup(s->pvid_to_pvmeta, new_pvid)) {
DEBUGLOG(s, "pv_found ignore duplicate device %" PRIu64 " of existing PV for pvid %s",
arg_device, arg_pvid);
dm_config_destroy(new_pvmeta);
s->flags |= GLFL_DISABLE;
s->flags |= GLFL_DISABLE_REASON_DUPLICATES;
return reply_fail("Ignore duplicate PV");
}
}
if (!(new_pvid_dup = dm_strdup(new_pvid)))
goto nomem;
if (!dm_hash_insert_binary(s->device_to_pvid, &arg_device, sizeof(arg_device), (char *)new_pvid_dup))
goto nomem;
if (!dm_hash_insert(s->pvid_to_pvmeta, new_pvid, new_pvmeta))
goto nomem;
} else if (new_device && !new_pvid) {
/*
* Old PV on new device (duplicate)
* Ignore it.
*/
DEBUGLOG(s, "pv_found ignore duplicate device %" PRIu64 " of existing device %" PRIu64 " for pvid %s",
new_device, old_device, arg_pvid);
dm_config_destroy(new_pvmeta);
s->flags |= GLFL_DISABLE;
s->flags |= GLFL_DISABLE_REASON_DUPLICATES;
return reply_fail("Ignore duplicate PV");
}
if (old_pvmeta)
dm_config_destroy(old_pvmeta);
/*
* Update VG metadata cache with arg_vgmeta from the PV, or
* if the PV holds no VG metadata, then look up the vgid and
* name of the VG so we can check if the VG is complete.
*/
if (arg_vgmeta) {
DEBUGLOG(s, "pv_found pvid %s has VG %s %s seqno %d", arg_pvid, arg_name, arg_vgid, arg_seqno);
if (!_update_metadata(s, arg_name, arg_vgid, arg_vgmeta, &old_seqno, arg_pvid)) {
ERROR(s, "Cannot use VG metadata for %s %s from PV %s on %" PRIu64,
arg_name, arg_vgid, arg_pvid, arg_device);
}
changed |= (old_seqno != arg_seqno);
} else {
arg_vgid = dm_hash_lookup(s->pvid_to_vgid, arg_pvid);
if (arg_vgid) {
arg_name = dm_hash_lookup(s->vgid_to_vgname, arg_vgid);
}
}
/*
* Check if the VG is complete (all PVs have been found) because
* the reply indicates if the the VG is complete or partial.
* The "vgmeta" from dm_hash_lookup will be a copy of arg_vgmeta that
* was cloned and added to the cache by update_metadata.
*/
if (!arg_vgid || !strcmp(arg_vgid, "#orphan")) {
DEBUGLOG(s, "pv_found pvid %s on %" PRIu64 " not in VG %s",
arg_pvid, arg_device, arg_vgid ?: "");
vg_status = "orphan";
goto prev_vals;
}
if (!(vgmeta = dm_hash_lookup(s->vgid_to_metadata, arg_vgid))) {
ERROR(s, "pv_found %s on %" PRIu64 " vgid %s no VG metadata found",
arg_pvid, arg_device, arg_vgid);
} else {
vg_status = _vg_is_complete(s, vgmeta) ? "complete" : "partial";
vg_status_seqno = dm_config_find_int(vgmeta->root, "metadata/seqno", -1);
}
prev_vals:
/*
* If the device previously held a different VG (prev_vgid_on_dev),
* then that VG should be removed if no devices are left for it.
*
* The mapping from the device's previous pvid to the previous vgid
* is removed.
*/
if (prev_pvid_on_dev || prev_vgid_on_dev) {
DEBUGLOG(s, "pv_found pvid %s on %" PRIu64 " had prev pvid %s prev vgid %s",
arg_pvid, arg_device,
prev_pvid_on_dev ?: "none",
prev_vgid_on_dev ?: "none");
}
if (prev_vgid_on_dev) {
char *tmp_vgid;
if (!arg_vgid || strcmp(arg_vgid, prev_vgid_on_dev)) {
tmp_vgid = dm_strdup(prev_vgid_on_dev);
/* vg_remove_if_missing will clear and free
the string pointed to by prev_vgid_on_dev. */
vg_remove_if_missing(s, tmp_vgid, 1);
dm_free(tmp_vgid);
}
/* vg_remove_if_missing may have remapped prev_pvid_on_dev to orphan */
if ((tmp_vgid = dm_hash_lookup(s->pvid_to_vgid, prev_pvid_on_dev))) {
dm_hash_remove(s->pvid_to_vgid, prev_pvid_on_dev);
dm_free(tmp_vgid);
}
}
/* This was unhashed from device_to_pvid above. */
if (prev_pvid_on_dev)
dm_free((void *)prev_pvid_on_dev);
return daemon_reply_simple("OK",
"status = %s", vg_status,
"changed = " FMTd64, (int64_t) changed,
"vgid = %s", arg_vgid ? arg_vgid : "#orphan",
"vgname = %s", arg_name ? arg_name : "#orphan",
"seqno_before = " FMTd64, (int64_t) old_seqno,
"seqno_after = " FMTd64, (int64_t) vg_status_seqno,
NULL);
nomem:
ERROR(s, "pv_found %s is out of memory.", arg_pvid);
ERROR(s, "lvmetad could not be updated is aborting.");
reply_fail("out of memory");
exit(EXIT_FAILURE);
}
static response vg_clear_outdated_pvs(lvmetad_state *s, request r)
{
struct dm_config_tree *outdated_pvs;
const char *vgid = daemon_request_str(r, "vgid", NULL);
if (!vgid)
return reply_fail("need VG UUID");
DEBUGLOG(s, "vg_clear_outdated_pvs vgid %s", vgid);
if ((outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid))) {
dm_config_destroy(outdated_pvs);
dm_hash_remove(s->vgid_to_outdated_pvs, vgid);
}
return daemon_reply_simple("OK", NULL);
}
static void vg_info_update(lvmetad_state *s, const char *uuid,
struct dm_config_node *metadata)
{
struct vg_info *info;
int64_t cache_version;
cache_version = dm_config_find_int64(metadata, "metadata/seqno", -1);
if (cache_version == -1)
return;
info = (struct vg_info *) dm_hash_lookup(s->vgid_to_info, uuid);
if (!info)
return;
if (cache_version >= info->external_version)
info->flags &= ~VGFL_INVALID;
}
static response vg_update(lvmetad_state *s, request r)
{
struct dm_config_node *metadata = dm_config_find_node(r.cft->root, "metadata");
const char *vgid = daemon_request_str(r, "metadata/id", NULL);
const char *vgname = daemon_request_str(r, "vgname", NULL);
DEBUGLOG(s, "vg_update vgid %s name %s", vgid ?: "none", vgname ?: "none");
if (metadata) {
if (!vgid) {
ERROR(s, "vg_update failed: need VG UUID");
reply_fail("vg_update: need VG UUID");
goto fail;
}
if (!vgname) {
ERROR(s, "vg_update failed: need VG name");
reply_fail("vg_update: need VG name");
goto fail;
}
if (daemon_request_int(r, "metadata/seqno", -1) < 0) {
ERROR(s, "vg_update failed: need VG seqno");
reply_fail("vg_update: need VG seqno");
goto fail;
}
/* TODO defer metadata update here; add a separate vg_commit
* call; if client does not commit, die */
if (!_update_metadata(s, vgname, vgid, metadata, NULL, NULL)) {
ERROR(s, "vg_update failed: metadata update failed");
reply_fail("vg_update: failed metadata update");
goto fail;
}
vg_info_update(s, vgid, metadata);
}
return daemon_reply_simple("OK", NULL);
fail:
ERROR(s, "lvmetad could not be updated is aborting.");
exit(EXIT_FAILURE);
}
static response vg_remove(lvmetad_state *s, request r)
{
const char *vgid = daemon_request_str(r, "uuid", NULL);
if (!vgid)
return reply_fail("need VG UUID");
DEBUGLOG(s, "vg_remove: %s", vgid);
remove_metadata(s, vgid, 1);
return daemon_reply_simple("OK", NULL);
}
/*
* Whether lvmetad is disabled is determined only by the single
* flag GLFL_DISABLE. The REASON flags are only explanatory
* additions to GLFL_DISABLE, and do not control the disabled state.
* The REASON flags can accumulate if multiple reasons exist for
* the disabled flag. When clearing GLFL_DISABLE, all REASON flags
* are cleared. The caller clearing GLFL_DISABLE should only do so
* when all the reasons for it have gone.
*/
static response set_global_info(lvmetad_state *s, request r)
{
const int global_invalid = daemon_request_int(r, "global_invalid", -1);
const int global_disable = daemon_request_int(r, "global_disable", -1);
const char *reason;
uint32_t reason_flags = 0;
if ((reason = daemon_request_str(r, "disable_reason", NULL))) {
if (strstr(reason, LVMETAD_DISABLE_REASON_DIRECT))
reason_flags |= GLFL_DISABLE_REASON_DIRECT;
if (strstr(reason, LVMETAD_DISABLE_REASON_LVM1))
reason_flags |= GLFL_DISABLE_REASON_LVM1;
if (strstr(reason, LVMETAD_DISABLE_REASON_DUPLICATES))
reason_flags |= GLFL_DISABLE_REASON_DUPLICATES;
if (strstr(reason, LVMETAD_DISABLE_REASON_VGRESTORE))
reason_flags |= GLFL_DISABLE_REASON_VGRESTORE;
}
if (global_invalid != -1) {
DEBUGLOG(s, "set global info invalid from %d to %d",
(s->flags & GLFL_INVALID) ? 1 : 0, global_invalid);
}
if (global_disable != -1) {
DEBUGLOG(s, "set global info disable from %d to %d %s",
(s->flags & GLFL_DISABLE) ? 1 : 0, global_disable,
reason ? reason : "");
}
if (global_invalid == 1)
s->flags |= GLFL_INVALID;
else if (global_invalid == 0)
s->flags &= ~GLFL_INVALID;
if (global_disable == 1) {
s->flags |= GLFL_DISABLE;
s->flags |= reason_flags;
} else if (global_disable == 0) {
s->flags &= ~GLFL_DISABLE;
s->flags &= ~GLFL_DISABLE_REASON_ALL;
}
return daemon_reply_simple("OK", NULL);
}
#define REASON_BUF_SIZE 64
/*
* Save the time when "updating" begins, and the config setting for how long
* the update is allowed to take. Before returning "updating" as the token
* value in get_global_info, check if the update has exceeded the max allowed
* time. If so, then return "none" as the current token value (i.e.
* uninitialized), so that the command will repopulate our cache.
*
* This automatically clears a stuck update, where a command started to update
* the cache and then failed, leaving the token set to "update in progress".
*/
static response get_global_info(lvmetad_state *s, request r)
{
char reason[REASON_BUF_SIZE];
char flag_str[64];
int pid;
/* This buffer should be large enough to hold all the possible reasons. */
memset(reason, 0, sizeof(reason));
pid = (int)daemon_request_int(r, "pid", 0);
if (s->flags & GLFL_DISABLE) {
snprintf(reason, REASON_BUF_SIZE - 1, "%s%s%s%s",
(s->flags & GLFL_DISABLE_REASON_DIRECT) ? LVMETAD_DISABLE_REASON_DIRECT "," : "",
(s->flags & GLFL_DISABLE_REASON_LVM1) ? LVMETAD_DISABLE_REASON_LVM1 "," : "",
(s->flags & GLFL_DISABLE_REASON_DUPLICATES) ? LVMETAD_DISABLE_REASON_DUPLICATES "," : "",
(s->flags & GLFL_DISABLE_REASON_VGRESTORE) ? LVMETAD_DISABLE_REASON_VGRESTORE "," : "");
}
if (!reason[0])
strcpy(reason, "none");
/*
* If the current update has timed out, then return
* token of "none" which means "uninitialized" so that
* the caller will repopulate lvmetad.
*/
if (s->update_begin && s->update_timeout) {
if (_monotonic_seconds() - s->update_begin >= s->update_timeout) {
DEBUGLOG(s, "global info cancel update after timeout %d len %d begin %llu pid %d cmd %s",
s->update_timeout,
(int)(_monotonic_seconds() - s->update_begin),
(unsigned long long)s->update_begin,
s->update_pid, s->update_cmd);
memset(s->token, 0, sizeof(s->token));
s->update_begin = 0;
s->update_timeout = 0;
s->update_pid = 0;
memset(s->update_cmd, 0, CMD_NAME_SIZE);
}
}
memset(flag_str, 0, sizeof(flag_str));
if (s->flags & GLFL_INVALID)
strcat(flag_str, "Invalid");
if (s->flags & GLFL_DISABLE)
strcat(flag_str, "Disable");
if (!flag_str[0])
strcat(flag_str, "none");
DEBUGLOG(s, "%d global info flags %s reason %s token %s update_pid %d",
pid, flag_str, reason, s->token[0] ? s->token : "none", s->update_pid);
return daemon_reply_simple("OK", "global_invalid = " FMTd64, (int64_t)((s->flags & GLFL_INVALID) ? 1 : 0),
"global_disable = " FMTd64, (int64_t)((s->flags & GLFL_DISABLE) ? 1 : 0),
"disable_reason = %s", reason,
"daemon_pid = " FMTd64, (int64_t)getpid(),
"token = %s", s->token[0] ? s->token : "none",
"update_cmd = %s", s->update_cmd,
"update_pid = " FMTd64, (int64_t)s->update_pid,
"update_begin = " FMTd64, (int64_t)s->update_begin,
"update_timeout = " FMTd64, (int64_t)s->update_timeout,
NULL);
}
static response set_vg_info(lvmetad_state *s, request r)
{
struct dm_config_tree *vg;
struct vg_info *info;
const char *name = NULL;
const char *uuid = NULL;
const int64_t new_version = daemon_request_int(r, "version", -1);
int64_t cache_version = -1;
if (new_version == -1)
goto out;
if (!(uuid = daemon_request_str(r, "uuid", NULL)))
goto use_name;
if ((vg = dm_hash_lookup(s->vgid_to_metadata, uuid)))
goto vers;
use_name:
if (!(name = daemon_request_str(r, "name", NULL)))
goto out;
if (!(uuid = dm_hash_lookup(s->vgname_to_vgid, name)))
goto out;
/*
* FIXME: if we only have the name and multiple VGs have that name,
* then invalidate each of them.
*/
if (!(vg = dm_hash_lookup(s->vgid_to_metadata, uuid)))
goto out;
vers:
if (!new_version)
goto inval;
cache_version = dm_config_find_int64(vg->root, "metadata/seqno", -1);
if (cache_version != -1 && new_version != -1 && cache_version >= new_version)
goto out;
inval:
DEBUGLOG(s, "set info VG name %s uuid %s cache_version %d new_version %d",
name ?: "none", uuid ?: "none", (int)cache_version, (int)new_version);
info = dm_hash_lookup(s->vgid_to_info, uuid);
if (!info) {
info = malloc(sizeof(struct vg_info));
if (!info)
goto bad;
memset(info, 0, sizeof(struct vg_info));
if (!dm_hash_insert(s->vgid_to_info, uuid, (void*)info))
goto bad;
}
info->external_version = new_version;
info->flags |= VGFL_INVALID;
out:
return daemon_reply_simple("OK", NULL);
bad:
return reply_fail("out of memory");
}
static void _dump_cft(struct buffer *buf, struct dm_hash_table *ht, const char *key_addr)
{
struct dm_hash_node *n;
dm_hash_iterate(n, ht) {
struct dm_config_tree *cft = dm_hash_get_data(ht, n);
const char *key_backup = cft->root->key;
cft->root->key = dm_config_find_str(cft->root, key_addr, "unknown");
(void) dm_config_write_node(cft->root, buffer_line, buf);
cft->root->key = key_backup;
}
}
static void _dump_pairs(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key)
{
char *append;
struct dm_hash_node *n;
buffer_append(buf, name);
buffer_append(buf, " {\n");
dm_hash_iterate(n, ht) {
const char *key = dm_hash_get_key(ht, n),
*val = dm_hash_get_data(ht, n);
buffer_append(buf, " ");
if (int_key)
(void) dm_asprintf(&append, "%d = \"%s\"", *(const int*)key, val);
else
(void) dm_asprintf(&append, "%s = \"%s\"", key, val);
if (append)
buffer_append(buf, append);
buffer_append(buf, "\n");
dm_free(append);
}
buffer_append(buf, "}\n");
}
static void _dump_info_version(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key)
{
char *append;
struct dm_hash_node *n = dm_hash_get_first(ht);
struct vg_info *info;
buffer_append(buf, name);
buffer_append(buf, " {\n");
while (n) {
const char *key = dm_hash_get_key(ht, n);
info = dm_hash_get_data(ht, n);
buffer_append(buf, " ");
(void) dm_asprintf(&append, "%s = %lld", key, (long long)info->external_version);
if (append)
buffer_append(buf, append);
buffer_append(buf, "\n");
dm_free(append);
n = dm_hash_get_next(ht, n);
}
buffer_append(buf, "}\n");
}
static void _dump_info_flags(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key)
{
char *append;
struct dm_hash_node *n = dm_hash_get_first(ht);
struct vg_info *info;
buffer_append(buf, name);
buffer_append(buf, " {\n");
while (n) {
const char *key = dm_hash_get_key(ht, n);
info = dm_hash_get_data(ht, n);
buffer_append(buf, " ");
(void) dm_asprintf(&append, "%s = %llx", key, (long long)info->flags);
if (append)
buffer_append(buf, append);
buffer_append(buf, "\n");
dm_free(append);
n = dm_hash_get_next(ht, n);
}
buffer_append(buf, "}\n");
}
static response dump(lvmetad_state *s)
{
response res = { 0 };
struct buffer *b = &res.buffer;
buffer_init(b);
/* Lock everything so that we get a consistent dump. */
buffer_append(b, "# VG METADATA\n\n");
_dump_cft(b, s->vgid_to_metadata, "metadata/id");
buffer_append(b, "\n# PV METADATA\n\n");
_dump_cft(b, s->pvid_to_pvmeta, "pvmeta/id");
buffer_append(b, "\n# VGID to VGNAME mapping\n\n");
_dump_pairs(b, s->vgid_to_vgname, "vgid_to_vgname", 0);
buffer_append(b, "\n# VGID to outdated PVs mapping\n\n");
_dump_cft(b, s->vgid_to_outdated_pvs, "outdated_pvs/vgid");
buffer_append(b, "\n# VGNAME to VGID mapping\n\n");
_dump_pairs(b, s->vgname_to_vgid, "vgname_to_vgid", 0);
buffer_append(b, "\n# PVID to VGID mapping\n\n");
_dump_pairs(b, s->pvid_to_vgid, "pvid_to_vgid", 0);
buffer_append(b, "\n# DEVICE to PVID mapping\n\n");
_dump_pairs(b, s->device_to_pvid, "device_to_pvid", 1);
buffer_append(b, "\n# VGID to INFO version mapping\n\n");
_dump_info_version(b, s->vgid_to_info, "vgid_to_info", 0);
buffer_append(b, "\n# VGID to INFO flags mapping\n\n");
_dump_info_flags(b, s->vgid_to_info, "vgid_to_info", 0);
return res;
}
static response handler(daemon_state s, client_handle h, request r)
{
response res = { 0 };
lvmetad_state *state = s.private;
char prev_token[128] = { 0 };
const char *rq;
const char *token;
const char *cmd;
int prev_in_progress, this_in_progress;
int update_timeout;
int pid;
int cache_lock = 0;
int info_lock = 0;
rq = daemon_request_str(r, "request", "NONE");
token = daemon_request_str(r, "token", "NONE");
pid = (int)daemon_request_int(r, "pid", 0);
cmd = daemon_request_str(r, "cmd", "NONE");
update_timeout = (int)daemon_request_int(r, "update_timeout", 0);
pthread_mutex_lock(&state->token_lock);
/*
* token_update: start populating the cache, i.e. a full update.
* To populate the lvmetad cache, a command does:
*
* - token_update, setting token to "update in progress"
* (further requests during the update continue using
* this same "update in progress" token)
* - pv_clear_all, to clear the current cache
* - pv_gone, for each PV
* - pv_found, for each PV to populate the cache
* - token_update, setting token to filter hash
*/
if (!strcmp(rq, "token_update")) {
prev_in_progress = !strcmp(state->token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
this_in_progress = !strcmp(token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
if (!prev_in_progress && this_in_progress) {
/* New update is starting (filter token is replaced by update token) */
memcpy(prev_token, state->token, 128);
strncpy(state->token, token, 128);
state->token[127] = 0;
state->update_begin = _monotonic_seconds();
state->update_timeout = update_timeout;
state->update_pid = pid;
strncpy(state->update_cmd, cmd, CMD_NAME_SIZE - 1);
DEBUGLOG(state, "token_update begin %llu timeout %d pid %d cmd %s",
(unsigned long long)state->update_begin,
state->update_timeout,
state->update_pid,
state->update_cmd);
} else if (prev_in_progress && this_in_progress) {
/* Current update is cancelled and replaced by a new update */
DEBUGLOG(state, "token_update replacing pid %d begin %llu len %d cmd %s",
state->update_pid,
(unsigned long long)state->update_begin,
(int)(_monotonic_seconds() - state->update_begin),
state->update_cmd);
memcpy(prev_token, state->token, 128);
strncpy(state->token, token, 128);
state->token[127] = 0;
state->update_begin = _monotonic_seconds();
state->update_timeout = update_timeout;
state->update_pid = pid;
strncpy(state->update_cmd, cmd, CMD_NAME_SIZE - 1);
DEBUGLOG(state, "token_update begin %llu timeout %d pid %d cmd %s",
(unsigned long long)state->update_begin,
state->update_timeout,
state->update_pid,
state->update_cmd);
} else if (prev_in_progress && !this_in_progress) {
/* Update is finished, update token is replaced by filter token */
if (state->update_pid != pid) {
/* If a pid doing update was cancelled, ignore its token update at the end. */
DEBUGLOG(state, "token_update ignored from cancelled update pid %d", pid);
pthread_mutex_unlock(&state->token_lock);
return daemon_reply_simple("token_mismatch",
"expected = %s", state->token,
"received = %s", token,
"update_pid = " FMTd64, (int64_t)state->update_pid,
"reason = %s", "another command has populated the cache");
}
DEBUGLOG(state, "token_update end len %d pid %d new token %s",
(int)(_monotonic_seconds() - state->update_begin),
state->update_pid, token);
memcpy(prev_token, state->token, 128);
strncpy(state->token, token, 128);
state->token[127] = 0;
state->update_begin = 0;
state->update_timeout = 0;
state->update_pid = 0;
memset(state->update_cmd, 0, CMD_NAME_SIZE);
}
pthread_mutex_unlock(&state->token_lock);
return daemon_reply_simple("OK",
"prev_token = %s", prev_token,
"update_pid = " FMTd64, (int64_t)state->update_pid,
NULL);
}
if (strcmp(token, state->token) && strcmp(rq, "dump") && strcmp(token, "skip")) {
pthread_mutex_unlock(&state->token_lock);
DEBUGLOG(state, "token_mismatch current \"%s\" got \"%s\" from pid %d cmd %s",
state->token, token, pid, cmd ?: "none");
return daemon_reply_simple("token_mismatch",
"expected = %s", state->token,
"received = %s", token,
"update_pid = " FMTd64, (int64_t)state->update_pid,
"reason = %s", "another command has populated the cache");
}
/* If a pid doing update was cancelled, ignore its update messages. */
if (!strcmp(token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS) &&
state->update_pid && pid && (state->update_pid != pid)) {
pthread_mutex_unlock(&state->token_lock);
DEBUGLOG(state, "token_mismatch ignore update from pid %d current update pid %d",
pid, state->update_pid);
return daemon_reply_simple("token_mismatch",
"expected = %s", state->token,
"received = %s", token,
"update_pid = " FMTd64, (int64_t)state->update_pid,
"reason = %s", "another command has populated the lvmetad cache");
}
pthread_mutex_unlock(&state->token_lock);
if (!strcmp(rq, "pv_found") ||
!strcmp(rq, "pv_gone") ||
!strcmp(rq, "vg_update") ||
!strcmp(rq, "vg_remove") ||
!strcmp(rq, "set_vg_info") ||
!strcmp(rq, "pv_clear_all") ||
!strcmp(rq, "vg_clear_outdated_pvs")) {
pthread_rwlock_wrlock(&state->cache_lock);
cache_lock = 1;
goto do_rq;
}
if (!strcmp(rq, "pv_lookup") ||
!strcmp(rq, "vg_lookup") ||
!strcmp(rq, "pv_list") ||
!strcmp(rq, "vg_list") ||
!strcmp(rq, "dump")) {
pthread_rwlock_rdlock(&state->cache_lock);
cache_lock = 1;
goto do_rq;
}
if (!strcmp(rq, "set_global_info") ||
!strcmp(rq, "get_global_info")) {
pthread_mutex_lock(&state->info_lock);
info_lock = 1;
goto do_rq;
}
do_rq:
if (!strcmp(rq, "pv_found"))
res = pv_found(state, r);
else if (!strcmp(rq, "pv_gone"))
res = pv_gone(state, r);
else if (!strcmp(rq, "pv_clear_all"))
res = pv_clear_all(state, r);
else if (!strcmp(rq, "pv_lookup"))
res = pv_lookup(state, r);
else if (!strcmp(rq, "vg_update"))
res = vg_update(state, r);
else if (!strcmp(rq, "vg_clear_outdated_pvs"))
res = vg_clear_outdated_pvs(state, r);
else if (!strcmp(rq, "vg_remove"))
res = vg_remove(state, r);
else if (!strcmp(rq, "vg_lookup"))
res = vg_lookup(state, r);
else if (!strcmp(rq, "pv_list"))
res = pv_list(state, r);
else if (!strcmp(rq, "vg_list"))
res = vg_list(state, r);
else if (!strcmp(rq, "set_global_info"))
res = set_global_info(state, r);
else if (!strcmp(rq, "get_global_info"))
res = get_global_info(state, r);
else if (!strcmp(rq, "set_vg_info"))
res = set_vg_info(state, r);
else if (!strcmp(rq, "dump"))
res = dump(state);
else
res = reply_fail("request not implemented");
if (cache_lock)
pthread_rwlock_unlock(&state->cache_lock);
if (info_lock)
pthread_mutex_unlock(&state->info_lock);
return res;
}
static int init(daemon_state *s)
{
lvmetad_state *ls = s->private;
ls->log = s->log;
pthread_mutex_init(&ls->token_lock, NULL);
pthread_mutex_init(&ls->info_lock, NULL);
pthread_rwlock_init(&ls->cache_lock, NULL);
create_metadata_hashes(ls);
ls->token[0] = 0;
/* Set up stderr logging depending on the -l option. */
if (!daemon_log_parse(ls->log, DAEMON_LOG_OUTLET_STDERR, ls->log_config, 1))
return 0;
DEBUGLOG(s, "initialised state: vgid_to_metadata = %p", ls->vgid_to_metadata);
if (!ls->pvid_to_vgid || !ls->vgid_to_metadata)
return 0;
/* if (ls->initial_registrations)
_process_initial_registrations(ds->initial_registrations); */
if (ls->idle)
ls->idle->is_idle = 1;
return 1;
}
static int fini(daemon_state *s)
{
lvmetad_state *ls = s->private;
DEBUGLOG(s, "fini");
destroy_metadata_hashes(ls);
return 1;
}
static int process_timeout_arg(const char *str, unsigned *max_timeouts)
{
char *endptr;
unsigned long l;
errno = 0;
l = strtoul(str, &endptr, 10);
if (errno || *endptr || l >= UINT_MAX)
return 0;
*max_timeouts = (unsigned) l;
return 1;
}
static void usage(const char *prog, FILE *file)
{
fprintf(file, "Usage:\n"
"%s [-V] [-h] [-f] [-l level[,level ...]] [-s path] [-t secs]\n\n"
" -V Show version of lvmetad\n"
" -h Show this help information\n"
" -f Don't fork, run in the foreground\n"
" -l Logging message levels (all,fatal,error,warn,info,wire,debug)\n"
" -p Set path to the pidfile\n"
" -s Set path to the socket to listen on\n"
" -t Time to wait in seconds before shutdown on idle (missing or 0 = inifinite)\n\n", prog);
}
int main(int argc, char *argv[])
{
signed char opt;
struct timeval timeout;
daemon_idle di = { .ptimeout = &timeout };
lvmetad_state ls = { .log_config = "" };
daemon_state s = {
.daemon_fini = fini,
.daemon_init = init,
.handler = handler,
.name = "lvmetad",
.pidfile = getenv("LVM_LVMETAD_PIDFILE") ? : LVMETAD_PIDFILE,
.private = &ls,
.protocol = "lvmetad",
.protocol_version = 1,
.socket_path = getenv("LVM_LVMETAD_SOCKET") ? : LVMETAD_SOCKET,
};
// use getopt_long
while ((opt = getopt(argc, argv, "?fhVl:p:s:t:")) != EOF) {
switch (opt) {
case 'h':
usage(argv[0], stdout);
exit(0);
case '?':
usage(argv[0], stderr);
exit(0);
case 'f':
s.foreground = 1;
break;
case 'l':
ls.log_config = optarg;
break;
case 'p':
s.pidfile = optarg;
break;
case 's': // --socket
s.socket_path = optarg;
break;
case 't':
if (!process_timeout_arg(optarg, &di.max_timeouts)) {
fprintf(stderr, "Invalid value of timeout parameter.\n");
exit(EXIT_FAILURE);
}
/* 0 equals to wait indefinitely */
if (di.max_timeouts)
s.idle = ls.idle = &di;
break;
case 'V':
printf("lvmetad version: " LVM_VERSION "\n");
exit(1);
}
}
daemon_start(s);
return 0;
}