1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-04 09:18:36 +03:00
lvm2/lib/cache/lvmetad.c

2844 lines
81 KiB
C
Raw Normal View History

/*
* Copyright (C) 2012 Red Hat, Inc.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "lib.h"
#include "toolcontext.h"
#include "metadata.h"
#include "device.h"
#include "lvmetad.h"
#include "lvmcache.h"
#include "lvmetad-client.h"
#include "format-text.h" // TODO for disk_locn, used as a DA representation
#include "crc.h"
#include "lvm-signal.h"
2015-03-05 23:00:44 +03:00
#include "lvmlockd.h"
#include "str_list.h"
#include <time.h>
static daemon_handle _lvmetad = { .error = 0 };
static int _lvmetad_use = 0;
static int _lvmetad_connected = 0;
static int _lvmetad_daemon_pid = 0;
static char *_lvmetad_token = NULL;
static const char *_lvmetad_socket = NULL;
static struct cmd_context *_lvmetad_cmd = NULL;
static int64_t _lvmetad_update_timeout;
static int _found_lvm1_metadata = 0;
static struct volume_group *lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg);
static uint64_t _monotonic_seconds(void)
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
return 0;
return ts.tv_sec;
}
static int _log_debug_inequality(const char *name, struct dm_config_node *a, struct dm_config_node *b)
{
int result = 0;
int final_result = 0;
if (a->v && b->v) {
result = compare_value(a->v, b->v);
if (result) {
struct dm_config_value *av = a->v;
struct dm_config_value *bv = b->v;
if (!strcmp(a->key, b->key)) {
if (a->v->type == DM_CFG_STRING && b->v->type == DM_CFG_STRING)
log_debug_lvmetad("VG %s metadata inequality at %s / %s: %s / %s",
name, a->key, b->key, av->v.str, bv->v.str);
else if (a->v->type == DM_CFG_INT && b->v->type == DM_CFG_INT)
2015-10-03 23:10:31 +03:00
log_debug_lvmetad("VG %s metadata inequality at %s / %s: " FMTi64 " / " FMTi64,
name, a->key, b->key, av->v.i, bv->v.i);
else
log_debug_lvmetad("VG %s metadata inequality at %s / %s: type %d / type %d",
name, a->key, b->key, av->type, bv->type);
} else {
log_debug_lvmetad("VG %s metadata inequality at %s / %s", name, a->key, b->key);
}
final_result = result;
}
}
if (a->v && !b->v) {
log_debug_lvmetad("VG %s metadata inequality at %s / %s", name, a->key, b->key);
final_result = 1;
}
if (!a->v && b->v) {
log_debug_lvmetad("VG %s metadata inequality at %s / %s", name, a->key, b->key);
final_result = -1;
}
if (a->child && b->child) {
result = _log_debug_inequality(name, a->child, b->child);
if (result)
final_result = result;
}
if (a->sib && b->sib) {
result = _log_debug_inequality(name, a->sib, b->sib);
if (result)
final_result = result;
}
if (a->sib && !b->sib) {
log_debug_lvmetad("VG %s metadata inequality at %s / %s", name, a->key, b->key);
final_result = 1;
}
if (!a->sib && b->sib) {
log_debug_lvmetad("VG %s metadata inequality at %s / %s", name, a->key, b->key);
final_result = -1;
}
return final_result;
}
void lvmetad_disconnect(void)
{
if (_lvmetad_connected)
daemon_close(_lvmetad);
_lvmetad_connected = 0;
_lvmetad_use = 0;
_lvmetad_cmd = NULL;
}
int lvmetad_connect(struct cmd_context *cmd)
{
if (!lvmetad_socket_present()) {
log_debug_lvmetad("Failed to connect to lvmetad: socket not present.");
_lvmetad_connected = 0;
_lvmetad_use = 0;
_lvmetad_cmd = NULL;
return 0;
}
_lvmetad_update_timeout = find_config_tree_int(cmd, global_lvmetad_update_wait_time_CFG, NULL);
_lvmetad = lvmetad_open(_lvmetad_socket);
if (_lvmetad.socket_fd >= 0 && !_lvmetad.error) {
log_debug_lvmetad("Successfully connected to lvmetad on fd %d.",
_lvmetad.socket_fd);
_lvmetad_connected = 1;
_lvmetad_use = 1;
_lvmetad_cmd = cmd;
return 1;
} else {
log_debug_lvmetad("Failed to connect to lvmetad: %s", strerror(_lvmetad.error));
_lvmetad_connected = 0;
_lvmetad_use = 0;
_lvmetad_cmd = NULL;
return 0;
}
}
int lvmetad_used(void)
{
return _lvmetad_use;
}
void lvmetad_make_unused(struct cmd_context *cmd)
{
lvmetad_disconnect();
if (cmd && !refresh_filters(cmd))
stack;
}
int lvmetad_pidfile_present(void)
{
const char *pidfile = getenv("LVM_LVMETAD_PIDFILE") ?: LVMETAD_PIDFILE;
return !access(pidfile, F_OK);
}
int lvmetad_socket_present(void)
{
const char *socket = _lvmetad_socket ?: LVMETAD_SOCKET;
int r;
if ((r = access(socket, F_OK)) && errno != ENOENT)
log_sys_error("access", socket);
return !r;
}
void lvmetad_set_socket(const char *sock)
{
_lvmetad_socket = sock;
}
/*
* Use a crc of the strings in the filter as the lvmetad token.
*/
void lvmetad_set_token(const struct dm_config_value *filter)
{
int ft = 0;
dm_free(_lvmetad_token);
while (filter && filter->type == DM_CFG_STRING) {
ft = calc_crc(ft, (const uint8_t *) filter->v.str, strlen(filter->v.str));
filter = filter->next;
}
if (dm_asprintf(&_lvmetad_token, "filter:%u", ft) < 0)
log_warn("WARNING: Failed to set lvmetad token. Out of memory?");
}
void lvmetad_release_token(void)
{
dm_free(_lvmetad_token);
_lvmetad_token = NULL;
}
/*
* Check if lvmetad's token matches our token. The token is a hash of the
* global filter used to populate lvmetad. The lvmetad token was set by the
* last command to populate lvmetad, and it was set to the hash of the global
* filter that command used when scanning to populate lvmetad.
*
* Our token is a hash of the global filter this command is using.
*
* If the lvmetad token is not set (or "none"), then lvmetad has not been
* populated. If the lvmetad token is "update in progress", then lvmetad is
* currently being populated -- this should be temporary, so wait for a while
* for the current update to finish and then compare our token with the new one
* (hopefully it will match). If the lvmetad token otherwise differs from
* ours, then lvmetad was populated using a different global filter that we are
* using.
*
* Return 1 if the lvmetad token matches ours. We can use it as is.
*
* Return 0 if the lvmetad token does not match ours (lvmetad is empty or
* populated using a different global filter). The caller will repopulate
* lvmetad (via lvmetad_pvscan_all_devs) before using lvmetad.
*
* If we time out waiting for an lvmetad update to finish, then disable this
* command's use of lvmetad and return 0.
*/
int lvmetad_token_matches(struct cmd_context *cmd)
{
daemon_reply reply;
const char *daemon_token;
unsigned int delay_usec = 0;
unsigned int wait_sec = 0;
uint64_t now = 0, wait_start = 0;
int ret = 1;
wait_sec = (unsigned int)_lvmetad_update_timeout;
retry:
log_debug_lvmetad("Sending lvmetad get_global_info");
reply = daemon_send_simple(_lvmetad, "get_global_info",
"token = %s", "skip",
"pid = " FMTd64, (int64_t)getpid(),
"cmd = %s", get_cmd_name(),
NULL);
if (reply.error) {
log_warn("WARNING: Not using lvmetad after send error (%d).", reply.error);
goto fail;
}
if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
log_warn("WARNING: Not using lvmetad after response error.");
goto fail;
}
if (!(daemon_token = daemon_reply_str(reply, "token", NULL))) {
log_warn("WARNING: Not using lvmetad with older version.");
goto fail;
}
_lvmetad_daemon_pid = (int)daemon_reply_int(reply, "daemon_pid", 0);
/*
* If lvmetad is being updated by another command, then sleep and retry
* until the token shows the update is done, and go on to the token
* comparison.
*
* Between retries, sleep for a random period between 1 and 2 seconds.
* Retry in this way for up to a configurable period of time.
*/
if (!strcmp(daemon_token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS)) {
if (!(now = _monotonic_seconds()))
goto fail;
if (!wait_start)
wait_start = now;
if (now - wait_start > wait_sec) {
log_warn("WARNING: Not using lvmetad after %u sec lvmetad_update_wait_time.", wait_sec);
goto fail;
}
log_warn("WARNING: lvmetad is being updated, retrying (setup) for %u more seconds.",
wait_sec - (unsigned int)(now - wait_start));
/* Delay a random period between 1 and 2 seconds. */
delay_usec = 1000000 + lvm_even_rand(&_lvmetad_cmd->rand_seed, 1000000);
usleep(delay_usec);
daemon_reply_destroy(reply);
goto retry;
}
/*
* lvmetad is empty, not yet populated.
* The caller should do a disk scan to populate lvmetad.
*/
if (!strcmp(daemon_token, "none")) {
ret = 0;
goto out;
}
/*
* lvmetad has an unmatching token; it was last populated using
* a different global filter.
* The caller should do a disk scan to populate lvmetad with
* our global filter.
*/
if (strcmp(daemon_token, _lvmetad_token)) {
ret = 0;
goto out;
}
out:
daemon_reply_destroy(reply);
return ret;
fail:
daemon_reply_destroy(reply);
/* The command will not use lvmetad and will revert to scanning. */
lvmetad_make_unused(cmd);
return 0;
}
/*
* Wait up to lvmetad_update_wait_time for the lvmetad updating state to be
* finished.
*
* Return 0 if lvmetad is not updating or there's an error and we can't tell.
* Return 1 if lvmetad is updating.
*/
static int _lvmetad_is_updating(struct cmd_context *cmd, int do_wait)
{
daemon_reply reply;
const char *daemon_token;
unsigned int wait_sec = 0;
uint64_t now = 0, wait_start = 0;
int ret = 0;
wait_sec = (unsigned int)_lvmetad_update_timeout;
retry:
log_debug_lvmetad("Sending lvmetad get_global_info");
reply = daemon_send_simple(_lvmetad, "get_global_info",
"token = %s", "skip",
"pid = " FMTd64, (int64_t)getpid(),
"cmd = %s", get_cmd_name(),
NULL);
if (reply.error)
goto out;
if (strcmp(daemon_reply_str(reply, "response", ""), "OK"))
goto out;
if (!(daemon_token = daemon_reply_str(reply, "token", NULL)))
goto out;
if (!strcmp(daemon_token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS)) {
ret = 1;
if (!do_wait)
goto out;
if (!(now = _monotonic_seconds()))
goto out;
if (!wait_start)
wait_start = now;
if (now - wait_start >= wait_sec)
goto out;
log_warn("WARNING: lvmetad is being updated, waiting for %u more seconds.",
wait_sec - (unsigned int)(now - wait_start));
usleep(1000000);
daemon_reply_destroy(reply);
goto retry;
} else {
ret = 0;
}
out:
daemon_reply_destroy(reply);
return ret;
}
static daemon_reply _lvmetad_send(struct cmd_context *cmd, const char *id, ...)
{
va_list ap;
daemon_reply reply = { 0 };
daemon_request req;
const char *token_expected;
unsigned int delay_usec;
unsigned int wait_sec = 0;
uint64_t now = 0, wait_start = 0;
int daemon_in_update;
int we_are_in_update;
if (!_lvmetad_connected || !_lvmetad_use) {
reply.error = ECONNRESET;
return reply;
}
wait_sec = (unsigned int)_lvmetad_update_timeout;
retry:
req = daemon_request_make(id);
if (!daemon_request_extend(req,
"token = %s", _lvmetad_token ?: "none",
"update_timeout = " FMTd64, (int64_t)wait_sec,
"pid = " FMTd64, (int64_t)getpid(),
"cmd = %s", get_cmd_name(),
NULL)) {
reply.error = ENOMEM;
return reply;
2015-07-09 16:15:15 +03:00
}
va_start(ap, id);
daemon_request_extend_v(req, ap);
va_end(ap);
reply = daemon_send(_lvmetad, req);
daemon_request_destroy(req);
if (reply.error == ECONNRESET)
log_warn("WARNING: lvmetad connection failed, cannot reconnect.");
/*
* For the "token_update" message, the result is handled entirely
* by the _token_update() function, so return the reply immediately.
*/
if (!strcmp(id, "token_update"))
return reply;
/*
* For other messages it may be useful to retry and resend the
* message, so check for that case before returning the reply.
* The reply will be checked further in lvmetad_handle_reply.
*/
if (reply.error)
return reply;
if (!strcmp(daemon_reply_str(reply, "response", ""), "token_mismatch")) {
token_expected = daemon_reply_str(reply, "expected", "");
daemon_in_update = !strcmp(token_expected, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
we_are_in_update = !strcmp(_lvmetad_token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
if (daemon_in_update && !we_are_in_update) {
/*
* Another command is updating lvmetad, and we cannot
* use lvmetad until the update is finished. Retry our
* request for a while; the update should finish
* shortly. This should not usually happen because
* this command already checked that the token is
* usable in lvmetad_token_matches(), but it's possible
* for another command's rescan to slip in between the
* time we call lvmetad_token_matches() and the time we
* get here to lvmetad_send().
*/
if (!(now = _monotonic_seconds()))
goto out;
if (!wait_start)
wait_start = now;
if (!wait_sec || (now - wait_start >= wait_sec)) {
log_warn("WARNING: Cannot use lvmetad after %u sec lvmetad_update_wait_time.", wait_sec);
goto out;
}
log_warn("WARNING: lvmetad is being updated, retrying (%s) for %u more seconds.",
id, wait_sec - (unsigned int)(now - wait_start));
/* Delay a random period between 1 and 2 seconds. */
delay_usec = 1000000 + lvm_even_rand(&_lvmetad_cmd->rand_seed, 1000000);
usleep(delay_usec);
daemon_reply_destroy(reply);
goto retry;
} else {
/* See lvmetad_handle_reply for handling other cases. */
}
}
out:
return reply;
}
/*
* token_update happens when starting or ending an lvmetad update.
* When starting we set the token to "update in progress".
* When ending we set the token to our filter:<hash>.
*
* From the perspective of a command, the lvmetad state is one of:
* "none" - the lvmetad cache is not populated and an update is required.
* "filter:<matching_hash>" - the command with can use the lvmetad cache.
* "filter:<unmatching_hash>" - the lvmetad cache must be updated to be used.
* "update in progress" - a command is updating the lvmetad cache.
*
* . If none, the command will update (scan and populate lvmetad),
* then use the cache.
*
* . If filter is matching, the command will use the cache.
*
* . If filter is unmatching, the command will update (scan and
* populate lvmetad), then use the cache.
*
* . If update in progress, the command will wait for a while for the state
* to become non-updating. If it changes, see above, if it doesn't change,
* then the command either reverts to not using lvmetad, or does an update
* (scan and populate lvmetad) and then uses the cache.
*
* A command that is explicitly intended to update the cache will always do
* that (it may wait for a while first to allow a current update to complete).
* A command that is not explicitly intended to update the cache may choose
* to revert to scanning and not use lvmetad.
*
* Because two different updates from two commands can potentially overlap,
* lvmetad saves the pid of the latest update to start, so it can reject messages
* from preempted updates. This prevents an invalid mix of two different updates.
* (The command makes use of the update_pid to print more informative messages.)
*
* If lvmetad detects that a command doing an update is taking too long, it will
* change the token from "update in progress" to "none", which means a new update
* is required, causing the next command to do an update. This effectively
* cancels/preempts a slow/stuck update, and helps to automatically resolve
* some failure cases.
*/
static int _token_update(int *replaced_update)
{
daemon_reply reply;
const char *token_expected;
const char *prev_token;
int update_pid;
int ending_our_update;
2013-02-05 19:48:48 +04:00
log_debug_lvmetad("Sending lvmetad token_update %s", _lvmetad_token);
reply = _lvmetad_send(NULL, "token_update", NULL);
if (replaced_update)
*replaced_update = 0;
if (reply.error) {
log_warn("WARNING: lvmetad token update error: %s", strerror(reply.error));
daemon_reply_destroy(reply);
return 0;
}
update_pid = (int)daemon_reply_int(reply, "update_pid", 0);
/*
* A mismatch can only happen when this command attempts to set the
* token to filter:<hash> at the end of its update, but the update has
* been preempted in lvmetad by a new one (from update_pid).
*/
if (!strcmp(daemon_reply_str(reply, "response", ""), "token_mismatch")) {
token_expected = daemon_reply_str(reply, "expected", "");
ending_our_update = strcmp(_lvmetad_token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
log_debug_lvmetad("Received token update mismatch expected \"%s\" our token \"%s\" update_pid %d our pid %d",
token_expected, _lvmetad_token, update_pid, getpid());
if (ending_our_update && (update_pid != getpid())) {
log_warn("WARNING: lvmetad was updated by another command (pid %d).", update_pid);
} else {
/*
* Shouldn't happen.
* If we're ending our update and our pid matches the update_pid,
* then there would not be a mismatch.
* If we're starting a new update, lvmetad never returns a
* token mismatch.
* In any case, it doesn't hurt to just return an error here.
*/
log_error(INTERNAL_ERROR "lvmetad token update mismatch pid %d matches our own pid %d", update_pid, getpid());
}
daemon_reply_destroy(reply);
return 0;
}
if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
log_error("Failed response from lvmetad for token update.");
daemon_reply_destroy(reply);
return 0;
}
if ((prev_token = daemon_reply_str(reply, "prev_token", NULL))) {
if (!strcmp(prev_token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS))
if (replaced_update && (update_pid != getpid()))
*replaced_update = 1;
}
daemon_reply_destroy(reply);
return 1;
}
/*
* Helper; evaluate the reply from lvmetad, check for errors, print diagnostics
* and return a summary success/failure exit code.
*
* If found is set, *found indicates whether or not device exists,
* and missing device is not treated as an error.
*/
static int _lvmetad_handle_reply(daemon_reply reply, const char *id, const char *object, int *found)
{
const char *token_expected;
const char *action;
int action_modifies = 0;
int daemon_in_update;
int we_are_in_update;
int update_pid;
if (!id)
action = "<none>";
else if (!strcmp(id, "pv_list"))
action = "list PVs";
else if (!strcmp(id, "vg_list"))
action = "list VGs";
else if (!strcmp(id, "vg_lookup"))
action = "lookup VG";
else if (!strcmp(id, "pv_lookup"))
action = "lookup PV";
else if (!strcmp(id, "pv_clear_all"))
action = "clear info about all PVs";
else if (!strcmp(id, "vg_clear_outdated_pvs"))
action = "clear the list of outdated PVs";
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
else if (!strcmp(id, "set_vg_info"))
action = "set VG info";
else if (!strcmp(id, "vg_update"))
action = "update VG";
else if (!strcmp(id, "vg_remove"))
action = "remove VG";
else if (!strcmp(id, "pv_found")) {
action = "update PV";
action_modifies = 1;
} else if (!strcmp(id, "pv_gone")) {
action = "drop PV";
action_modifies = 1;
} else {
log_error(INTERNAL_ERROR "Unchecked lvmetad message %s.", id);
action = "action unknown";
}
if (reply.error) {
log_warn("WARNING: lvmetad cannot be used due to error: %s", strerror(reply.error));
goto fail;
}
/*
* Errors related to token mismatch.
*/
if (!strcmp(daemon_reply_str(reply, "response", ""), "token_mismatch")) {
token_expected = daemon_reply_str(reply, "expected", "");
update_pid = (int)daemon_reply_int(reply, "update_pid", 0);
log_debug("lvmetad token mismatch, expected \"%s\" our token \"%s\"",
token_expected, _lvmetad_token);
daemon_in_update = !strcmp(token_expected, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
we_are_in_update = !strcmp(_lvmetad_token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
if (daemon_in_update && we_are_in_update) {
/*
* When we do not match the update_pid, it means our
* update was cancelled and another process is now
* updating the cache.
*/
if (update_pid != getpid()) {
log_warn("WARNING: lvmetad is being updated by another command (pid %d).", update_pid);
} else {
/* Shouldn't happen */
log_error(INTERNAL_ERROR "lvmetad update by pid %d matches our own pid %d", update_pid, getpid());
}
/* We don't care if the action was modifying during a token update. */
action_modifies = 0;
goto fail;
} else if (daemon_in_update && !we_are_in_update) {
/*
* Another command is updating lvmetad, and we cannot
* use lvmetad until the update is finished.
* lvmetad_send resent this message up to the limit and
* eventually gave up. The caller may choose to not
* use lvmetad at this point and revert to scanning.
*/
log_warn("WARNING: lvmetad is being updated and cannot be used.");
goto fail;
} else if (!daemon_in_update && we_are_in_update) {
/*
* We are updating lvmetad after setting the token to
* "update in progress", but lvmetad has a non-update
* token and is rejecting our update messages. This
* must mean that lvmetad cancelled our update (we were
* probably too slow, taking longer than the timeout),
* so another command completed an update and set the
* token based on its filter. Here we've attempt to
* continue our cache update, and find we've been
* preempted, so we should just abort our failed
* update.
*/
log_warn("WARNING: lvmetad was updated by another command.");
/* We don't care if the action was modifying during a token update. */
action_modifies = 0;
goto fail;
} else if (!daemon_in_update && !we_are_in_update) {
/*
* Another command has updated the lvmetad cache, and
* has done so using a different device filter from our
* own, which has made the lvmetad token and our token
* not match. This should not usually happen because
* this command has already checked for a matching token
* in lvmetad_token_matches(), but it's possible for
* another command's rescan to slip in between the time
* we call lvmetad_token_matches() and the time we get
* here to lvmetad_send(). With a mismatched token
* (different set of devices), we cannot use the lvmetad
* cache.
*
* FIXME: it would be nice to have this command ignore
* lvmetad at this point and revert to disk scanning,
* but the layers above lvmetad_send are not yet able
* to switch modes in the middle of processing.
*
* (The advantage of lvmetad_check_token is that it
* can rescan to get the token in sync, or if that
* fails it can make the command revert to scanning
* from the start.)
*/
log_warn("WARNING: Cannot use lvmetad while it caches different devices.");
goto fail;
}
}
/*
* Non-token-mismatch related error checking.
*/
/* All OK? */
if (!strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
if (found)
*found = 1;
return 1;
}
/* Unknown device permitted? */
if (found && !strcmp(daemon_reply_str(reply, "response", ""), "unknown")) {
log_very_verbose("Request to %s %s%sin lvmetad did not find any matching object.",
action, object, *object ? " " : "");
*found = 0;
return 1;
}
/* Multiple VGs with the same name were found. */
if (found && !strcmp(daemon_reply_str(reply, "response", ""), "multiple")) {
log_very_verbose("Request to %s %s%sin lvmetad found multiple matching objects.",
action, object, *object ? " " : "");
if (found)
*found = 2;
return 1;
}
/*
* Generic error message for error cases not specifically checked above.
*/
log_error("Request to %s %s%sin lvmetad gave response %s. Reason: %s",
action, object, *object ? " " : "",
daemon_reply_str(reply, "response", "<missing>"),
daemon_reply_str(reply, "reason", "<missing>"));
fail:
/*
* If the failed lvmetad message was updating lvmetad with new metadata
* that has been changed by this command, it is important to restart
* lvmetad (or at least rescan.) (An lvmetad update that is just
* scanning disks to populate the cache is not a problem, so we try to
* avoid printing a "corruption" warning in that case.)
*/
if (action_modifies) {
/*
* FIXME: experiment with killing the lvmetad process here, e.g.
* kill(_lvmetad_daemon_pid, SIGKILL);
*/
log_warn("WARNING: To avoid corruption, restart lvmetad (or disable with use_lvmetad=0).");
}
return 0;
}
static int _read_mda(struct lvmcache_info *info,
struct format_type *fmt,
const struct dm_config_node *cn)
{
struct metadata_area_ops *ops;
dm_list_iterate_items(ops, &fmt->mda_ops)
if (ops->mda_import_text && ops->mda_import_text(info, cn))
return 1;
return 0;
}
static int _pv_populate_lvmcache(struct cmd_context *cmd,
struct dm_config_node *cn,
struct format_type *fmt, dev_t fallback)
{
struct device *dev;
struct id pvid, vgid;
char mda_id[32];
char da_id[32];
int i = 0;
struct dm_config_node *mda, *da;
uint64_t offset, size;
struct lvmcache_info *info;
const char *pvid_txt = dm_config_find_str(cn->child, "id", NULL),
*vgid_txt = dm_config_find_str(cn->child, "vgid", NULL),
*vgname = dm_config_find_str(cn->child, "vgname", NULL),
*fmt_name = dm_config_find_str(cn->child, "format", NULL);
dev_t devt = dm_config_find_int(cn->child, "device", 0);
uint64_t devsize = dm_config_find_int64(cn->child, "dev_size", 0),
label_sector = dm_config_find_int64(cn->child, "label_sector", 0);
uint32_t ext_flags = (uint32_t) dm_config_find_int64(cn->child, "ext_flags", 0);
uint32_t ext_version = (uint32_t) dm_config_find_int64(cn->child, "ext_version", 0);
if (!fmt && fmt_name)
fmt = get_format_by_name(cmd, fmt_name);
if (!fmt) {
log_error("PV %s not recognised. Is the device missing?", pvid_txt);
return 0;
}
dev = dev_cache_get_by_devt(devt, cmd->filter);
if (!dev && fallback)
dev = dev_cache_get_by_devt(fallback, cmd->filter);
if (!dev) {
log_warn("WARNING: Device for PV %s not found or rejected by a filter.", pvid_txt);
return 0;
}
if (!pvid_txt || !id_read_format(&pvid, pvid_txt)) {
log_error("Missing or ill-formatted PVID for PV: %s.", pvid_txt);
return 0;
}
if (vgid_txt) {
if (!id_read_format(&vgid, vgid_txt))
return_0;
} else
strcpy((char*)&vgid, fmt->orphan_vg_name);
if (!vgname)
vgname = fmt->orphan_vg_name;
if (!(info = lvmcache_add(fmt->labeller, (const char *)&pvid, dev,
vgname, (const char *)&vgid, 0)))
return_0;
lvmcache_get_label(info)->sector = label_sector;
lvmcache_get_label(info)->dev = dev;
lvmcache_set_device_size(info, devsize);
lvmcache_del_das(info);
lvmcache_del_mdas(info);
lvmcache_del_bas(info);
do {
sprintf(mda_id, "mda%d", i);
mda = dm_config_find_node(cn->child, mda_id);
if (mda)
_read_mda(info, fmt, mda);
++i;
} while (mda);
i = 0;
do {
sprintf(da_id, "da%d", i);
da = dm_config_find_node(cn->child, da_id);
if (da) {
if (!dm_config_get_uint64(da->child, "offset", &offset)) return_0;
if (!dm_config_get_uint64(da->child, "size", &size)) return_0;
lvmcache_add_da(info, offset, size);
}
++i;
} while (da);
i = 0;
do {
sprintf(da_id, "ba%d", i);
da = dm_config_find_node(cn->child, da_id);
if (da) {
if (!dm_config_get_uint64(da->child, "offset", &offset)) return_0;
if (!dm_config_get_uint64(da->child, "size", &size)) return_0;
lvmcache_add_ba(info, offset, size);
}
++i;
} while (da);
lvmcache_set_ext_flags(info, ext_flags);
lvmcache_set_ext_version(info, ext_version);
return 1;
}
static int _pv_update_struct_pv(struct physical_volume *pv, struct format_instance *fid)
{
struct lvmcache_info *info;
if ((info = lvmcache_info_from_pvid((const char *)&pv->id, pv->dev, 0))) {
pv->label_sector = lvmcache_get_label(info)->sector;
pv->dev = lvmcache_device(info);
if (!pv->dev)
pv->status |= MISSING_PV;
if (!lvmcache_fid_add_mdas_pv(info, fid))
return_0;
pv->fid = fid;
} else
pv->status |= MISSING_PV; /* probably missing */
return 1;
}
struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgname, const char *vgid)
{
struct volume_group *vg = NULL;
struct volume_group *vg2 = NULL;
daemon_reply reply;
int found;
char uuid[64];
struct format_instance *fid = NULL;
struct format_instance_ctx fic;
struct dm_config_node *top;
const char *name, *diag_name;
const char *fmt_name;
struct format_type *fmt;
struct dm_config_node *pvcn;
struct pv_list *pvl;
int rescan = 0;
if (!lvmetad_used())
return NULL;
if (vgid) {
if (!id_write_format((const struct id*)vgid, uuid, sizeof(uuid)))
return_NULL;
}
if (vgid && vgname) {
log_debug_lvmetad("Asking lvmetad for VG %s %s", uuid, vgname);
reply = _lvmetad_send(cmd, "vg_lookup",
"uuid = %s", uuid,
"name = %s", vgname,
NULL);
diag_name = uuid;
} else if (vgid) {
log_debug_lvmetad("Asking lvmetad for VG vgid %s", uuid);
reply = _lvmetad_send(cmd, "vg_lookup", "uuid = %s", uuid, NULL);
diag_name = uuid;
} else if (vgname) {
log_debug_lvmetad("Asking lvmetad for VG %s", vgname);
reply = _lvmetad_send(cmd, "vg_lookup", "name = %s", vgname, NULL);
diag_name = vgname;
} else {
log_error(INTERNAL_ERROR "VG name required (VGID not available)");
return NULL;
}
if (_lvmetad_handle_reply(reply, "vg_lookup", diag_name, &found) && found) {
if ((found == 2) && vgname) {
log_error("Multiple VGs found with the same name: %s.", vgname);
log_error("See the --select option with VG UUID (vg_uuid).");
goto out;
}
if (!(top = dm_config_find_node(reply.cft->root, "metadata"))) {
log_error(INTERNAL_ERROR "metadata config node not found.");
goto out;
}
name = daemon_reply_str(reply, "name", NULL);
/* fall back to lvm2 if we don't know better */
fmt_name = dm_config_find_str(top, "metadata/format", "lvm2");
if (!(fmt = get_format_by_name(cmd, fmt_name))) {
log_error(INTERNAL_ERROR
"We do not know the format (%s) reported by lvmetad.",
fmt_name);
2012-03-03 01:24:37 +04:00
goto out;
}
fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS;
fic.context.vg_ref.vg_name = name;
fic.context.vg_ref.vg_id = vgid;
if (!(fid = fmt->ops->create_instance(fmt, &fic)))
2012-03-03 01:24:37 +04:00
goto_out;
if ((pvcn = dm_config_find_node(top, "metadata/physical_volumes")))
for (pvcn = pvcn->child; pvcn; pvcn = pvcn->sib)
_pv_populate_lvmcache(cmd, pvcn, fmt, 0);
if ((pvcn = dm_config_find_node(top, "metadata/outdated_pvs")))
for (pvcn = pvcn->child; pvcn; pvcn = pvcn->sib)
_pv_populate_lvmcache(cmd, pvcn, fmt, 0);
top->key = name;
if (!(vg = import_vg_from_lvmetad_config_tree(reply.cft, fid)))
2012-03-03 01:24:37 +04:00
goto_out;
/*
* Read the VG from disk, ignoring the lvmetad copy in these
* cases:
*
* 1. The host is not using lvmlockd, but is reading lockd VGs
* using the --shared option. The shared option is meant to
* let hosts not running lvmlockd look at lockd VGs, like the
* foreign option allows hosts to look at foreign VGs. When
* --foreign is used, the code forces a rescan since the local
* lvmetad cache of foreign VGs is likely stale. Similarly,
* for --shared, have the code reading the shared VGs below
* not use the cached copy from lvmetad but to rescan the VG.
*
* 2. The host failed to acquire the VG lock from lvmlockd for
* the lockd VG. In this case, the usual mechanisms for
* updating the lvmetad copy of the VG have been missed. Since
* we don't know if the cached copy is valid, assume it's not.
*
* 3. lvmetad has returned the "vg_invalid" flag, which is the
* usual mechanism used by lvmlockd/lvmetad to cause a host to
* reread a VG from disk that has been modified from another
* host.
*/
if (is_lockd_type(vg->lock_type) && cmd->include_shared_vgs) {
log_debug_lvmetad("Rescan VG %s because including shared", vgname);
rescan = 1;
} else if (is_lockd_type(vg->lock_type) && cmd->lockd_vg_rescan) {
log_debug_lvmetad("Rescan VG %s because no lvmlockd lock is held", vgname);
rescan = 1;
} else if (dm_config_find_node(reply.cft->root, "vg_invalid")) {
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
if (!is_lockd_type(vg->lock_type)) {
/* Can happen if a previous command failed/crashed without updating lvmetad. */
log_warn("WARNING: Reading VG %s from disk because lvmetad metadata is invalid.", vgname);
} else {
/* This is normal when the VG was modified by another host. */
log_debug_lvmetad("Rescan VG %s because lvmetad returned invalid", vgname);
}
rescan = 1;
}
/*
* locking may have detected a newer vg version and
* invalidated the cached vg.
*/
if (rescan) {
if (!(vg2 = lvmetad_pvscan_vg(cmd, vg))) {
log_debug_lvmetad("VG %s from lvmetad not found during rescan.", vgname);
fid = NULL;
release_vg(vg);
vg = NULL;
goto out;
}
release_vg(vg);
vg = vg2;
fid = vg2->fid;
}
dm_list_iterate_items(pvl, &vg->pvs) {
if (!_pv_update_struct_pv(pvl->pv, fid)) {
vg = NULL;
goto_out; /* FIXME error path */
}
}
dm_list_iterate_items(pvl, &vg->pvs_outdated) {
if (!_pv_update_struct_pv(pvl->pv, fid)) {
vg = NULL;
goto_out; /* FIXME error path */
}
}
lvmcache_update_vg(vg, 0);
vg_mark_partial_lvs(vg, 1);
}
2012-03-03 01:24:37 +04:00
out:
if (!vg && fid)
fid->fmt->ops->destroy_instance(fid);
daemon_reply_destroy(reply);
2012-03-03 01:24:37 +04:00
return vg;
}
struct _fixup_baton {
int i;
int find;
int ignore;
};
static int _fixup_ignored(struct metadata_area *mda, void *baton) {
struct _fixup_baton *b = baton;
if (b->i == b->find)
mda_set_ignored(mda, b->ignore);
b->i ++;
return 1;
}
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
/*
* After the VG is written to disk, but before it's committed,
* lvmetad is told the new seqno. lvmetad sets the INVALID
* flag on the cached VG and saves the new seqno.
*
* After the VG is committed on disk, the command sends the
* new VG metadata, containing the new seqno. lvmetad sees
* that it has the updated metadata and clears the INVALID
* flag on the cached VG.
*
* If the command fails after committing the metadata on disk
* but before sending the new metadata to lvmetad, then the
* next command that asks lvmetad for the metadata will get
* back the INVALID flag. That command will then read the
* VG metadata from disk to use, and will send the latest
* metadata from disk to lvmetad which will clear the
* INVALID flag.
*/
int lvmetad_vg_update_pending(struct volume_group *vg)
{
char uuid[64] __attribute__((aligned(8)));
daemon_reply reply;
if (!lvmetad_used() || test_mode())
return 1; /* fake it */
if (!id_write_format(&vg->id, uuid, sizeof(uuid)))
return_0;
log_debug_lvmetad("Sending lvmetad pending VG %s (seqno %" PRIu32 ")", vg->name, vg->seqno);
reply = _lvmetad_send(vg->cmd, "set_vg_info",
"name = %s", vg->name,
"uuid = %s", uuid,
"version = %"PRId64, (int64_t)vg->seqno,
NULL);
if (!_lvmetad_handle_reply(reply, "set_vg_info", vg->name, NULL)) {
daemon_reply_destroy(reply);
return_0;
}
vg->lvmetad_update_pending = 1;
daemon_reply_destroy(reply);
return 1;
}
int lvmetad_vg_update_finish(struct volume_group *vg)
{
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
char uuid[64] __attribute__((aligned(8)));
daemon_reply reply;
struct dm_hash_node *n;
struct metadata_area *mda;
char mda_id[128], *num;
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
struct dm_config_tree *vgmeta;
struct pv_list *pvl;
struct lvmcache_info *info;
struct _fixup_baton baton;
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
if (!vg->lvmetad_update_pending)
return 1;
if (!(vg->fid->fmt->features & FMT_PRECOMMIT))
return 1;
if (!lvmetad_used() || test_mode())
return 1; /* fake it */
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
if (!id_write_format(&vg->id, uuid, sizeof(uuid)))
return_0;
if (!(vgmeta = export_vg_to_config_tree(vg))) {
log_error("Failed to export VG to config tree.");
return 0;
}
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
log_debug_lvmetad("Sending lvmetad updated VG %s (seqno %" PRIu32 ")", vg->name, vg->seqno);
reply = _lvmetad_send(vg->cmd, "vg_update",
"vgname = %s", vg->name,
"metadata = %t", vgmeta,
NULL);
dm_config_destroy(vgmeta);
if (!_lvmetad_handle_reply(reply, "vg_update", vg->name, NULL)) {
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
/*
* In this failure case, the VG cached in lvmetad remains in
* the INVALID state (from lvmetad_vg_update_pending).
* A subsequent command will see INVALID, ignore the cached
* copy, read the VG from disk, and update the cached copy.
*/
daemon_reply_destroy(reply);
return 0;
}
2012-03-03 01:24:37 +04:00
daemon_reply_destroy(reply);
n = (vg->fid && vg->fid->metadata_areas_index) ?
dm_hash_get_first(vg->fid->metadata_areas_index) : NULL;
while (n) {
mda = dm_hash_get_data(vg->fid->metadata_areas_index, n);
strcpy(mda_id, dm_hash_get_key(vg->fid->metadata_areas_index, n));
if ((num = strchr(mda_id, '_'))) {
*num = 0;
++num;
if ((info = lvmcache_info_from_pvid(mda_id, NULL, 0))) {
memset(&baton, 0, sizeof(baton));
baton.find = atoi(num);
baton.ignore = mda_is_ignored(mda);
lvmcache_foreach_mda(info, _fixup_ignored, &baton);
}
}
n = dm_hash_get_next(vg->fid->metadata_areas_index, n);
}
dm_list_iterate_items(pvl, &vg->pvs) {
/* NB. the PV fmt pointer is sometimes wrong during vgconvert */
if (pvl->pv->dev && !lvmetad_pv_found(vg->cmd, &pvl->pv->id, pvl->pv->dev,
vg->fid ? vg->fid->fmt : pvl->pv->fmt,
pvl->pv->label_sector, NULL, NULL, NULL))
return 0;
}
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
vg->lvmetad_update_pending = 0;
return 1;
}
int lvmetad_vg_remove_pending(struct volume_group *vg)
{
char uuid[64] __attribute__((aligned(8)));
daemon_reply reply;
if (!lvmetad_used() || test_mode())
return 1; /* fake it */
if (!id_write_format(&vg->id, uuid, sizeof(uuid)))
return_0;
/* Sending version/seqno 0 in set_vg_info will set the INVALID flag. */
log_debug_lvmetad("Sending lvmetad pending remove VG %s", vg->name);
reply = _lvmetad_send(vg->cmd, "set_vg_info",
"name = %s", vg->name,
"uuid = %s", uuid,
"version = %d", 0,
NULL);
if (!_lvmetad_handle_reply(reply, "set_vg_info", vg->name, NULL)) {
daemon_reply_destroy(reply);
return_0;
}
daemon_reply_destroy(reply);
return 1;
}
int lvmetad_vg_remove_finish(struct volume_group *vg)
{
char uuid[64];
daemon_reply reply;
int result;
if (!lvmetad_used() || test_mode())
return 1; /* just fake it */
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
vg->lvmetad_update_pending = 0;
if (!id_write_format(&vg->id, uuid, sizeof(uuid)))
return_0;
log_debug_lvmetad("Telling lvmetad to remove VGID %s (%s)", uuid, vg->name);
reply = _lvmetad_send(vg->cmd, "vg_remove", "uuid = %s", uuid, NULL);
result = _lvmetad_handle_reply(reply, "vg_remove", vg->name, NULL);
daemon_reply_destroy(reply);
return result;
}
int lvmetad_pv_lookup(struct cmd_context *cmd, struct id pvid, int *found)
{
char uuid[64];
daemon_reply reply;
int result = 0;
struct dm_config_node *cn;
if (!lvmetad_used())
return_0;
if (!id_write_format(&pvid, uuid, sizeof(uuid)))
return_0;
log_debug_lvmetad("Asking lvmetad for PV %s", uuid);
reply = _lvmetad_send(cmd, "pv_lookup", "uuid = %s", uuid, NULL);
if (!_lvmetad_handle_reply(reply, "pv_lookup", "", found))
goto_out;
if (found && !*found)
goto out_success;
if (!(cn = dm_config_find_node(reply.cft->root, "physical_volume")))
goto_out;
else if (!_pv_populate_lvmcache(cmd, cn, NULL, 0))
goto_out;
out_success:
result = 1;
out:
daemon_reply_destroy(reply);
return result;
}
int lvmetad_pv_lookup_by_dev(struct cmd_context *cmd, struct device *dev, int *found)
{
int result = 0;
daemon_reply reply;
struct dm_config_node *cn;
if (!lvmetad_used())
return_0;
log_debug_lvmetad("Asking lvmetad for PV on %s", dev_name(dev));
reply = _lvmetad_send(cmd, "pv_lookup", "device = %" PRId64, (int64_t) dev->dev, NULL);
if (!_lvmetad_handle_reply(reply, "pv_lookup", dev_name(dev), found))
goto_out;
if (found && !*found)
goto out_success;
cn = dm_config_find_node(reply.cft->root, "physical_volume");
if (!cn || !_pv_populate_lvmcache(cmd, cn, NULL, dev->dev))
goto_out;
out_success:
result = 1;
out:
daemon_reply_destroy(reply);
return result;
}
int lvmetad_pv_list_to_lvmcache(struct cmd_context *cmd)
{
daemon_reply reply;
struct dm_config_node *cn;
if (!lvmetad_used())
return 1;
log_debug_lvmetad("Asking lvmetad for complete list of known PVs");
reply = _lvmetad_send(cmd, "pv_list", NULL);
if (!_lvmetad_handle_reply(reply, "pv_list", "", NULL)) {
daemon_reply_destroy(reply);
return_0;
}
if ((cn = dm_config_find_node(reply.cft->root, "physical_volumes")))
for (cn = cn->child; cn; cn = cn->sib)
_pv_populate_lvmcache(cmd, cn, NULL, 0);
daemon_reply_destroy(reply);
return 1;
}
int lvmetad_get_vgnameids(struct cmd_context *cmd, struct dm_list *vgnameids)
{
struct vgnameid_list *vgnl;
struct id vgid;
const char *vgid_txt;
const char *vg_name;
daemon_reply reply;
struct dm_config_node *cn;
log_debug_lvmetad("Asking lvmetad for complete list of known VG ids/names");
reply = _lvmetad_send(cmd, "vg_list", NULL);
if (!_lvmetad_handle_reply(reply, "vg_list", "", NULL)) {
daemon_reply_destroy(reply);
return_0;
}
if ((cn = dm_config_find_node(reply.cft->root, "volume_groups"))) {
for (cn = cn->child; cn; cn = cn->sib) {
vgid_txt = cn->key;
if (!id_read_format(&vgid, vgid_txt)) {
stack;
continue;
}
if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) {
log_error("vgnameid_list allocation failed.");
return 0;
}
if (!(vg_name = dm_config_find_str(cn->child, "name", NULL))) {
log_error("vg_list no name found.");
return 0;
}
vgnl->vgid = dm_pool_strdup(cmd->mem, (char *)&vgid);
vgnl->vg_name = dm_pool_strdup(cmd->mem, vg_name);
if (!vgnl->vgid || !vgnl->vg_name) {
log_error("vgnameid_list member allocation failed.");
return 0;
}
dm_list_add(vgnameids, &vgnl->list);
}
}
daemon_reply_destroy(reply);
return 1;
}
int lvmetad_vg_list_to_lvmcache(struct cmd_context *cmd)
{
struct volume_group *tmp;
struct id vgid;
const char *vgid_txt;
daemon_reply reply;
struct dm_config_node *cn;
if (!lvmetad_used())
return 1;
log_debug_lvmetad("Asking lvmetad for complete list of known VGs");
reply = _lvmetad_send(cmd, "vg_list", NULL);
if (!_lvmetad_handle_reply(reply, "vg_list", "", NULL)) {
daemon_reply_destroy(reply);
return_0;
}
if ((cn = dm_config_find_node(reply.cft->root, "volume_groups")))
for (cn = cn->child; cn; cn = cn->sib) {
vgid_txt = cn->key;
if (!id_read_format(&vgid, vgid_txt)) {
stack;
continue;
}
/* the call to lvmetad_vg_lookup will poke the VG into lvmcache */
tmp = lvmetad_vg_lookup(cmd, NULL, (const char*)&vgid);
release_vg(tmp);
}
daemon_reply_destroy(reply);
return 1;
}
struct _extract_dl_baton {
int i;
struct dm_config_tree *cft;
struct dm_config_node *pre_sib;
};
static int _extract_mda(struct metadata_area *mda, void *baton)
{
struct _extract_dl_baton *b = baton;
struct dm_config_node *cn;
char id[32];
if (!mda->ops->mda_export_text) /* do nothing */
return 1;
(void) dm_snprintf(id, 32, "mda%d", b->i);
if (!(cn = make_config_node(b->cft, id, b->cft->root, b->pre_sib)))
return 0;
if (!mda->ops->mda_export_text(mda, b->cft, cn))
return 0;
b->i ++;
b->pre_sib = cn; /* for efficiency */
return 1;
}
static int _extract_disk_location(const char *name, struct disk_locn *dl, void *baton)
{
struct _extract_dl_baton *b = baton;
struct dm_config_node *cn;
char id[32];
if (!dl)
return 1;
(void) dm_snprintf(id, 32, "%s%d", name, b->i);
if (!(cn = make_config_node(b->cft, id, b->cft->root, b->pre_sib)))
return 0;
if (!config_make_nodes(b->cft, cn, NULL,
"offset = %"PRId64, (int64_t) dl->offset,
"size = %"PRId64, (int64_t) dl->size,
NULL))
return 0;
b->i ++;
b->pre_sib = cn; /* for efficiency */
return 1;
}
static int _extract_da(struct disk_locn *da, void *baton)
{
return _extract_disk_location("da", da, baton);
}
static int _extract_ba(struct disk_locn *ba, void *baton)
{
return _extract_disk_location("ba", ba, baton);
}
static int _extract_mdas(struct lvmcache_info *info, struct dm_config_tree *cft,
struct dm_config_node *pre_sib)
{
struct _extract_dl_baton baton = { .cft = cft };
if (!lvmcache_foreach_mda(info, &_extract_mda, &baton))
return 0;
baton.i = 0;
if (!lvmcache_foreach_da(info, &_extract_da, &baton))
return 0;
baton.i = 0;
if (!lvmcache_foreach_ba(info, &_extract_ba, &baton))
return 0;
return 1;
}
int lvmetad_pv_found(struct cmd_context *cmd, const struct id *pvid, struct device *dev, const struct format_type *fmt,
uint64_t label_sector, struct volume_group *vg,
struct dm_list *found_vgnames,
struct dm_list *changed_vgnames)
{
char uuid[64];
daemon_reply reply;
struct lvmcache_info *info;
struct dm_config_tree *pvmeta, *vgmeta;
const char *status = NULL, *vgname = NULL;
int64_t changed = 0;
int result;
if (!lvmetad_used() || test_mode())
return 1;
if (!id_write_format(pvid, uuid, sizeof(uuid)))
return_0;
pvmeta = dm_config_create();
if (!pvmeta)
return_0;
info = lvmcache_info_from_pvid((const char *)pvid, dev, 0);
if (!(pvmeta->root = make_config_node(pvmeta, "pv", NULL, NULL))) {
dm_config_destroy(pvmeta);
return_0;
}
/* TODO: resolve what does it actually mean 'info == NULL'
* missing info is likely an INTERNAL_ERROR */
if (!config_make_nodes(pvmeta, pvmeta->root, NULL,
"device = %"PRId64, (int64_t) dev->dev,
"dev_size = %"PRId64, (int64_t) (info ? lvmcache_device_size(info) : 0),
"format = %s", fmt->name,
"label_sector = %"PRId64, (int64_t) label_sector,
"id = %s", uuid,
"ext_version = %"PRId64, (int64_t) (info ? lvmcache_ext_version(info) : 0),
"ext_flags = %"PRId64, (int64_t) (info ? lvmcache_ext_flags(info) : 0),
NULL))
{
dm_config_destroy(pvmeta);
return_0;
}
if (info)
/* FIXME A more direct route would be much preferable. */
_extract_mdas(info, pvmeta, pvmeta->root);
if (vg) {
if (!(vgmeta = export_vg_to_config_tree(vg))) {
dm_config_destroy(pvmeta);
return_0;
}
log_debug_lvmetad("Telling lvmetad to store PV %s (%s) in VG %s", dev_name(dev), uuid, vg->name);
reply = _lvmetad_send(cmd, "pv_found",
"pvmeta = %t", pvmeta,
"vgname = %s", vg->name,
"metadata = %t", vgmeta,
NULL);
dm_config_destroy(vgmeta);
} else {
/*
* There is no VG metadata stored on this PV.
* It might or might not be an orphan.
*/
log_debug_lvmetad("Telling lvmetad to store PV %s (%s)", dev_name(dev), uuid);
reply = _lvmetad_send(NULL, "pv_found", "pvmeta = %t", pvmeta, NULL);
}
dm_config_destroy(pvmeta);
result = _lvmetad_handle_reply(reply, "pv_found", uuid, NULL);
if (vg && result &&
(daemon_reply_int(reply, "seqno_after", -1) != vg->seqno ||
daemon_reply_int(reply, "seqno_after", -1) != daemon_reply_int(reply, "seqno_before", -1)))
log_warn("WARNING: Inconsistent metadata found for VG %s", vg->name);
if (result && found_vgnames) {
status = daemon_reply_str(reply, "status", NULL);
vgname = daemon_reply_str(reply, "vgname", NULL);
changed = daemon_reply_int(reply, "changed", 0);
}
/*
* If lvmetad now sees all PVs in the VG, it returned the
* "complete" status string. Add this VG name to the list
* of found VGs so that the caller can do autoactivation.
*
* If there was a problem notifying lvmetad about the new
* PV, e.g. lvmetad was disabled due to a duplicate, then
* no autoactivation is attempted.
*
* FIXME: there was a previous fixme indicating that
* autoactivation might also be done for VGs with the
* "partial" status.
*
* If the VG has "changed" by finding the PV, lvmetad returns
* the "changed" flag. The names of "changed" VGs are saved
* in the changed_vgnames lists, which is used during autoactivation.
* If a VG is changed, then autoactivation refreshes LVs in the VG.
*/
if (found_vgnames && vgname && status && !strcmp(status, "complete")) {
log_debug("VG %s is complete in lvmetad with dev %s.", vgname, dev_name(dev));
if (!str_list_add(cmd->mem, found_vgnames, dm_pool_strdup(cmd->mem, vgname)))
log_error("str_list_add failed");
if (changed_vgnames && changed) {
log_debug("VG %s is changed in lvmetad.", vgname);
if (!str_list_add(cmd->mem, changed_vgnames, dm_pool_strdup(cmd->mem, vgname)))
log_error("str_list_add failed");
}
}
daemon_reply_destroy(reply);
return result;
}
int lvmetad_pv_gone(dev_t devno, const char *pv_name)
{
2012-03-14 21:15:22 +04:00
daemon_reply reply;
int result;
int found;
if (!lvmetad_used() || test_mode())
return 1;
/*
* TODO: automatic volume deactivation takes place here *before*
* all cached info is gone - call handler. Also, consider
* integrating existing deactivation script that deactivates
* the whole stack from top to bottom (not yet upstream).
*/
log_debug_lvmetad("Telling lvmetad to forget any PV on %s", pv_name);
reply = _lvmetad_send(NULL, "pv_gone", "device = %" PRId64, (int64_t) devno, NULL);
result = _lvmetad_handle_reply(reply, "pv_gone", pv_name, &found);
/* We don't care whether or not the daemon had the PV cached. */
daemon_reply_destroy(reply);
return result;
}
int lvmetad_pv_gone_by_dev(struct device *dev)
{
return lvmetad_pv_gone(dev->dev, dev_name(dev));
}
/*
* The following code implements pvscan --cache.
*/
struct _lvmetad_pvscan_baton {
struct volume_group *vg;
struct format_instance *fid;
};
static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
{
struct _lvmetad_pvscan_baton *b = baton;
struct volume_group *vg;
if (mda_is_ignored(mda) ||
!(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1)))
return 1;
2012-03-03 02:44:31 +04:00
/* FIXME Also ensure contents match etc. */
if (!b->vg || vg->seqno > b->vg->seqno)
b->vg = vg;
else if (b->vg)
release_vg(vg);
return 1;
}
/*
* The lock manager may detect that the vg cached in lvmetad is out of date,
* due to something like an lvcreate from another host.
* This is limited to changes that only affect the vg (not global state like
* orphan PVs), so we only need to reread mdas on the vg's existing pvs.
* But, a previous PV in the VG may have been removed since we last read
* the VG, and that PV may have been reused for another VG.
*/
static struct volume_group *lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg)
{
char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
char uuid[64] __attribute__((aligned(8)));
struct label *label;
struct volume_group *vg_ret = NULL;
struct dm_config_tree *vgmeta_ret = NULL;
struct dm_config_tree *vgmeta;
struct pv_list *pvl, *pvl_new;
struct device_list *devl, *devl_new, *devlsafe;
struct dm_list pvs_scan;
struct dm_list pvs_drop;
struct dm_list pvs_new;
struct lvmcache_info *info;
struct format_instance *fid;
struct format_instance_ctx fic = { .type = 0 };
struct _lvmetad_pvscan_baton baton;
struct device *save_dev = NULL;
uint32_t save_seqno = 0;
int missing_devs = 0;
int check_new_pvs = 0;
int found;
dm_list_init(&pvs_scan);
dm_list_init(&pvs_drop);
dm_list_init(&pvs_new);
log_debug_lvmetad("Rescanning VG %s (seqno %u).", vg->name, vg->seqno);
/*
* Another host may have added a PV to the VG, and some
* commands do not always populate their lvmcache with
* all devs from lvmetad, so they would fail to find
* the new PV when scanning the VG. So make sure this
* command knows about all PVs from lvmetad.
*/
lvmcache_seed_infos_from_lvmetad(cmd);
/*
* Start with the list of PVs that we last saw in the VG.
* Some may now be gone, and some new PVs may have been added.
*/
dm_list_iterate_items(pvl, &vg->pvs) {
if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl))))
return_NULL;
devl->dev = pvl->pv->dev;
dm_list_add(&pvs_scan, &devl->list);
}
scan_more:
/*
* Run the equivalent of lvmetad_pvscan_single on each dev in the VG.
*/
dm_list_iterate_items_safe(devl, devlsafe, &pvs_scan) {
if (!devl->dev)
continue;
log_debug_lvmetad("Rescan VG %s scanning %s.", vg->name, dev_name(devl->dev));
if (!label_read(devl->dev, &label, 0)) {
/* Another host removed this PV from the VG. */
log_debug_lvmetad("Rescan VG %s found %s was removed.", vg->name, dev_name(devl->dev));
if ((info = lvmcache_info_from_pvid(devl->dev->pvid, NULL, 0)))
lvmcache_del(info);
dm_list_move(&pvs_drop, &devl->list);
continue;
2015-07-09 16:15:15 +03:00
}
info = (struct lvmcache_info *) label->info;
baton.vg = NULL;
baton.fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
if (!baton.fid)
return_NULL;
if (baton.fid->fmt->features & FMT_OBSOLETE) {
log_debug_lvmetad("Ignoring obsolete format on PV %s in VG %s.", dev_name(devl->dev), vg->name);
lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
dm_list_move(&pvs_drop, &devl->list);
continue;
}
/*
* Read VG metadata from this dev's mdas.
*/
lvmcache_foreach_mda(info, _lvmetad_pvscan_single, &baton);
/*
* The PV may have been removed from the VG by another host
* since we last read the VG.
*/
if (!baton.vg) {
log_debug_lvmetad("Rescan VG %s did not find %s.", vg->name, dev_name(devl->dev));
lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
dm_list_move(&pvs_drop, &devl->list);
continue;
}
/*
* The PV may have been removed from the VG and used for a
* different VG since we last read the VG.
*/
if (strcmp(baton.vg->name, vg->name)) {
log_debug_lvmetad("Rescan VG %s found different VG %s on PV %s.",
vg->name, baton.vg->name, dev_name(devl->dev));
release_vg(baton.vg);
dm_list_move(&pvs_drop, &devl->list);
continue;
}
/*
* The VG metadata read from each dev should match. Save the
* metadata from the first dev, and compare it to the metadata
* read from each other dev.
*/
if (!save_seqno)
save_seqno = baton.vg->seqno;
if (!(vgmeta = export_vg_to_config_tree(baton.vg))) {
log_error("VG export to config tree failed");
release_vg(baton.vg);
return NULL;
}
if (!vgmeta_ret) {
vgmeta_ret = vgmeta;
save_dev = devl->dev;
} else {
if (compare_config(vgmeta_ret->root, vgmeta->root)) {
log_error("VG %s metadata comparison failed for device %s vs %s",
vg->name, dev_name(devl->dev), save_dev ? dev_name(save_dev) : "none");
_log_debug_inequality(vg->name, vgmeta_ret->root, vgmeta->root);
dm_config_destroy(vgmeta);
dm_config_destroy(vgmeta_ret);
release_vg(baton.vg);
return NULL;
}
dm_config_destroy(vgmeta);
}
/*
* Look for any new PVs in the VG metadata that were not in our
* previous version of the VG. Add them to pvs_new to be
* scanned in this loop just like the old PVs.
*/
if (!check_new_pvs) {
check_new_pvs = 1;
dm_list_iterate_items(pvl_new, &baton.vg->pvs) {
found = 0;
dm_list_iterate_items(pvl, &vg->pvs) {
if (pvl_new->pv->dev != pvl->pv->dev)
continue;
found = 1;
break;
}
if (found)
continue;
if (!pvl_new->pv->dev) {
strncpy(pvid_s, (char *) &pvl_new->pv->id, sizeof(pvid_s) - 1);
if (!id_write_format((const struct id *)&pvid_s, uuid, sizeof(uuid)))
stack;
log_error("Device not found for PV %s in VG %s", uuid, vg->name);
missing_devs++;
continue;
}
if (!(devl_new = dm_pool_zalloc(cmd->mem, sizeof(*devl_new))))
return_NULL;
devl_new->dev = pvl_new->pv->dev;
dm_list_add(&pvs_new, &devl_new->list);
log_debug_lvmetad("Rescan VG %s found %s was added.", vg->name, dev_name(devl_new->dev));
}
}
release_vg(baton.vg);
}
/*
* Do the same scanning above for any new PVs.
*/
if (!dm_list_empty(&pvs_new)) {
dm_list_init(&pvs_scan);
dm_list_splice(&pvs_scan, &pvs_new);
dm_list_init(&pvs_new);
log_debug_lvmetad("Rescan VG %s found new PVs to scan.", vg->name);
goto scan_more;
}
if (missing_devs) {
if (vgmeta_ret)
dm_config_destroy(vgmeta_ret);
return_NULL;
}
/*
* Remove pvs_drop entries from lvmetad.
*/
dm_list_iterate_items(devl, &pvs_drop) {
if (!devl->dev)
continue;
log_debug_lvmetad("Rescan VG %s dropping %s.", vg->name, dev_name(devl->dev));
if (!lvmetad_pv_gone_by_dev(devl->dev))
return_NULL;
}
/*
* Update the VG in lvmetad.
*/
if (vgmeta_ret) {
fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
if (!(vg_ret = import_vg_from_config_tree(vgmeta_ret, fid))) {
log_error("VG import from config tree failed");
lvmcache_fmt(info)->ops->destroy_instance(fid);
goto out;
}
/*
* Update lvmetad with the newly read version of the VG.
* When the seqno is unchanged the cached VG can be left.
*/
if (save_seqno != vg->seqno) {
dm_list_iterate_items(devl, &pvs_scan) {
if (!devl->dev)
continue;
log_debug_lvmetad("Rescan VG %s dropping to replace %s.", vg->name, dev_name(devl->dev));
if (!lvmetad_pv_gone_by_dev(devl->dev))
return_NULL;
}
log_debug_lvmetad("Rescan VG %s updating lvmetad from seqno %u to seqno %u.",
vg->name, vg->seqno, save_seqno);
lvmetad: two phase vg_update Previously, a command sent lvmetad new VG metadata in vg_commit(). In vg_commit(), devices are suspended, so any memory allocation done by the command while sending to lvmetad, or by lvmetad while updating its cache could deadlock if memory reclaim was triggered. Now lvmetad is updated in unlock_vg(), after devices are resumed. The new method for updating VG metadata in lvmetad is in two phases: 1. In vg_write(), before devices are suspended, the command sends lvmetad a short message ("set_vg_info") telling it what the new VG seqno will be. lvmetad sees that the seqno is newer than the seqno of its cached VG, so it sets the INVALID flag for the cached VG. If sending the message to lvmetad fails, the command fails before the metadata is committed and the change is not made. If sending the message succeeds, vg_commit() is called. 2. In unlock_vg(), after devices are resumed, the command sends lvmetad the standard vg_update message with the new metadata. lvmetad sees that the seqno in the new metadata matches the seqno it saved from set_vg_info, and knows it has the latest copy, so it clears the INVALID flag for the cached VG. If a command fails between 1 and 2 (after committing the VG on disk, but before sending lvmetad the new metadata), the cached VG retains the INVALID flag in lvmetad. A subsequent command will read the cached VG from lvmetad, see the INVALID flag, ignore the cached copy, read the VG from disk instead, update the lvmetad copy with the latest copy from disk, (this clears the INVALID flag in lvmetad), and use the correct VG metadata for the command. (This INVALID mechanism already existed for use by lvmlockd.)
2016-06-08 22:42:03 +03:00
/*
* If this vg_update fails the cached metadata in
* lvmetad will remain invalid.
*/
vg_ret->lvmetad_update_pending = 1;
if (!lvmetad_vg_update_finish(vg_ret))
log_error("Failed to update lvmetad with new VG meta");
}
dm_config_destroy(vgmeta_ret);
}
out:
log_debug_lvmetad("Rescan VG %s done (seqno %u).", vg_ret->name, vg_ret->seqno);
return vg_ret;
}
int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
struct dm_list *found_vgnames,
struct dm_list *changed_vgnames)
{
struct label *label;
struct lvmcache_info *info;
struct _lvmetad_pvscan_baton baton;
/* Create a dummy instance. */
struct format_instance_ctx fic = { .type = 0 };
if (!lvmetad_used()) {
log_error("Cannot proceed since lvmetad is not active.");
return 0;
}
if (!label_read(dev, &label, 0)) {
log_print_unless_silent("No PV label found on %s.", dev_name(dev));
if (!lvmetad_pv_gone_by_dev(dev))
goto_bad;
return 1;
}
info = (struct lvmcache_info *) label->info;
baton.vg = NULL;
baton.fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
if (!baton.fid)
goto_bad;
if (baton.fid->fmt->features & FMT_OBSOLETE) {
lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
log_warn("WARNING: Disabling lvmetad cache which does not support obsolete (lvm1) metadata.");
lvmetad_set_disabled(cmd, LVMETAD_DISABLE_REASON_LVM1);
_found_lvm1_metadata = 1;
/*
* return 1 (success) so that we'll continue to populate lvmetad
* instead of leaving the update incomplete.
*/
return 1;
}
lvmcache_foreach_mda(info, _lvmetad_pvscan_single, &baton);
if (!baton.vg)
lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
if (!lvmetad_pv_found(cmd, (const struct id *) &dev->pvid, dev, lvmcache_fmt(info),
label->sector, baton.vg, found_vgnames, changed_vgnames)) {
release_vg(baton.vg);
goto_bad;
}
release_vg(baton.vg);
return 1;
bad:
return 0;
}
/*
* Update the lvmetad cache: clear the current lvmetad cache, and scan all
* devs, sending all info from the devs to lvmetad.
*
* We want only one command to be doing this at a time. When do_wait is set,
* this will first check if lvmetad is currently being updated by another
* command, and if so it will delay until that update is finished, or until a
* timeout, at which point it will go ahead and do the lvmetad update.
*
* Callers that have already checked and waited for the updating state, e.g. by
* using lvmetad_token_matches(), will generaly set do_wait to 0. Callers that
* have not checked for the updating state yet will generally set do_wait to 1.
*
* If another command doing an update failed, it left lvmetad in the "update in
* progess" state, so we can't just wait until that state has cleared, but have
* to go ahead after a timeout.
*
* The _lvmetad_is_updating check avoids most races to update lvmetad from
* multiple commands (which shouldn't generally happen anway) but does not
* eliminate them. If an update race happens, the second will see that the
* previous token was "update in progress" when it calls _token_update(). It
* will then fail, and the command calling lvmetad_pvscan_all_devs() will
* generally revert disk scanning and not use lvmetad.
*/
int lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait)
{
struct dev_iter *iter;
struct device *dev;
daemon_reply reply;
char *future_token;
const char *reason;
int was_silent;
int replacing_other_update = 0;
int replaced_update = 0;
int retries = 0;
int ret = 1;
if (!lvmetad_used()) {
log_error("Cannot proceed since lvmetad is not active.");
return 0;
}
retry:
/*
* If another update is in progress, delay to allow it to finish,
* rather than interrupting it with our own update.
*/
if (do_wait && _lvmetad_is_updating(cmd, 1)) {
log_warn("WARNING: lvmetad update is interrupting another update in progress.");
replacing_other_update = 1;
}
log_verbose("Scanning all devices to update lvmetad.");
if (!(iter = dev_iter_create(cmd->lvmetad_filter, 1))) {
log_error("dev_iter creation failed");
return 0;
}
future_token = _lvmetad_token;
_lvmetad_token = (char *) LVMETAD_TOKEN_UPDATE_IN_PROGRESS;
if (!_token_update(&replaced_update)) {
log_error("Failed to update lvmetad which had an update in progress.");
dev_iter_destroy(iter);
_lvmetad_token = future_token;
return 0;
}
/*
* if _token_update() sets replaced_update to 1, it means that we set
* "update in progress" when the lvmetad was already set to "udpate in
* progress". This detects a race between two commands doing updates
* at once. The attempt above to avoid this race using
* _lvmetad_is_updating isn't perfect.
*/
if (!replacing_other_update && replaced_update) {
if (do_wait && !retries) {
retries = 1;
log_warn("WARNING: lvmetad update in progress, retrying update.");
dev_iter_destroy(iter);
_lvmetad_token = future_token;
goto retry;
}
log_warn("WARNING: lvmetad update in progress, skipping update.");
dev_iter_destroy(iter);
_lvmetad_token = future_token;
return 0;
}
log_debug_lvmetad("Telling lvmetad to clear its cache");
reply = _lvmetad_send(cmd, "pv_clear_all", NULL);
if (!_lvmetad_handle_reply(reply, "pv_clear_all", "", NULL))
ret = 0;
daemon_reply_destroy(reply);
was_silent = silent_mode();
init_silent(1);
while ((dev = dev_iter_get(iter))) {
if (sigint_caught()) {
ret = 0;
stack;
break;
}
if (!lvmetad_pvscan_single(cmd, dev, NULL, NULL)) {
ret = 0;
stack;
break;
}
}
init_silent(was_silent);
dev_iter_destroy(iter);
_lvmetad_token = future_token;
/*
* If we failed to fully and successfully populate lvmetad just leave
* the existing "update in progress" token in place so lvmetad will
* time out our update and force another command to do it.
* (We could try to set the token to empty here, but that doesn't
* help much.)
*/
if (!ret)
return 0;
if (!_token_update(NULL)) {
log_error("Failed to update lvmetad token after device scan.");
return 0;
}
/*
* If lvmetad is disabled, and no lvm1 metadata was seen and no
* duplicate PVs were seen, then re-enable lvmetad.
*/
if (lvmetad_is_disabled(cmd, &reason) &&
!lvmcache_found_duplicate_pvs() && !_found_lvm1_metadata) {
log_debug_lvmetad("Enabling lvmetad which was previously disabled.");
lvmetad_clear_disabled(cmd);
}
return ret;
}
int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg)
{
char uuid[64];
daemon_reply reply;
int result;
if (!id_write_format(&vg->id, uuid, sizeof(uuid)))
return_0;
log_debug_lvmetad("Sending lvmetad vg_clear_outdated_pvs");
reply = _lvmetad_send(vg->cmd, "vg_clear_outdated_pvs", "vgid = %s", uuid, NULL);
result = _lvmetad_handle_reply(reply, "vg_clear_outdated_pvs", vg->name, NULL);
daemon_reply_destroy(reply);
return result;
}
/*
* Records the state of cached PVs in lvmetad so we can look for changes
* after rescanning.
*/
struct pv_cache_list {
struct dm_list list;
dev_t devt;
struct id pvid;
const char *vgid;
unsigned found : 1;
unsigned update_udev : 1;
};
/*
* Get the list of PVs known to lvmetad.
*/
static int _lvmetad_get_pv_cache_list(struct cmd_context *cmd, struct dm_list *pvc_list)
{
daemon_reply reply;
struct dm_config_node *cn;
struct pv_cache_list *pvcl;
const char *pvid_txt;
const char *vgid;
if (!lvmetad_used())
return 1;
log_debug_lvmetad("Asking lvmetad for complete list of known PVs");
reply = _lvmetad_send(cmd, "pv_list", NULL);
if (!_lvmetad_handle_reply(reply, "pv_list", "", NULL)) {
daemon_reply_destroy(reply);
return_0;
}
if ((cn = dm_config_find_node(reply.cft->root, "physical_volumes"))) {
for (cn = cn->child; cn; cn = cn->sib) {
if (!(pvcl = dm_pool_zalloc(cmd->mem, sizeof(*pvcl)))) {
log_error("pv_cache_list allocation failed.");
return 0;
}
pvid_txt = cn->key;
if (!id_read_format(&pvcl->pvid, pvid_txt)) {
stack;
continue;
}
pvcl->devt = dm_config_find_int(cn->child, "device", 0);
if ((vgid = dm_config_find_str(cn->child, "vgid", NULL)))
pvcl->vgid = dm_pool_strdup(cmd->mem, vgid);
dm_list_add(pvc_list, &pvcl->list);
}
}
daemon_reply_destroy(reply);
return 1;
}
/*
* Opening the device RDWR should trigger a udev db update.
* FIXME: is there a better way to update the udev db than
2015-07-09 16:15:15 +03:00
* doing an open/close of the device? - For example writing
* "change" to /sys/block/<device>/uevent?
*/
static void _update_pv_in_udev(struct cmd_context *cmd, dev_t devt)
{
struct device *dev;
log_debug_devs("device %d:%d open to update udev",
(int)MAJOR(devt), (int)MINOR(devt));
if (!(dev = dev_cache_get_by_devt(devt, cmd->lvmetad_filter))) {
log_error("_update_pv_in_udev no dev found");
return;
}
2015-07-09 16:15:15 +03:00
if (!dev_open(dev)) {
stack;
return;
2015-07-09 16:15:15 +03:00
}
if (!dev_close(dev))
stack;
}
/*
* Compare before and after PV lists from before/after rescanning,
* and update udev db for changes.
*
* For PVs that have changed pvid or vgid in lvmetad from rescanning,
* there may be information in the udev database to update, so open
* these devices to trigger a udev update.
*
* "before" refers to the list of pvs from lvmetad before rescanning
* "after" refers to the list of pvs from lvmetad after rescanning
*
* Comparing both lists, we can see which PVs changed (pvid or vgid),
* and trigger a udev db update for those.
*/
static void _update_changed_pvs_in_udev(struct cmd_context *cmd,
struct dm_list *pvc_before,
struct dm_list *pvc_after)
{
struct pv_cache_list *before;
struct pv_cache_list *after;
char id_before[ID_LEN + 1];
char id_after[ID_LEN + 1];
int found;
dm_list_iterate_items(before, pvc_before) {
found = 0;
dm_list_iterate_items(after, pvc_after) {
if (after->found)
continue;
if (before->devt != after->devt)
continue;
if (!id_equal(&before->pvid, &after->pvid)) {
(void) dm_strncpy(id_before, (char *) &before->pvid, sizeof(id_before));
(void) dm_strncpy(id_after, (char *) &after->pvid, sizeof(id_after));
log_debug_devs("device %d:%d changed pvid from %s to %s",
(int)MAJOR(before->devt), (int)MINOR(before->devt),
id_before, id_after);
before->update_udev = 1;
} else if ((before->vgid && !after->vgid) ||
(after->vgid && !before->vgid) ||
(before->vgid && after->vgid && strcmp(before->vgid, after->vgid))) {
log_debug_devs("device %d:%d changed vg from %s to %s",
(int)MAJOR(before->devt), (int)MINOR(before->devt),
before->vgid ?: "none", after->vgid ?: "none");
before->update_udev = 1;
}
after->found = 1;
before->found = 1;
found = 1;
break;
}
if (!found) {
(void) dm_strncpy(id_before, (char *) &before->pvid, sizeof(id_before));
log_debug_devs("device %d:%d pvid %s vg %s is gone",
(int)MAJOR(before->devt), (int)MINOR(before->devt),
id_before, before->vgid ? before->vgid : "none");
before->update_udev = 1;
}
}
dm_list_iterate_items(before, pvc_before) {
if (before->update_udev)
_update_pv_in_udev(cmd, before->devt);
}
dm_list_iterate_items(after, pvc_after) {
if (after->update_udev)
_update_pv_in_udev(cmd, after->devt);
}
}
/*
* Before this command was run, some external entity may have
* invalidated lvmetad's cache of global information, e.g. lvmlockd.
*
* The global information includes things like a new VG, a
* VG that was removed, the assignment of a PV to a VG;
* any change that is not isolated within a single VG.
*
* The external entity, like a lock manager, would invalidate
* the lvmetad global cache if it detected that the global
* information had been changed on disk by something other
* than a local lvm command, e.g. an lvm command on another
* host with access to the same devices. (How it detects
* the change is specific to lock manager or other entity.)
*
* The effect is that metadata on disk is newer than the metadata
* in the local lvmetad daemon, and the local lvmetad's cache
* should be updated from disk before this command uses it.
*
* So, using this function, a command checks if lvmetad's global
* cache is valid. If so, it does nothing. If not, it rescans
* devices to update the lvmetad cache, then it notifies lvmetad
* that it's cache is valid again (consistent with what's on disk.)
* This command can then go ahead and use the newly refreshed metadata.
*
* 1. Check if the lvmetad global cache is invalid.
* 2. If so, reread metadata from all devices and update the lvmetad cache.
* 3. Tell lvmetad that the global cache is now valid.
*/
void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
{
struct dm_list pvc_before; /* pv_cache_list */
struct dm_list pvc_after; /* pv_cache_list */
const char *reason = NULL;
daemon_reply reply;
int global_invalid;
dm_list_init(&pvc_before);
dm_list_init(&pvc_after);
2015-03-05 23:00:44 +03:00
if (!lvmlockd_use()) {
log_error(INTERNAL_ERROR "validate global cache without lvmlockd");
return;
}
if (!lvmetad_used())
return;
2015-03-05 23:00:44 +03:00
log_debug_lvmetad("Validating global lvmetad cache");
if (force)
goto do_scan;
log_debug_lvmetad("lvmetad validate send get_global_info");
reply = daemon_send_simple(_lvmetad, "get_global_info",
"token = %s", "skip",
"pid = " FMTd64, (int64_t)getpid(),
"cmd = %s", get_cmd_name(),
NULL);
if (reply.error) {
log_error("lvmetad_validate_global_cache get_global_info error %d", reply.error);
goto do_scan;
}
if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
log_error("lvmetad_validate_global_cache get_global_info not ok");
goto do_scan;
}
global_invalid = daemon_reply_int(reply, "global_invalid", -1);
daemon_reply_destroy(reply);
if (!global_invalid)
return; /* cache is valid */
do_scan:
/*
* Save the current state of pvs from lvmetad so after devices are
* scanned, we can compare to the new state to see if pvs changed.
*/
_lvmetad_get_pv_cache_list(cmd, &pvc_before);
/*
* Update the local lvmetad cache so it correctly reflects any
* changes made on remote hosts. (It's possible that this command
* already refreshed the local lvmetad because of a token change,
* but we need to do it again here since we now hold the global
* lock. Another host may have changed things between the time
* we rescanned for the token, and the time we acquired the global
* lock.)
*/
if (!lvmetad_pvscan_all_devs(cmd, 1)) {
log_warn("WARNING: Not using lvmetad because cache update failed.");
lvmetad_make_unused(cmd);
return;
}
if (lvmetad_is_disabled(cmd, &reason)) {
log_warn("WARNING: Not using lvmetad because %s.", reason);
lvmetad_make_unused(cmd);
return;
}
/*
* Clear the global_invalid flag in lvmetad.
* Subsequent local commands that read global state
* from lvmetad will not see global_invalid until
* another host makes another global change.
*/
log_debug_lvmetad("lvmetad validate send set_global_info");
reply = daemon_send_simple(_lvmetad, "set_global_info",
"token = %s", "skip",
"global_invalid = " FMTd64, INT64_C(0),
"pid = " FMTd64, (int64_t)getpid(),
"cmd = %s", get_cmd_name(),
NULL);
if (reply.error)
log_error("lvmetad_validate_global_cache set_global_info error %d", reply.error);
if (strcmp(daemon_reply_str(reply, "response", ""), "OK"))
log_error("lvmetad_validate_global_cache set_global_info not ok");
daemon_reply_destroy(reply);
/*
* Populate this command's lvmcache structures from lvmetad.
*/
lvmcache_seed_infos_from_lvmetad(cmd);
/*
* Update the local udev database to reflect PV changes from
* other hosts.
*
* Compare the before and after PV lists, and if a PV's
* pvid or vgid has changed, then open that device to trigger
* a uevent to update the udev db.
*
* This has no direct benefit to lvm, but is just a best effort
* attempt to keep the udev db updated and reflecting current
* lvm information.
*
* FIXME: lvmcache_seed_infos_from_lvmetad() and _lvmetad_get_pv_cache_list()
* each get pv_list from lvmetad, and they could share a single pv_list reply.
*/
if (!dm_list_empty(&pvc_before)) {
_lvmetad_get_pv_cache_list(cmd, &pvc_after);
_update_changed_pvs_in_udev(cmd, &pvc_before, &pvc_after);
}
log_debug_lvmetad("Validating global lvmetad cache finished");
}
int lvmetad_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const char *vgid)
{
daemon_reply reply;
struct dm_config_node *top;
const char *system_id = NULL;
char uuid[64];
int ret;
if (!id_write_format((const struct id*)vgid, uuid, sizeof(uuid)))
return_0;
log_debug_lvmetad("Sending lvmetad vg_clear_outdated_pvs");
reply = _lvmetad_send(cmd, "vg_lookup",
"uuid = %s", uuid,
"name = %s", vgname,
NULL);
if ((top = dm_config_find_node(reply.cft->root, "metadata")))
system_id = dm_config_find_str(top, "metadata/system_id", NULL);
ret = !is_system_id_allowed(cmd, system_id);
daemon_reply_destroy(reply);
return ret;
}
/*
* lvmetad has a disabled state in which it continues running,
* and returns the "disabled" flag in a get_global_info query.
*
* Case 1
* ------
* When "normal" commands start, (those not specifically
* intended to rescan devs) they begin by checking lvmetad's
* token and global info:
*
* - If the token doesn't match (should be uncommon), the
* command first rescans devices to repopulate lvmetad with
* the global_filter it is using. After rescanning, the
* lvmetad disabled state is set or cleared depending on
* what the scan saw.
*
* An unmatching token occurs when:
* . lvmetad was just started and has not been populated yet.
* . The global_filter has been changed in lvm.conf since the
* last command was run.
* . The global_filter is overriden on the command line.
* (There's little point in using lvmetad if global_filter
* is often changed/overridden.)
*
* - If the token does match (common case), the command and
* lvmetad are using the same global_filter and the command
* does not rescan devs to repopulate lvmetad, or change the
* lvmetad disabled state.
*
* - After the token check/sync, the command checks if the
* disabled flag is set in lvmetad. If it is, the command will
* not use the lvmetad cache and will revert to scanning, i.e.
* it runs the same as if use_lvmetad=0.
*
* So, "normal" commands try to use the lvmetad cache to avoid
* scanning devices. In the uncommon case when the token doesn't
* match, these commands will first rescan devs to repopulate the
* lvmetad cache, and then attempt to use the lvmetad cache.
* In the uncommon case where lvmetad is disabled (by a previous
* command), the common commands do not rescan devs to repopulate
* lvmetad, but revert the equivalent of use_lvmetad=0, reading
* from disk instead of the cache.
* The combination of those two uncommon cases means that a command
* could begin by rescanning devs because of a token mismatch, then
* disable lvmetad as a result of that scan, and continue without
* using lvmetad.
*
* Case 2
* ------
* Commands that are meant to scan devices to repopulate the
* lvmetad cache, e.g. pvscan --cache, will always rescan
* devices and then set/clear the disabled state according to
* what they found when scanning. The global_filter is always
* used when choosing which devices to scan to populate lvmetad.
* The command-specific filter is never used when choosing
* which devices to scan for repopulating the lvmetad cache.
*
* During a scan repopulating the lvmetad cache, a command looks
* for PVs with lvm1 metadata, or duplicate PVs (two devices with
* the same PVID). If either of those are found during the scan,
* the command sets the disabled state in lvmetad. If none are
* found, the command clears the disabled state in lvmetad.
* (Other problems scanning may also cause the command to set the
* disabled state.)
*
* Case 3
* ------
* The special command 'pvscan --cache <dev>' is meant to only
* scan the specified device and send info from the dev to
* lvmetad. This single-dev pvscan will not detect duplicate PVs
* since it only sees the one device. If lvmetad already knows
* about the same PV on another device, then lvmetad will be the
* first to discover that a duplicate PV exists. In this case,
* lvmetad sets the disabled state for itself.
*
* Duplicates
* ----------
* The most common reasons for duplicate PVs to exist are:
*
* 1. Multipath. When multipath is running, it creates a new
* mpath device for the underlying "duplicate" devs. lvm has
* built in, automatic filtering that will hide the duplicate
* devs of the underlying mpath dev, so the duplicates will
* be skipping during scanning (multipath_component_detection).
*
* If multipath_component_detection=0, or if multipathd is not
* running, or multipath is not set up to handle a particular
* set of devs, then lvm will see the multipath paths as
* duplicates. lvm will choose one of them to use, consider
* the other a duplicate, and disable lvmetad. multipathd
* should be configured and running to resolve these duplicates,
* and multipath_component_detection enabled.
*
* 2. Cloning by copying. One device is copied over another, e.g.
* with dd. This is a more concerning case because using the
* wrong device could lead to corruption. LVM will attempt to
* choose the best device as the PV, but it may not always
* be the right one. In this case, lvmetad is disabled.
* vgimportclone should be used on the new copy to resolve the
* duplicates.
*
* 3. Cloning by hardware. A LUN is cloned/snapshotted on
* a hardware device. The description here is the same as
* cloning by copying.
*
* 4. Creating LVM snapshots of LVs being used as PVs.
* If pvcreate is run on an LV, and lvcreate is used to
* create a snapshot of that LV, then the two LVs will
* appear to be duplicate PVs.
*
* Filtering duplicates
* --------------------
*
* If all but one copy of a PV is added to the global_filter,
* then duplicates will not be seen when scanning to populate
* the lvmetad cache. Neither common commands nor scanning
* commands will see the duplicates, and lvmetad will not be
* disabled.
*
* If the global_filter is *not* used to hide duplicates,
* then lvmetad will be disabled when they are scanned, but
* common commands can use the command filter to hide the
* duplicates and work with a selected instance of the PV.
* The command will not use lvmetad in this case, but will
* not see duplicate PVs itself because its command filter
* is more restrictive than the global_filter and has hidden
* the duplicates.
*/
/*
* FIXME: if we fail to disable lvmetad, then other commands could
* potentially use incorrect cache data from lvmetad. Should we
* do something more severe if the disable messages fails, like
* sending SIGKILL to the lvmetad pid?
*
* FIXME: log something in syslog any time we disable lvmetad?
* At a minimum if we fail to disable lvmetad.
*/
void lvmetad_set_disabled(struct cmd_context *cmd, const char *reason)
{
daemon_reply reply;
if (!_lvmetad_use)
return;
log_debug_lvmetad("Sending lvmetad disabled %s", reason);
reply = daemon_send_simple(_lvmetad, "set_global_info",
"token = %s", "skip",
"global_disable = " FMTd64, (int64_t)1,
"disable_reason = %s", reason,
"pid = " FMTd64, (int64_t)getpid(),
"cmd = %s", get_cmd_name(),
NULL);
if (reply.error)
log_error("Failed to send message to lvmetad %d", reply.error);
if (strcmp(daemon_reply_str(reply, "response", ""), "OK"))
log_error("Failed response from lvmetad.");
daemon_reply_destroy(reply);
}
void lvmetad_clear_disabled(struct cmd_context *cmd)
{
daemon_reply reply;
if (!_lvmetad_use)
return;
log_debug_lvmetad("Sending lvmetad disabled 0");
reply = daemon_send_simple(_lvmetad, "set_global_info",
"token = %s", "skip",
"global_disable = " FMTd64, (int64_t)0,
"pid = " FMTd64, (int64_t)getpid(),
"cmd = %s", get_cmd_name(),
NULL);
if (reply.error)
log_error("Failed to send message to lvmetad %d", reply.error);
if (strcmp(daemon_reply_str(reply, "response", ""), "OK"))
log_error("Failed response from lvmetad.");
daemon_reply_destroy(reply);
}
int lvmetad_is_disabled(struct cmd_context *cmd, const char **reason)
{
daemon_reply reply;
const char *reply_reason;
int ret = 0;
reply = daemon_send_simple(_lvmetad, "get_global_info",
"token = %s", "skip",
"pid = " FMTd64, (int64_t)getpid(),
"cmd = %s", get_cmd_name(),
NULL);
if (reply.error) {
*reason = "send error";
ret = 1;
goto out;
}
if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
*reason = "response error";
ret = 1;
goto out;
}
if (daemon_reply_int(reply, "global_disable", 0)) {
ret = 1;
reply_reason = daemon_reply_str(reply, "disable_reason", NULL);
if (!reply_reason) {
*reason = "<not set>";
} else if (strstr(reply_reason, LVMETAD_DISABLE_REASON_DIRECT)) {
*reason = "the disable flag was set directly";
} else if (strstr(reply_reason, LVMETAD_DISABLE_REASON_LVM1)) {
*reason = "LVM1 metadata was found";
} else if (strstr(reply_reason, LVMETAD_DISABLE_REASON_DUPLICATES)) {
*reason = "duplicate PVs were found";
} else if (strstr(reply_reason, LVMETAD_DISABLE_REASON_VGRESTORE)) {
*reason = "vgcfgrestore is restoring VG metadata";
} else {
*reason = "<unknown>";
}
}
out:
daemon_reply_destroy(reply);
return ret;
}