RAID (lvconvert/dmeventd): Cleanly handle primary failure during 'recover' op

Add the checks necessary to distiguish the state of a RAID when the primary source for syncing fails during the "recover" process. It has been possible to hit this condition before (like when converting from 2-way RAID1 to 3-way and having the first two devices die during the "recover" process). However, this condition is now more likely since we treat linear -> RAID1 conversions as "recover" now - so it is especially important we cleanly handle this condition.
2025-03-10 16:58:47 +03:00 · 2017-06-14 08:39:50 -05:00 · 2017-06-14 08:39:50 -05:00 · 4c0e908b0a
commit 4c0e908b0a
parent d34d2068dd
2 changed files with 76 additions and 0 deletions
--- a/daemons/dmeventd/plugins/raid/dmeventd_raid.c
+++ b/daemons/dmeventd/plugins/raid/dmeventd_raid.c
@ -58,6 +58,22 @@ static int _process_raid_event(struct dso_state *state, char *params, const char
 		dead = 1;
 	}

+	/*
+	 * if we are converting from non-RAID to RAID (e.g. linear -> raid1)
+	 * and too many original devices die, such that we cannot continue
+	 * the "recover" operation, the sync action will go to "idle", the
+	 * unsynced devs will remain at 'a', and the original devices will
+	 * NOT SWITCH TO 'D', but will remain at 'A' - hoping to be revived.
+	 *
+	 * This is simply the way the kernel works...
+	 */
+	if (!strcmp(status->sync_action, "idle") &&
+	    strchr(status->dev_health, 'a')) {
+		log_error("Primary sources for new RAID, %s, have failed.",
+			  device);
+		dead = 1; /* run it through LVM repair */
+	}
+
 	if (dead) {
 		if (status->insync_regions < status->total_regions) {
 			if (!state->warned) {
--- a/lib/metadata/raid_manip.c
+++ b/lib/metadata/raid_manip.c
@ -6407,6 +6407,39 @@ has_enough_space:
 	return 1;
 }

+/*
+ * _lv_raid_has_primary_failure_on_recover
+ * @lv
+ *
+ * The kernel behaves strangely in the presense of a primary failure
+ * during a "recover" sync operation.  It's not technically a bug, I
+ * suppose, but the output of the status line can make it difficult
+ * to determine that we are in this state.  The sync ratio will be
+ * 100% and the sync action will be "idle", but the health characters
+ * will be e.g. "Aaa" or "Aa", where the 'A' is the dead
+ * primary source that cannot be marked dead by the kernel b/c
+ * it is the only source for the remainder of data.
+ *
+ * This function helps to detect that condition.
+ *
+ * Returns: 1 if the state is detected, 0 otherwise.
+ * FIXME: would be better to return -1,0,1 to allow error report.
+ */
+int _lv_raid_has_primary_failure_on_recover(struct logical_volume *lv)
+{
+	char *tmp_dev_health;
+	char *tmp_sync_action;
+
+	if (!lv_raid_sync_action(lv, &tmp_sync_action) ||
+	    !lv_raid_dev_health(lv, &tmp_dev_health))
+		return_0;
+
+	if (!strcmp(tmp_sync_action, "idle") && strchr(tmp_dev_health, 'a'))
+		return 1;
+
+	return 0;
+}
+
 /*
 * Helper:
 *
@ -6458,11 +6491,38 @@ static int _lv_raid_rebuild_or_replace(struct logical_volume *lv,
 	}

 	if (!_raid_in_sync(lv)) {
+		/*
+		 * FIXME: There is a bug in the kernel that prevents 'rebuild'
+		 *        from being specified when the array is not in-sync.
+		 *        There are conditions where this should be allowed,
+		 *        but only when we are doing a repair - as indicated by
+		 *        'lv->vg->cmd->handles_missing_pvs'.  The above
+		 *        conditional should be:
+		 (!lv->vg->cmd->handles_missing_pvs && !_raid_in_sync(lv))
+		 */
 		log_error("Unable to replace devices in %s while it is "
 			  "not in-sync.", display_lvname(lv));
 		return 0;
 	}

+	if (_lv_raid_has_primary_failure_on_recover(lv)) {
+		/*
+		 * I hate having multiple error lines, but this
+		 * seems to work best for syslog and CLI.
+		 */
+		log_error("Unable to repair %s/%s.  Source devices failed"
+			  " before the RAID could synchronize.",
+			  lv->vg->name, lv->name);
+		log_error("You should choose one of the following:");
+		log_error("  1) deactivate %s/%s, revive failed "
+			  "device, re-activate LV, and proceed.",
+			  lv->vg->name, lv->name);
+		log_error("  2) remove the LV (all data is lost).");
+		log_error("  3) Seek expert advice to attempt to salvage any"
+			  " data from remaining devices.");
+		return 0;
+	}
+
 	/*
 	 * How many sub-LVs are being removed?
 	 */