summaryrefslogtreecommitdiff
path: root/daemons/dmeventd/plugins/raid/dmeventd_raid.c
blob: 52cf43d6b5cf830113c3f286ebe8b9edb8e682c5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
/*
 * Copyright (C) 2005-2017 Red Hat, Inc. All rights reserved.
 *
 * This file is part of LVM2.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
 * of the GNU Lesser General Public License v.2.1.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "lib.h"
#include "defaults.h"
#include "dmeventd_lvm.h"
#include "libdevmapper-event.h"

/* Hold enough elements for the mximum number of RAID images */
#define	RAID_DEVS_ELEMS	((DEFAULT_RAID_MAX_IMAGES + 63) / 64)

struct dso_state {
	struct dm_pool *mem;
	char cmd_lvconvert[512];
	uint64_t raid_devs[RAID_DEVS_ELEMS];
	int failed;
	int warned;
};

DM_EVENT_LOG_FN("raid")

/* FIXME Reformat to 80 char lines. */

static int _process_raid_event(struct dso_state *state, char *params, const char *device)
{
	struct dm_status_raid *status;
	const char *d;
	int dead = 0, r = 1;
	uint32_t dev;

	if (!dm_get_status_raid(state->mem, params, &status)) {
		log_error("Failed to process status line for %s.", device);
		return 0;
	}

	d = status->dev_health;
	while ((d = strchr(d, 'D'))) {
		dev = (uint32_t)(d - status->dev_health);

		if (!(state->raid_devs[dev / 64] & (UINT64_C(1) << (dev % 64)))) {
			state->raid_devs[dev / 64] |= (UINT64_C(1) << (dev % 64));
			log_warn("WARNING: Device #%u of %s array, %s, has failed.",
				 dev, status->raid_type, device);
		}

		d++;
		dead = 1;
	}

	/*
	 * if we are converting from non-RAID to RAID (e.g. linear -> raid1)
	 * and too many original devices die, such that we cannot continue
	 * the "recover" operation, the sync action will go to "idle", the
	 * unsynced devs will remain at 'a', and the original devices will
	 * NOT SWITCH TO 'D', but will remain at 'A' - hoping to be revived.
	 *
	 * This is simply the way the kernel works...
	 */
	if (!strcmp(status->sync_action, "idle") &&
	    (status->dev_health[0] == 'a') &&
	    (status->insync_regions < status->total_regions)) {
		log_error("Primary sources for new RAID, %s, have failed.",
			  device);
		dead = 1; /* run it through LVM repair */
	}

	if (dead) {
		if (status->insync_regions < status->total_regions) {
			if (!state->warned) {
				state->warned = 1;
				log_warn("WARNING: waiting for resynchronization to finish "
					 "before initiating repair on RAID device %s.", device);
			}

			goto out; /* Not yet done syncing with accessible devices */
		}

		if (state->failed)
			goto out; /* already reported */

		state->failed = 1;

		/* if repair goes OK, report success even if lvscan has failed */
		if (!dmeventd_lvm2_run_with_lock(state->cmd_lvconvert)) {
			log_error("Repair of RAID device %s failed.", device);
			r = 0;
		}
	} else {
		state->failed = 0;
		if (status->insync_regions == status->total_regions)
			memset(&state->raid_devs, 0, sizeof(state->raid_devs));
		log_info("%s array, %s, is %s in-sync.",
			 status->raid_type, device,
			 (status->insync_regions == status->total_regions) ? "now" : "not");
	}
out:
	dm_pool_free(state->mem, status);

	return r;
}

void process_event(struct dm_task *dmt,
		   enum dm_event_mask event __attribute__((unused)),
		   void **user)
{
	struct dso_state *state = *user;
	void *next = NULL;
	uint64_t start, length;
	char *target_type = NULL;
	char *params;
	const char *device = dm_task_get_name(dmt);

	do {
		next = dm_get_next_target(dmt, next, &start, &length,
					  &target_type, &params);

		if (!target_type) {
			log_info("%s mapping lost.", device);
			continue;
		}

		if (strcmp(target_type, "raid")) {
			log_info("%s has non-raid portion.", device);
			continue;
		}

		if (!_process_raid_event(state, params, device))
			log_error("Failed to process event for %s.",
				  device);
	} while (next);
}

int register_device(const char *device,
		    const char *uuid __attribute__((unused)),
		    int major __attribute__((unused)),
		    int minor __attribute__((unused)),
		    void **user)
{
	struct dso_state *state;

	if (!dmeventd_lvm2_init_with_pool("raid_state", state))
		goto_bad;

	if (!dmeventd_lvm2_command(state->mem, state->cmd_lvconvert, sizeof(state->cmd_lvconvert),
				   "lvconvert --repair --use-policies", device))
		goto_bad;

	*user = state;

	log_info("Monitoring RAID device %s for events.", device);

	return 1;
bad:
	log_error("Failed to monitor RAID %s.", device);

	if (state)
		dmeventd_lvm2_exit_with_pool(state);

	return 0;
}

int unregister_device(const char *device,
		      const char *uuid __attribute__((unused)),
		      int major __attribute__((unused)),
		      int minor __attribute__((unused)),
		      void **user)
{
	struct dso_state *state = *user;

	dmeventd_lvm2_exit_with_pool(state);
	log_info("No longer monitoring RAID device %s for events.",
		 device);

	return 1;
}