/* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. * * This file is part of the device-mapper userspace tools. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU Lesser General Public License v.2.1. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "dmlib.h" #include "libdm-targets.h" #include "libdm-common.h" #include #include #include #include #include #ifdef linux # include "kdev_t.h" # include #else # define MAJOR(x) major((x)) # define MINOR(x) minor((x)) # define MKDEV(x,y) makedev((x),(y)) #endif #include "dm-ioctl.h" /* * Ensure build compatibility. * The hard-coded versions here are the highest present * in the _cmd_data arrays. */ #if !((DM_VERSION_MAJOR == 4 && DM_VERSION_MINOR >= 6)) #error The version of dm-ioctl.h included is incompatible. #endif /* FIXME This should be exported in device-mapper.h */ #define DM_NAME "device-mapper" #define PROC_MISC "/proc/misc" #define PROC_DEVICES "/proc/devices" #define MISC_NAME "misc" #define NUMBER_OF_MAJORS 4096 /* * Static minor number assigned since kernel version 2.6.36. * The original definition is in kernel's include/linux/miscdevice.h. * This number is also visible in modules.devname exported by depmod * utility (support included in module-init-tools version >= 3.12). */ #define MAPPER_CTRL_MINOR 236 #define MISC_MAJOR 10 /* dm major version no for running kernel */ static unsigned _dm_version = DM_VERSION_MAJOR; static unsigned _dm_version_minor = 0; static unsigned _dm_version_patchlevel = 0; static int _log_suppress = 0; static int _kernel_major = 0; static int _kernel_minor = 0; static int _kernel_release = 0; /* * If the kernel dm driver only supports one major number * we store it in _dm_device_major. Otherwise we indicate * which major numbers have been claimed by device-mapper * in _dm_bitset. */ static unsigned _dm_multiple_major_support = 1; static dm_bitset_t _dm_bitset = NULL; static uint32_t _dm_device_major = 0; static int _control_fd = -1; static int _version_checked = 0; static int _version_ok = 1; static unsigned _ioctl_buffer_double_factor = 0; const int _dm_compat = 0; /* *INDENT-OFF* */ static struct cmd_data _cmd_data_v4[] = { {"create", DM_DEV_CREATE, {4, 0, 0}}, {"reload", DM_TABLE_LOAD, {4, 0, 0}}, {"remove", DM_DEV_REMOVE, {4, 0, 0}}, {"remove_all", DM_REMOVE_ALL, {4, 0, 0}}, {"suspend", DM_DEV_SUSPEND, {4, 0, 0}}, {"resume", DM_DEV_SUSPEND, {4, 0, 0}}, {"info", DM_DEV_STATUS, {4, 0, 0}}, {"deps", DM_TABLE_DEPS, {4, 0, 0}}, {"rename", DM_DEV_RENAME, {4, 0, 0}}, {"version", DM_VERSION, {4, 0, 0}}, {"status", DM_TABLE_STATUS, {4, 0, 0}}, {"table", DM_TABLE_STATUS, {4, 0, 0}}, {"waitevent", DM_DEV_WAIT, {4, 0, 0}}, {"names", DM_LIST_DEVICES, {4, 0, 0}}, {"clear", DM_TABLE_CLEAR, {4, 0, 0}}, {"mknodes", DM_DEV_STATUS, {4, 0, 0}}, #ifdef DM_LIST_VERSIONS {"versions", DM_LIST_VERSIONS, {4, 1, 0}}, #endif #ifdef DM_TARGET_MSG {"message", DM_TARGET_MSG, {4, 2, 0}}, #endif #ifdef DM_DEV_SET_GEOMETRY {"setgeometry", DM_DEV_SET_GEOMETRY, {4, 6, 0}}, #endif }; /* *INDENT-ON* */ #define ALIGNMENT 8 /* FIXME Rejig library to record & use errno instead */ #ifndef DM_EXISTS_FLAG # define DM_EXISTS_FLAG 0x00000004 #endif static char *_align(char *ptr, unsigned int a) { register unsigned long agn = --a; return (char *) (((unsigned long) ptr + agn) & ~agn); } static int _uname(void) { static int _uts_set = 0; struct utsname _uts; int parts; if (_uts_set) return 1; if (uname(&_uts)) { log_error("uname failed: %s", strerror(errno)); return 0; } parts = sscanf(_uts.release, "%d.%d.%d", &_kernel_major, &_kernel_minor, &_kernel_release); /* Kernels with a major number of 2 always had 3 parts. */ if (parts < 1 || (_kernel_major < 3 && parts < 3)) { log_error("Could not determine kernel version used."); return 0; } _uts_set = 1; return 1; } #ifdef DM_IOCTLS /* * Set number to NULL to populate _dm_bitset - otherwise first * match is returned. */ static int _get_proc_number(const char *file, const char *name, uint32_t *number) { FILE *fl; char nm[256]; char *line = NULL; size_t len; uint32_t num; if (!(fl = fopen(file, "r"))) { log_sys_error("fopen", file); return 0; } while (getline(&line, &len, fl) != -1) { if (sscanf(line, "%d %255s\n", &num, &nm[0]) == 2) { if (!strcmp(name, nm)) { if (number) { *number = num; if (fclose(fl)) log_sys_error("fclose", file); free(line); return 1; } dm_bit_set(_dm_bitset, num); } } } if (fclose(fl)) log_sys_error("fclose", file); free(line); if (number) { log_error("%s: No entry for %s found", file, name); return 0; } return 1; } static int _control_device_number(uint32_t *major, uint32_t *minor) { if (!_get_proc_number(PROC_DEVICES, MISC_NAME, major) || !_get_proc_number(PROC_MISC, DM_NAME, minor)) { *major = 0; return 0; } return 1; } /* * Returns 1 if it exists on returning; 0 if it doesn't; -1 if it's wrong. */ static int _control_exists(const char *control, uint32_t major, uint32_t minor) { struct stat buf; if (stat(control, &buf) < 0) { if (errno != ENOENT) log_sys_error("stat", control); return 0; } if (!S_ISCHR(buf.st_mode)) { log_verbose("%s: Wrong inode type", control); if (!unlink(control)) return 0; log_sys_error("unlink", control); return -1; } if (major && buf.st_rdev != MKDEV(major, minor)) { log_verbose("%s: Wrong device number: (%u, %u) instead of " "(%u, %u)", control, MAJOR(buf.st_mode), MINOR(buf.st_mode), major, minor); if (!unlink(control)) return 0; log_sys_error("unlink", control); return -1; } return 1; } static int _create_control(const char *control, uint32_t major, uint32_t minor) { int ret; mode_t old_umask; /* * Return if the control already exists with intended major/minor * or there's an error unlinking an apparently incorrect one. */ ret = _control_exists(control, major, minor); if (ret == -1) return 0; /* Failed to unlink existing incorrect node */ if (ret) return 1; /* Already exists and correct */ (void) dm_prepare_selinux_context(dm_dir(), S_IFDIR); old_umask = umask(DM_DEV_DIR_UMASK); ret = dm_create_dir(dm_dir()); umask(old_umask); (void) dm_prepare_selinux_context(NULL, 0); if (!ret) return 0; log_verbose("Creating device %s (%u, %u)", control, major, minor); (void) dm_prepare_selinux_context(control, S_IFCHR); old_umask = umask(DM_CONTROL_NODE_UMASK); if (mknod(control, S_IFCHR | S_IRUSR | S_IWUSR, MKDEV(major, minor)) < 0) { log_sys_error("mknod", control); (void) dm_prepare_selinux_context(NULL, 0); return 0; } umask(old_umask); (void) dm_prepare_selinux_context(NULL, 0); return 1; } #endif /* * FIXME Update bitset in long-running process if dm claims new major numbers. */ static int _create_dm_bitset(void) { #ifdef DM_IOCTLS if (_dm_bitset || _dm_device_major) return 1; if (!_uname()) return 0; /* * 2.6 kernels are limited to one major number. * Assume 2.4 kernels are patched not to. * FIXME Check _dm_version and _dm_version_minor if 2.6 changes this. */ if (KERNEL_VERSION(_kernel_major, _kernel_minor, _kernel_release) >= KERNEL_VERSION(2, 6, 0)) _dm_multiple_major_support = 0; if (!_dm_multiple_major_support) { if (!_get_proc_number(PROC_DEVICES, DM_NAME, &_dm_device_major)) return 0; return 1; } /* Multiple major numbers supported */ if (!(_dm_bitset = dm_bitset_create(NULL, NUMBER_OF_MAJORS))) return 0; if (!_get_proc_number(PROC_DEVICES, DM_NAME, NULL)) { dm_bitset_destroy(_dm_bitset); _dm_bitset = NULL; return 0; } return 1; #else return 0; #endif } int dm_is_dm_major(uint32_t major) { if (!_create_dm_bitset()) return 0; if (_dm_multiple_major_support) return dm_bit(_dm_bitset, major) ? 1 : 0; else return (major == _dm_device_major) ? 1 : 0; } static void _close_control_fd(void) { if (_control_fd != -1) { if (close(_control_fd) < 0) log_sys_error("close", "_control_fd"); _control_fd = -1; } } static int _open_and_assign_control_fd(const char *control) { if ((_control_fd = open(control, O_RDWR)) < 0) { log_sys_error("open", control); return 0; } return 1; } static int _open_control(void) { #ifdef DM_IOCTLS char control[PATH_MAX]; uint32_t major = MISC_MAJOR; uint32_t minor = MAPPER_CTRL_MINOR; if (_control_fd != -1) return 1; if (!_uname()) return 0; snprintf(control, sizeof(control), "%s/%s", dm_dir(), DM_CONTROL_NODE); /* * Prior to 2.6.36 the minor number should be looked up in /proc. */ if ((KERNEL_VERSION(_kernel_major, _kernel_minor, _kernel_release) < KERNEL_VERSION(2, 6, 36)) && !_control_device_number(&major, &minor)) goto_bad; /* * Create the node with correct major and minor if not already done. * Udev may already have created /dev/mapper/control * from the modules.devname file generated by depmod. */ if (!_create_control(control, major, minor)) goto_bad; /* * As of 2.6.36 kernels, the open can trigger autoloading dm-mod. */ if (!_open_and_assign_control_fd(control)) goto_bad; if (!_create_dm_bitset()) { log_error("Failed to set up list of device-mapper major numbers"); return 0; } return 1; bad: log_error("Failure to communicate with kernel device-mapper driver."); if (!geteuid()) log_error("Check that device-mapper is available in the kernel."); return 0; #else return 1; #endif } static void _dm_zfree_string(char *string) { if (string) { memset(string, 0, strlen(string)); dm_free(string); } } static void _dm_zfree_dmi(struct dm_ioctl *dmi) { if (dmi) { memset(dmi, 0, dmi->data_size); dm_free(dmi); } } void dm_task_destroy(struct dm_task *dmt) { struct target *t, *n; for (t = dmt->head; t; t = n) { n = t->next; _dm_zfree_string(t->params); dm_free(t->type); dm_free(t); } _dm_zfree_dmi(dmt->dmi.v4); dm_free(dmt->dev_name); dm_free(dmt->newname); dm_free(dmt->message); dm_free(dmt->geometry); dm_free(dmt->uuid); dm_free(dmt); } /* * Protocol Version 4 functions. */ int dm_task_get_driver_version(struct dm_task *dmt, char *version, size_t size) { unsigned *v; if (!dmt->dmi.v4) { version[0] = '\0'; return 0; } v = dmt->dmi.v4->version; snprintf(version, size, "%u.%u.%u", v[0], v[1], v[2]); _dm_version_minor = v[1]; _dm_version_patchlevel = v[2]; return 1; } static int _check_version(char *version, size_t size, int log_suppress) { struct dm_task *task; int r; if (!(task = dm_task_create(DM_DEVICE_VERSION))) { log_error("Failed to get device-mapper version"); version[0] = '\0'; return 0; } if (log_suppress) _log_suppress = 1; r = dm_task_run(task); dm_task_get_driver_version(task, version, size); dm_task_destroy(task); _log_suppress = 0; return r; } /* * Find out device-mapper's major version number the first time * this is called and whether or not we support it. */ int dm_check_version(void) { char libversion[64], dmversion[64]; const char *compat = ""; if (_version_checked) return _version_ok; _version_checked = 1; if (_check_version(dmversion, sizeof(dmversion), _dm_compat)) return 1; if (!_dm_compat) goto_bad; log_verbose("device-mapper ioctl protocol version %u failed. " "Trying protocol version 1.", _dm_version); _dm_version = 1; if (_check_version(dmversion, sizeof(dmversion), 0)) { log_verbose("Using device-mapper ioctl protocol version 1"); return 1; } compat = "(compat)"; dm_get_library_version(libversion, sizeof(libversion)); log_error("Incompatible libdevmapper %s%s and kernel driver %s", libversion, compat, dmversion); bad: _version_ok = 0; return 0; } int dm_cookie_supported(void) { return (dm_check_version() && _dm_version >= 4 && _dm_version_minor >= 15); } static int dm_inactive_supported(void) { int inactive_supported = 0; if (dm_check_version() && _dm_version >= 4) { if (_dm_version_minor >= 16) inactive_supported = 1; /* upstream */ else if (_dm_version_minor == 11 && (_dm_version_patchlevel >= 6 && _dm_version_patchlevel <= 40)) { inactive_supported = 1; /* RHEL 5.7 */ } } return inactive_supported; } void *dm_get_next_target(struct dm_task *dmt, void *next, uint64_t *start, uint64_t *length, char **target_type, char **params) { struct target *t = (struct target *) next; if (!t) t = dmt->head; if (!t) return NULL; *start = t->start; *length = t->length; *target_type = t->type; *params = t->params; return t->next; } /* Unmarshall the target info returned from a status call */ static int _unmarshal_status(struct dm_task *dmt, struct dm_ioctl *dmi) { char *outbuf = (char *) dmi + dmi->data_start; char *outptr = outbuf; uint32_t i; struct dm_target_spec *spec; for (i = 0; i < dmi->target_count; i++) { spec = (struct dm_target_spec *) outptr; if (!dm_task_add_target(dmt, spec->sector_start, spec->length, spec->target_type, outptr + sizeof(*spec))) { return 0; } outptr = outbuf + spec->next; } return 1; } int dm_format_dev(char *buf, int bufsize, uint32_t dev_major, uint32_t dev_minor) { int r; if (bufsize < 8) return 0; r = snprintf(buf, (size_t) bufsize, "%u:%u", dev_major, dev_minor); if (r < 0 || r > bufsize - 1) return 0; return 1; } int dm_task_get_info(struct dm_task *dmt, struct dm_info *info) { if (!dmt->dmi.v4) return 0; memset(info, 0, sizeof(*info)); info->exists = dmt->dmi.v4->flags & DM_EXISTS_FLAG ? 1 : 0; if (!info->exists) return 1; info->suspended = dmt->dmi.v4->flags & DM_SUSPEND_FLAG ? 1 : 0; info->read_only = dmt->dmi.v4->flags & DM_READONLY_FLAG ? 1 : 0; info->live_table = dmt->dmi.v4->flags & DM_ACTIVE_PRESENT_FLAG ? 1 : 0; info->inactive_table = dmt->dmi.v4->flags & DM_INACTIVE_PRESENT_FLAG ? 1 : 0; info->target_count = dmt->dmi.v4->target_count; info->open_count = dmt->dmi.v4->open_count; info->event_nr = dmt->dmi.v4->event_nr; info->major = MAJOR(dmt->dmi.v4->dev); info->minor = MINOR(dmt->dmi.v4->dev); return 1; } uint32_t dm_task_get_read_ahead(const struct dm_task *dmt, uint32_t *read_ahead) { const char *dev_name; *read_ahead = 0; if (!dmt->dmi.v4 || !(dmt->dmi.v4->flags & DM_EXISTS_FLAG)) return 0; if (*dmt->dmi.v4->name) dev_name = dmt->dmi.v4->name; else if (dmt->dev_name) dev_name = dmt->dev_name; else { log_error("Get read ahead request failed: device name unrecorded."); return 0; } return get_dev_node_read_ahead(dev_name, read_ahead); } const char *dm_task_get_name(const struct dm_task *dmt) { return (dmt->dmi.v4->name); } const char *dm_task_get_uuid(const struct dm_task *dmt) { return (dmt->dmi.v4->uuid); } struct dm_deps *dm_task_get_deps(struct dm_task *dmt) { return (struct dm_deps *) (((char *) dmt->dmi.v4) + dmt->dmi.v4->data_start); } struct dm_names *dm_task_get_names(struct dm_task *dmt) { return (struct dm_names *) (((char *) dmt->dmi.v4) + dmt->dmi.v4->data_start); } struct dm_versions *dm_task_get_versions(struct dm_task *dmt) { return (struct dm_versions *) (((char *) dmt->dmi.v4) + dmt->dmi.v4->data_start); } int dm_task_set_ro(struct dm_task *dmt) { dmt->read_only = 1; return 1; } int dm_task_set_read_ahead(struct dm_task *dmt, uint32_t read_ahead, uint32_t read_ahead_flags) { dmt->read_ahead = read_ahead; dmt->read_ahead_flags = read_ahead_flags; return 1; } int dm_task_suppress_identical_reload(struct dm_task *dmt) { dmt->suppress_identical_reload = 1; return 1; } int dm_task_set_add_node(struct dm_task *dmt, dm_add_node_t add_node) { switch (add_node) { case DM_ADD_NODE_ON_RESUME: case DM_ADD_NODE_ON_CREATE: dmt->add_node = add_node; return 1; default: log_error("Unknown add node parameter"); return 0; } } int dm_task_set_newuuid(struct dm_task *dmt, const char *newuuid) { if (strlen(newuuid) >= DM_UUID_LEN) { log_error("Uuid \"%s\" too long", newuuid); return 0; } if (!(dmt->newname = dm_strdup(newuuid))) { log_error("dm_task_set_newuuid: strdup(%s) failed", newuuid); return 0; } dmt->new_uuid = 1; return 1; } int dm_task_set_newname(struct dm_task *dmt, const char *newname) { if (strchr(newname, '/')) { log_error("Name \"%s\" invalid. It contains \"/\".", newname); return 0; } if (strlen(newname) >= DM_NAME_LEN) { log_error("Name \"%s\" too long", newname); return 0; } if (!(dmt->newname = dm_strdup(newname))) { log_error("dm_task_set_newname: strdup(%s) failed", newname); return 0; } dmt->new_uuid = 0; return 1; } int dm_task_set_message(struct dm_task *dmt, const char *message) { if (!(dmt->message = dm_strdup(message))) { log_error("dm_task_set_message: strdup failed"); return 0; } return 1; } int dm_task_set_sector(struct dm_task *dmt, uint64_t sector) { dmt->sector = sector; return 1; } int dm_task_set_geometry(struct dm_task *dmt, const char *cylinders, const char *heads, const char *sectors, const char *start) { size_t len = strlen(cylinders) + 1 + strlen(heads) + 1 + strlen(sectors) + 1 + strlen(start) + 1; if (!(dmt->geometry = dm_malloc(len))) { log_error("dm_task_set_geometry: dm_malloc failed"); return 0; } if (sprintf(dmt->geometry, "%s %s %s %s", cylinders, heads, sectors, start) < 0) { log_error("dm_task_set_geometry: sprintf failed"); return 0; } return 1; } int dm_task_no_flush(struct dm_task *dmt) { dmt->no_flush = 1; return 1; } int dm_task_no_open_count(struct dm_task *dmt) { dmt->no_open_count = 1; return 1; } int dm_task_skip_lockfs(struct dm_task *dmt) { dmt->skip_lockfs = 1; return 1; } int dm_task_secure_data(struct dm_task *dmt) { dmt->secure_data = 1; return 1; } int dm_task_retry_remove(struct dm_task *dmt) { dmt->retry_remove = 1; return 1; } int dm_task_query_inactive_table(struct dm_task *dmt) { dmt->query_inactive_table = 1; return 1; } int dm_task_set_event_nr(struct dm_task *dmt, uint32_t event_nr) { dmt->event_nr = event_nr; return 1; } struct target *create_target(uint64_t start, uint64_t len, const char *type, const char *params) { struct target *t; if (strlen(type) >= DM_MAX_TYPE_NAME) { log_error("Target type name %s is too long.", type); return NULL; } if (!(t = dm_zalloc(sizeof(*t)))) { log_error("create_target: malloc(%" PRIsize_t ") failed", sizeof(*t)); return NULL; } if (!(t->params = dm_strdup(params))) { log_error("create_target: strdup(params) failed"); goto bad; } if (!(t->type = dm_strdup(type))) { log_error("create_target: strdup(type) failed"); goto bad; } t->start = start; t->length = len; return t; bad: _dm_zfree_string(t->params); dm_free(t->type); dm_free(t); return NULL; } static char *_add_target(struct target *t, char *out, char *end) { char *out_sp = out; struct dm_target_spec sp; size_t sp_size = sizeof(struct dm_target_spec); int len; if (strlen(t->type) >= sizeof(sp.target_type)) { log_error("Target type name %s is too long.", t->type); return NULL; } sp.status = 0; sp.sector_start = t->start; sp.length = t->length; strncpy(sp.target_type, t->type, sizeof(sp.target_type) - 1); sp.target_type[sizeof(sp.target_type) - 1] = '\0'; out += sp_size; len = strlen(t->params); if ((out >= end) || (out + len + 1) >= end) { log_error("Ran out of memory building ioctl parameter"); return NULL; } strcpy(out, t->params); out += len + 1; /* align next block */ out = _align(out, ALIGNMENT); sp.next = out - out_sp; memcpy(out_sp, &sp, sp_size); return out; } static int _lookup_dev_name(uint64_t dev, char *buf, size_t len) { struct dm_names *names; unsigned next = 0; struct dm_task *dmt; int r = 0; if (!(dmt = dm_task_create(DM_DEVICE_LIST))) return 0; if (!dm_task_run(dmt)) goto out; if (!(names = dm_task_get_names(dmt))) goto out; if (!names->dev) goto out; do { names = (struct dm_names *)((char *) names + next); if (names->dev == dev) { strncpy(buf, names->name, len); r = 1; break; } next = names->next; } while (next); out: dm_task_destroy(dmt); return r; } static struct dm_ioctl *_flatten(struct dm_task *dmt, unsigned repeat_count) { const size_t min_size = 16 * 1024; const int (*version)[3]; struct dm_ioctl *dmi; struct target *t; struct dm_target_msg *tmsg; size_t len = sizeof(struct dm_ioctl); char *b, *e; int count = 0; for (t = dmt->head; t; t = t->next) { len += sizeof(struct dm_target_spec); len += strlen(t->params) + 1 + ALIGNMENT; count++; } if (count && (dmt->sector || dmt->message)) { log_error("targets and message are incompatible"); return NULL; } if (count && dmt->newname) { log_error("targets and rename are incompatible"); return NULL; } if (count && dmt->geometry) { log_error("targets and geometry are incompatible"); return NULL; } if (dmt->newname && (dmt->sector || dmt->message)) { log_error("message and rename are incompatible"); return NULL; } if (dmt->newname && dmt->geometry) { log_error("geometry and rename are incompatible"); return NULL; } if (dmt->geometry && (dmt->sector || dmt->message)) { log_error("geometry and message are incompatible"); return NULL; } if (dmt->sector && !dmt->message) { log_error("message is required with sector"); return NULL; } if (dmt->newname) len += strlen(dmt->newname) + 1; if (dmt->message) len += sizeof(struct dm_target_msg) + strlen(dmt->message) + 1; if (dmt->geometry) len += strlen(dmt->geometry) + 1; /* * Give len a minimum size so that we have space to store * dependencies or status information. */ if (len < min_size) len = min_size; /* Increase buffer size if repeating because buffer was too small */ while (repeat_count--) len *= 2; if (!(dmi = dm_malloc(len))) return NULL; memset(dmi, 0, len); version = &_cmd_data_v4[dmt->type].version; dmi->version[0] = (*version)[0]; dmi->version[1] = (*version)[1]; dmi->version[2] = (*version)[2]; dmi->data_size = len; dmi->data_start = sizeof(struct dm_ioctl); if (dmt->minor >= 0) { if (dmt->major <= 0) { log_error("Missing major number for persistent device."); goto bad; } if (!_dm_multiple_major_support && dmt->allow_default_major_fallback && dmt->major != _dm_device_major) { log_verbose("Overriding major number of %" PRIu32 " with %" PRIu32 " for persistent device.", dmt->major, _dm_device_major); dmt->major = _dm_device_major; } dmi->flags |= DM_PERSISTENT_DEV_FLAG; dmi->dev = MKDEV(dmt->major, dmt->minor); } /* Does driver support device number referencing? */ if (_dm_version_minor < 3 && !dmt->dev_name && !dmt->uuid && dmi->dev) { if (!_lookup_dev_name(dmi->dev, dmi->name, sizeof(dmi->name))) { log_error("Unable to find name for device (%" PRIu32 ":%" PRIu32 ")", dmt->major, dmt->minor); goto bad; } log_verbose("device (%" PRIu32 ":%" PRIu32 ") is %s " "for compatibility with old kernel", dmt->major, dmt->minor, dmi->name); } /* FIXME Until resume ioctl supplies name, use dev_name for readahead */ if (dmt->dev_name && (dmt->type != DM_DEVICE_RESUME || dmt->minor < 0 || dmt->major < 0)) strncpy(dmi->name, dmt->dev_name, sizeof(dmi->name)); if (dmt->uuid) strncpy(dmi->uuid, dmt->uuid, sizeof(dmi->uuid)); if (dmt->type == DM_DEVICE_SUSPEND) dmi->flags |= DM_SUSPEND_FLAG; if (dmt->no_flush) dmi->flags |= DM_NOFLUSH_FLAG; if (dmt->read_only) dmi->flags |= DM_READONLY_FLAG; if (dmt->skip_lockfs) dmi->flags |= DM_SKIP_LOCKFS_FLAG; if (dmt->secure_data) { if (_dm_version_minor < 20) log_verbose("Secure data flag unsupported by kernel. " "Buffers will not be wiped after use."); dmi->flags |= DM_SECURE_DATA_FLAG; } if (dmt->query_inactive_table) { if (!dm_inactive_supported()) log_warn("WARNING: Inactive table query unsupported " "by kernel. It will use live table."); dmi->flags |= DM_QUERY_INACTIVE_TABLE_FLAG; } if (dmt->new_uuid) { if (_dm_version_minor < 19) { log_error("WARNING: Setting UUID unsupported by " "kernel. Aborting operation."); goto bad; } dmi->flags |= DM_UUID_FLAG; } dmi->target_count = count; dmi->event_nr = dmt->event_nr; b = (char *) (dmi + 1); e = (char *) dmi + len; for (t = dmt->head; t; t = t->next) if (!(b = _add_target(t, b, e))) goto_bad; if (dmt->newname) strcpy(b, dmt->newname); if (dmt->message) { tmsg = (struct dm_target_msg *) b; tmsg->sector = dmt->sector; strcpy(tmsg->message, dmt->message); } if (dmt->geometry) strcpy(b, dmt->geometry); return dmi; bad: _dm_zfree_dmi(dmi); return NULL; } static int _process_mapper_dir(struct dm_task *dmt) { struct dirent *dirent; DIR *d; const char *dir; int r = 1; dir = dm_dir(); if (!(d = opendir(dir))) { log_sys_error("opendir", dir); return 0; } while ((dirent = readdir(d))) { if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, "..") || !strcmp(dirent->d_name, "control")) continue; if (!dm_task_set_name(dmt, dirent->d_name)) { r = 0; stack; continue; /* try next name */ } if (!dm_task_run(dmt)) { r = 0; stack; /* keep going */ } } if (closedir(d)) log_sys_error("closedir", dir); return r; } static int _process_all_v4(struct dm_task *dmt) { struct dm_task *task; struct dm_names *names; unsigned next = 0; int r = 1; if (!(task = dm_task_create(DM_DEVICE_LIST))) return 0; if (!dm_task_run(task)) { r = 0; goto out; } if (!(names = dm_task_get_names(task))) { r = 0; goto out; } if (!names->dev) goto out; do { names = (struct dm_names *)((char *) names + next); if (!dm_task_set_name(dmt, names->name)) { r = 0; goto out; } if (!dm_task_run(dmt)) r = 0; next = names->next; } while (next); out: dm_task_destroy(task); return r; } static int _mknodes_v4(struct dm_task *dmt) { (void) _process_mapper_dir(dmt); return _process_all_v4(dmt); } /* * If an operation that uses a cookie fails, decrement the * semaphore instead of udev. */ static int _udev_complete(struct dm_task *dmt) { uint16_t base; if (dmt->cookie_set && (base = dmt->event_nr & ~DM_UDEV_FLAGS_MASK)) /* strip flags from the cookie and use cookie magic instead */ return dm_udev_complete(base | (DM_COOKIE_MAGIC << DM_UDEV_FLAGS_SHIFT)); return 1; } static int _check_uevent_generated(struct dm_ioctl *dmi) { if (!dm_check_version() || _dm_version < 4 || _dm_version_minor < 17) /* can't check, assume uevent is generated */ return 1; return dmi->flags & DM_UEVENT_GENERATED_FLAG; } static int _create_and_load_v4(struct dm_task *dmt) { struct dm_task *task; int r; uint32_t cookie; /* Use new task struct to create the device */ if (!(task = dm_task_create(DM_DEVICE_CREATE))) { _udev_complete(dmt); return_0; } /* Copy across relevant fields */ if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) goto_bad; if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid)) goto_bad; task->major = dmt->major; task->minor = dmt->minor; task->uid = dmt->uid; task->gid = dmt->gid; task->mode = dmt->mode; /* FIXME: Just for udev_check in dm_task_run. Can we avoid this? */ task->event_nr = dmt->event_nr & DM_UDEV_FLAGS_MASK; task->cookie_set = dmt->cookie_set; task->add_node = dmt->add_node; if (!dm_task_run(task)) goto_bad; dm_task_destroy(task); /* Next load the table */ if (!(task = dm_task_create(DM_DEVICE_RELOAD))) { stack; _udev_complete(dmt); goto revert; } /* Copy across relevant fields */ if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) { stack; dm_task_destroy(task); _udev_complete(dmt); goto revert; } task->read_only = dmt->read_only; task->head = dmt->head; task->tail = dmt->tail; task->secure_data = dmt->secure_data; r = dm_task_run(task); task->head = NULL; task->tail = NULL; dm_task_destroy(task); if (!r) { stack; _udev_complete(dmt); goto revert; } /* Use the original structure last so the info will be correct */ dmt->type = DM_DEVICE_RESUME; dm_free(dmt->uuid); dmt->uuid = NULL; if (dm_task_run(dmt)) return 1; revert: dmt->type = DM_DEVICE_REMOVE; dm_free(dmt->uuid); dmt->uuid = NULL; /* * Also udev-synchronize "remove" dm task that is a part of this revert! * But only if the original dm task was supposed to be synchronized. */ if (dmt->cookie_set) { cookie = (dmt->event_nr & ~DM_UDEV_FLAGS_MASK) | (DM_COOKIE_MAGIC << DM_UDEV_FLAGS_SHIFT); if (!dm_task_set_cookie(dmt, &cookie, (dmt->event_nr & DM_UDEV_FLAGS_MASK) >> DM_UDEV_FLAGS_SHIFT)) stack; /* keep going */ } if (!dm_task_run(dmt)) log_error("Failed to revert device creation."); return 0; bad: dm_task_destroy(task); _udev_complete(dmt); return 0; } uint64_t dm_task_get_existing_table_size(struct dm_task *dmt) { return dmt->existing_table_size; } static int _reload_with_suppression_v4(struct dm_task *dmt) { struct dm_task *task; struct target *t1, *t2; size_t len; int r; /* New task to get existing table information */ if (!(task = dm_task_create(DM_DEVICE_TABLE))) { log_error("Failed to create device-mapper task struct"); return 0; } /* Copy across relevant fields */ if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) { dm_task_destroy(task); return 0; } if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid)) { dm_task_destroy(task); return 0; } task->major = dmt->major; task->minor = dmt->minor; r = dm_task_run(task); if (!r) { dm_task_destroy(task); return r; } /* Store existing table size */ t2 = task->head; while (t2 && t2->next) t2 = t2->next; dmt->existing_table_size = t2 ? t2->start + t2->length : 0; if (((task->dmi.v4->flags & DM_READONLY_FLAG) ? 1 : 0) != dmt->read_only) goto no_match; t1 = dmt->head; t2 = task->head; while (t1 && t2) { len = strlen(t2->params); while (len-- > 0 && t2->params[len] == ' ') t2->params[len] = '\0'; if ((t1->start != t2->start) || (t1->length != t2->length) || (strcmp(t1->type, t2->type)) || (strcmp(t1->params, t2->params))) goto no_match; t1 = t1->next; t2 = t2->next; } if (!t1 && !t2) { dmt->dmi.v4 = task->dmi.v4; task->dmi.v4 = NULL; dm_task_destroy(task); return 1; } no_match: dm_task_destroy(task); /* Now do the original reload */ dmt->suppress_identical_reload = 0; r = dm_task_run(dmt); return r; } static int _check_children_not_suspended_v4(struct dm_task *dmt, uint64_t device) { struct dm_task *task; struct dm_info info; struct dm_deps *deps; int r = 0; uint32_t i; /* Find dependencies */ if (!(task = dm_task_create(DM_DEVICE_DEPS))) return 0; /* Copy across or set relevant fields */ if (device) { task->major = MAJOR(device); task->minor = MINOR(device); } else { if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) goto out; if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid)) goto out; task->major = dmt->major; task->minor = dmt->minor; } task->uid = dmt->uid; task->gid = dmt->gid; task->mode = dmt->mode; /* FIXME: Just for udev_check in dm_task_run. Can we avoid this? */ task->event_nr = dmt->event_nr & DM_UDEV_FLAGS_MASK; task->cookie_set = dmt->cookie_set; task->add_node = dmt->add_node; if (!(r = dm_task_run(task))) goto out; if (!dm_task_get_info(task, &info) || !info.exists) goto out; /* * Warn if any of the devices this device depends upon are already * suspended: I/O could become trapped between the two devices. */ if (info.suspended) { if (!device) log_debug("Attempting to suspend a device that is already suspended " "(%u:%u)", info.major, info.minor); else log_error(INTERNAL_ERROR "Attempt to suspend device %s%s%s%.0d%s%.0d%s%s" "that uses already-suspended device (%u:%u)", dmt->dev_name ? : "", dmt->uuid ? : "", dmt->major > 0 ? "(" : "", dmt->major > 0 ? dmt->major : 0, dmt->major > 0 ? ":" : "", dmt->minor > 0 ? dmt->minor : 0, dmt->major > 0 && dmt->minor == 0 ? "0" : "", dmt->major > 0 ? ") " : "", info.major, info.minor); /* No need for further recursion */ r = 1; goto out; } if (!(deps = dm_task_get_deps(task))) goto out; for (i = 0; i < deps->count; i++) { /* Only recurse with dm devices */ if (MAJOR(deps->device[i]) != _dm_device_major) continue; if (!_check_children_not_suspended_v4(task, deps->device[i])) goto out; } r = 1; out: dm_task_destroy(task); return r; } static int _suspend_with_validation_v4(struct dm_task *dmt) { /* Avoid recursion */ dmt->enable_checks = 0; /* * Ensure we can't leave any I/O trapped between suspended devices. */ if (!_check_children_not_suspended_v4(dmt, 0)) return 0; /* Finally, perform the original suspend. */ return dm_task_run(dmt); } static const char *_sanitise_message(char *message) { const char *sanitised_message = message ?: ""; /* FIXME: Check for whitespace variations. */ /* This traps what cryptsetup sends us. */ if (message && !strncasecmp(message, "key set", 7)) sanitised_message = "key set"; return sanitised_message; } static struct dm_ioctl *_do_dm_ioctl(struct dm_task *dmt, unsigned command, unsigned buffer_repeat_count, unsigned retry_repeat_count, int *retryable) { struct dm_ioctl *dmi; int ioctl_with_uevent; dmi = _flatten(dmt, buffer_repeat_count); if (!dmi) { log_error("Couldn't create ioctl argument."); return NULL; } if (dmt->type == DM_DEVICE_TABLE) dmi->flags |= DM_STATUS_TABLE_FLAG; dmi->flags |= DM_EXISTS_FLAG; /* FIXME */ if (dmt->no_open_count) dmi->flags |= DM_SKIP_BDGET_FLAG; ioctl_with_uevent = dmt->type == DM_DEVICE_RESUME || dmt->type == DM_DEVICE_REMOVE || dmt->type == DM_DEVICE_RENAME; if (ioctl_with_uevent && dm_cookie_supported()) { /* * Always mark events coming from libdevmapper as * "primary sourced". This is needed to distinguish * any spurious events so we can act appropriately. * This needs to be applied even when udev_sync is * not used because udev flags could be used alone. */ dmi->event_nr |= DM_UDEV_PRIMARY_SOURCE_FLAG << DM_UDEV_FLAGS_SHIFT; /* * Prevent udev vs. libdevmapper race when processing nodes * and symlinks. This can happen when the udev rules are * installed and udev synchronisation code is enabled in * libdevmapper but the software using libdevmapper does not * make use of it (by not calling dm_task_set_cookie before). * We need to instruct the udev rules not to be applied at * all in this situation so we can gracefully fallback to * libdevmapper's node and symlink creation code. */ if (!dmt->cookie_set && dm_udev_get_sync_support()) { log_debug("Cookie value is not set while trying to call %s " "ioctl. Please, consider using libdevmapper's udev " "synchronisation interface or disable it explicitly " "by calling dm_udev_set_sync_support(0).", dmt->type == DM_DEVICE_RESUME ? "DM_DEVICE_RESUME" : dmt->type == DM_DEVICE_REMOVE ? "DM_DEVICE_REMOVE" : "DM_DEVICE_RENAME"); log_debug("Switching off device-mapper and all subsystem related " "udev rules. Falling back to libdevmapper node creation."); /* * Disable general dm and subsystem rules but keep * dm disk rules if not flagged out explicitly before. * We need /dev/disk content for the software that expects it. */ dmi->event_nr |= (DM_UDEV_DISABLE_DM_RULES_FLAG | DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG) << DM_UDEV_FLAGS_SHIFT; } } log_debug("dm %s %s%s %s%s%s %s%.0d%s%.0d%s" "%s%c%c%s%s%s%s%s%s %.0" PRIu64 " %s [%u] (*%u)", _cmd_data_v4[dmt->type].name, dmt->new_uuid ? "UUID " : "", dmi->name, dmi->uuid, dmt->newname ? " " : "", dmt->newname ? dmt->newname : "", dmt->major > 0 ? "(" : "", dmt->major > 0 ? dmt->major : 0, dmt->major > 0 ? ":" : "", dmt->minor > 0 ? dmt->minor : 0, dmt->major > 0 && dmt->minor == 0 ? "0" : "", dmt->major > 0 ? ") " : "", dmt->no_open_count ? 'N' : 'O', dmt->no_flush ? 'N' : 'F', dmt->read_only ? "R" : "", dmt->skip_lockfs ? "S " : "", dmt->retry_remove ? "T " : "", dmt->secure_data ? "W " : "", dmt->query_inactive_table ? "I " : "", dmt->enable_checks ? "C" : "", dmt->sector, _sanitise_message(dmt->message), dmi->data_size, retry_repeat_count); #ifdef DM_IOCTLS if (ioctl(_control_fd, command, dmi) < 0 && dmt->expected_errno != errno) { if (errno == ENXIO && ((dmt->type == DM_DEVICE_INFO) || (dmt->type == DM_DEVICE_MKNODES) || (dmt->type == DM_DEVICE_STATUS))) dmi->flags &= ~DM_EXISTS_FLAG; /* FIXME */ else { if (_log_suppress) log_verbose("device-mapper: %s ioctl " "failed: %s", _cmd_data_v4[dmt->type].name, strerror(errno)); else log_error("device-mapper: %s ioctl " "failed: %s", _cmd_data_v4[dmt->type].name, strerror(errno)); /* * It's sometimes worth retrying after EBUSY in case * it's a transient failure caused by an asynchronous * process quickly scanning the device. */ *retryable = errno == EBUSY; _dm_zfree_dmi(dmi); return NULL; } } if (ioctl_with_uevent && dm_udev_get_sync_support() && !_check_uevent_generated(dmi)) { log_debug("Uevent not generated! Calling udev_complete " "internally to avoid process lock-up."); _udev_complete(dmt); } #else /* Userspace alternative for testing */ #endif return dmi; } void dm_task_update_nodes(void) { update_devs(); } #define DM_IOCTL_RETRIES 25 #define DM_RETRY_USLEEP_DELAY 200000 int dm_task_run(struct dm_task *dmt) { struct dm_ioctl *dmi; unsigned command; int check_udev; int rely_on_udev; int suspended_counter; unsigned ioctl_retry = 1; int retryable = 0; if ((unsigned) dmt->type >= (sizeof(_cmd_data_v4) / sizeof(*_cmd_data_v4))) { log_error(INTERNAL_ERROR "unknown device-mapper task %d", dmt->type); return 0; } command = _cmd_data_v4[dmt->type].cmd; /* Old-style creation had a table supplied */ if (dmt->type == DM_DEVICE_CREATE && dmt->head) return _create_and_load_v4(dmt); if (dmt->type == DM_DEVICE_MKNODES && !dmt->dev_name && !dmt->uuid && dmt->major <= 0) return _mknodes_v4(dmt); if ((dmt->type == DM_DEVICE_RELOAD) && dmt->suppress_identical_reload) return _reload_with_suppression_v4(dmt); if ((dmt->type == DM_DEVICE_SUSPEND) && dmt->enable_checks) return _suspend_with_validation_v4(dmt); if (!_open_control()) { _udev_complete(dmt); return_0; } if ((suspended_counter = dm_get_suspended_counter()) && dmt->type == DM_DEVICE_RELOAD) log_error(INTERNAL_ERROR "Performing unsafe table load while %d device(s) " "are known to be suspended: " "%s%s%s %s%.0d%s%.0d%s%s", suspended_counter, dmt->dev_name ? : "", dmt->uuid ? " UUID " : "", dmt->uuid ? : "", dmt->major > 0 ? "(" : "", dmt->major > 0 ? dmt->major : 0, dmt->major > 0 ? ":" : "", dmt->minor > 0 ? dmt->minor : 0, dmt->major > 0 && dmt->minor == 0 ? "0" : "", dmt->major > 0 ? ") " : ""); /* FIXME Detect and warn if cookie set but should not be. */ repeat_ioctl: if (!(dmi = _do_dm_ioctl(dmt, command, _ioctl_buffer_double_factor, ioctl_retry, &retryable))) { /* * Async udev rules that scan devices commonly cause transient * failures. Normally you'd expect the user to have made sure * nothing was using the device before issuing REMOVE, so it's * worth retrying in case the failure is indeed transient. */ if (retryable && dmt->type == DM_DEVICE_REMOVE && dmt->retry_remove && ++ioctl_retry <= DM_IOCTL_RETRIES) { usleep(DM_RETRY_USLEEP_DELAY); goto repeat_ioctl; } _udev_complete(dmt); return 0; } if (dmi->flags & DM_BUFFER_FULL_FLAG) { switch (dmt->type) { case DM_DEVICE_LIST_VERSIONS: case DM_DEVICE_LIST: case DM_DEVICE_DEPS: case DM_DEVICE_STATUS: case DM_DEVICE_TABLE: case DM_DEVICE_WAITEVENT: _ioctl_buffer_double_factor++; _dm_zfree_dmi(dmi); goto repeat_ioctl; default: log_error("WARNING: libdevmapper buffer too small for data"); } } /* * Are we expecting a udev operation to occur that we need to check for? */ check_udev = dmt->cookie_set && !(dmt->event_nr >> DM_UDEV_FLAGS_SHIFT & DM_UDEV_DISABLE_DM_RULES_FLAG); rely_on_udev = dmt->cookie_set ? (dmt->event_nr >> DM_UDEV_FLAGS_SHIFT & DM_UDEV_DISABLE_LIBRARY_FALLBACK) : 0; switch (dmt->type) { case DM_DEVICE_CREATE: if ((dmt->add_node == DM_ADD_NODE_ON_CREATE) && dmt->dev_name && *dmt->dev_name && !rely_on_udev) add_dev_node(dmt->dev_name, MAJOR(dmi->dev), MINOR(dmi->dev), dmt->uid, dmt->gid, dmt->mode, check_udev, rely_on_udev); break; case DM_DEVICE_REMOVE: /* FIXME Kernel needs to fill in dmi->name */ if (dmt->dev_name && !rely_on_udev) rm_dev_node(dmt->dev_name, check_udev, rely_on_udev); break; case DM_DEVICE_RENAME: /* FIXME Kernel needs to fill in dmi->name */ if (!dmt->new_uuid && dmt->dev_name) rename_dev_node(dmt->dev_name, dmt->newname, check_udev, rely_on_udev); break; case DM_DEVICE_RESUME: if ((dmt->add_node == DM_ADD_NODE_ON_RESUME) && dmt->dev_name && *dmt->dev_name) add_dev_node(dmt->dev_name, MAJOR(dmi->dev), MINOR(dmi->dev), dmt->uid, dmt->gid, dmt->mode, check_udev, rely_on_udev); /* FIXME Kernel needs to fill in dmi->name */ set_dev_node_read_ahead(dmt->dev_name, dmt->read_ahead, dmt->read_ahead_flags); break; case DM_DEVICE_MKNODES: if (dmi->flags & DM_EXISTS_FLAG) add_dev_node(dmi->name, MAJOR(dmi->dev), MINOR(dmi->dev), dmt->uid, dmt->gid, dmt->mode, 0, rely_on_udev); else if (dmt->dev_name) rm_dev_node(dmt->dev_name, 0, rely_on_udev); break; case DM_DEVICE_STATUS: case DM_DEVICE_TABLE: case DM_DEVICE_WAITEVENT: if (!_unmarshal_status(dmt, dmi)) goto bad; break; } /* Was structure reused? */ _dm_zfree_dmi(dmt->dmi.v4); dmt->dmi.v4 = dmi; return 1; bad: _dm_zfree_dmi(dmi); return 0; } void dm_lib_release(void) { _close_control_fd(); update_devs(); } void dm_pools_check_leaks(void); void dm_lib_exit(void) { int suspended_counter; static unsigned _exited = 0; if (_exited++) return; if ((suspended_counter = dm_get_suspended_counter())) log_error("libdevmapper exiting with %d device(s) still suspended.", suspended_counter); dm_lib_release(); selinux_release(); if (_dm_bitset) dm_bitset_destroy(_dm_bitset); _dm_bitset = NULL; dm_pools_check_leaks(); dm_dump_memory(); _version_ok = 1; _version_checked = 0; }