mirror of
git://sourceware.org/git/lvm2.git
synced 2025-01-25 10:04:17 +03:00
8b78982297
Add a daemon that can be launched to monitor a group of regions corresponding to the extents of a file, and to update the regions as the file's allocation changes. The daemon is intended to be started from a library interface, but can also be run from the command line: dmfilemapd <fd> <group_id> <path> <mode> [<foreground>[<log_level>]] Where fd is a file descriptor open on the mapped file, group_id is the group identifier of the mapped group and mode is either "inode" or "path". E.g.: # dmfilemapd 3 0 vm.img inode 1 3 3<vm.img ... If foreground is non-zero, the daemon will not fork to run in the background. If verbose is non-zero, libdm and daemon log messages will be printed. It is possible for the group identifier to change when regions are re-mapped: this occurs when the group leader is deleted (regroup=1 in dm_stats_update_regions_from_fd()), and another region is created before the daemon has a chance to recreate the leader region. The operation is inherently racey since there is currently no way to atomically move or resize a dm_stats region while retaining its region_id. Detect this condition and update the group_id value stored in the filemap monitor. A function is also provided in the the stats API to launch the filemap monitoring daemon: int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path, dm_filemapd_mode_t mode, unsigned foreground, unsigned verbose); This carries out the first fork and execs dmfilemapd with the arguments specified. A dm_filemapd_mode_t value is specified by the mode argument: either DM_FILEMAPD_FOLLOW_INODE, or DM_FILEMAPD_FOLLOW_PATH. A helper function, dm_filemapd_mode_from_string(), is provided to parse a string containing a valid mode name into the appropriate dm_filemapd_mode_t value.
3738 lines
131 KiB
C
3738 lines
131 KiB
C
/*
|
|
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
|
|
* Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved.
|
|
* Copyright (C) 2006 Rackable Systems All rights reserved.
|
|
*
|
|
* This file is part of the device-mapper userspace tools.
|
|
*
|
|
* This copyrighted material is made available to anyone wishing to use,
|
|
* modify, copy, or redistribute it subject to the terms and conditions
|
|
* of the GNU Lesser General Public License v.2.1.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
|
|
#ifndef LIB_DEVICE_MAPPER_H
|
|
#define LIB_DEVICE_MAPPER_H
|
|
|
|
#include <inttypes.h>
|
|
#include <stdarg.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
|
|
#ifdef __linux__
|
|
# include <linux/types.h>
|
|
#endif
|
|
|
|
#include <limits.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#ifndef __GNUC__
|
|
# define __typeof__ typeof
|
|
#endif
|
|
|
|
/* Macros to make string defines */
|
|
#define DM_TO_STRING_EXP(A) #A
|
|
#define DM_TO_STRING(A) DM_TO_STRING_EXP(A)
|
|
|
|
#define DM_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/*****************************************************************
|
|
* The first section of this file provides direct access to the
|
|
* individual device-mapper ioctls. Since it is quite laborious to
|
|
* build the ioctl arguments for the device-mapper, people are
|
|
* encouraged to use this library.
|
|
****************************************************************/
|
|
|
|
/*
|
|
* The library user may wish to register their own
|
|
* logging function. By default errors go to stderr.
|
|
* Use dm_log_with_errno_init(NULL) to restore the default log fn.
|
|
* Error messages may have a non-zero errno.
|
|
* Debug messages may have a non-zero class.
|
|
* Aborts on internal error when env DM_ABORT_ON_INTERNAL_ERRORS is 1
|
|
*/
|
|
|
|
typedef void (*dm_log_with_errno_fn) (int level, const char *file, int line,
|
|
int dm_errno_or_class, const char *f, ...)
|
|
__attribute__ ((format(printf, 5, 6)));
|
|
|
|
void dm_log_with_errno_init(dm_log_with_errno_fn fn);
|
|
void dm_log_init_verbose(int level);
|
|
|
|
/*
|
|
* Original version of this function.
|
|
* dm_errno is set to 0.
|
|
*
|
|
* Deprecated: Use the _with_errno_ versions above instead.
|
|
*/
|
|
typedef void (*dm_log_fn) (int level, const char *file, int line,
|
|
const char *f, ...)
|
|
__attribute__ ((format(printf, 4, 5)));
|
|
|
|
void dm_log_init(dm_log_fn fn);
|
|
/*
|
|
* For backward-compatibility, indicate that dm_log_init() was used
|
|
* to set a non-default value of dm_log().
|
|
*/
|
|
int dm_log_is_non_default(void);
|
|
|
|
/*
|
|
* Number of devices currently in suspended state (via the library).
|
|
*/
|
|
int dm_get_suspended_counter(void);
|
|
|
|
enum {
|
|
DM_DEVICE_CREATE,
|
|
DM_DEVICE_RELOAD,
|
|
DM_DEVICE_REMOVE,
|
|
DM_DEVICE_REMOVE_ALL,
|
|
|
|
DM_DEVICE_SUSPEND,
|
|
DM_DEVICE_RESUME,
|
|
|
|
DM_DEVICE_INFO,
|
|
DM_DEVICE_DEPS,
|
|
DM_DEVICE_RENAME,
|
|
|
|
DM_DEVICE_VERSION,
|
|
|
|
DM_DEVICE_STATUS,
|
|
DM_DEVICE_TABLE,
|
|
DM_DEVICE_WAITEVENT,
|
|
|
|
DM_DEVICE_LIST,
|
|
|
|
DM_DEVICE_CLEAR,
|
|
|
|
DM_DEVICE_MKNODES,
|
|
|
|
DM_DEVICE_LIST_VERSIONS,
|
|
|
|
DM_DEVICE_TARGET_MSG,
|
|
|
|
DM_DEVICE_SET_GEOMETRY
|
|
};
|
|
|
|
/*
|
|
* You will need to build a struct dm_task for
|
|
* each ioctl command you want to execute.
|
|
*/
|
|
|
|
struct dm_pool;
|
|
struct dm_task;
|
|
struct dm_timestamp;
|
|
|
|
struct dm_task *dm_task_create(int type);
|
|
void dm_task_destroy(struct dm_task *dmt);
|
|
|
|
int dm_task_set_name(struct dm_task *dmt, const char *name);
|
|
int dm_task_set_uuid(struct dm_task *dmt, const char *uuid);
|
|
|
|
/*
|
|
* Retrieve attributes after an info.
|
|
*/
|
|
struct dm_info {
|
|
int exists;
|
|
int suspended;
|
|
int live_table;
|
|
int inactive_table;
|
|
int32_t open_count;
|
|
uint32_t event_nr;
|
|
uint32_t major;
|
|
uint32_t minor; /* minor device number */
|
|
int read_only; /* 0:read-write; 1:read-only */
|
|
|
|
int32_t target_count;
|
|
|
|
int deferred_remove;
|
|
int internal_suspend;
|
|
};
|
|
|
|
struct dm_deps {
|
|
uint32_t count;
|
|
uint32_t filler;
|
|
uint64_t device[0];
|
|
};
|
|
|
|
struct dm_names {
|
|
uint64_t dev;
|
|
uint32_t next; /* Offset to next struct from start of this struct */
|
|
char name[0];
|
|
};
|
|
|
|
struct dm_versions {
|
|
uint32_t next; /* Offset to next struct from start of this struct */
|
|
uint32_t version[3];
|
|
|
|
char name[0];
|
|
};
|
|
|
|
int dm_get_library_version(char *version, size_t size);
|
|
int dm_task_get_driver_version(struct dm_task *dmt, char *version, size_t size);
|
|
int dm_task_get_info(struct dm_task *dmt, struct dm_info *dmi);
|
|
|
|
/*
|
|
* This function returns dm device's UUID based on the value
|
|
* of the mangling mode set during preceding dm_task_run call:
|
|
* - unmangled UUID for DM_STRING_MANGLING_{AUTO, HEX},
|
|
* - UUID without any changes for DM_STRING_MANGLING_NONE.
|
|
*
|
|
* To get mangled or unmangled form of the UUID directly, use
|
|
* dm_task_get_uuid_mangled or dm_task_get_uuid_unmangled function.
|
|
*/
|
|
const char *dm_task_get_uuid(const struct dm_task *dmt);
|
|
|
|
struct dm_deps *dm_task_get_deps(struct dm_task *dmt);
|
|
struct dm_versions *dm_task_get_versions(struct dm_task *dmt);
|
|
const char *dm_task_get_message_response(struct dm_task *dmt);
|
|
|
|
/*
|
|
* These functions return device-mapper names based on the value
|
|
* of the mangling mode set during preceding dm_task_run call:
|
|
* - unmangled name for DM_STRING_MANGLING_{AUTO, HEX},
|
|
* - name without any changes for DM_STRING_MANGLING_NONE.
|
|
*
|
|
* To get mangled or unmangled form of the name directly, use
|
|
* dm_task_get_name_mangled or dm_task_get_name_unmangled function.
|
|
*/
|
|
const char *dm_task_get_name(const struct dm_task *dmt);
|
|
struct dm_names *dm_task_get_names(struct dm_task *dmt);
|
|
|
|
int dm_task_set_ro(struct dm_task *dmt);
|
|
int dm_task_set_newname(struct dm_task *dmt, const char *newname);
|
|
int dm_task_set_newuuid(struct dm_task *dmt, const char *newuuid);
|
|
int dm_task_set_minor(struct dm_task *dmt, int minor);
|
|
int dm_task_set_major(struct dm_task *dmt, int major);
|
|
int dm_task_set_major_minor(struct dm_task *dmt, int major, int minor, int allow_default_major_fallback);
|
|
int dm_task_set_uid(struct dm_task *dmt, uid_t uid);
|
|
int dm_task_set_gid(struct dm_task *dmt, gid_t gid);
|
|
int dm_task_set_mode(struct dm_task *dmt, mode_t mode);
|
|
/* See also description for DM_UDEV_DISABLE_LIBRARY_FALLBACK flag! */
|
|
int dm_task_set_cookie(struct dm_task *dmt, uint32_t *cookie, uint16_t flags);
|
|
int dm_task_set_event_nr(struct dm_task *dmt, uint32_t event_nr);
|
|
int dm_task_set_geometry(struct dm_task *dmt, const char *cylinders, const char *heads, const char *sectors, const char *start);
|
|
int dm_task_set_message(struct dm_task *dmt, const char *message);
|
|
int dm_task_set_sector(struct dm_task *dmt, uint64_t sector);
|
|
int dm_task_no_flush(struct dm_task *dmt);
|
|
int dm_task_no_open_count(struct dm_task *dmt);
|
|
int dm_task_skip_lockfs(struct dm_task *dmt);
|
|
int dm_task_query_inactive_table(struct dm_task *dmt);
|
|
int dm_task_suppress_identical_reload(struct dm_task *dmt);
|
|
int dm_task_secure_data(struct dm_task *dmt);
|
|
int dm_task_retry_remove(struct dm_task *dmt);
|
|
int dm_task_deferred_remove(struct dm_task *dmt);
|
|
|
|
/*
|
|
* Record timestamp immediately after the ioctl returns.
|
|
*/
|
|
int dm_task_set_record_timestamp(struct dm_task *dmt);
|
|
struct dm_timestamp *dm_task_get_ioctl_timestamp(struct dm_task *dmt);
|
|
|
|
/*
|
|
* Enable checks for common mistakes such as issuing ioctls in an unsafe order.
|
|
*/
|
|
int dm_task_enable_checks(struct dm_task *dmt);
|
|
|
|
typedef enum {
|
|
DM_ADD_NODE_ON_RESUME, /* add /dev/mapper node with dmsetup resume */
|
|
DM_ADD_NODE_ON_CREATE /* add /dev/mapper node with dmsetup create */
|
|
} dm_add_node_t;
|
|
int dm_task_set_add_node(struct dm_task *dmt, dm_add_node_t add_node);
|
|
|
|
/*
|
|
* Control read_ahead.
|
|
*/
|
|
#define DM_READ_AHEAD_AUTO UINT32_MAX /* Use kernel default readahead */
|
|
#define DM_READ_AHEAD_NONE 0 /* Disable readahead */
|
|
|
|
#define DM_READ_AHEAD_MINIMUM_FLAG 0x1 /* Value supplied is minimum */
|
|
|
|
/*
|
|
* Read ahead is set with DM_DEVICE_CREATE with a table or DM_DEVICE_RESUME.
|
|
*/
|
|
int dm_task_set_read_ahead(struct dm_task *dmt, uint32_t read_ahead,
|
|
uint32_t read_ahead_flags);
|
|
uint32_t dm_task_get_read_ahead(const struct dm_task *dmt,
|
|
uint32_t *read_ahead);
|
|
|
|
/*
|
|
* Use these to prepare for a create or reload.
|
|
*/
|
|
int dm_task_add_target(struct dm_task *dmt,
|
|
uint64_t start,
|
|
uint64_t size, const char *ttype, const char *params);
|
|
|
|
/*
|
|
* Format major/minor numbers correctly for input to driver.
|
|
*/
|
|
#define DM_FORMAT_DEV_BUFSIZE 13 /* Minimum bufsize to handle worst case. */
|
|
int dm_format_dev(char *buf, int bufsize, uint32_t dev_major, uint32_t dev_minor);
|
|
|
|
/* Use this to retrive target information returned from a STATUS call */
|
|
void *dm_get_next_target(struct dm_task *dmt,
|
|
void *next, uint64_t *start, uint64_t *length,
|
|
char **target_type, char **params);
|
|
|
|
/*
|
|
* Following dm_get_status_* functions will allocate approriate status structure
|
|
* from passed mempool together with the necessary character arrays.
|
|
* Destroying the mempool will release all asociated allocation.
|
|
*/
|
|
|
|
/* Parse params from STATUS call for mirror target */
|
|
typedef enum {
|
|
DM_STATUS_MIRROR_ALIVE = 'A',/* No failures */
|
|
DM_STATUS_MIRROR_FLUSH_FAILED = 'F',/* Mirror out-of-sync */
|
|
DM_STATUS_MIRROR_WRITE_FAILED = 'D',/* Mirror out-of-sync */
|
|
DM_STATUS_MIRROR_SYNC_FAILED = 'S',/* Mirror out-of-sync */
|
|
DM_STATUS_MIRROR_READ_FAILED = 'R',/* Mirror data unaffected */
|
|
DM_STATUS_MIRROR_UNCLASSIFIED = 'U' /* Bug */
|
|
} dm_status_mirror_health_t;
|
|
|
|
struct dm_status_mirror {
|
|
uint64_t total_regions;
|
|
uint64_t insync_regions;
|
|
uint32_t dev_count; /* # of devs[] elements (<= 8) */
|
|
struct {
|
|
dm_status_mirror_health_t health;
|
|
uint32_t major;
|
|
uint32_t minor;
|
|
} *devs; /* array with individual legs */
|
|
const char *log_type; /* core, disk,.... */
|
|
uint32_t log_count; /* # of logs[] elements */
|
|
struct {
|
|
dm_status_mirror_health_t health;
|
|
uint32_t major;
|
|
uint32_t minor;
|
|
} *logs; /* array with individual logs */
|
|
};
|
|
|
|
int dm_get_status_mirror(struct dm_pool *mem, const char *params,
|
|
struct dm_status_mirror **status);
|
|
|
|
/* Parse params from STATUS call for raid target */
|
|
struct dm_status_raid {
|
|
uint64_t reserved;
|
|
uint64_t total_regions; /* sectors */
|
|
uint64_t insync_regions; /* sectors */
|
|
uint64_t mismatch_count;
|
|
uint32_t dev_count;
|
|
char *raid_type;
|
|
/* A - alive, a - alive not in-sync, D - dead/failed */
|
|
char *dev_health;
|
|
/* idle, frozen, resync, recover, check, repair */
|
|
char *sync_action;
|
|
uint64_t data_offset; /* RAID out-of-place reshaping */
|
|
};
|
|
|
|
int dm_get_status_raid(struct dm_pool *mem, const char *params,
|
|
struct dm_status_raid **status);
|
|
|
|
/* Parse params from STATUS call for cache target */
|
|
struct dm_status_cache {
|
|
uint64_t version; /* zero for now */
|
|
|
|
uint32_t metadata_block_size; /* in 512B sectors */
|
|
uint32_t block_size; /* AKA 'chunk_size' */
|
|
|
|
uint64_t metadata_used_blocks;
|
|
uint64_t metadata_total_blocks;
|
|
|
|
uint64_t used_blocks;
|
|
uint64_t dirty_blocks;
|
|
uint64_t total_blocks;
|
|
|
|
uint64_t read_hits;
|
|
uint64_t read_misses;
|
|
uint64_t write_hits;
|
|
uint64_t write_misses;
|
|
|
|
uint64_t demotions;
|
|
uint64_t promotions;
|
|
|
|
uint64_t feature_flags;
|
|
|
|
int core_argc;
|
|
char **core_argv;
|
|
|
|
char *policy_name;
|
|
int policy_argc;
|
|
char **policy_argv;
|
|
|
|
unsigned error : 1; /* detected error (switches to fail soon) */
|
|
unsigned fail : 1; /* all I/O fails */
|
|
unsigned needs_check : 1; /* metadata needs check */
|
|
unsigned read_only : 1; /* metadata may not be changed */
|
|
uint32_t reserved : 28;
|
|
};
|
|
|
|
int dm_get_status_cache(struct dm_pool *mem, const char *params,
|
|
struct dm_status_cache **status);
|
|
|
|
/*
|
|
* Parse params from STATUS call for snapshot target
|
|
*
|
|
* Snapshot target's format:
|
|
* <= 1.7.0: <used_sectors>/<total_sectors>
|
|
* >= 1.8.0: <used_sectors>/<total_sectors> <metadata_sectors>
|
|
*/
|
|
struct dm_status_snapshot {
|
|
uint64_t used_sectors; /* in 512b units */
|
|
uint64_t total_sectors;
|
|
uint64_t metadata_sectors;
|
|
unsigned has_metadata_sectors : 1; /* set when metadata_sectors is present */
|
|
unsigned invalid : 1; /* set when snapshot is invalidated */
|
|
unsigned merge_failed : 1; /* set when snapshot merge failed */
|
|
unsigned overflow : 1; /* set when snapshot overflows */
|
|
};
|
|
|
|
int dm_get_status_snapshot(struct dm_pool *mem, const char *params,
|
|
struct dm_status_snapshot **status);
|
|
|
|
/* Parse params from STATUS call for thin_pool target */
|
|
typedef enum {
|
|
DM_THIN_DISCARDS_IGNORE,
|
|
DM_THIN_DISCARDS_NO_PASSDOWN,
|
|
DM_THIN_DISCARDS_PASSDOWN
|
|
} dm_thin_discards_t;
|
|
|
|
struct dm_status_thin_pool {
|
|
uint64_t transaction_id;
|
|
uint64_t used_metadata_blocks;
|
|
uint64_t total_metadata_blocks;
|
|
uint64_t used_data_blocks;
|
|
uint64_t total_data_blocks;
|
|
uint64_t held_metadata_root;
|
|
uint32_t read_only; /* metadata may not be changed */
|
|
dm_thin_discards_t discards;
|
|
uint32_t fail : 1; /* all I/O fails */
|
|
uint32_t error_if_no_space : 1; /* otherwise queue_if_no_space */
|
|
uint32_t out_of_data_space : 1; /* metadata may be changed, but data may not be allocated (no rw) */
|
|
uint32_t needs_check : 1; /* metadata needs check */
|
|
uint32_t error : 1; /* detected error (switches to fail soon) */
|
|
uint32_t reserved : 27;
|
|
};
|
|
|
|
int dm_get_status_thin_pool(struct dm_pool *mem, const char *params,
|
|
struct dm_status_thin_pool **status);
|
|
|
|
/* Parse params from STATUS call for thin target */
|
|
struct dm_status_thin {
|
|
uint64_t mapped_sectors;
|
|
uint64_t highest_mapped_sector;
|
|
uint32_t fail : 1; /* Thin volume fails I/O */
|
|
uint32_t reserved : 31;
|
|
};
|
|
|
|
int dm_get_status_thin(struct dm_pool *mem, const char *params,
|
|
struct dm_status_thin **status);
|
|
|
|
/*
|
|
* device-mapper statistics support
|
|
*/
|
|
|
|
/*
|
|
* Statistics handle.
|
|
*
|
|
* Operations on dm_stats objects include managing statistics regions
|
|
* and obtaining and manipulating current counter values from the
|
|
* kernel. Methods are provided to return baisc count values and to
|
|
* derive time-based metrics when a suitable interval estimate is
|
|
* provided.
|
|
*
|
|
* Internally the dm_stats handle contains a pointer to a table of one
|
|
* or more dm_stats_region objects representing the regions registered
|
|
* with the dm_stats_create_region() method. These in turn point to a
|
|
* table of one or more dm_stats_counters objects containing the
|
|
* counter sets for each defined area within the region:
|
|
*
|
|
* dm_stats->dm_stats_region[nr_regions]->dm_stats_counters[nr_areas]
|
|
*
|
|
* This structure is private to the library and may change in future
|
|
* versions: all users should make use of the public interface and treat
|
|
* the dm_stats type as an opaque handle.
|
|
*
|
|
* Regions and counter sets are stored in order of increasing region_id.
|
|
* Depending on region specifications and the sequence of create and
|
|
* delete operations this may not correspond to increasing sector
|
|
* number: users of the library should not assume that this is the case
|
|
* unless region creation is deliberately managed to ensure this (by
|
|
* always creating regions in strict order of ascending sector address).
|
|
*
|
|
* Regions may also overlap so the same sector range may be included in
|
|
* more than one region or area: applications should be prepared to deal
|
|
* with this or manage regions such that it does not occur.
|
|
*/
|
|
struct dm_stats;
|
|
|
|
/*
|
|
* Histogram handle.
|
|
*
|
|
* A histogram object represents the latency histogram values and bin
|
|
* boundaries of the histogram associated with a particular area.
|
|
*
|
|
* Operations on the handle allow the number of bins, bin boundaries,
|
|
* counts and relative proportions to be obtained as well as the
|
|
* conversion of a histogram or its bounds to a compact string
|
|
* representation.
|
|
*/
|
|
struct dm_histogram;
|
|
|
|
/*
|
|
* Allocate a dm_stats handle to use for subsequent device-mapper
|
|
* statistics operations. A program_id may be specified and will be
|
|
* used by default for subsequent operations on this handle.
|
|
*
|
|
* If program_id is NULL or the empty string a program_id will be
|
|
* automatically set to the value contained in /proc/self/comm.
|
|
*/
|
|
struct dm_stats *dm_stats_create(const char *program_id);
|
|
|
|
/*
|
|
* Bind a dm_stats handle to the specified device major and minor
|
|
* values. Any previous binding is cleared and any preexisting counter
|
|
* data contained in the handle is released.
|
|
*/
|
|
int dm_stats_bind_devno(struct dm_stats *dms, int major, int minor);
|
|
|
|
/*
|
|
* Bind a dm_stats handle to the specified device name.
|
|
* Any previous binding is cleared and any preexisting counter
|
|
* data contained in the handle is released.
|
|
*/
|
|
int dm_stats_bind_name(struct dm_stats *dms, const char *name);
|
|
|
|
/*
|
|
* Bind a dm_stats handle to the specified device UUID.
|
|
* Any previous binding is cleared and any preexisting counter
|
|
* data contained in the handle is released.
|
|
*/
|
|
int dm_stats_bind_uuid(struct dm_stats *dms, const char *uuid);
|
|
|
|
/*
|
|
* Bind a dm_stats handle to the device backing the file referenced
|
|
* by the specified file descriptor.
|
|
*
|
|
* File descriptor fd must reference a regular file, open for reading,
|
|
* in a local file system, backed by a device-mapper device, that
|
|
* supports the FIEMAP ioctl, and that returns data describing the
|
|
* physical location of extents.
|
|
*/
|
|
int dm_stats_bind_from_fd(struct dm_stats *dms, int fd);
|
|
/*
|
|
* Test whether the running kernel supports the precise_timestamps
|
|
* feature. Presence of this feature also implies histogram support.
|
|
* The library will check this call internally and fails any attempt
|
|
* to use nanosecond counters or histograms on kernels that fail to
|
|
* meet this check.
|
|
*/
|
|
int dm_message_supports_precise_timestamps(void);
|
|
|
|
/*
|
|
* Precise timetamps and histogram support.
|
|
*
|
|
* Test for the presence of precise_timestamps and histogram support.
|
|
*/
|
|
int dm_stats_driver_supports_precise(void);
|
|
int dm_stats_driver_supports_histogram(void);
|
|
|
|
/*
|
|
* Returns 1 if the specified region has the precise_timestamps feature
|
|
* enabled (i.e. produces nanosecond-precision counter values) or 0 for
|
|
* a region using the default milisecond precision.
|
|
*/
|
|
int dm_stats_get_region_precise_timestamps(const struct dm_stats *dms,
|
|
uint64_t region_id);
|
|
|
|
/*
|
|
* Returns 1 if the region at the current cursor location has the
|
|
* precise_timestamps feature enabled (i.e. produces
|
|
* nanosecond-precision counter values) or 0 for a region using the
|
|
* default milisecond precision.
|
|
*/
|
|
int dm_stats_get_current_region_precise_timestamps(const struct dm_stats *dms);
|
|
|
|
#define DM_STATS_ALL_PROGRAMS ""
|
|
/*
|
|
* Parse the response from a @stats_list message. dm_stats_list will
|
|
* allocate the necessary dm_stats and dm_stats region structures from
|
|
* the embedded dm_pool. No counter data will be obtained (the counters
|
|
* members of dm_stats_region objects are set to NULL).
|
|
*
|
|
* A program_id may optionally be supplied; if the argument is non-NULL
|
|
* only regions with a matching program_id value will be considered. If
|
|
* the argument is NULL then the default program_id associated with the
|
|
* dm_stats handle will be used. Passing the special value
|
|
* DM_STATS_ALL_PROGRAMS will cause all regions to be queried
|
|
* regardless of region program_id.
|
|
*/
|
|
int dm_stats_list(struct dm_stats *dms, const char *program_id);
|
|
|
|
#define DM_STATS_REGIONS_ALL UINT64_MAX
|
|
/*
|
|
* Populate a dm_stats object with statistics for one or more regions of
|
|
* the specified device.
|
|
*
|
|
* A program_id may optionally be supplied; if the argument is non-NULL
|
|
* only regions with a matching program_id value will be considered. If
|
|
* the argument is NULL then the default program_id associated with the
|
|
* dm_stats handle will be used. Passing the special value
|
|
* DM_STATS_ALL_PROGRAMS will cause all regions to be queried
|
|
* regardless of region program_id.
|
|
*
|
|
* Passing the special value DM_STATS_REGIONS_ALL as the region_id
|
|
* argument will attempt to retrieve all regions selected by the
|
|
* program_id argument.
|
|
*
|
|
* If region_id is used to request a single region_id to be populated
|
|
* the program_id is ignored.
|
|
*/
|
|
int dm_stats_populate(struct dm_stats *dms, const char *program_id,
|
|
uint64_t region_id);
|
|
|
|
/*
|
|
* Create a new statistics region on the device bound to dms.
|
|
*
|
|
* start and len specify the region start and length in 512b sectors.
|
|
* Passing zero for both start and len will create a region spanning
|
|
* the entire device.
|
|
*
|
|
* Step determines how to subdivide the region into discrete counter
|
|
* sets: a positive value specifies the size of areas into which the
|
|
* region should be split while a negative value will split the region
|
|
* into a number of areas equal to the absolute value of step:
|
|
*
|
|
* - a region with one area spanning the entire device:
|
|
*
|
|
* dm_stats_create_region(dms, 0, 0, -1, p, a);
|
|
*
|
|
* - a region with areas of 1MiB:
|
|
*
|
|
* dm_stats_create_region(dms, 0, 0, 1 << 11, p, a);
|
|
*
|
|
* - one 1MiB region starting at 1024 sectors with two areas:
|
|
*
|
|
* dm_stats_create_region(dms, 1024, 1 << 11, -2, p, a);
|
|
*
|
|
* If precise is non-zero attempt to create a region with nanosecond
|
|
* precision counters using the kernel precise_timestamps feature.
|
|
*
|
|
* precise - A flag to request nanosecond precision counters
|
|
* to be used for this region.
|
|
*
|
|
* histogram_bounds - specify the boundaries of a latency histogram to
|
|
* be tracked for the region. The values are expressed as an array of
|
|
* uint64_t terminated with a zero. Values must be in order of ascending
|
|
* magnitude and specify the upper bounds of successive histogram bins
|
|
* in nanoseconds (with an implicit lower bound of zero on the first bin
|
|
* and an implicit upper bound of infinity on the final bin). For
|
|
* example:
|
|
*
|
|
* uint64_t bounds_ary[] = { 1000, 2000, 3000, 0 };
|
|
*
|
|
* Specifies a histogram with four bins: 0-1000ns, 1000-2000ns,
|
|
* 2000-3000ns and >3000ns.
|
|
*
|
|
* The smallest latency value that can be tracked for a region not using
|
|
* precise_timestamps is 1ms: attempting to create a region with
|
|
* histogram boundaries < 1ms will cause the precise_timestamps feature
|
|
* to be enabled for that region automatically if it was not requested
|
|
* explicitly.
|
|
*
|
|
* program_id is an optional string argument that identifies the
|
|
* program creating the region. If program_id is NULL or the empty
|
|
* string the default program_id stored in the handle will be used.
|
|
*
|
|
* user_data is an optional string argument that is added to the
|
|
* content of the aux_data field stored with the statistics region by
|
|
* the kernel.
|
|
*
|
|
* The library may also use this space internally, for example, to
|
|
* store a group descriptor or other metadata: in this case the
|
|
* library will strip any internal data fields from the value before
|
|
* it is returned via a call to dm_stats_get_region_aux_data().
|
|
*
|
|
* The user data stored is not accessed by the library or kernel and
|
|
* may be used to store an arbitrary data word (embedded whitespace is
|
|
* not permitted).
|
|
*
|
|
* An application using both the library and direct access to the
|
|
* @stats_list device-mapper message may see the internal values stored
|
|
* in this field by the library. In such cases any string up to and
|
|
* including the first '#' in the field must be treated as an opaque
|
|
* value and preserved across any external modification of aux_data.
|
|
*
|
|
* The region_id of the newly-created region is returned in *region_id
|
|
* if it is non-NULL.
|
|
*/
|
|
int dm_stats_create_region(struct dm_stats *dms, uint64_t *region_id,
|
|
uint64_t start, uint64_t len, int64_t step,
|
|
int precise, struct dm_histogram *bounds,
|
|
const char *program_id, const char *user_data);
|
|
|
|
/*
|
|
* Delete the specified statistics region. This will also mark the
|
|
* region as not-present and discard any existing statistics data.
|
|
*/
|
|
int dm_stats_delete_region(struct dm_stats *dms, uint64_t region_id);
|
|
|
|
/*
|
|
* Clear the specified statistics region. This requests the kernel to
|
|
* zero all counter values (except in-flight I/O). Note that this
|
|
* operation is not atomic with respect to reads of the counters; any IO
|
|
* events occurring between the last print operation and the clear will
|
|
* be lost. This can be avoided by using the atomic print-and-clear
|
|
* function of the dm_stats_print_region() call or by using the higher
|
|
* level dm_stats_populate() interface.
|
|
*/
|
|
int dm_stats_clear_region(struct dm_stats *dms, uint64_t region_id);
|
|
|
|
/*
|
|
* Print the current counter values for the specified statistics region
|
|
* and return them as a string. The memory for the string buffer will
|
|
* be allocated from the dm_stats handle's private pool and should be
|
|
* returned by calling dm_stats_buffer_destroy() when no longer
|
|
* required. The pointer will become invalid following any call that
|
|
* clears or reinitializes the handle (destroy, list, populate, bind).
|
|
*
|
|
* This allows applications that wish to access the raw message response
|
|
* to obtain it via a dm_stats handle; no parsing of the textual counter
|
|
* data is carried out by this function.
|
|
*
|
|
* Most users are recommended to use the dm_stats_populate() call
|
|
* instead since this will automatically parse the statistics data into
|
|
* numeric form accessible via the dm_stats_get_*() counter access
|
|
* methods.
|
|
*
|
|
* A subset of the data lines may be requested by setting the
|
|
* start_line and num_lines parameters. If both are zero all data
|
|
* lines are returned.
|
|
*
|
|
* If the clear parameter is non-zero the operation will also
|
|
* atomically reset all counter values to zero (except in-flight IO).
|
|
*/
|
|
char *dm_stats_print_region(struct dm_stats *dms, uint64_t region_id,
|
|
unsigned start_line, unsigned num_lines,
|
|
unsigned clear);
|
|
|
|
/*
|
|
* Destroy a statistics response buffer obtained from a call to
|
|
* dm_stats_print_region().
|
|
*/
|
|
void dm_stats_buffer_destroy(struct dm_stats *dms, char *buffer);
|
|
|
|
/*
|
|
* Determine the number of regions contained in a dm_stats handle
|
|
* following a dm_stats_list() or dm_stats_populate() call.
|
|
*
|
|
* The value returned is the number of registered regions visible with the
|
|
* progam_id value used for the list or populate operation and may not be
|
|
* equal to the highest present region_id (either due to program_id
|
|
* filtering or gaps in the sequence of region_id values).
|
|
*
|
|
* Always returns zero on an empty handle.
|
|
*/
|
|
uint64_t dm_stats_get_nr_regions(const struct dm_stats *dms);
|
|
|
|
/*
|
|
* Determine the number of groups contained in a dm_stats handle
|
|
* following a dm_stats_list() or dm_stats_populate() call.
|
|
*
|
|
* The value returned is the number of registered groups visible with the
|
|
* progam_id value used for the list or populate operation and may not be
|
|
* equal to the highest present group_id (either due to program_id
|
|
* filtering or gaps in the sequence of group_id values).
|
|
*
|
|
* Always returns zero on an empty handle.
|
|
*/
|
|
uint64_t dm_stats_get_nr_groups(const struct dm_stats *dms);
|
|
|
|
/*
|
|
* Test whether region_id is present in this dm_stats handle.
|
|
*/
|
|
int dm_stats_region_present(const struct dm_stats *dms, uint64_t region_id);
|
|
|
|
/*
|
|
* Returns the number of areas (counter sets) contained in the specified
|
|
* region_id of the supplied dm_stats handle.
|
|
*/
|
|
uint64_t dm_stats_get_region_nr_areas(const struct dm_stats *dms,
|
|
uint64_t region_id);
|
|
|
|
/*
|
|
* Returns the total number of areas (counter sets) in all regions of the
|
|
* given dm_stats object.
|
|
*/
|
|
uint64_t dm_stats_get_nr_areas(const struct dm_stats *dms);
|
|
|
|
/*
|
|
* Test whether group_id is present in this dm_stats handle.
|
|
*/
|
|
int dm_stats_group_present(const struct dm_stats *dms, uint64_t group_id);
|
|
|
|
/*
|
|
* Return the number of bins in the histogram configuration for the
|
|
* specified region or zero if no histogram specification is configured.
|
|
* Valid following a dm_stats_list() or dm_stats_populate() operation.
|
|
*/
|
|
int dm_stats_get_region_nr_histogram_bins(const struct dm_stats *dms,
|
|
uint64_t region_id);
|
|
|
|
/*
|
|
* Parse a histogram string with optional unit suffixes into a
|
|
* dm_histogram bounds description.
|
|
*
|
|
* A histogram string is a string of numbers "n1,n2,n3,..." that
|
|
* represent the boundaries of a histogram. The first and final bins
|
|
* have implicit lower and upper bounds of zero and infinity
|
|
* respectively and boundary values must occur in order of ascending
|
|
* magnitude. Unless a unit suffix is given all values are specified in
|
|
* nanoseconds.
|
|
*
|
|
* For example, if bounds_str="300,600,900", the region will be created
|
|
* with a histogram containing four bins. Each report will include four
|
|
* numbers a:b:c:d. a is the number of requests that took between 0 and
|
|
* 300ns to complete, b is the number of requests that took 300-600ns to
|
|
* complete, c is the number of requests that took 600-900ns to complete
|
|
* and d is the number of requests that took more than 900ns to
|
|
* complete.
|
|
*
|
|
* An optional unit suffix of 's', 'ms', 'us', or 'ns' may be used to
|
|
* specify units of seconds, miliseconds, microseconds, or nanoseconds:
|
|
*
|
|
* bounds_str="1ns,1us,1ms,1s"
|
|
* bounds_str="500us,1ms,1500us,2ms"
|
|
* bounds_str="200ms,400ms,600ms,800ms,1s"
|
|
*
|
|
* The smallest valid unit of time for a histogram specification depends
|
|
* on whether the region uses precise timestamps: for a region with the
|
|
* default milisecond precision the smallest possible histogram boundary
|
|
* magnitude is one milisecond: attempting to use a histogram with a
|
|
* boundary less than one milisecond when creating a region will cause
|
|
* the region to be created with the precise_timestamps feature enabled.
|
|
*
|
|
* On sucess a pointer to the struct dm_histogram representing the
|
|
* bounds values is returned, or NULL in the case of error. The returned
|
|
* pointer should be freed using dm_free() when no longer required.
|
|
*/
|
|
struct dm_histogram *dm_histogram_bounds_from_string(const char *bounds_str);
|
|
|
|
/*
|
|
* Parse a zero terminated array of uint64_t into a dm_histogram bounds
|
|
* description.
|
|
*
|
|
* Each value in the array specifies the upper bound of a bin in the
|
|
* latency histogram in nanoseconds. Values must appear in ascending
|
|
* order of magnitude.
|
|
*
|
|
* The smallest valid unit of time for a histogram specification depends
|
|
* on whether the region uses precise timestamps: for a region with the
|
|
* default milisecond precision the smallest possible histogram boundary
|
|
* magnitude is one milisecond: attempting to use a histogram with a
|
|
* boundary less than one milisecond when creating a region will cause
|
|
* the region to be created with the precise_timestamps feature enabled.
|
|
*/
|
|
struct dm_histogram *dm_histogram_bounds_from_uint64(const uint64_t *bounds);
|
|
|
|
/*
|
|
* Destroy the histogram bounds array obtained from a call to
|
|
* dm_histogram_bounds_from_string().
|
|
*/
|
|
void dm_histogram_bounds_destroy(struct dm_histogram *bounds);
|
|
|
|
/*
|
|
* Destroy a dm_stats object and all associated regions, counter
|
|
* sets and histograms.
|
|
*/
|
|
void dm_stats_destroy(struct dm_stats *dms);
|
|
|
|
/*
|
|
* Counter sampling interval
|
|
*/
|
|
|
|
/*
|
|
* Set the sampling interval for counter data to the specified value in
|
|
* either nanoseconds or milliseconds.
|
|
*
|
|
* The interval is used to calculate time-based metrics from the basic
|
|
* counter data: an interval must be set before calling any of the
|
|
* metric methods.
|
|
*
|
|
* For best accuracy the duration should be measured and updated at the
|
|
* end of each interval.
|
|
*
|
|
* All values are stored internally with nanosecond precision and are
|
|
* converted to or from ms when the millisecond interfaces are used.
|
|
*/
|
|
void dm_stats_set_sampling_interval_ns(struct dm_stats *dms,
|
|
uint64_t interval_ns);
|
|
|
|
void dm_stats_set_sampling_interval_ms(struct dm_stats *dms,
|
|
uint64_t interval_ms);
|
|
|
|
/*
|
|
* Retrieve the configured sampling interval in either nanoseconds or
|
|
* milliseconds.
|
|
*/
|
|
uint64_t dm_stats_get_sampling_interval_ns(const struct dm_stats *dms);
|
|
uint64_t dm_stats_get_sampling_interval_ms(const struct dm_stats *dms);
|
|
|
|
/*
|
|
* Override program_id. This may be used to change the default
|
|
* program_id value for an existing handle. If the allow_empty argument
|
|
* is non-zero a NULL or empty program_id is permitted.
|
|
*
|
|
* Use with caution! Most users of the library should set a valid,
|
|
* non-NULL program_id for every statistics region created. Failing to
|
|
* do so may result in confusing state when multiple programs are
|
|
* creating and managing statistics regions.
|
|
*
|
|
* All users of the library are encouraged to choose an unambiguous,
|
|
* unique program_id: this could be based on PID (for programs that
|
|
* create, report, and delete regions in a single process), session id,
|
|
* executable name, or some other distinguishing string.
|
|
*
|
|
* Use of the empty string as a program_id does not simplify use of the
|
|
* library or the command line tools and use of this value is strongly
|
|
* discouraged.
|
|
*/
|
|
int dm_stats_set_program_id(struct dm_stats *dms, int allow_empty,
|
|
const char *program_id);
|
|
|
|
/*
|
|
* Region properties: size, length & area_len.
|
|
*
|
|
* Region start and length are returned in units of 512b as specified
|
|
* at region creation time. The area_len value gives the size of areas
|
|
* into which the region has been subdivided. For regions with a single
|
|
* area spanning the range this value is equal to the region length.
|
|
*
|
|
* For regions created with a specified number of areas the value
|
|
* represents the size of the areas into which the kernel divided the
|
|
* region excluding any rounding of the last area size. The number of
|
|
* areas may be obtained using the dm_stats_nr_areas_region() call.
|
|
*
|
|
* All values are returned in units of 512b sectors.
|
|
*/
|
|
int dm_stats_get_region_start(const struct dm_stats *dms, uint64_t *start,
|
|
uint64_t region_id);
|
|
|
|
int dm_stats_get_region_len(const struct dm_stats *dms, uint64_t *len,
|
|
uint64_t region_id);
|
|
|
|
int dm_stats_get_region_area_len(const struct dm_stats *dms,
|
|
uint64_t *len, uint64_t region_id);
|
|
|
|
/*
|
|
* Area properties: start, offset and length.
|
|
*
|
|
* The area length is always equal to the area length of the region
|
|
* that contains it and is obtained from dm_stats_get_region_area_len().
|
|
*
|
|
* The start of an area is a function of the area_id and the containing
|
|
* region's start and area length: it gives the absolute offset into the
|
|
* containing device of the beginning of the area.
|
|
*
|
|
* The offset expresses the area's relative offset into the current
|
|
* region. I.e. the area start minus the start offset of the containing
|
|
* region.
|
|
*
|
|
* All values are returned in units of 512b sectors.
|
|
*/
|
|
int dm_stats_get_area_start(const struct dm_stats *dms, uint64_t *start,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
int dm_stats_get_area_offset(const struct dm_stats *dms, uint64_t *offset,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
/*
|
|
* Retrieve program_id and user aux_data for a specific region.
|
|
*
|
|
* Only valid following a call to dm_stats_list().
|
|
*/
|
|
|
|
/*
|
|
* Retrieve program_id for the specified region.
|
|
*
|
|
* The returned pointer does not need to be freed separately from the
|
|
* dm_stats handle but will become invalid after a dm_stats_destroy(),
|
|
* dm_stats_list(), dm_stats_populate(), or dm_stats_bind*() of the
|
|
* handle from which it was obtained.
|
|
*/
|
|
const char *dm_stats_get_region_program_id(const struct dm_stats *dms,
|
|
uint64_t region_id);
|
|
|
|
/*
|
|
* Retrieve user aux_data set for the specified region. This function
|
|
* will return any stored user aux_data as a string in the memory
|
|
* pointed to by the aux_data argument.
|
|
*
|
|
* Any library internal aux_data fields, such as DMS_GROUP descriptors,
|
|
* are stripped before the value is returned.
|
|
*
|
|
* The returned pointer does not need to be freed separately from the
|
|
* dm_stats handle but will become invalid after a dm_stats_destroy(),
|
|
* dm_stats_list(), dm_stats_populate(), or dm_stats_bind*() of the
|
|
* handle from which it was obtained.
|
|
*/
|
|
const char *dm_stats_get_region_aux_data(const struct dm_stats *dms,
|
|
uint64_t region_id);
|
|
|
|
typedef enum {
|
|
DM_STATS_OBJECT_TYPE_NONE,
|
|
DM_STATS_OBJECT_TYPE_AREA,
|
|
DM_STATS_OBJECT_TYPE_REGION,
|
|
DM_STATS_OBJECT_TYPE_GROUP
|
|
} dm_stats_obj_type_t;
|
|
|
|
/*
|
|
* Statistics cursor
|
|
*
|
|
* A dm_stats handle maintains an optional cursor into the statistics
|
|
* tables that it stores. Iterators are provided to visit each region,
|
|
* area, or group in a handle and accessor methods are provided to
|
|
* obtain properties and values for the object at the current cursor
|
|
* position.
|
|
*
|
|
* Using the cursor simplifies walking all regions or groups when
|
|
* the tables are sparse (i.e. contains some present and some
|
|
* non-present region_id or group_id values either due to program_id
|
|
* filtering or the ordering of region and group creation and deletion).
|
|
*
|
|
* Simple macros are provided to visit each area, region, or group,
|
|
* contained in a handle and applications are encouraged to use these
|
|
* where possible.
|
|
*/
|
|
|
|
/*
|
|
* Walk flags are used to initialise a dm_stats handle's cursor control
|
|
* and to select region or group aggregation when calling a metric or
|
|
* counter property method with immediate group, region, and area ID
|
|
* values.
|
|
*
|
|
* Walk flags are stored in the uppermost word of a uint64_t so that
|
|
* a region_id or group_id may be encoded in the lower bits. This
|
|
* allows an aggregate region_id or group_id to be specified when
|
|
* retrieving counter or metric values.
|
|
*
|
|
* Flags may be ORred together when used to initialise a dm_stats_walk:
|
|
* the resulting walk will visit instance of each type specified by
|
|
* the flag combination.
|
|
*/
|
|
#define DM_STATS_WALK_AREA 0x1000000000000ULL
|
|
#define DM_STATS_WALK_REGION 0x2000000000000ULL
|
|
#define DM_STATS_WALK_GROUP 0x4000000000000ULL
|
|
|
|
#define DM_STATS_WALK_ALL 0x7000000000000ULL
|
|
#define DM_STATS_WALK_DEFAULT (DM_STATS_WALK_AREA | DM_STATS_WALK_REGION)
|
|
|
|
/*
|
|
* Skip regions from a DM_STATS_WALK_REGION that contain only a single
|
|
* area: in this case the region's aggregate values are identical to
|
|
* the values of the single contained area. Setting this flag will
|
|
* suppress these duplicate entries during a dm_stats_walk_* with the
|
|
* DM_STATS_WALK_REGION flag set.
|
|
*/
|
|
#define DM_STATS_WALK_SKIP_SINGLE_AREA 0x8000000000000ULL
|
|
|
|
/*
|
|
* Initialise the cursor control of a dm_stats handle for the specified
|
|
* walk type(s). Including a walk flag in the flags argument will cause
|
|
* any subsequent walk to visit that type of object (until the next
|
|
* call to dm_stats_walk_init()).
|
|
*/
|
|
int dm_stats_walk_init(struct dm_stats *dms, uint64_t flags);
|
|
|
|
/*
|
|
* Set the cursor of a dm_stats handle to address the first present
|
|
* group, region, or area of the currently configured walk. It is
|
|
* valid to attempt to walk a NULL stats handle or a handle containing
|
|
* no present regions; in this case any call to dm_stats_walk_next()
|
|
* becomes a no-op and all calls to dm_stats_walk_end() return true.
|
|
*/
|
|
void dm_stats_walk_start(struct dm_stats *dms);
|
|
|
|
/*
|
|
* Advance the statistics cursor to the next area, or to the next
|
|
* present region if at the end of the current region. If the end of
|
|
* the region, area, or group tables is reached a subsequent call to
|
|
* dm_stats_walk_end() will return 1 and dm_stats_object_type() called
|
|
* on the location will return DM_STATS_OBJECT_TYPE_NONE,
|
|
*/
|
|
void dm_stats_walk_next(struct dm_stats *dms);
|
|
|
|
/*
|
|
* Force the statistics cursor to advance to the next region. This will
|
|
* stop any in-progress area walk (by clearing DM_STATS_WALK_AREA) and
|
|
* advance the cursor to the next present region, the first present
|
|
* group (if DM_STATS_GROUP_WALK is set), or to the end. In this case a
|
|
* subsequent call to dm_stats_walk_end() will return 1 and a call to
|
|
* dm_stats_object_type() for the location will return
|
|
* DM_STATS_OBJECT_TYPE_NONE.
|
|
*/
|
|
void dm_stats_walk_next_region(struct dm_stats *dms);
|
|
|
|
/*
|
|
* Test whether the end of a statistics walk has been reached.
|
|
*/
|
|
int dm_stats_walk_end(struct dm_stats *dms);
|
|
|
|
/*
|
|
* Return the type of object at the location specified by region_id
|
|
* and area_id. If either region_id or area_id uses one of the special
|
|
* values DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT the
|
|
* corresponding region or area identifier will be taken from the
|
|
* current cursor location. If the cursor location or the value encoded
|
|
* by region_id and area_id indicates an aggregate region or group,
|
|
* this will be reflected in the value returned.
|
|
*/
|
|
dm_stats_obj_type_t dm_stats_object_type(const struct dm_stats *dms,
|
|
uint64_t region_id,
|
|
uint64_t area_id);
|
|
|
|
/*
|
|
* Return the type of object at the current stats cursor location.
|
|
*/
|
|
dm_stats_obj_type_t dm_stats_current_object_type(const struct dm_stats *dms);
|
|
|
|
/*
|
|
* Stats iterators
|
|
*
|
|
* C 'for' and 'do'/'while' style iterators for dm_stats data.
|
|
*
|
|
* It is not safe to call any function that modifies the region table
|
|
* within the loop body (i.e. dm_stats_list(), dm_stats_populate(),
|
|
* dm_stats_init(), or dm_stats_destroy()).
|
|
*
|
|
* All counter and property (dm_stats_get_*) access methods, as well as
|
|
* dm_stats_populate_region() can be safely called from loops.
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* Iterate over the regions table visiting each region.
|
|
*
|
|
* If the region table is empty or unpopulated the loop body will not be
|
|
* executed.
|
|
*/
|
|
#define dm_stats_foreach_region(dms) \
|
|
for (dm_stats_walk_init((dms), DM_STATS_WALK_REGION), \
|
|
dm_stats_walk_start((dms)); \
|
|
!dm_stats_walk_end((dms)); dm_stats_walk_next_region((dms)))
|
|
|
|
/*
|
|
* Iterate over the regions table visiting each area.
|
|
*
|
|
* If the region table is empty or unpopulated the loop body will not
|
|
* be executed.
|
|
*/
|
|
#define dm_stats_foreach_area(dms) \
|
|
for (dm_stats_walk_init((dms), DM_STATS_WALK_AREA), \
|
|
dm_stats_walk_start((dms)); \
|
|
!dm_stats_walk_end((dms)); dm_stats_walk_next((dms)))
|
|
|
|
/*
|
|
* Iterate over the regions table visiting each group. Metric and
|
|
* counter methods will return values for the group.
|
|
*
|
|
* If the group table is empty or unpopulated the loop body will not
|
|
* be executed.
|
|
*/
|
|
#define dm_stats_foreach_group(dms) \
|
|
for (dm_stats_walk_init((dms), DM_STATS_WALK_GROUP), \
|
|
dm_stats_walk_start(dms); \
|
|
!dm_stats_walk_end(dms); \
|
|
dm_stats_walk_next(dms))
|
|
|
|
/*
|
|
* Start a walk iterating over the regions contained in dm_stats handle
|
|
* 'dms'.
|
|
*
|
|
* The body of the loop should call dm_stats_walk_next() or
|
|
* dm_stats_walk_next_region() to advance to the next element.
|
|
*
|
|
* The loop body is executed at least once even if the stats handle is
|
|
* empty.
|
|
*/
|
|
#define dm_stats_walk_do(dms) \
|
|
do { \
|
|
dm_stats_walk_start((dms)); \
|
|
do
|
|
|
|
/*
|
|
* Start a 'while' style loop or end a 'do..while' loop iterating over the
|
|
* regions contained in dm_stats handle 'dms'.
|
|
*/
|
|
#define dm_stats_walk_while(dms) \
|
|
while(!dm_stats_walk_end((dms))); \
|
|
} while (0)
|
|
|
|
/*
|
|
* Cursor relative property methods
|
|
*
|
|
* Calls with the prefix dm_stats_get_current_* operate relative to the
|
|
* current cursor location, returning properties for the current region
|
|
* or area of the supplied dm_stats handle.
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* Returns the number of areas (counter sets) contained in the current
|
|
* region of the supplied dm_stats handle.
|
|
*/
|
|
uint64_t dm_stats_get_current_nr_areas(const struct dm_stats *dms);
|
|
|
|
/*
|
|
* Retrieve the current values of the stats cursor.
|
|
*/
|
|
uint64_t dm_stats_get_current_region(const struct dm_stats *dms);
|
|
uint64_t dm_stats_get_current_area(const struct dm_stats *dms);
|
|
|
|
/*
|
|
* Current region properties: size, length & area_len.
|
|
*
|
|
* See the comments for the equivalent dm_stats_get_* versions for a
|
|
* complete description of these methods.
|
|
*
|
|
* All values are returned in units of 512b sectors.
|
|
*/
|
|
int dm_stats_get_current_region_start(const struct dm_stats *dms,
|
|
uint64_t *start);
|
|
|
|
int dm_stats_get_current_region_len(const struct dm_stats *dms,
|
|
uint64_t *len);
|
|
|
|
int dm_stats_get_current_region_area_len(const struct dm_stats *dms,
|
|
uint64_t *area_len);
|
|
|
|
/*
|
|
* Current area properties: start and length.
|
|
*
|
|
* See the comments for the equivalent dm_stats_get_* versions for a
|
|
* complete description of these methods.
|
|
*
|
|
* All values are returned in units of 512b sectors.
|
|
*/
|
|
int dm_stats_get_current_area_start(const struct dm_stats *dms,
|
|
uint64_t *start);
|
|
|
|
int dm_stats_get_current_area_offset(const struct dm_stats *dms,
|
|
uint64_t *offset);
|
|
|
|
int dm_stats_get_current_area_len(const struct dm_stats *dms,
|
|
uint64_t *start);
|
|
|
|
/*
|
|
* Return a pointer to the program_id string for region at the current
|
|
* cursor location.
|
|
*/
|
|
const char *dm_stats_get_current_region_program_id(const struct dm_stats *dms);
|
|
|
|
/*
|
|
* Return a pointer to the user aux_data string for the region at the
|
|
* current cursor location.
|
|
*/
|
|
const char *dm_stats_get_current_region_aux_data(const struct dm_stats *dms);
|
|
|
|
/*
|
|
* Statistics groups and data aggregation.
|
|
*/
|
|
|
|
/*
|
|
* Create a new group in stats handle dms from the group descriptor
|
|
* passed in group. The group descriptor is a string containing a list
|
|
* of region_id values that will be included in the group. The first
|
|
* region_id found will be the group leader. Ranges of identifiers may
|
|
* be expressed as "M-N", where M and N are the start and end region_id
|
|
* values for the range.
|
|
*/
|
|
int dm_stats_create_group(struct dm_stats *dms, const char *group,
|
|
const char *alias, uint64_t *group_id);
|
|
|
|
/*
|
|
* Remove the specified group_id. If the remove argument is zero the
|
|
* group will be removed but the regions that it contained will remain.
|
|
* If remove is non-zero then all regions that belong to the group will
|
|
* also be removed.
|
|
*/
|
|
int dm_stats_delete_group(struct dm_stats *dms, uint64_t group_id, int remove);
|
|
|
|
/*
|
|
* Set an alias for this group or region. The alias will be returned
|
|
* instead of the normal dm-stats name for this region or group.
|
|
*/
|
|
int dm_stats_set_alias(struct dm_stats *dms, uint64_t group_id,
|
|
const char *alias);
|
|
|
|
/*
|
|
* Returns a pointer to the currently configured alias for id, or the
|
|
* name of the dm device the handle is bound to if no alias has been
|
|
* set. The pointer will be freed automatically when a new alias is set
|
|
* or when the stats handle is cleared.
|
|
*/
|
|
const char *dm_stats_get_alias(const struct dm_stats *dms, uint64_t id);
|
|
|
|
#define DM_STATS_GROUP_NONE UINT64_MAX
|
|
/*
|
|
* Return the group_id that the specified region_id belongs to, or the
|
|
* special value DM_STATS_GROUP_NONE if the region does not belong
|
|
* to any group.
|
|
*/
|
|
uint64_t dm_stats_get_group_id(const struct dm_stats *dms, uint64_t region_id);
|
|
|
|
/*
|
|
* Store a pointer to a string describing the regions that are members
|
|
* of the group specified by group_id in the memory pointed to by buf.
|
|
* The string is in the same format as the 'group' argument to
|
|
* dm_stats_create_group().
|
|
*
|
|
* The pointer does not need to be freed explicitly by the caller: it
|
|
* will become invalid following a subsequent dm_stats_list(),
|
|
* dm_stats_populate() or dm_stats_destroy() of the corresponding
|
|
* dm_stats handle.
|
|
*/
|
|
int dm_stats_get_group_descriptor(const struct dm_stats *dms,
|
|
uint64_t group_id, char **buf);
|
|
|
|
/*
|
|
* Create regions that correspond to the extents of a file in the
|
|
* filesystem and optionally place them into a group.
|
|
*
|
|
* File descriptor fd must reference a regular file, open for reading,
|
|
* in a local file system that supports the FIEMAP ioctl, and that
|
|
* returns data describing the physical location of extents.
|
|
*
|
|
* The file descriptor can be closed by the caller following the call
|
|
* to dm_stats_create_regions_from_fd().
|
|
*
|
|
* Unless nogroup is non-zero the regions will be placed into a group
|
|
* and the group alias set to the value supplied (if alias is NULL no
|
|
* group alias will be assigned).
|
|
*
|
|
* On success the function returns a pointer to an array of uint64_t
|
|
* containing the IDs of the newly created regions. The region_id
|
|
* array is terminated by the value DM_STATS_REGION_NOT_PRESENT and
|
|
* should be freed using dm_free() when no longer required.
|
|
*
|
|
* On error NULL is returned.
|
|
*
|
|
* Following a call to dm_stats_create_regions_from_fd() the handle
|
|
* is guaranteed to be in a listed state, and to contain any region
|
|
* and group identifiers created by the operation.
|
|
*
|
|
* The group_id for the new group is equal to the region_id value in
|
|
* the first array element.
|
|
*/
|
|
uint64_t *dm_stats_create_regions_from_fd(struct dm_stats *dms, int fd,
|
|
int group, int precise,
|
|
struct dm_histogram *bounds,
|
|
const char *alias);
|
|
/*
|
|
* Update a group of regions that correspond to the extents of a file
|
|
* in the filesystem, adding and removing regions to account for
|
|
* allocation changes in the underlying file.
|
|
*
|
|
* File descriptor fd must reference a regular file, open for reading,
|
|
* in a local file system that supports the FIEMAP ioctl, and that
|
|
* returns data describing the physical location of extents.
|
|
*
|
|
* The file descriptor can be closed by the caller following the call
|
|
* to dm_stats_update_regions_from_fd().
|
|
*
|
|
* On success the function returns a pointer to an array of uint64_t
|
|
* containing the IDs of the updated regions (including any existing
|
|
* regions that were not modified by the call).
|
|
*
|
|
* The region_id array is terminated by the special value
|
|
* DM_STATS_REGION_NOT_PRESENT and should be freed using dm_free()
|
|
* when no longer required.
|
|
*
|
|
* On error NULL is returned.
|
|
*
|
|
* Following a call to dm_stats_update_regions_from_fd() the handle
|
|
* is guaranteed to be in a listed state, and to contain any region
|
|
* and group identifiers created by the operation.
|
|
*
|
|
* This function cannot be used with file mapped regions that are
|
|
* not members of a group: either group the regions, or remove them
|
|
* and re-map them with dm_stats_create_regions_from_fd().
|
|
*/
|
|
uint64_t *dm_stats_update_regions_from_fd(struct dm_stats *dms, int fd,
|
|
uint64_t group_id);
|
|
|
|
|
|
/*
|
|
* The file map monitoring daemon can monitor files in two distinct
|
|
* ways: the mode affects the behaviour of the daemon when a file
|
|
* under monitoring is renamed or unlinked, and the conditions which
|
|
* cause the daemon to terminate.
|
|
*
|
|
* In both modes, the daemon will always shut down when the group
|
|
* being monitored is deleted.
|
|
*
|
|
* Follow inode:
|
|
* The daemon follows the inode of the file, as it was at the time the
|
|
* daemon started. The file descriptor referencing the file is kept
|
|
* open at all times, and the daemon will exit when it detects that
|
|
* the file has been unlinked and it is the last holder of a reference
|
|
* to the file.
|
|
*
|
|
* This mode is useful if the file is expected to be renamed, or moved
|
|
* within the file system, while it is being monitored.
|
|
*
|
|
* Follow path:
|
|
* The daemon follows the path that was given on the daemon command
|
|
* line. The file descriptor referencing the file is re-opened on each
|
|
* iteration of the daemon, and the daemon will exit if no file exists
|
|
* at this location (a tolerance is allowed so that a brief delay
|
|
* between unlink() and creat() is permitted).
|
|
*
|
|
* This mode is useful if the file is updated by unlinking the original
|
|
* and placing a new file at the same path.
|
|
*/
|
|
|
|
typedef enum {
|
|
DM_FILEMAPD_FOLLOW_INODE,
|
|
DM_FILEMAPD_FOLLOW_PATH,
|
|
DM_FILEMAPD_FOLLOW_NONE
|
|
} dm_filemapd_mode_t;
|
|
|
|
/*
|
|
* Parse a string representation of a dmfilemapd mode.
|
|
*
|
|
* Returns a valid dm_filemapd_mode_t value on success, or
|
|
* DM_FILEMAPD_FOLLOW_NONE on error.
|
|
*/
|
|
dm_filemapd_mode_t dm_filemapd_mode_from_string(const char *mode_str);
|
|
|
|
/*
|
|
* Start the dmfilemapd filemap monitoring daemon for the specified
|
|
* file descriptor, group, and file system path. The daemon will
|
|
* monitor the file for allocation changes, and when a change is
|
|
* detected, call dm_stats_update_regions_from_fd() to update the
|
|
* mapped regions for the file.
|
|
*
|
|
* The mode parameter controls the behaviour of the daemon when the
|
|
* file being monitored is unlinked or moved: see the comments for
|
|
* dm_filemapd_mode_t for a full description and possible values.
|
|
*
|
|
* The daemon can be stopped at any time by sending SIGTERM to the
|
|
* daemon pid.
|
|
*/
|
|
int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path,
|
|
dm_filemapd_mode_t mode, unsigned foreground,
|
|
unsigned verbose);
|
|
|
|
/*
|
|
* Call this to actually run the ioctl.
|
|
*/
|
|
int dm_task_run(struct dm_task *dmt);
|
|
|
|
/*
|
|
* The errno from the last device-mapper ioctl performed by dm_task_run.
|
|
*/
|
|
int dm_task_get_errno(struct dm_task *dmt);
|
|
|
|
/*
|
|
* Call this to make or remove the device nodes associated with previously
|
|
* issued commands.
|
|
*/
|
|
void dm_task_update_nodes(void);
|
|
|
|
/*
|
|
* Mangling support
|
|
*
|
|
* Character whitelist: 0-9, A-Z, a-z, #+-.:=@_
|
|
* HEX mangling format: \xNN, NN being the hex value of the character.
|
|
* (whitelist and format supported by udev)
|
|
*/
|
|
typedef enum {
|
|
DM_STRING_MANGLING_NONE, /* do not mangle at all */
|
|
DM_STRING_MANGLING_AUTO, /* mangle only if not already mangled with hex, error when mixed */
|
|
DM_STRING_MANGLING_HEX /* always mangle with hex encoding, no matter what the input is */
|
|
} dm_string_mangling_t;
|
|
|
|
/*
|
|
* Set/get mangling mode used for device-mapper names and uuids.
|
|
*/
|
|
int dm_set_name_mangling_mode(dm_string_mangling_t name_mangling);
|
|
dm_string_mangling_t dm_get_name_mangling_mode(void);
|
|
|
|
/*
|
|
* Get mangled/unmangled form of the device-mapper name or uuid
|
|
* irrespective of the global setting (set by dm_set_name_mangling_mode).
|
|
* The name or uuid returned needs to be freed after use by calling dm_free!
|
|
*/
|
|
char *dm_task_get_name_mangled(const struct dm_task *dmt);
|
|
char *dm_task_get_name_unmangled(const struct dm_task *dmt);
|
|
char *dm_task_get_uuid_mangled(const struct dm_task *dmt);
|
|
char *dm_task_get_uuid_unmangled(const struct dm_task *dmt);
|
|
|
|
/*
|
|
* Configure the device-mapper directory
|
|
*/
|
|
int dm_set_dev_dir(const char *dir);
|
|
const char *dm_dir(void);
|
|
|
|
/*
|
|
* Configure sysfs directory, /sys by default
|
|
*/
|
|
int dm_set_sysfs_dir(const char *dir);
|
|
const char *dm_sysfs_dir(void);
|
|
|
|
/*
|
|
* Configure default UUID prefix string.
|
|
* Conventionally this is a short capitalised prefix indicating the subsystem
|
|
* that is managing the devices, e.g. "LVM-" or "MPATH-".
|
|
* To support stacks of devices from different subsystems, recursive functions
|
|
* stop recursing if they reach a device with a different prefix.
|
|
*/
|
|
int dm_set_uuid_prefix(const char *uuid_prefix);
|
|
const char *dm_uuid_prefix(void);
|
|
|
|
/*
|
|
* Determine whether a major number belongs to device-mapper or not.
|
|
*/
|
|
int dm_is_dm_major(uint32_t major);
|
|
|
|
/*
|
|
* Get associated device name for given major and minor number by reading
|
|
* the sysfs content. If this is a dm device, get associated dm name, the one
|
|
* that appears in /dev/mapper. DM names could be resolved this way only if
|
|
* kernel used >= 2.6.29, kernel name is found otherwise (e.g. dm-0).
|
|
* If prefer_kernel_name is set, the kernel name is always preferred over
|
|
* device-mapper name for dm devices no matter what the kernel version is.
|
|
* For non-dm devices, we always get associated kernel name, e.g sda, md0 etc.
|
|
* Returns 0 on error or if sysfs is not used (or configured incorrectly),
|
|
* otherwise returns 1 and the supplied buffer holds the device name.
|
|
*/
|
|
int dm_device_get_name(uint32_t major, uint32_t minor,
|
|
int prefer_kernel_name,
|
|
char *buf, size_t buf_size);
|
|
|
|
/*
|
|
* Determine whether a device has any holders (devices
|
|
* using this device). If sysfs is not used (or configured
|
|
* incorrectly), returns 0.
|
|
*/
|
|
int dm_device_has_holders(uint32_t major, uint32_t minor);
|
|
|
|
/*
|
|
* Determine whether a device contains mounted filesystem.
|
|
* If sysfs is not used (or configured incorrectly), returns 0.
|
|
*/
|
|
int dm_device_has_mounted_fs(uint32_t major, uint32_t minor);
|
|
|
|
|
|
/*
|
|
* Callback is invoked for individal mountinfo lines,
|
|
* minor, major and mount target are parsed and unmangled.
|
|
*/
|
|
typedef int (*dm_mountinfo_line_callback_fn) (char *line, unsigned maj, unsigned min,
|
|
char *target, void *cb_data);
|
|
|
|
/*
|
|
* Read all lines from /proc/self/mountinfo,
|
|
* for each line calls read_fn callback.
|
|
*/
|
|
int dm_mountinfo_read(dm_mountinfo_line_callback_fn read_fn, void *cb_data);
|
|
|
|
/*
|
|
* Initialise library
|
|
*/
|
|
void dm_lib_init(void) __attribute__((constructor));
|
|
|
|
/*
|
|
* Release library resources
|
|
*/
|
|
void dm_lib_release(void);
|
|
void dm_lib_exit(void) __attribute__((destructor));
|
|
|
|
/* An optimisation for clients making repeated calls involving dm ioctls */
|
|
void dm_hold_control_dev(int hold_open);
|
|
|
|
/*
|
|
* Use NULL for all devices.
|
|
*/
|
|
int dm_mknodes(const char *name);
|
|
int dm_driver_version(char *version, size_t size);
|
|
|
|
/******************************************************
|
|
* Functions to build and manipulate trees of devices *
|
|
******************************************************/
|
|
struct dm_tree;
|
|
struct dm_tree_node;
|
|
|
|
/*
|
|
* Initialise an empty dependency tree.
|
|
*
|
|
* The tree consists of a root node together with one node for each mapped
|
|
* device which has child nodes for each device referenced in its table.
|
|
*
|
|
* Every node in the tree has one or more children and one or more parents.
|
|
*
|
|
* The root node is the parent/child of every node that doesn't have other
|
|
* parents/children.
|
|
*/
|
|
struct dm_tree *dm_tree_create(void);
|
|
void dm_tree_free(struct dm_tree *tree);
|
|
|
|
/*
|
|
* List of suffixes to be ignored when matching uuids against existing devices.
|
|
*/
|
|
void dm_tree_set_optional_uuid_suffixes(struct dm_tree *dtree, const char **optional_uuid_suffixes);
|
|
|
|
/*
|
|
* Add nodes to the tree for a given device and all the devices it uses.
|
|
*/
|
|
int dm_tree_add_dev(struct dm_tree *tree, uint32_t major, uint32_t minor);
|
|
int dm_tree_add_dev_with_udev_flags(struct dm_tree *tree, uint32_t major,
|
|
uint32_t minor, uint16_t udev_flags);
|
|
|
|
/*
|
|
* Add a new node to the tree if it doesn't already exist.
|
|
*/
|
|
struct dm_tree_node *dm_tree_add_new_dev(struct dm_tree *tree,
|
|
const char *name,
|
|
const char *uuid,
|
|
uint32_t major, uint32_t minor,
|
|
int read_only,
|
|
int clear_inactive,
|
|
void *context);
|
|
struct dm_tree_node *dm_tree_add_new_dev_with_udev_flags(struct dm_tree *tree,
|
|
const char *name,
|
|
const char *uuid,
|
|
uint32_t major,
|
|
uint32_t minor,
|
|
int read_only,
|
|
int clear_inactive,
|
|
void *context,
|
|
uint16_t udev_flags);
|
|
|
|
/*
|
|
* Search for a node in the tree.
|
|
* Set major and minor to 0 or uuid to NULL to get the root node.
|
|
*/
|
|
struct dm_tree_node *dm_tree_find_node(struct dm_tree *tree,
|
|
uint32_t major,
|
|
uint32_t minor);
|
|
struct dm_tree_node *dm_tree_find_node_by_uuid(struct dm_tree *tree,
|
|
const char *uuid);
|
|
|
|
/*
|
|
* Use this to walk through all children of a given node.
|
|
* Set handle to NULL in first call.
|
|
* Returns NULL after the last child.
|
|
* Set inverted to use inverted tree.
|
|
*/
|
|
struct dm_tree_node *dm_tree_next_child(void **handle,
|
|
const struct dm_tree_node *parent,
|
|
uint32_t inverted);
|
|
|
|
/*
|
|
* Get properties of a node.
|
|
*/
|
|
const char *dm_tree_node_get_name(const struct dm_tree_node *node);
|
|
const char *dm_tree_node_get_uuid(const struct dm_tree_node *node);
|
|
const struct dm_info *dm_tree_node_get_info(const struct dm_tree_node *node);
|
|
void *dm_tree_node_get_context(const struct dm_tree_node *node);
|
|
/*
|
|
* Returns 0 when node size and its children is unchanged.
|
|
* Returns 1 when node or any of its children has increased size.
|
|
* Rerurns -1 when node or any of its children has reduced size.
|
|
*/
|
|
int dm_tree_node_size_changed(const struct dm_tree_node *dnode);
|
|
|
|
/*
|
|
* Returns the number of children of the given node (excluding the root node).
|
|
* Set inverted for the number of parents.
|
|
*/
|
|
int dm_tree_node_num_children(const struct dm_tree_node *node, uint32_t inverted);
|
|
|
|
/*
|
|
* Deactivate a device plus all dependencies.
|
|
* Ignores devices that don't have a uuid starting with uuid_prefix.
|
|
*/
|
|
int dm_tree_deactivate_children(struct dm_tree_node *dnode,
|
|
const char *uuid_prefix,
|
|
size_t uuid_prefix_len);
|
|
/*
|
|
* Preload/create a device plus all dependencies.
|
|
* Ignores devices that don't have a uuid starting with uuid_prefix.
|
|
*/
|
|
int dm_tree_preload_children(struct dm_tree_node *dnode,
|
|
const char *uuid_prefix,
|
|
size_t uuid_prefix_len);
|
|
|
|
/*
|
|
* Resume a device plus all dependencies.
|
|
* Ignores devices that don't have a uuid starting with uuid_prefix.
|
|
*/
|
|
int dm_tree_activate_children(struct dm_tree_node *dnode,
|
|
const char *uuid_prefix,
|
|
size_t uuid_prefix_len);
|
|
|
|
/*
|
|
* Suspend a device plus all dependencies.
|
|
* Ignores devices that don't have a uuid starting with uuid_prefix.
|
|
*/
|
|
int dm_tree_suspend_children(struct dm_tree_node *dnode,
|
|
const char *uuid_prefix,
|
|
size_t uuid_prefix_len);
|
|
|
|
/*
|
|
* Skip the filesystem sync when suspending.
|
|
* Does nothing with other functions.
|
|
* Use this when no snapshots are involved.
|
|
*/
|
|
void dm_tree_skip_lockfs(struct dm_tree_node *dnode);
|
|
|
|
/*
|
|
* Set the 'noflush' flag when suspending devices.
|
|
* If the kernel supports it, instead of erroring outstanding I/O that
|
|
* cannot be completed, the I/O is queued and resubmitted when the
|
|
* device is resumed. This affects multipath devices when all paths
|
|
* have failed and queue_if_no_path is set, and mirror devices when
|
|
* block_on_error is set and the mirror log has failed.
|
|
*/
|
|
void dm_tree_use_no_flush_suspend(struct dm_tree_node *dnode);
|
|
|
|
/*
|
|
* Retry removal of each device if not successful.
|
|
*/
|
|
void dm_tree_retry_remove(struct dm_tree_node *dnode);
|
|
|
|
/*
|
|
* Is the uuid prefix present in the tree?
|
|
* Only returns 0 if every node was checked successfully.
|
|
* Returns 1 if the tree walk has to be aborted.
|
|
*/
|
|
int dm_tree_children_use_uuid(struct dm_tree_node *dnode,
|
|
const char *uuid_prefix,
|
|
size_t uuid_prefix_len);
|
|
|
|
/*
|
|
* Construct tables for new nodes before activating them.
|
|
*/
|
|
int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node *dnode,
|
|
uint64_t size,
|
|
const char *origin_uuid);
|
|
int dm_tree_node_add_snapshot_target(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
const char *origin_uuid,
|
|
const char *cow_uuid,
|
|
int persistent,
|
|
uint32_t chunk_size);
|
|
int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
const char *origin_uuid,
|
|
const char *cow_uuid,
|
|
const char *merge_uuid,
|
|
uint32_t chunk_size);
|
|
int dm_tree_node_add_error_target(struct dm_tree_node *node,
|
|
uint64_t size);
|
|
int dm_tree_node_add_zero_target(struct dm_tree_node *node,
|
|
uint64_t size);
|
|
int dm_tree_node_add_linear_target(struct dm_tree_node *node,
|
|
uint64_t size);
|
|
int dm_tree_node_add_striped_target(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
uint32_t stripe_size);
|
|
|
|
#define DM_CRYPT_IV_DEFAULT UINT64_C(-1) /* iv_offset == seg offset */
|
|
/*
|
|
* Function accepts one string in cipher specification
|
|
* (chainmode and iv should be NULL because included in cipher string)
|
|
* or
|
|
* separate arguments which will be joined to "cipher-chainmode-iv"
|
|
*/
|
|
int dm_tree_node_add_crypt_target(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
const char *cipher,
|
|
const char *chainmode,
|
|
const char *iv,
|
|
uint64_t iv_offset,
|
|
const char *key);
|
|
int dm_tree_node_add_mirror_target(struct dm_tree_node *node,
|
|
uint64_t size);
|
|
|
|
/* Mirror log flags */
|
|
#define DM_NOSYNC 0x00000001 /* Known already in sync */
|
|
#define DM_FORCESYNC 0x00000002 /* Force resync */
|
|
#define DM_BLOCK_ON_ERROR 0x00000004 /* On error, suspend I/O */
|
|
#define DM_CORELOG 0x00000008 /* In-memory log */
|
|
|
|
int dm_tree_node_add_mirror_target_log(struct dm_tree_node *node,
|
|
uint32_t region_size,
|
|
unsigned clustered,
|
|
const char *log_uuid,
|
|
unsigned area_count,
|
|
uint32_t flags);
|
|
|
|
int dm_tree_node_add_raid_target(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
const char *raid_type,
|
|
uint32_t region_size,
|
|
uint32_t stripe_size,
|
|
uint64_t rebuilds,
|
|
uint64_t flags);
|
|
|
|
/*
|
|
* Defines below are based on kernel's dm-cache.c defines
|
|
* DM_CACHE_MIN_DATA_BLOCK_SIZE (32 * 1024 >> SECTOR_SHIFT)
|
|
* DM_CACHE_MAX_DATA_BLOCK_SIZE (1024 * 1024 * 1024 >> SECTOR_SHIFT)
|
|
*/
|
|
#define DM_CACHE_MIN_DATA_BLOCK_SIZE (UINT32_C(64))
|
|
#define DM_CACHE_MAX_DATA_BLOCK_SIZE (UINT32_C(2097152))
|
|
/*
|
|
* Max supported size for cache pool metadata device.
|
|
* Limitation is hardcoded into the kernel and bigger device sizes
|
|
* are not accepted.
|
|
*
|
|
* Limit defined in drivers/md/dm-cache-metadata.h
|
|
*/
|
|
#define DM_CACHE_METADATA_MAX_SECTORS DM_THIN_METADATA_MAX_SECTORS
|
|
|
|
/*
|
|
* Define number of elements in rebuild and writemostly arrays
|
|
* 'of struct dm_tree_node_raid_params'.
|
|
*/
|
|
|
|
struct dm_tree_node_raid_params {
|
|
const char *raid_type;
|
|
|
|
uint32_t stripes;
|
|
uint32_t mirrors;
|
|
uint32_t region_size;
|
|
uint32_t stripe_size;
|
|
|
|
/*
|
|
* 'rebuilds' and 'writemostly' are bitfields that signify
|
|
* which devices in the array are to be rebuilt or marked
|
|
* writemostly. The kernel supports up to 253 legs.
|
|
* We limit ourselves by choosing a lower value
|
|
* for DEFAULT_RAID{1}_MAX_IMAGES in defaults.h.
|
|
*/
|
|
uint64_t rebuilds;
|
|
uint64_t writemostly;
|
|
uint32_t writebehind; /* I/Os (kernel default COUNTER_MAX / 2) */
|
|
uint32_t sync_daemon_sleep; /* ms (kernel default = 5sec) */
|
|
uint32_t max_recovery_rate; /* kB/sec/disk */
|
|
uint32_t min_recovery_rate; /* kB/sec/disk */
|
|
uint32_t stripe_cache; /* sectors */
|
|
|
|
uint64_t flags; /* [no]sync */
|
|
uint32_t reserved2;
|
|
};
|
|
|
|
/*
|
|
* Version 2 of above node raid params struct to keeep API compatibility.
|
|
*
|
|
* Extended for more than 64 legs (max 253 in the MD kernel runtime!),
|
|
* delta_disks for disk add/remove reshaping,
|
|
* data_offset for out-of-place reshaping
|
|
* and data_copies for odd number of raid10 legs.
|
|
*/
|
|
#define RAID_BITMAP_SIZE 4 /* 4 * 64 bit elements in rebuilds/writemostly arrays */
|
|
struct dm_tree_node_raid_params_v2 {
|
|
const char *raid_type;
|
|
|
|
uint32_t stripes;
|
|
uint32_t mirrors;
|
|
uint32_t region_size;
|
|
uint32_t stripe_size;
|
|
|
|
int delta_disks; /* +/- number of disks to add/remove (reshaping) */
|
|
int data_offset; /* data offset to set (out-of-place reshaping) */
|
|
|
|
/*
|
|
* 'rebuilds' and 'writemostly' are bitfields that signify
|
|
* which devices in the array are to be rebuilt or marked
|
|
* writemostly. The kernel supports up to 253 legs.
|
|
* We limit ourselvs by choosing a lower value
|
|
* for DEFAULT_RAID_MAX_IMAGES.
|
|
*/
|
|
uint64_t rebuilds[RAID_BITMAP_SIZE];
|
|
uint64_t writemostly[RAID_BITMAP_SIZE];
|
|
uint32_t writebehind; /* I/Os (kernel default COUNTER_MAX / 2) */
|
|
uint32_t data_copies; /* RAID # of data copies */
|
|
uint32_t sync_daemon_sleep; /* ms (kernel default = 5sec) */
|
|
uint32_t max_recovery_rate; /* kB/sec/disk */
|
|
uint32_t min_recovery_rate; /* kB/sec/disk */
|
|
uint32_t stripe_cache; /* sectors */
|
|
|
|
uint64_t flags; /* [no]sync */
|
|
};
|
|
|
|
int dm_tree_node_add_raid_target_with_params(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
const struct dm_tree_node_raid_params *p);
|
|
|
|
/* Version 2 API function taking dm_tree_node_raid_params_v2 for aforementioned extensions. */
|
|
int dm_tree_node_add_raid_target_with_params_v2(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
const struct dm_tree_node_raid_params_v2 *p);
|
|
|
|
/* Cache feature_flags */
|
|
#define DM_CACHE_FEATURE_WRITEBACK 0x00000001
|
|
#define DM_CACHE_FEATURE_WRITETHROUGH 0x00000002
|
|
#define DM_CACHE_FEATURE_PASSTHROUGH 0x00000004
|
|
|
|
struct dm_config_node;
|
|
/*
|
|
* Use for passing cache policy and all its args e.g.:
|
|
*
|
|
* policy_settings {
|
|
* migration_threshold=2048
|
|
* sequention_threashold=100
|
|
* ...
|
|
* }
|
|
*
|
|
* For policy without any parameters use NULL.
|
|
*/
|
|
int dm_tree_node_add_cache_target(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
uint64_t feature_flags, /* DM_CACHE_FEATURE_* */
|
|
const char *metadata_uuid,
|
|
const char *data_uuid,
|
|
const char *origin_uuid,
|
|
const char *policy_name,
|
|
const struct dm_config_node *policy_settings,
|
|
uint32_t data_block_size);
|
|
|
|
/*
|
|
* FIXME Add individual cache policy pairs <key> = value, like:
|
|
* int dm_tree_node_add_cache_policy_arg(struct dm_tree_node *dnode,
|
|
* const char *key, uint64_t value);
|
|
*/
|
|
|
|
/*
|
|
* Replicator operation mode
|
|
* Note: API for Replicator is not yet stable
|
|
*/
|
|
typedef enum {
|
|
DM_REPLICATOR_SYNC, /* Synchronous replication */
|
|
DM_REPLICATOR_ASYNC_WARN, /* Warn if async replicator is slow */
|
|
DM_REPLICATOR_ASYNC_STALL, /* Stall replicator if not fast enough */
|
|
DM_REPLICATOR_ASYNC_DROP, /* Drop sites out of sync */
|
|
DM_REPLICATOR_ASYNC_FAIL, /* Fail replicator if slow */
|
|
NUM_DM_REPLICATOR_MODES
|
|
} dm_replicator_mode_t;
|
|
|
|
int dm_tree_node_add_replicator_target(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
const char *rlog_uuid,
|
|
const char *rlog_type,
|
|
unsigned rsite_index,
|
|
dm_replicator_mode_t mode,
|
|
uint32_t async_timeout,
|
|
uint64_t fall_behind_data,
|
|
uint32_t fall_behind_ios);
|
|
|
|
int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
const char *replicator_uuid, /* Replicator control device */
|
|
uint64_t rdevice_index,
|
|
const char *rdev_uuid, /* Rimage device name/uuid */
|
|
unsigned rsite_index,
|
|
const char *slog_uuid,
|
|
uint32_t slog_flags, /* Mirror log flags */
|
|
uint32_t slog_region_size);
|
|
/* End of Replicator API */
|
|
|
|
/*
|
|
* FIXME: Defines bellow are based on kernel's dm-thin.c defines
|
|
* DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
|
|
* DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
|
|
*/
|
|
#define DM_THIN_MIN_DATA_BLOCK_SIZE (UINT32_C(128))
|
|
#define DM_THIN_MAX_DATA_BLOCK_SIZE (UINT32_C(2097152))
|
|
/*
|
|
* Max supported size for thin pool metadata device (17112760320 bytes)
|
|
* Limitation is hardcoded into the kernel and bigger device size
|
|
* is not accepted.
|
|
* drivers/md/dm-thin-metadata.h THIN_METADATA_MAX_SECTORS
|
|
*/
|
|
#define DM_THIN_MAX_METADATA_SIZE (UINT64_C(255) * (1 << 14) * (4096 / (1 << 9)) - 256 * 1024)
|
|
|
|
int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
uint64_t transaction_id,
|
|
const char *metadata_uuid,
|
|
const char *pool_uuid,
|
|
uint32_t data_block_size,
|
|
uint64_t low_water_mark,
|
|
unsigned skip_block_zeroing);
|
|
|
|
/* Supported messages for thin provision target */
|
|
typedef enum {
|
|
DM_THIN_MESSAGE_CREATE_SNAP, /* device_id, origin_id */
|
|
DM_THIN_MESSAGE_CREATE_THIN, /* device_id */
|
|
DM_THIN_MESSAGE_DELETE, /* device_id */
|
|
DM_THIN_MESSAGE_SET_TRANSACTION_ID, /* current_id, new_id */
|
|
DM_THIN_MESSAGE_RESERVE_METADATA_SNAP, /* target version >= 1.1 */
|
|
DM_THIN_MESSAGE_RELEASE_METADATA_SNAP, /* target version >= 1.1 */
|
|
} dm_thin_message_t;
|
|
|
|
int dm_tree_node_add_thin_pool_message(struct dm_tree_node *node,
|
|
dm_thin_message_t type,
|
|
uint64_t id1, uint64_t id2);
|
|
|
|
/*
|
|
* Set thin pool discard features
|
|
* ignore - Disable support for discards
|
|
* no_passdown - Don't pass discards down to underlying data device,
|
|
* just remove the mapping
|
|
* Feature is available since version 1.1 of the thin target.
|
|
*/
|
|
int dm_tree_node_set_thin_pool_discard(struct dm_tree_node *node,
|
|
unsigned ignore,
|
|
unsigned no_passdown);
|
|
/*
|
|
* Set error if no space, instead of queueing for thin pool.
|
|
*/
|
|
int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node *node,
|
|
unsigned error_if_no_space);
|
|
/* Start thin pool with metadata in read-only mode */
|
|
int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node *node,
|
|
unsigned read_only);
|
|
/*
|
|
* FIXME: Defines bellow are based on kernel's dm-thin.c defines
|
|
* MAX_DEV_ID ((1 << 24) - 1)
|
|
*/
|
|
#define DM_THIN_MAX_DEVICE_ID (UINT32_C((1 << 24) - 1))
|
|
int dm_tree_node_add_thin_target(struct dm_tree_node *node,
|
|
uint64_t size,
|
|
const char *pool_uuid,
|
|
uint32_t device_id);
|
|
|
|
int dm_tree_node_set_thin_external_origin(struct dm_tree_node *node,
|
|
const char *external_uuid);
|
|
|
|
void dm_tree_node_set_udev_flags(struct dm_tree_node *node, uint16_t udev_flags);
|
|
|
|
void dm_tree_node_set_presuspend_node(struct dm_tree_node *node,
|
|
struct dm_tree_node *presuspend_node);
|
|
|
|
int dm_tree_node_add_target_area(struct dm_tree_node *node,
|
|
const char *dev_name,
|
|
const char *dlid,
|
|
uint64_t offset);
|
|
|
|
/*
|
|
* Only for temporarily-missing raid devices where changes are tracked.
|
|
*/
|
|
int dm_tree_node_add_null_area(struct dm_tree_node *node, uint64_t offset);
|
|
|
|
/*
|
|
* Set readahead (in sectors) after loading the node.
|
|
*/
|
|
void dm_tree_node_set_read_ahead(struct dm_tree_node *dnode,
|
|
uint32_t read_ahead,
|
|
uint32_t read_ahead_flags);
|
|
|
|
/*
|
|
* Set node callback hook before de/activation.
|
|
* Callback is called before 'activation' of node for activation tree,
|
|
* or 'deactivation' of node for deactivation tree.
|
|
*/
|
|
typedef enum {
|
|
DM_NODE_CALLBACK_PRELOADED, /* Node has preload deps */
|
|
DM_NODE_CALLBACK_DEACTIVATED, /* Node is deactivated */
|
|
} dm_node_callback_t;
|
|
typedef int (*dm_node_callback_fn) (struct dm_tree_node *node,
|
|
dm_node_callback_t type, void *cb_data);
|
|
void dm_tree_node_set_callback(struct dm_tree_node *node,
|
|
dm_node_callback_fn cb, void *cb_data);
|
|
|
|
void dm_tree_set_cookie(struct dm_tree_node *node, uint32_t cookie);
|
|
uint32_t dm_tree_get_cookie(struct dm_tree_node *node);
|
|
|
|
/*****************************************************************************
|
|
* Library functions
|
|
*****************************************************************************/
|
|
|
|
/*******************
|
|
* Memory management
|
|
*******************/
|
|
|
|
/*
|
|
* Never use these functions directly - use the macros following instead.
|
|
*/
|
|
void *dm_malloc_wrapper(size_t s, const char *file, int line)
|
|
__attribute__((__malloc__)) __attribute__((__warn_unused_result__));
|
|
void *dm_zalloc_wrapper(size_t s, const char *file, int line)
|
|
__attribute__((__malloc__)) __attribute__((__warn_unused_result__));
|
|
void *dm_realloc_wrapper(void *p, unsigned int s, const char *file, int line)
|
|
__attribute__((__warn_unused_result__));
|
|
void dm_free_wrapper(void *ptr);
|
|
char *dm_strdup_wrapper(const char *s, const char *file, int line)
|
|
__attribute__((__warn_unused_result__));
|
|
int dm_dump_memory_wrapper(void);
|
|
void dm_bounds_check_wrapper(void);
|
|
|
|
#define dm_malloc(s) dm_malloc_wrapper((s), __FILE__, __LINE__)
|
|
#define dm_zalloc(s) dm_zalloc_wrapper((s), __FILE__, __LINE__)
|
|
#define dm_strdup(s) dm_strdup_wrapper((s), __FILE__, __LINE__)
|
|
#define dm_free(p) dm_free_wrapper(p)
|
|
#define dm_realloc(p, s) dm_realloc_wrapper((p), (s), __FILE__, __LINE__)
|
|
#define dm_dump_memory() dm_dump_memory_wrapper()
|
|
#define dm_bounds_check() dm_bounds_check_wrapper()
|
|
|
|
/*
|
|
* The pool allocator is useful when you are going to allocate
|
|
* lots of memory, use the memory for a bit, and then free the
|
|
* memory in one go. A surprising amount of code has this usage
|
|
* profile.
|
|
*
|
|
* You should think of the pool as an infinite, contiguous chunk
|
|
* of memory. The front of this chunk of memory contains
|
|
* allocated objects, the second half is free. dm_pool_alloc grabs
|
|
* the next 'size' bytes from the free half, in effect moving it
|
|
* into the allocated half. This operation is very efficient.
|
|
*
|
|
* dm_pool_free frees the allocated object *and* all objects
|
|
* allocated after it. It is important to note this semantic
|
|
* difference from malloc/free. This is also extremely
|
|
* efficient, since a single dm_pool_free can dispose of a large
|
|
* complex object.
|
|
*
|
|
* dm_pool_destroy frees all allocated memory.
|
|
*
|
|
* eg, If you are building a binary tree in your program, and
|
|
* know that you are only ever going to insert into your tree,
|
|
* and not delete (eg, maintaining a symbol table for a
|
|
* compiler). You can create yourself a pool, allocate the nodes
|
|
* from it, and when the tree becomes redundant call dm_pool_destroy
|
|
* (no nasty iterating through the tree to free nodes).
|
|
*
|
|
* eg, On the other hand if you wanted to repeatedly insert and
|
|
* remove objects into the tree, you would be better off
|
|
* allocating the nodes from a free list; you cannot free a
|
|
* single arbitrary node with pool.
|
|
*/
|
|
|
|
struct dm_pool;
|
|
|
|
/* constructor and destructor */
|
|
struct dm_pool *dm_pool_create(const char *name, size_t chunk_hint)
|
|
__attribute__((__warn_unused_result__));
|
|
void dm_pool_destroy(struct dm_pool *p);
|
|
|
|
/* simple allocation/free routines */
|
|
void *dm_pool_alloc(struct dm_pool *p, size_t s)
|
|
__attribute__((__warn_unused_result__));
|
|
void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment)
|
|
__attribute__((__warn_unused_result__));
|
|
void dm_pool_empty(struct dm_pool *p);
|
|
void dm_pool_free(struct dm_pool *p, void *ptr);
|
|
|
|
/*
|
|
* To aid debugging, a pool can be locked. Any modifications made
|
|
* to the content of the pool while it is locked can be detected.
|
|
* Default compilation is using a crc checksum to notice modifications.
|
|
* The pool locking is using the mprotect with the compilation flag
|
|
* DEBUG_ENFORCE_POOL_LOCKING to enforce the memory protection.
|
|
*/
|
|
/* query pool lock status */
|
|
int dm_pool_locked(struct dm_pool *p);
|
|
/* mark pool as locked */
|
|
int dm_pool_lock(struct dm_pool *p, int crc)
|
|
__attribute__((__warn_unused_result__));
|
|
/* mark pool as unlocked */
|
|
int dm_pool_unlock(struct dm_pool *p, int crc)
|
|
__attribute__((__warn_unused_result__));
|
|
|
|
/*
|
|
* Object building routines:
|
|
*
|
|
* These allow you to 'grow' an object, useful for
|
|
* building strings, or filling in dynamic
|
|
* arrays.
|
|
*
|
|
* It's probably best explained with an example:
|
|
*
|
|
* char *build_string(struct dm_pool *mem)
|
|
* {
|
|
* int i;
|
|
* char buffer[16];
|
|
*
|
|
* if (!dm_pool_begin_object(mem, 128))
|
|
* return NULL;
|
|
*
|
|
* for (i = 0; i < 50; i++) {
|
|
* snprintf(buffer, sizeof(buffer), "%d, ", i);
|
|
* if (!dm_pool_grow_object(mem, buffer, 0))
|
|
* goto bad;
|
|
* }
|
|
*
|
|
* // add null
|
|
* if (!dm_pool_grow_object(mem, "\0", 1))
|
|
* goto bad;
|
|
*
|
|
* return dm_pool_end_object(mem);
|
|
*
|
|
* bad:
|
|
*
|
|
* dm_pool_abandon_object(mem);
|
|
* return NULL;
|
|
*}
|
|
*
|
|
* So start an object by calling dm_pool_begin_object
|
|
* with a guess at the final object size - if in
|
|
* doubt make the guess too small.
|
|
*
|
|
* Then append chunks of data to your object with
|
|
* dm_pool_grow_object. Finally get your object with
|
|
* a call to dm_pool_end_object.
|
|
*
|
|
* Setting delta to 0 means it will use strlen(extra).
|
|
*/
|
|
int dm_pool_begin_object(struct dm_pool *p, size_t hint);
|
|
int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta);
|
|
void *dm_pool_end_object(struct dm_pool *p);
|
|
void dm_pool_abandon_object(struct dm_pool *p);
|
|
|
|
/* utilities */
|
|
char *dm_pool_strdup(struct dm_pool *p, const char *str)
|
|
__attribute__((__warn_unused_result__));
|
|
char *dm_pool_strndup(struct dm_pool *p, const char *str, size_t n)
|
|
__attribute__((__warn_unused_result__));
|
|
void *dm_pool_zalloc(struct dm_pool *p, size_t s)
|
|
__attribute__((__warn_unused_result__));
|
|
|
|
/******************
|
|
* bitset functions
|
|
******************/
|
|
|
|
typedef uint32_t *dm_bitset_t;
|
|
|
|
dm_bitset_t dm_bitset_create(struct dm_pool *mem, unsigned num_bits);
|
|
void dm_bitset_destroy(dm_bitset_t bs);
|
|
|
|
int dm_bitset_equal(dm_bitset_t in1, dm_bitset_t in2);
|
|
|
|
void dm_bit_and(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2);
|
|
void dm_bit_union(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2);
|
|
int dm_bit_get_first(dm_bitset_t bs);
|
|
int dm_bit_get_next(dm_bitset_t bs, int last_bit);
|
|
int dm_bit_get_last(dm_bitset_t bs);
|
|
int dm_bit_get_prev(dm_bitset_t bs, int last_bit);
|
|
|
|
#define DM_BITS_PER_INT (sizeof(int) * CHAR_BIT)
|
|
|
|
#define dm_bit(bs, i) \
|
|
((bs)[((i) / DM_BITS_PER_INT) + 1] & (0x1 << ((i) & (DM_BITS_PER_INT - 1))))
|
|
|
|
#define dm_bit_set(bs, i) \
|
|
((bs)[((i) / DM_BITS_PER_INT) + 1] |= (0x1 << ((i) & (DM_BITS_PER_INT - 1))))
|
|
|
|
#define dm_bit_clear(bs, i) \
|
|
((bs)[((i) / DM_BITS_PER_INT) + 1] &= ~(0x1 << ((i) & (DM_BITS_PER_INT - 1))))
|
|
|
|
#define dm_bit_set_all(bs) \
|
|
memset((bs) + 1, -1, ((*(bs) / DM_BITS_PER_INT) + 1) * sizeof(int))
|
|
|
|
#define dm_bit_clear_all(bs) \
|
|
memset((bs) + 1, 0, ((*(bs) / DM_BITS_PER_INT) + 1) * sizeof(int))
|
|
|
|
#define dm_bit_copy(bs1, bs2) \
|
|
memcpy((bs1) + 1, (bs2) + 1, ((*(bs2) / DM_BITS_PER_INT) + 1) * sizeof(int))
|
|
|
|
/*
|
|
* Parse a string representation of a bitset into a dm_bitset_t. The
|
|
* notation used is identical to the kernel bitmap parser (cpuset etc.)
|
|
* and supports both lists ("1,2,3") and ranges ("1-2,5-8"). If the mem
|
|
* parameter is NULL memory for the bitset will be allocated using
|
|
* dm_malloc(). Otherwise the bitset will be allocated using the supplied
|
|
* dm_pool.
|
|
*/
|
|
dm_bitset_t dm_bitset_parse_list(const char *str, struct dm_pool *mem,
|
|
size_t min_num_bits);
|
|
|
|
/* Returns number of set bits */
|
|
static inline unsigned hweight32(uint32_t i)
|
|
{
|
|
unsigned r = (i & 0x55555555) + ((i >> 1) & 0x55555555);
|
|
|
|
r = (r & 0x33333333) + ((r >> 2) & 0x33333333);
|
|
r = (r & 0x0F0F0F0F) + ((r >> 4) & 0x0F0F0F0F);
|
|
r = (r & 0x00FF00FF) + ((r >> 8) & 0x00FF00FF);
|
|
return (r & 0x0000FFFF) + ((r >> 16) & 0x0000FFFF);
|
|
}
|
|
|
|
/****************
|
|
* hash functions
|
|
****************/
|
|
|
|
struct dm_hash_table;
|
|
struct dm_hash_node;
|
|
|
|
typedef void (*dm_hash_iterate_fn) (void *data);
|
|
|
|
struct dm_hash_table *dm_hash_create(unsigned size_hint)
|
|
__attribute__((__warn_unused_result__));
|
|
void dm_hash_destroy(struct dm_hash_table *t);
|
|
void dm_hash_wipe(struct dm_hash_table *t);
|
|
|
|
void *dm_hash_lookup(struct dm_hash_table *t, const char *key);
|
|
int dm_hash_insert(struct dm_hash_table *t, const char *key, void *data);
|
|
void dm_hash_remove(struct dm_hash_table *t, const char *key);
|
|
|
|
void *dm_hash_lookup_binary(struct dm_hash_table *t, const void *key, uint32_t len);
|
|
int dm_hash_insert_binary(struct dm_hash_table *t, const void *key, uint32_t len,
|
|
void *data);
|
|
void dm_hash_remove_binary(struct dm_hash_table *t, const void *key, uint32_t len);
|
|
|
|
unsigned dm_hash_get_num_entries(struct dm_hash_table *t);
|
|
void dm_hash_iter(struct dm_hash_table *t, dm_hash_iterate_fn f);
|
|
|
|
char *dm_hash_get_key(struct dm_hash_table *t, struct dm_hash_node *n);
|
|
void *dm_hash_get_data(struct dm_hash_table *t, struct dm_hash_node *n);
|
|
struct dm_hash_node *dm_hash_get_first(struct dm_hash_table *t);
|
|
struct dm_hash_node *dm_hash_get_next(struct dm_hash_table *t, struct dm_hash_node *n);
|
|
|
|
/*
|
|
* dm_hash_insert() replaces the value of an existing
|
|
* entry with a matching key if one exists. Otherwise
|
|
* it adds a new entry.
|
|
*
|
|
* dm_hash_insert_with_val() inserts a new entry if
|
|
* another entry with the same key already exists.
|
|
* val_len is the size of the data being inserted.
|
|
*
|
|
* If two entries with the same key exist,
|
|
* (added using dm_hash_insert_allow_multiple), then:
|
|
* . dm_hash_lookup() returns the first one it finds, and
|
|
* dm_hash_lookup_with_val() returns the one with a matching
|
|
* val_len/val.
|
|
* . dm_hash_remove() removes the first one it finds, and
|
|
* dm_hash_remove_with_val() removes the one with a matching
|
|
* val_len/val.
|
|
*
|
|
* If a single entry with a given key exists, and it has
|
|
* zero val_len, then:
|
|
* . dm_hash_lookup() returns it
|
|
* . dm_hash_lookup_with_val(val_len=0) returns it
|
|
* . dm_hash_remove() removes it
|
|
* . dm_hash_remove_with_val(val_len=0) removes it
|
|
*
|
|
* dm_hash_lookup_with_count() is a single call that will
|
|
* both lookup a key's value and check if there is more
|
|
* than one entry with the given key.
|
|
*
|
|
* (It is not meant to retrieve all the entries with the
|
|
* given key. In the common case where a single entry exists
|
|
* for the key, it is useful to have a single call that will
|
|
* both look up the value and indicate if multiple values
|
|
* exist for the key.)
|
|
*
|
|
* dm_hash_lookup_with_count:
|
|
* . If no entries exist, the function returns NULL, and
|
|
* the count is set to 0.
|
|
* . If only one entry exists, the value of that entry is
|
|
* returned and count is set to 1.
|
|
* . If N entries exists, the value of the first entry is
|
|
* returned and count is set to N.
|
|
*/
|
|
|
|
void *dm_hash_lookup_with_val(struct dm_hash_table *t, const char *key,
|
|
const void *val, uint32_t val_len);
|
|
void dm_hash_remove_with_val(struct dm_hash_table *t, const char *key,
|
|
const void *val, uint32_t val_len);
|
|
int dm_hash_insert_allow_multiple(struct dm_hash_table *t, const char *key,
|
|
const void *val, uint32_t val_len);
|
|
void *dm_hash_lookup_with_count(struct dm_hash_table *t, const char *key, int *count);
|
|
|
|
|
|
#define dm_hash_iterate(v, h) \
|
|
for (v = dm_hash_get_first((h)); v; \
|
|
v = dm_hash_get_next((h), v))
|
|
|
|
/****************
|
|
* list functions
|
|
****************/
|
|
|
|
/*
|
|
* A list consists of a list head plus elements.
|
|
* Each element has 'next' and 'previous' pointers.
|
|
* The list head's pointers point to the first and the last element.
|
|
*/
|
|
|
|
struct dm_list {
|
|
struct dm_list *n, *p;
|
|
};
|
|
|
|
/*
|
|
* String list.
|
|
*/
|
|
struct dm_str_list {
|
|
struct dm_list list;
|
|
const char *str;
|
|
};
|
|
|
|
/*
|
|
* Initialise a list before use.
|
|
* The list head's next and previous pointers point back to itself.
|
|
*/
|
|
#define DM_LIST_HEAD_INIT(name) { &(name), &(name) }
|
|
#define DM_LIST_INIT(name) struct dm_list name = DM_LIST_HEAD_INIT(name)
|
|
void dm_list_init(struct dm_list *head);
|
|
|
|
/*
|
|
* Insert an element before 'head'.
|
|
* If 'head' is the list head, this adds an element to the end of the list.
|
|
*/
|
|
void dm_list_add(struct dm_list *head, struct dm_list *elem);
|
|
|
|
/*
|
|
* Insert an element after 'head'.
|
|
* If 'head' is the list head, this adds an element to the front of the list.
|
|
*/
|
|
void dm_list_add_h(struct dm_list *head, struct dm_list *elem);
|
|
|
|
/*
|
|
* Delete an element from its list.
|
|
* Note that this doesn't change the element itself - it may still be safe
|
|
* to follow its pointers.
|
|
*/
|
|
void dm_list_del(struct dm_list *elem);
|
|
|
|
/*
|
|
* Remove an element from existing list and insert before 'head'.
|
|
*/
|
|
void dm_list_move(struct dm_list *head, struct dm_list *elem);
|
|
|
|
/*
|
|
* Join 'head1' to the end of 'head'.
|
|
*/
|
|
void dm_list_splice(struct dm_list *head, struct dm_list *head1);
|
|
|
|
/*
|
|
* Is the list empty?
|
|
*/
|
|
int dm_list_empty(const struct dm_list *head);
|
|
|
|
/*
|
|
* Is this the first element of the list?
|
|
*/
|
|
int dm_list_start(const struct dm_list *head, const struct dm_list *elem);
|
|
|
|
/*
|
|
* Is this the last element of the list?
|
|
*/
|
|
int dm_list_end(const struct dm_list *head, const struct dm_list *elem);
|
|
|
|
/*
|
|
* Return first element of the list or NULL if empty
|
|
*/
|
|
struct dm_list *dm_list_first(const struct dm_list *head);
|
|
|
|
/*
|
|
* Return last element of the list or NULL if empty
|
|
*/
|
|
struct dm_list *dm_list_last(const struct dm_list *head);
|
|
|
|
/*
|
|
* Return the previous element of the list, or NULL if we've reached the start.
|
|
*/
|
|
struct dm_list *dm_list_prev(const struct dm_list *head, const struct dm_list *elem);
|
|
|
|
/*
|
|
* Return the next element of the list, or NULL if we've reached the end.
|
|
*/
|
|
struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *elem);
|
|
|
|
/*
|
|
* Given the address v of an instance of 'struct dm_list' called 'head'
|
|
* contained in a structure of type t, return the containing structure.
|
|
*/
|
|
#define dm_list_struct_base(v, t, head) \
|
|
((t *)((const char *)(v) - (const char *)&((t *) 0)->head))
|
|
|
|
/*
|
|
* Given the address v of an instance of 'struct dm_list list' contained in
|
|
* a structure of type t, return the containing structure.
|
|
*/
|
|
#define dm_list_item(v, t) dm_list_struct_base((v), t, list)
|
|
|
|
/*
|
|
* Given the address v of one known element e in a known structure of type t,
|
|
* return another element f.
|
|
*/
|
|
#define dm_struct_field(v, t, e, f) \
|
|
(((t *)((uintptr_t)(v) - (uintptr_t)&((t *) 0)->e))->f)
|
|
|
|
/*
|
|
* Given the address v of a known element e in a known structure of type t,
|
|
* return the list head 'list'
|
|
*/
|
|
#define dm_list_head(v, t, e) dm_struct_field(v, t, e, list)
|
|
|
|
/*
|
|
* Set v to each element of a list in turn.
|
|
*/
|
|
#define dm_list_iterate(v, head) \
|
|
for (v = (head)->n; v != head; v = v->n)
|
|
|
|
/*
|
|
* Set v to each element in a list in turn, starting from the element
|
|
* in front of 'start'.
|
|
* You can use this to 'unwind' a list_iterate and back out actions on
|
|
* already-processed elements.
|
|
* If 'start' is 'head' it walks the list backwards.
|
|
*/
|
|
#define dm_list_uniterate(v, head, start) \
|
|
for (v = (start)->p; v != head; v = v->p)
|
|
|
|
/*
|
|
* A safe way to walk a list and delete and free some elements along
|
|
* the way.
|
|
* t must be defined as a temporary variable of the same type as v.
|
|
*/
|
|
#define dm_list_iterate_safe(v, t, head) \
|
|
for (v = (head)->n, t = v->n; v != head; v = t, t = v->n)
|
|
|
|
/*
|
|
* Walk a list, setting 'v' in turn to the containing structure of each item.
|
|
* The containing structure should be the same type as 'v'.
|
|
* The 'struct dm_list' variable within the containing structure is 'field'.
|
|
*/
|
|
#define dm_list_iterate_items_gen(v, head, field) \
|
|
for (v = dm_list_struct_base((head)->n, __typeof__(*v), field); \
|
|
&v->field != (head); \
|
|
v = dm_list_struct_base(v->field.n, __typeof__(*v), field))
|
|
|
|
/*
|
|
* Walk a list, setting 'v' in turn to the containing structure of each item.
|
|
* The containing structure should be the same type as 'v'.
|
|
* The list should be 'struct dm_list list' within the containing structure.
|
|
*/
|
|
#define dm_list_iterate_items(v, head) dm_list_iterate_items_gen(v, (head), list)
|
|
|
|
/*
|
|
* Walk a list, setting 'v' in turn to the containing structure of each item.
|
|
* The containing structure should be the same type as 'v'.
|
|
* The 'struct dm_list' variable within the containing structure is 'field'.
|
|
* t must be defined as a temporary variable of the same type as v.
|
|
*/
|
|
#define dm_list_iterate_items_gen_safe(v, t, head, field) \
|
|
for (v = dm_list_struct_base((head)->n, __typeof__(*v), field), \
|
|
t = dm_list_struct_base(v->field.n, __typeof__(*v), field); \
|
|
&v->field != (head); \
|
|
v = t, t = dm_list_struct_base(v->field.n, __typeof__(*v), field))
|
|
/*
|
|
* Walk a list, setting 'v' in turn to the containing structure of each item.
|
|
* The containing structure should be the same type as 'v'.
|
|
* The list should be 'struct dm_list list' within the containing structure.
|
|
* t must be defined as a temporary variable of the same type as v.
|
|
*/
|
|
#define dm_list_iterate_items_safe(v, t, head) \
|
|
dm_list_iterate_items_gen_safe(v, t, (head), list)
|
|
|
|
/*
|
|
* Walk a list backwards, setting 'v' in turn to the containing structure
|
|
* of each item.
|
|
* The containing structure should be the same type as 'v'.
|
|
* The 'struct dm_list' variable within the containing structure is 'field'.
|
|
*/
|
|
#define dm_list_iterate_back_items_gen(v, head, field) \
|
|
for (v = dm_list_struct_base((head)->p, __typeof__(*v), field); \
|
|
&v->field != (head); \
|
|
v = dm_list_struct_base(v->field.p, __typeof__(*v), field))
|
|
|
|
/*
|
|
* Walk a list backwards, setting 'v' in turn to the containing structure
|
|
* of each item.
|
|
* The containing structure should be the same type as 'v'.
|
|
* The list should be 'struct dm_list list' within the containing structure.
|
|
*/
|
|
#define dm_list_iterate_back_items(v, head) dm_list_iterate_back_items_gen(v, (head), list)
|
|
|
|
/*
|
|
* Return the number of elements in a list by walking it.
|
|
*/
|
|
unsigned int dm_list_size(const struct dm_list *head);
|
|
|
|
/*********
|
|
* selinux
|
|
*********/
|
|
|
|
/*
|
|
* Obtain SELinux security context assigned for the path and set this
|
|
* context for creating a new file system object. This security context
|
|
* is global and it is used until reset to default policy behaviour
|
|
* by calling 'dm_prepare_selinux_context(NULL, 0)'.
|
|
*/
|
|
int dm_prepare_selinux_context(const char *path, mode_t mode);
|
|
/*
|
|
* Set SELinux context for existing file system object.
|
|
*/
|
|
int dm_set_selinux_context(const char *path, mode_t mode);
|
|
|
|
/*********************
|
|
* string manipulation
|
|
*********************/
|
|
|
|
/*
|
|
* Break up the name of a mapped device into its constituent
|
|
* Volume Group, Logical Volume and Layer (if present).
|
|
* If mem is supplied, the result is allocated from the mempool.
|
|
* Otherwise the strings are changed in situ.
|
|
*/
|
|
int dm_split_lvm_name(struct dm_pool *mem, const char *dmname,
|
|
char **vgname, char **lvname, char **layer);
|
|
|
|
/*
|
|
* Destructively split buffer into NULL-separated words in argv.
|
|
* Returns number of words.
|
|
*/
|
|
int dm_split_words(char *buffer, unsigned max,
|
|
unsigned ignore_comments, /* Not implemented */
|
|
char **argv);
|
|
|
|
/*
|
|
* Returns -1 if buffer too small
|
|
*/
|
|
int dm_snprintf(char *buf, size_t bufsize, const char *format, ...)
|
|
__attribute__ ((format(printf, 3, 4)));
|
|
|
|
/*
|
|
* Returns pointer to the last component of the path.
|
|
*/
|
|
const char *dm_basename(const char *path);
|
|
|
|
/*
|
|
* Returns number of occurrences of 'c' in 'str' of length 'size'.
|
|
*/
|
|
unsigned dm_count_chars(const char *str, size_t len, const int c);
|
|
|
|
/*
|
|
* Length of string after escaping double quotes and backslashes.
|
|
*/
|
|
size_t dm_escaped_len(const char *str);
|
|
|
|
/*
|
|
* <vg>-<lv>-<layer> or if !layer just <vg>-<lv>.
|
|
*/
|
|
char *dm_build_dm_name(struct dm_pool *mem, const char *vgname,
|
|
const char *lvname, const char *layer);
|
|
char *dm_build_dm_uuid(struct dm_pool *mem, const char *prefix, const char *lvid, const char *layer);
|
|
|
|
/*
|
|
* Copies a string, quoting double quotes with backslashes.
|
|
*/
|
|
char *dm_escape_double_quotes(char *out, const char *src);
|
|
|
|
/*
|
|
* Undo quoting in situ.
|
|
*/
|
|
void dm_unescape_double_quotes(char *src);
|
|
|
|
/*
|
|
* Unescape colons and "at" signs in situ and save the substrings
|
|
* starting at the position of the first unescaped colon and the
|
|
* first unescaped "at" sign. This is normally used to unescape
|
|
* device names used as PVs.
|
|
*/
|
|
void dm_unescape_colons_and_at_signs(char *src,
|
|
char **substr_first_unquoted_colon,
|
|
char **substr_first_unquoted_at_sign);
|
|
|
|
/*
|
|
* Replacement for strncpy() function.
|
|
*
|
|
* Copies no more than n bytes from string pointed by src to the buffer
|
|
* pointed by dest and ensure string is finished with '\0'.
|
|
* Returns 0 if the whole string does not fit.
|
|
*/
|
|
int dm_strncpy(char *dest, const char *src, size_t n);
|
|
|
|
/*
|
|
* Recognize unit specifier in the 'units' arg and return a factor
|
|
* representing that unit. If the 'units' contains a prefix with digits,
|
|
* the 'units' is considered to be a custom unit.
|
|
*
|
|
* Also, set 'unit_type' output arg to the character that represents
|
|
* the unit specified. The 'unit_type' character equals to the unit
|
|
* character itself recognized in the 'units' arg for canonical units.
|
|
* Otherwise, the 'unit_type' character is set to 'U' for custom unit.
|
|
*
|
|
* An example for k/K canonical units and 8k/8K custom units:
|
|
*
|
|
* units unit_type return value (factor)
|
|
* k k 1024
|
|
* K K 1000
|
|
* 8k U 1024*8
|
|
* 8K U 1000*8
|
|
* etc...
|
|
*
|
|
* Recognized units:
|
|
*
|
|
* h/H - human readable (returns 1 for both)
|
|
* b/B - byte (returns 1 for both)
|
|
* s/S - sector (returns 512 for both)
|
|
* k/K - kilo (returns 1024/1000 respectively)
|
|
* m/M - mega (returns 1024^2/1000^2 respectively)
|
|
* g/G - giga (returns 1024^3/1000^3 respectively)
|
|
* t/T - tera (returns 1024^4/1000^4 respectively)
|
|
* p/P - peta (returns 1024^5/1000^5 respectively)
|
|
* e/E - exa (returns 1024^6/1000^6 respectively)
|
|
*
|
|
* Only one units character is allowed in the 'units' arg
|
|
* if strict mode is enabled by 'strict' arg.
|
|
*
|
|
* The 'endptr' output arg, if not NULL, saves the pointer
|
|
* in the 'units' string which follows the unit specifier
|
|
* recognized (IOW the position where the parsing of the
|
|
* unit specifier stopped).
|
|
*
|
|
* Returns the unit factor or 0 if no unit is recognized.
|
|
*/
|
|
uint64_t dm_units_to_factor(const char *units, char *unit_type,
|
|
int strict, const char **endptr);
|
|
|
|
/*
|
|
* Type of unit specifier used by dm_size_to_string().
|
|
*/
|
|
typedef enum {
|
|
DM_SIZE_LONG = 0, /* Megabyte */
|
|
DM_SIZE_SHORT = 1, /* MB or MiB */
|
|
DM_SIZE_UNIT = 2 /* M or m */
|
|
} dm_size_suffix_t;
|
|
|
|
/*
|
|
* Convert a size (in 512-byte sectors) into a printable string using units of unit_type.
|
|
* An upper-case unit_type indicates output units based on powers of 1000 are
|
|
* required; a lower-case unit_type indicates powers of 1024.
|
|
* For correct operation, unit_factor must be one of:
|
|
* 0 - the correct value will be calculated internally;
|
|
* or the output from dm_units_to_factor() corresponding to unit_type;
|
|
* or 'u' or 'U', an arbitrary number of bytes to use as the power base.
|
|
* Set include_suffix to 1 to include a suffix of suffix_type.
|
|
* Set use_si_units to 0 for suffixes that don't distinguish between 1000 and 1024.
|
|
* Set use_si_units to 1 for a suffix that does distinguish.
|
|
*/
|
|
const char *dm_size_to_string(struct dm_pool *mem, uint64_t size,
|
|
char unit_type, int use_si_units,
|
|
uint64_t unit_factor, int include_suffix,
|
|
dm_size_suffix_t suffix_type);
|
|
|
|
/**************************
|
|
* file/stream manipulation
|
|
**************************/
|
|
|
|
/*
|
|
* Create a directory (with parent directories if necessary).
|
|
* Returns 1 on success, 0 on failure.
|
|
*/
|
|
int dm_create_dir(const char *dir);
|
|
|
|
int dm_is_empty_dir(const char *dir);
|
|
|
|
/*
|
|
* Close a stream, with nicer error checking than fclose's.
|
|
* Derived from gnulib's close-stream.c.
|
|
*
|
|
* Close "stream". Return 0 if successful, and EOF (setting errno)
|
|
* otherwise. Upon failure, set errno to 0 if the error number
|
|
* cannot be determined. Useful mainly for writable streams.
|
|
*/
|
|
int dm_fclose(FILE *stream);
|
|
|
|
/*
|
|
* Returns size of a buffer which is allocated with dm_malloc.
|
|
* Pointer to the buffer is stored in *buf.
|
|
* Returns -1 on failure leaving buf undefined.
|
|
*/
|
|
int dm_asprintf(char **buf, const char *format, ...)
|
|
__attribute__ ((format(printf, 2, 3)));
|
|
int dm_vasprintf(char **buf, const char *format, va_list ap)
|
|
__attribute__ ((format(printf, 2, 0)));
|
|
|
|
/*
|
|
* create lockfile (pidfile) - create and lock a lock file
|
|
* @lockfile: location of lock file
|
|
*
|
|
* Returns: 1 on success, 0 otherwise, errno is handled internally
|
|
*/
|
|
int dm_create_lockfile(const char* lockfile);
|
|
|
|
/*
|
|
* Query whether a daemon is running based on its lockfile
|
|
*
|
|
* Returns: 1 if running, 0 if not
|
|
*/
|
|
int dm_daemon_is_running(const char* lockfile);
|
|
|
|
/*********************
|
|
* regular expressions
|
|
*********************/
|
|
struct dm_regex;
|
|
|
|
/*
|
|
* Initialise an array of num patterns for matching.
|
|
* Uses memory from mem.
|
|
*/
|
|
struct dm_regex *dm_regex_create(struct dm_pool *mem, const char * const *patterns,
|
|
unsigned num_patterns);
|
|
|
|
/*
|
|
* Match string s against the patterns.
|
|
* Returns the index of the highest pattern in the array that matches,
|
|
* or -1 if none match.
|
|
*/
|
|
int dm_regex_match(struct dm_regex *regex, const char *s);
|
|
|
|
/*
|
|
* This is useful for regression testing only. The idea is if two
|
|
* fingerprints are different, then the two dfas are certainly not
|
|
* isomorphic. If two fingerprints _are_ the same then it's very likely
|
|
* that the dfas are isomorphic.
|
|
*
|
|
* This function must be called before any matching is done.
|
|
*/
|
|
uint32_t dm_regex_fingerprint(struct dm_regex *regex);
|
|
|
|
/******************
|
|
* percent handling
|
|
******************/
|
|
/*
|
|
* A fixed-point representation of percent values. One percent equals to
|
|
* DM_PERCENT_1 as defined below. Values that are not multiples of DM_PERCENT_1
|
|
* represent fractions, with precision of 1/1000000 of a percent. See
|
|
* dm_percent_to_float for a conversion to a floating-point representation.
|
|
*
|
|
* You should always use dm_make_percent when building dm_percent_t values. The
|
|
* implementation of dm_make_percent is biased towards the middle: it ensures that
|
|
* the result is DM_PERCENT_0 or DM_PERCENT_100 if and only if this is the actual
|
|
* value -- it never rounds any intermediate value (> 0 or < 100) to either 0
|
|
* or 100.
|
|
*/
|
|
#define DM_PERCENT_CHAR '%'
|
|
|
|
typedef enum {
|
|
DM_PERCENT_0 = 0,
|
|
DM_PERCENT_1 = 1000000,
|
|
DM_PERCENT_100 = 100 * DM_PERCENT_1,
|
|
DM_PERCENT_INVALID = -1,
|
|
DM_PERCENT_FAILED = -2
|
|
} dm_percent_range_t;
|
|
|
|
typedef int32_t dm_percent_t;
|
|
|
|
float dm_percent_to_float(dm_percent_t percent);
|
|
dm_percent_t dm_make_percent(uint64_t numerator, uint64_t denominator);
|
|
|
|
/********************
|
|
* timestamp handling
|
|
********************/
|
|
|
|
/*
|
|
* Create a dm_timestamp object to use with dm_timestamp_get.
|
|
*/
|
|
struct dm_timestamp *dm_timestamp_alloc(void);
|
|
|
|
/*
|
|
* Update dm_timestamp object to represent the current time.
|
|
*/
|
|
int dm_timestamp_get(struct dm_timestamp *ts);
|
|
|
|
/*
|
|
* Copy a timestamp from ts_old to ts_new.
|
|
*/
|
|
void dm_timestamp_copy(struct dm_timestamp *ts_new, struct dm_timestamp *ts_old);
|
|
|
|
/*
|
|
* Compare two timestamps.
|
|
*
|
|
* Return: -1 if ts1 is less than ts2
|
|
* 0 if ts1 is equal to ts2
|
|
* 1 if ts1 is greater than ts2
|
|
*/
|
|
int dm_timestamp_compare(struct dm_timestamp *ts1, struct dm_timestamp *ts2);
|
|
|
|
/*
|
|
* Return the absolute difference in nanoseconds between
|
|
* the dm_timestamp objects ts1 and ts2.
|
|
*
|
|
* Callers that need to know whether ts1 is before, equal to, or after ts2
|
|
* in addition to the magnitude should use dm_timestamp_compare.
|
|
*/
|
|
uint64_t dm_timestamp_delta(struct dm_timestamp *ts1, struct dm_timestamp *ts2);
|
|
|
|
/*
|
|
* Destroy a dm_timestamp object.
|
|
*/
|
|
void dm_timestamp_destroy(struct dm_timestamp *ts);
|
|
|
|
/*********************
|
|
* reporting functions
|
|
*********************/
|
|
|
|
struct dm_report_object_type {
|
|
uint32_t id; /* Powers of 2 */
|
|
const char *desc;
|
|
const char *prefix; /* field id string prefix (optional) */
|
|
/* FIXME: convert to proper usage of const pointers here */
|
|
void *(*data_fn)(void *object); /* callback from report_object() */
|
|
};
|
|
|
|
struct dm_report_field;
|
|
|
|
/*
|
|
* dm_report_field_type flags
|
|
*/
|
|
#define DM_REPORT_FIELD_MASK 0x00000FFF
|
|
#define DM_REPORT_FIELD_ALIGN_MASK 0x0000000F
|
|
#define DM_REPORT_FIELD_ALIGN_LEFT 0x00000001
|
|
#define DM_REPORT_FIELD_ALIGN_RIGHT 0x00000002
|
|
#define DM_REPORT_FIELD_TYPE_MASK 0x00000FF0
|
|
#define DM_REPORT_FIELD_TYPE_NONE 0x00000000
|
|
#define DM_REPORT_FIELD_TYPE_STRING 0x00000010
|
|
#define DM_REPORT_FIELD_TYPE_NUMBER 0x00000020
|
|
#define DM_REPORT_FIELD_TYPE_SIZE 0x00000040
|
|
#define DM_REPORT_FIELD_TYPE_PERCENT 0x00000080
|
|
#define DM_REPORT_FIELD_TYPE_STRING_LIST 0x00000100
|
|
#define DM_REPORT_FIELD_TYPE_TIME 0x00000200
|
|
|
|
/* For use with reserved values only! */
|
|
#define DM_REPORT_FIELD_RESERVED_VALUE_MASK 0x0000000F
|
|
#define DM_REPORT_FIELD_RESERVED_VALUE_NAMED 0x00000001 /* only named value, less strict form of reservation */
|
|
#define DM_REPORT_FIELD_RESERVED_VALUE_RANGE 0x00000002 /* value is range - low and high value defined */
|
|
#define DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE 0x00000004 /* value is computed in runtime */
|
|
#define DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES 0x00000008 /* value names are recognized in runtime */
|
|
|
|
#define DM_REPORT_FIELD_TYPE_ID_LEN 32
|
|
#define DM_REPORT_FIELD_TYPE_HEADING_LEN 32
|
|
|
|
struct dm_report;
|
|
struct dm_report_field_type {
|
|
uint32_t type; /* object type id */
|
|
uint32_t flags; /* DM_REPORT_FIELD_* */
|
|
uint32_t offset; /* byte offset in the object */
|
|
int32_t width; /* default width */
|
|
/* string used to specify the field */
|
|
const char id[DM_REPORT_FIELD_TYPE_ID_LEN];
|
|
/* string printed in header */
|
|
const char heading[DM_REPORT_FIELD_TYPE_HEADING_LEN];
|
|
int (*report_fn)(struct dm_report *rh, struct dm_pool *mem,
|
|
struct dm_report_field *field, const void *data,
|
|
void *private_data);
|
|
const char *desc; /* description of the field */
|
|
};
|
|
|
|
/*
|
|
* Per-field reserved value.
|
|
*/
|
|
struct dm_report_field_reserved_value {
|
|
/* field_num is the position of the field in 'fields'
|
|
array passed to dm_report_init_with_selection */
|
|
uint32_t field_num;
|
|
/* the value is of the same type as the field
|
|
identified by field_num */
|
|
const void *value;
|
|
};
|
|
|
|
/*
|
|
* Reserved value is a 'value' that is used directly if any of the 'names' is hit
|
|
* or in case of fuzzy names, if such fuzzy name matches.
|
|
*
|
|
* If type is any of DM_REPORT_FIELD_TYPE_*, the reserved value is recognized
|
|
* for all fields of that type.
|
|
*
|
|
* If type is DM_REPORT_FIELD_TYPE_NONE, the reserved value is recognized
|
|
* for the exact field specified - hence the type of the value is automatically
|
|
* the same as the type of the field itself.
|
|
*
|
|
* The array of reserved values is used to initialize reporting with
|
|
* selection enabled (see also dm_report_init_with_selection function).
|
|
*/
|
|
struct dm_report_reserved_value {
|
|
const uint32_t type; /* DM_REPORT_FIELD_RESERVED_VALUE_* and DM_REPORT_FIELD_TYPE_* */
|
|
const void *value; /* reserved value:
|
|
uint64_t for DM_REPORT_FIELD_TYPE_NUMBER
|
|
uint64_t for DM_REPORT_FIELD_TYPE_SIZE (number of 512-byte sectors)
|
|
uint64_t for DM_REPORT_FIELD_TYPE_PERCENT
|
|
const char* for DM_REPORT_FIELD_TYPE_STRING
|
|
struct dm_report_field_reserved_value for DM_REPORT_FIELD_TYPE_NONE
|
|
dm_report_reserved_handler* if DM_REPORT_FIELD_RESERVED_VALUE_{DYNAMIC_VALUE,FUZZY_NAMES} is used */
|
|
const char **names; /* null-terminated array of static names for this reserved value */
|
|
const char *description; /* description of the reserved value */
|
|
};
|
|
|
|
/*
|
|
* Available actions for dm_report_reserved_value_handler.
|
|
*/
|
|
typedef enum {
|
|
DM_REPORT_RESERVED_PARSE_FUZZY_NAME,
|
|
DM_REPORT_RESERVED_GET_DYNAMIC_VALUE,
|
|
} dm_report_reserved_action_t;
|
|
|
|
/*
|
|
* Generic reserved value handler to process reserved value names and/or values.
|
|
*
|
|
* Actions and their input/output:
|
|
*
|
|
* DM_REPORT_RESERVED_PARSE_FUZZY_NAME
|
|
* data_in: const char *fuzzy_name
|
|
* data_out: const char *canonical_name, NULL if fuzzy_name not recognized
|
|
*
|
|
* DM_REPORT_RESERVED_GET_DYNAMIC_VALUE
|
|
* data_in: const char *canonical_name
|
|
* data_out: void *value, NULL if canonical_name not recognized
|
|
*
|
|
* All actions return:
|
|
*
|
|
* -1 if action not implemented
|
|
* 0 on error
|
|
* 1 on success
|
|
*/
|
|
typedef int (*dm_report_reserved_handler) (struct dm_report *rh,
|
|
struct dm_pool *mem,
|
|
uint32_t field_num,
|
|
dm_report_reserved_action_t action,
|
|
const void *data_in,
|
|
const void **data_out);
|
|
|
|
/*
|
|
* The dm_report_value_cache_{set,get} are helper functions to store and retrieve
|
|
* various values used during reporting (dm_report_field_type.report_fn) and/or
|
|
* selection processing (dm_report_reserved_handler instances) to avoid
|
|
* recalculation of these values or to share values among calls.
|
|
*/
|
|
int dm_report_value_cache_set(struct dm_report *rh, const char *name, const void *data);
|
|
const void *dm_report_value_cache_get(struct dm_report *rh, const char *name);
|
|
/*
|
|
* dm_report_init output_flags
|
|
*/
|
|
#define DM_REPORT_OUTPUT_MASK 0x000000FF
|
|
#define DM_REPORT_OUTPUT_ALIGNED 0x00000001
|
|
#define DM_REPORT_OUTPUT_BUFFERED 0x00000002
|
|
#define DM_REPORT_OUTPUT_HEADINGS 0x00000004
|
|
#define DM_REPORT_OUTPUT_FIELD_NAME_PREFIX 0x00000008
|
|
#define DM_REPORT_OUTPUT_FIELD_UNQUOTED 0x00000010
|
|
#define DM_REPORT_OUTPUT_COLUMNS_AS_ROWS 0x00000020
|
|
#define DM_REPORT_OUTPUT_MULTIPLE_TIMES 0x00000040
|
|
|
|
struct dm_report *dm_report_init(uint32_t *report_types,
|
|
const struct dm_report_object_type *types,
|
|
const struct dm_report_field_type *fields,
|
|
const char *output_fields,
|
|
const char *output_separator,
|
|
uint32_t output_flags,
|
|
const char *sort_keys,
|
|
void *private_data);
|
|
struct dm_report *dm_report_init_with_selection(uint32_t *report_types,
|
|
const struct dm_report_object_type *types,
|
|
const struct dm_report_field_type *fields,
|
|
const char *output_fields,
|
|
const char *output_separator,
|
|
uint32_t output_flags,
|
|
const char *sort_keys,
|
|
const char *selection,
|
|
const struct dm_report_reserved_value reserved_values[],
|
|
void *private_data);
|
|
/*
|
|
* Report an object, pass it through the selection criteria if they
|
|
* are present and display the result on output if it passes the criteria.
|
|
*/
|
|
int dm_report_object(struct dm_report *rh, void *object);
|
|
/*
|
|
* The same as dm_report_object, but display the result on output only if
|
|
* 'do_output' arg is set. Also, save the result of selection in 'selected'
|
|
* arg if it's not NULL (either 1 if the object passes, otherwise 0).
|
|
*/
|
|
int dm_report_object_is_selected(struct dm_report *rh, void *object, int do_output, int *selected);
|
|
|
|
/*
|
|
* Compact report output so that if field value is empty for all rows in
|
|
* the report, drop the field from output completely (including headers).
|
|
* Compact output is applicable only if report is buffered, otherwise
|
|
* this function has no effect.
|
|
*/
|
|
int dm_report_compact_fields(struct dm_report *rh);
|
|
|
|
/*
|
|
* The same as dm_report_compact_fields, but for selected fields only.
|
|
* The "fields" arg is comma separated list of field names (the same format
|
|
* as used for "output_fields" arg in dm_report_init fn).
|
|
*/
|
|
int dm_report_compact_given_fields(struct dm_report *rh, const char *fields);
|
|
|
|
/*
|
|
* Returns 1 if there is no data waiting to be output.
|
|
*/
|
|
int dm_report_is_empty(struct dm_report *rh);
|
|
|
|
/*
|
|
* Destroy report content without doing output.
|
|
*/
|
|
void dm_report_destroy_rows(struct dm_report *rh);
|
|
|
|
int dm_report_output(struct dm_report *rh);
|
|
|
|
/*
|
|
* Output the report headings for a columns-based report, even if they
|
|
* have already been shown. Useful for repeating reports that wish to
|
|
* issue a periodic reminder of the column headings.
|
|
*/
|
|
int dm_report_column_headings(struct dm_report *rh);
|
|
|
|
void dm_report_free(struct dm_report *rh);
|
|
|
|
/*
|
|
* Prefix added to each field name with DM_REPORT_OUTPUT_FIELD_NAME_PREFIX
|
|
*/
|
|
int dm_report_set_output_field_name_prefix(struct dm_report *rh,
|
|
const char *report_prefix);
|
|
|
|
int dm_report_set_selection(struct dm_report *rh, const char *selection);
|
|
|
|
/*
|
|
* Report functions are provided for simple data types.
|
|
* They take care of allocating copies of the data.
|
|
*/
|
|
int dm_report_field_string(struct dm_report *rh, struct dm_report_field *field,
|
|
const char *const *data);
|
|
int dm_report_field_string_list(struct dm_report *rh, struct dm_report_field *field,
|
|
const struct dm_list *data, const char *delimiter);
|
|
int dm_report_field_string_list_unsorted(struct dm_report *rh, struct dm_report_field *field,
|
|
const struct dm_list *data, const char *delimiter);
|
|
int dm_report_field_int32(struct dm_report *rh, struct dm_report_field *field,
|
|
const int32_t *data);
|
|
int dm_report_field_uint32(struct dm_report *rh, struct dm_report_field *field,
|
|
const uint32_t *data);
|
|
int dm_report_field_int(struct dm_report *rh, struct dm_report_field *field,
|
|
const int *data);
|
|
int dm_report_field_uint64(struct dm_report *rh, struct dm_report_field *field,
|
|
const uint64_t *data);
|
|
int dm_report_field_percent(struct dm_report *rh, struct dm_report_field *field,
|
|
const dm_percent_t *data);
|
|
|
|
/*
|
|
* For custom fields, allocate the data in 'mem' and use
|
|
* dm_report_field_set_value().
|
|
* 'sortvalue' may be NULL if it matches 'value'
|
|
*/
|
|
void dm_report_field_set_value(struct dm_report_field *field, const void *value,
|
|
const void *sortvalue);
|
|
|
|
/*
|
|
* Report group support.
|
|
*/
|
|
struct dm_report_group;
|
|
|
|
typedef enum {
|
|
DM_REPORT_GROUP_SINGLE,
|
|
DM_REPORT_GROUP_BASIC,
|
|
DM_REPORT_GROUP_JSON
|
|
} dm_report_group_type_t;
|
|
|
|
struct dm_report_group *dm_report_group_create(dm_report_group_type_t type, void *data);
|
|
int dm_report_group_push(struct dm_report_group *group, struct dm_report *report, void *data);
|
|
int dm_report_group_pop(struct dm_report_group *group);
|
|
int dm_report_group_output_and_pop_all(struct dm_report_group *group);
|
|
int dm_report_group_destroy(struct dm_report_group *group);
|
|
|
|
/*
|
|
* Stats counter access methods
|
|
*
|
|
* Each method returns the corresponding stats counter value from the
|
|
* supplied dm_stats handle for the specified region_id and area_id.
|
|
* If either region_id or area_id uses one of the special values
|
|
* DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT then the region
|
|
* or area is selected according to the current state of the dm_stats
|
|
* handle's embedded cursor.
|
|
*
|
|
* Two methods are provided to access counter values: a named function
|
|
* for each available counter field and a single function that accepts
|
|
* an enum value specifying the required field. New code is encouraged
|
|
* to use the enum based interface as calls to the named functions are
|
|
* implemented using the enum method internally.
|
|
*
|
|
* See the kernel documentation for complete descriptions of each
|
|
* counter field:
|
|
*
|
|
* Documentation/device-mapper/statistics.txt
|
|
* Documentation/iostats.txt
|
|
*
|
|
* reads: the number of reads completed
|
|
* reads_merged: the number of reads merged
|
|
* read_sectors: the number of sectors read
|
|
* read_nsecs: the number of nanoseconds spent reading
|
|
* writes: the number of writes completed
|
|
* writes_merged: the number of writes merged
|
|
* write_sectors: the number of sectors written
|
|
* write_nsecs: the number of nanoseconds spent writing
|
|
* io_in_progress: the number of I/Os currently in progress
|
|
* io_nsecs: the number of nanoseconds spent doing I/Os
|
|
* weighted_io_nsecs: the weighted number of nanoseconds spent doing I/Os
|
|
* total_read_nsecs: the total time spent reading in nanoseconds
|
|
* total_write_nsecs: the total time spent writing in nanoseconds
|
|
*/
|
|
|
|
#define DM_STATS_REGION_CURRENT UINT64_MAX
|
|
#define DM_STATS_AREA_CURRENT UINT64_MAX
|
|
|
|
typedef enum {
|
|
DM_STATS_READS_COUNT,
|
|
DM_STATS_READS_MERGED_COUNT,
|
|
DM_STATS_READ_SECTORS_COUNT,
|
|
DM_STATS_READ_NSECS,
|
|
DM_STATS_WRITES_COUNT,
|
|
DM_STATS_WRITES_MERGED_COUNT,
|
|
DM_STATS_WRITE_SECTORS_COUNT,
|
|
DM_STATS_WRITE_NSECS,
|
|
DM_STATS_IO_IN_PROGRESS_COUNT,
|
|
DM_STATS_IO_NSECS,
|
|
DM_STATS_WEIGHTED_IO_NSECS,
|
|
DM_STATS_TOTAL_READ_NSECS,
|
|
DM_STATS_TOTAL_WRITE_NSECS,
|
|
DM_STATS_NR_COUNTERS
|
|
} dm_stats_counter_t;
|
|
|
|
uint64_t dm_stats_get_counter(const struct dm_stats *dms,
|
|
dm_stats_counter_t counter,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_reads(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_reads_merged(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_read_sectors(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_read_nsecs(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_writes(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_writes_merged(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_write_sectors(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_write_nsecs(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_io_in_progress(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_io_nsecs(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_weighted_io_nsecs(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_total_read_nsecs(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
uint64_t dm_stats_get_total_write_nsecs(const struct dm_stats *dms,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
/*
|
|
* Derived statistics access methods
|
|
*
|
|
* Each method returns the corresponding value calculated from the
|
|
* counters stored in the supplied dm_stats handle for the specified
|
|
* region_id and area_id. If either region_id or area_id uses one of the
|
|
* special values DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT then
|
|
* the region or area is selected according to the current state of the
|
|
* dm_stats handle's embedded cursor.
|
|
*
|
|
* The set of metrics is based on the fields provided by the Linux
|
|
* iostats program.
|
|
*
|
|
* rd_merges_per_sec: the number of reads merged per second
|
|
* wr_merges_per_sec: the number of writes merged per second
|
|
* reads_per_sec: the number of reads completed per second
|
|
* writes_per_sec: the number of writes completed per second
|
|
* read_sectors_per_sec: the number of sectors read per second
|
|
* write_sectors_per_sec: the number of sectors written per second
|
|
* average_request_size: the average size of requests submitted
|
|
* service_time: the average service time (in ns) for requests issued
|
|
* average_queue_size: the average queue length
|
|
* average_wait_time: the average time for requests to be served (in ns)
|
|
* average_rd_wait_time: the average read wait time
|
|
* average_wr_wait_time: the average write wait time
|
|
*/
|
|
|
|
typedef enum {
|
|
DM_STATS_RD_MERGES_PER_SEC,
|
|
DM_STATS_WR_MERGES_PER_SEC,
|
|
DM_STATS_READS_PER_SEC,
|
|
DM_STATS_WRITES_PER_SEC,
|
|
DM_STATS_READ_SECTORS_PER_SEC,
|
|
DM_STATS_WRITE_SECTORS_PER_SEC,
|
|
DM_STATS_AVERAGE_REQUEST_SIZE,
|
|
DM_STATS_AVERAGE_QUEUE_SIZE,
|
|
DM_STATS_AVERAGE_WAIT_TIME,
|
|
DM_STATS_AVERAGE_RD_WAIT_TIME,
|
|
DM_STATS_AVERAGE_WR_WAIT_TIME,
|
|
DM_STATS_SERVICE_TIME,
|
|
DM_STATS_THROUGHPUT,
|
|
DM_STATS_UTILIZATION,
|
|
DM_STATS_NR_METRICS
|
|
} dm_stats_metric_t;
|
|
|
|
int dm_stats_get_metric(const struct dm_stats *dms, int metric,
|
|
uint64_t region_id, uint64_t area_id, double *value);
|
|
|
|
int dm_stats_get_rd_merges_per_sec(const struct dm_stats *dms, double *rrqm,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
int dm_stats_get_wr_merges_per_sec(const struct dm_stats *dms, double *rrqm,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
int dm_stats_get_reads_per_sec(const struct dm_stats *dms, double *rd_s,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
int dm_stats_get_writes_per_sec(const struct dm_stats *dms, double *wr_s,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
int dm_stats_get_read_sectors_per_sec(const struct dm_stats *dms,
|
|
double *rsec_s, uint64_t region_id,
|
|
uint64_t area_id);
|
|
|
|
int dm_stats_get_write_sectors_per_sec(const struct dm_stats *dms,
|
|
double *wr_s, uint64_t region_id,
|
|
uint64_t area_id);
|
|
|
|
int dm_stats_get_average_request_size(const struct dm_stats *dms,
|
|
double *arqsz, uint64_t region_id,
|
|
uint64_t area_id);
|
|
|
|
int dm_stats_get_service_time(const struct dm_stats *dms, double *svctm,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
int dm_stats_get_average_queue_size(const struct dm_stats *dms, double *qusz,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
int dm_stats_get_average_wait_time(const struct dm_stats *dms, double *await,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
int dm_stats_get_average_rd_wait_time(const struct dm_stats *dms,
|
|
double *await, uint64_t region_id,
|
|
uint64_t area_id);
|
|
|
|
int dm_stats_get_average_wr_wait_time(const struct dm_stats *dms,
|
|
double *await, uint64_t region_id,
|
|
uint64_t area_id);
|
|
|
|
int dm_stats_get_throughput(const struct dm_stats *dms, double *tput,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
int dm_stats_get_utilization(const struct dm_stats *dms, dm_percent_t *util,
|
|
uint64_t region_id, uint64_t area_id);
|
|
|
|
/*
|
|
* Statistics histogram access methods.
|
|
*
|
|
* Methods to access latency histograms for regions that have them
|
|
* enabled. Each histogram contains a configurable number of bins
|
|
* spanning a user defined latency interval.
|
|
*
|
|
* The bin count, upper and lower bin bounds, and bin values are
|
|
* made available via the following area methods.
|
|
*
|
|
* Methods to obtain a simple string representation of the histogram
|
|
* and its bounds are also provided.
|
|
*/
|
|
|
|
/*
|
|
* Retrieve a pointer to the histogram associated with the specified
|
|
* area. If the area does not have a histogram configured this function
|
|
* returns NULL.
|
|
*
|
|
* The pointer does not need to be freed explicitly by the caller: it
|
|
* will become invalid following a subsequent dm_stats_list(),
|
|
* dm_stats_populate() or dm_stats_destroy() of the corresponding
|
|
* dm_stats handle.
|
|
*
|
|
* If region_id or area_id is one of the special values
|
|
* DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT the current cursor
|
|
* value is used to select the region or area.
|
|
*/
|
|
struct dm_histogram *dm_stats_get_histogram(const struct dm_stats *dms,
|
|
uint64_t region_id,
|
|
uint64_t area_id);
|
|
|
|
/*
|
|
* Return the number of bins in the specified histogram handle.
|
|
*/
|
|
int dm_histogram_get_nr_bins(const struct dm_histogram *dmh);
|
|
|
|
/*
|
|
* Get the lower bound of the specified bin of the histogram for the
|
|
* area specified by region_id and area_id. The value is returned in
|
|
* nanoseconds.
|
|
*/
|
|
uint64_t dm_histogram_get_bin_lower(const struct dm_histogram *dmh, int bin);
|
|
|
|
/*
|
|
* Get the upper bound of the specified bin of the histogram for the
|
|
* area specified by region_id and area_id. The value is returned in
|
|
* nanoseconds.
|
|
*/
|
|
uint64_t dm_histogram_get_bin_upper(const struct dm_histogram *dmh, int bin);
|
|
|
|
/*
|
|
* Get the width of the specified bin of the histogram for the area
|
|
* specified by region_id and area_id. The width is equal to the bin
|
|
* upper bound minus the lower bound and yields the range of latency
|
|
* values covered by this bin. The value is returned in nanoseconds.
|
|
*/
|
|
uint64_t dm_histogram_get_bin_width(const struct dm_histogram *dmh, int bin);
|
|
|
|
/*
|
|
* Get the value of the specified bin of the histogram for the area
|
|
* specified by region_id and area_id.
|
|
*/
|
|
uint64_t dm_histogram_get_bin_count(const struct dm_histogram *dmh, int bin);
|
|
|
|
/*
|
|
* Get the percentage (relative frequency) of the specified bin of the
|
|
* histogram for the area specified by region_id and area_id.
|
|
*/
|
|
dm_percent_t dm_histogram_get_bin_percent(const struct dm_histogram *dmh,
|
|
int bin);
|
|
|
|
/*
|
|
* Return the total observations (sum of bin counts) for the histogram
|
|
* of the area specified by region_id and area_id.
|
|
*/
|
|
uint64_t dm_histogram_get_sum(const struct dm_histogram *dmh);
|
|
|
|
/*
|
|
* Histogram formatting flags.
|
|
*/
|
|
#define DM_HISTOGRAM_SUFFIX 0x1
|
|
#define DM_HISTOGRAM_VALUES 0x2
|
|
#define DM_HISTOGRAM_PERCENT 0X4
|
|
#define DM_HISTOGRAM_BOUNDS_LOWER 0x10
|
|
#define DM_HISTOGRAM_BOUNDS_UPPER 0x20
|
|
#define DM_HISTOGRAM_BOUNDS_RANGE 0x30
|
|
|
|
/*
|
|
* Return a string representation of the supplied histogram's values and
|
|
* bin boundaries.
|
|
*
|
|
* The bin argument selects the bin to format. If this argument is less
|
|
* than zero all bins will be included in the resulting string.
|
|
*
|
|
* width specifies a minimum width for the field in characters; if it is
|
|
* zero the width will be determined automatically based on the options
|
|
* selected for formatting. A value less than zero disables field width
|
|
* control: bin boundaries and values will be output with a minimum
|
|
* amount of whitespace.
|
|
*
|
|
* flags is a collection of flag arguments that control the string format:
|
|
*
|
|
* DM_HISTOGRAM_VALUES - Include bin values in the string.
|
|
* DM_HISTOGRAM_SUFFIX - Include time unit suffixes when printing bounds.
|
|
* DM_HISTOGRAM_PERCENT - Format bin values as a percentage.
|
|
*
|
|
* DM_HISTOGRAM_BOUNDS_LOWER - Include the lower bound of each bin.
|
|
* DM_HISTOGRAM_BOUNDS_UPPER - Include the upper bound of each bin.
|
|
* DM_HISTOGRAM_BOUNDS_RANGE - Show the span of each bin as "lo-up".
|
|
*
|
|
* The returned pointer does not need to be freed explicitly by the
|
|
* caller: it will become invalid following a subsequent
|
|
* dm_stats_list(), dm_stats_populate() or dm_stats_destroy() of the
|
|
* corresponding dm_stats handle.
|
|
*/
|
|
const char *dm_histogram_to_string(const struct dm_histogram *dmh, int bin,
|
|
int width, int flags);
|
|
|
|
/*************************
|
|
* config file parse/print
|
|
*************************/
|
|
typedef enum {
|
|
DM_CFG_INT,
|
|
DM_CFG_FLOAT,
|
|
DM_CFG_STRING,
|
|
DM_CFG_EMPTY_ARRAY
|
|
} dm_config_value_type_t;
|
|
|
|
struct dm_config_value {
|
|
dm_config_value_type_t type;
|
|
|
|
union {
|
|
int64_t i;
|
|
float f;
|
|
double d; /* Unused. */
|
|
const char *str;
|
|
} v;
|
|
|
|
struct dm_config_value *next; /* For arrays */
|
|
uint32_t format_flags;
|
|
};
|
|
|
|
struct dm_config_node {
|
|
const char *key;
|
|
struct dm_config_node *parent, *sib, *child;
|
|
struct dm_config_value *v;
|
|
int id;
|
|
};
|
|
|
|
struct dm_config_tree {
|
|
struct dm_config_node *root;
|
|
struct dm_config_tree *cascade;
|
|
struct dm_pool *mem;
|
|
void *custom;
|
|
};
|
|
|
|
struct dm_config_tree *dm_config_create(void);
|
|
struct dm_config_tree *dm_config_from_string(const char *config_settings);
|
|
int dm_config_parse(struct dm_config_tree *cft, const char *start, const char *end);
|
|
int dm_config_parse_without_dup_node_check(struct dm_config_tree *cft, const char *start, const char *end);
|
|
|
|
void *dm_config_get_custom(struct dm_config_tree *cft);
|
|
void dm_config_set_custom(struct dm_config_tree *cft, void *custom);
|
|
|
|
/*
|
|
* When searching, first_cft is checked before second_cft.
|
|
*/
|
|
struct dm_config_tree *dm_config_insert_cascaded_tree(struct dm_config_tree *first_cft, struct dm_config_tree *second_cft);
|
|
|
|
/*
|
|
* If there's a cascaded dm_config_tree, remove the top layer
|
|
* and return the layer below. Otherwise return NULL.
|
|
*/
|
|
struct dm_config_tree *dm_config_remove_cascaded_tree(struct dm_config_tree *cft);
|
|
|
|
/*
|
|
* Create a new, uncascaded config tree equivalent to the input cascade.
|
|
*/
|
|
struct dm_config_tree *dm_config_flatten(struct dm_config_tree *cft);
|
|
|
|
void dm_config_destroy(struct dm_config_tree *cft);
|
|
|
|
/* Simple output line by line. */
|
|
typedef int (*dm_putline_fn)(const char *line, void *baton);
|
|
/* More advaced output with config node reference. */
|
|
typedef int (*dm_config_node_out_fn)(const struct dm_config_node *cn, const char *line, void *baton);
|
|
|
|
/*
|
|
* Specification for advanced config node output.
|
|
*/
|
|
struct dm_config_node_out_spec {
|
|
dm_config_node_out_fn prefix_fn; /* called before processing config node lines */
|
|
dm_config_node_out_fn line_fn; /* called for each config node line */
|
|
dm_config_node_out_fn suffix_fn; /* called after processing config node lines */
|
|
};
|
|
|
|
/* Write the node and any subsequent siblings it has. */
|
|
int dm_config_write_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton);
|
|
int dm_config_write_node_out(const struct dm_config_node *cn, const struct dm_config_node_out_spec *out_spec, void *baton);
|
|
|
|
/* Write given node only without subsequent siblings. */
|
|
int dm_config_write_one_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton);
|
|
int dm_config_write_one_node_out(const struct dm_config_node *cn, const struct dm_config_node_out_spec *out_spec, void *baton);
|
|
|
|
struct dm_config_node *dm_config_find_node(const struct dm_config_node *cn, const char *path);
|
|
int dm_config_has_node(const struct dm_config_node *cn, const char *path);
|
|
int dm_config_remove_node(struct dm_config_node *parent, struct dm_config_node *remove);
|
|
const char *dm_config_find_str(const struct dm_config_node *cn, const char *path, const char *fail);
|
|
const char *dm_config_find_str_allow_empty(const struct dm_config_node *cn, const char *path, const char *fail);
|
|
int dm_config_find_int(const struct dm_config_node *cn, const char *path, int fail);
|
|
int64_t dm_config_find_int64(const struct dm_config_node *cn, const char *path, int64_t fail);
|
|
float dm_config_find_float(const struct dm_config_node *cn, const char *path, float fail);
|
|
|
|
const struct dm_config_node *dm_config_tree_find_node(const struct dm_config_tree *cft, const char *path);
|
|
const char *dm_config_tree_find_str(const struct dm_config_tree *cft, const char *path, const char *fail);
|
|
const char *dm_config_tree_find_str_allow_empty(const struct dm_config_tree *cft, const char *path, const char *fail);
|
|
int dm_config_tree_find_int(const struct dm_config_tree *cft, const char *path, int fail);
|
|
int64_t dm_config_tree_find_int64(const struct dm_config_tree *cft, const char *path, int64_t fail);
|
|
float dm_config_tree_find_float(const struct dm_config_tree *cft, const char *path, float fail);
|
|
int dm_config_tree_find_bool(const struct dm_config_tree *cft, const char *path, int fail);
|
|
|
|
/*
|
|
* Understands (0, ~0), (y, n), (yes, no), (on,
|
|
* off), (true, false).
|
|
*/
|
|
int dm_config_find_bool(const struct dm_config_node *cn, const char *path, int fail);
|
|
int dm_config_value_is_bool(const struct dm_config_value *v);
|
|
|
|
int dm_config_get_uint32(const struct dm_config_node *cn, const char *path, uint32_t *result);
|
|
int dm_config_get_uint64(const struct dm_config_node *cn, const char *path, uint64_t *result);
|
|
int dm_config_get_str(const struct dm_config_node *cn, const char *path, const char **result);
|
|
int dm_config_get_list(const struct dm_config_node *cn, const char *path, const struct dm_config_value **result);
|
|
int dm_config_get_section(const struct dm_config_node *cn, const char *path, const struct dm_config_node **result);
|
|
|
|
unsigned dm_config_maybe_section(const char *str, unsigned len);
|
|
|
|
const char *dm_config_parent_name(const struct dm_config_node *n);
|
|
|
|
struct dm_config_node *dm_config_clone_node_with_mem(struct dm_pool *mem, const struct dm_config_node *node, int siblings);
|
|
struct dm_config_node *dm_config_create_node(struct dm_config_tree *cft, const char *key);
|
|
struct dm_config_value *dm_config_create_value(struct dm_config_tree *cft);
|
|
struct dm_config_node *dm_config_clone_node(struct dm_config_tree *cft, const struct dm_config_node *cn, int siblings);
|
|
|
|
/*
|
|
* Common formatting flags applicable to all config node types (lower 16 bits).
|
|
*/
|
|
#define DM_CONFIG_VALUE_FMT_COMMON_ARRAY 0x00000001 /* value is array */
|
|
#define DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES 0x00000002 /* add spaces in "key = value" pairs in constrast to "key=value" for better readability */
|
|
|
|
/*
|
|
* Type-related config node formatting flags (higher 16 bits).
|
|
*/
|
|
/* int-related formatting flags */
|
|
#define DM_CONFIG_VALUE_FMT_INT_OCTAL 0x00010000 /* print number in octal form */
|
|
|
|
/* string-related formatting flags */
|
|
#define DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES 0x00010000 /* do not print quotes around string value */
|
|
|
|
void dm_config_value_set_format_flags(struct dm_config_value *cv, uint32_t format_flags);
|
|
uint32_t dm_config_value_get_format_flags(struct dm_config_value *cv);
|
|
|
|
struct dm_pool *dm_config_memory(struct dm_config_tree *cft);
|
|
|
|
/* Udev device directory. */
|
|
#define DM_UDEV_DEV_DIR "/dev/"
|
|
|
|
/* Cookie prefixes.
|
|
*
|
|
* The cookie value consists of a prefix (16 bits) and a base (16 bits).
|
|
* We can use the prefix to store the flags. These flags are sent to
|
|
* kernel within given dm task. When returned back to userspace in
|
|
* DM_COOKIE udev environment variable, we can control several aspects
|
|
* of udev rules we use by decoding the cookie prefix. When doing the
|
|
* notification, we replace the cookie prefix with DM_COOKIE_MAGIC,
|
|
* so we notify the right semaphore.
|
|
*
|
|
* It is still possible to use cookies for passing the flags to udev
|
|
* rules even when udev_sync is disabled. The base part of the cookie
|
|
* will be zero (there's no notification semaphore) and prefix will be
|
|
* set then. However, having udev_sync enabled is highly recommended.
|
|
*/
|
|
#define DM_COOKIE_MAGIC 0x0D4D
|
|
#define DM_UDEV_FLAGS_MASK 0xFFFF0000
|
|
#define DM_UDEV_FLAGS_SHIFT 16
|
|
|
|
/*
|
|
* DM_UDEV_DISABLE_DM_RULES_FLAG is set in case we need to disable
|
|
* basic device-mapper udev rules that create symlinks in /dev/<DM_DIR>
|
|
* directory. However, we can't reliably prevent creating default
|
|
* nodes by udev (commonly /dev/dm-X, where X is a number).
|
|
*/
|
|
#define DM_UDEV_DISABLE_DM_RULES_FLAG 0x0001
|
|
/*
|
|
* DM_UDEV_DISABLE_SUBSYTEM_RULES_FLAG is set in case we need to disable
|
|
* subsystem udev rules, but still we need the general DM udev rules to
|
|
* be applied (to create the nodes and symlinks under /dev and /dev/disk).
|
|
*/
|
|
#define DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG 0x0002
|
|
/*
|
|
* DM_UDEV_DISABLE_DISK_RULES_FLAG is set in case we need to disable
|
|
* general DM rules that set symlinks in /dev/disk directory.
|
|
*/
|
|
#define DM_UDEV_DISABLE_DISK_RULES_FLAG 0x0004
|
|
/*
|
|
* DM_UDEV_DISABLE_OTHER_RULES_FLAG is set in case we need to disable
|
|
* all the other rules that are not general device-mapper nor subsystem
|
|
* related (the rules belong to other software or packages). All foreign
|
|
* rules should check this flag directly and they should ignore further
|
|
* rule processing for such event.
|
|
*/
|
|
#define DM_UDEV_DISABLE_OTHER_RULES_FLAG 0x0008
|
|
/*
|
|
* DM_UDEV_LOW_PRIORITY_FLAG is set in case we need to instruct the
|
|
* udev rules to give low priority to the device that is currently
|
|
* processed. For example, this provides a way to select which symlinks
|
|
* could be overwritten by high priority ones if their names are equal.
|
|
* Common situation is a name based on FS UUID while using origin and
|
|
* snapshot devices.
|
|
*/
|
|
#define DM_UDEV_LOW_PRIORITY_FLAG 0x0010
|
|
/*
|
|
* DM_UDEV_DISABLE_LIBRARY_FALLBACK is set in case we need to disable
|
|
* libdevmapper's node management. We will rely on udev completely
|
|
* and there will be no fallback action provided by libdevmapper if
|
|
* udev does something improperly. Using the library fallback code has
|
|
* a consequence that you need to take into account: any device node
|
|
* or symlink created without udev is not recorded in udev database
|
|
* which other applications may read to get complete list of devices.
|
|
* For this reason, use of DM_UDEV_DISABLE_LIBRARY_FALLBACK is
|
|
* recommended on systems where udev is used. Keep library fallback
|
|
* enabled just for exceptional cases where you need to debug udev-related
|
|
* problems. If you hit such problems, please contact us through upstream
|
|
* LVM2 development mailing list (see also README file). This flag is
|
|
* currently not set by default in libdevmapper so you need to set it
|
|
* explicitly if you're sure that udev is behaving correctly on your
|
|
* setups.
|
|
*/
|
|
#define DM_UDEV_DISABLE_LIBRARY_FALLBACK 0x0020
|
|
/*
|
|
* DM_UDEV_PRIMARY_SOURCE_FLAG is automatically appended by
|
|
* libdevmapper for all ioctls generating udev uevents. Once used in
|
|
* udev rules, we know if this is a real "primary sourced" event or not.
|
|
* We need to distinguish real events originated in libdevmapper from
|
|
* any spurious events to gather all missing information (e.g. events
|
|
* generated as a result of "udevadm trigger" command or as a result
|
|
* of the "watch" udev rule).
|
|
*/
|
|
#define DM_UDEV_PRIMARY_SOURCE_FLAG 0x0040
|
|
|
|
/*
|
|
* Udev flags reserved for use by any device-mapper subsystem.
|
|
*/
|
|
#define DM_SUBSYSTEM_UDEV_FLAG0 0x0100
|
|
#define DM_SUBSYSTEM_UDEV_FLAG1 0x0200
|
|
#define DM_SUBSYSTEM_UDEV_FLAG2 0x0400
|
|
#define DM_SUBSYSTEM_UDEV_FLAG3 0x0800
|
|
#define DM_SUBSYSTEM_UDEV_FLAG4 0x1000
|
|
#define DM_SUBSYSTEM_UDEV_FLAG5 0x2000
|
|
#define DM_SUBSYSTEM_UDEV_FLAG6 0x4000
|
|
#define DM_SUBSYSTEM_UDEV_FLAG7 0x8000
|
|
|
|
int dm_cookie_supported(void);
|
|
|
|
/*
|
|
* Udev synchronisation functions.
|
|
*/
|
|
void dm_udev_set_sync_support(int sync_with_udev);
|
|
int dm_udev_get_sync_support(void);
|
|
void dm_udev_set_checking(int checking);
|
|
int dm_udev_get_checking(void);
|
|
|
|
/*
|
|
* Default value to get new auto generated cookie created
|
|
*/
|
|
#define DM_COOKIE_AUTO_CREATE 0
|
|
int dm_udev_create_cookie(uint32_t *cookie);
|
|
int dm_udev_complete(uint32_t cookie);
|
|
int dm_udev_wait(uint32_t cookie);
|
|
|
|
/*
|
|
* dm_dev_wait_immediate
|
|
* If *ready is 1 on return, the wait is complete.
|
|
* If *ready is 0 on return, the wait is incomplete and either
|
|
* this function or dm_udev_wait() must be called again.
|
|
* Returns 0 on error, when neither function should be called again.
|
|
*/
|
|
int dm_udev_wait_immediate(uint32_t cookie, int *ready);
|
|
|
|
#define DM_DEV_DIR_UMASK 0022
|
|
#define DM_CONTROL_NODE_UMASK 0177
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
#endif /* LIB_DEVICE_MAPPER_H */
|