mirror of
git://sourceware.org/git/lvm2.git
synced 2024-10-06 22:19:30 +03:00
[io paths] Unpick agk's aio stuff
This commit is contained in:
parent
d51429254f
commit
00f1b208a1
@ -59,22 +59,6 @@ devices {
|
||||
# This configuration option is advanced.
|
||||
scan = [ "/dev" ]
|
||||
|
||||
# Configuration option devices/use_aio.
|
||||
# Use linux asynchronous I/O for parallel device access where possible.
|
||||
# This configuration option has an automatic default value.
|
||||
# use_aio = 1
|
||||
|
||||
# Configuration option devices/aio_max.
|
||||
# Maximum number of asynchronous I/Os to issue concurrently.
|
||||
# This configuration option has an automatic default value.
|
||||
# aio_max = 128
|
||||
|
||||
# Configuration option devices/aio_memory.
|
||||
# Approximate maximum total amount of memory (in MB) used
|
||||
# for asynchronous I/O buffers.
|
||||
# This configuration option has an automatic default value.
|
||||
# aio_memory = 10
|
||||
|
||||
# Configuration option devices/obtain_device_list_from_udev.
|
||||
# Obtain the list of available devices from udev.
|
||||
# This avoids opening or using any inapplicable non-block devices or
|
||||
|
22
configure.in
22
configure.in
@ -39,7 +39,6 @@ case "$host_os" in
|
||||
LDDEPS="$LDDEPS .export.sym"
|
||||
LIB_SUFFIX=so
|
||||
DEVMAPPER=yes
|
||||
AIO=yes
|
||||
BUILD_LVMETAD=no
|
||||
BUILD_LVMPOLLD=no
|
||||
LOCKDSANLOCK=no
|
||||
@ -59,7 +58,6 @@ case "$host_os" in
|
||||
CLDNOWHOLEARCHIVE=
|
||||
LIB_SUFFIX=dylib
|
||||
DEVMAPPER=yes
|
||||
AIO=no
|
||||
ODIRECT=no
|
||||
DM_IOCTLS=no
|
||||
SELINUX=no
|
||||
@ -1124,24 +1122,6 @@ if test "$DEVMAPPER" = yes; then
|
||||
AC_DEFINE([DEVMAPPER_SUPPORT], 1, [Define to 1 to enable LVM2 device-mapper interaction.])
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
dnl -- Disable aio
|
||||
AC_MSG_CHECKING(whether to use asynchronous I/O)
|
||||
AC_ARG_ENABLE(aio,
|
||||
AC_HELP_STRING([--disable-aio],
|
||||
[disable asynchronous I/O]),
|
||||
AIO=$enableval)
|
||||
AC_MSG_RESULT($AIO)
|
||||
|
||||
if test "$AIO" = yes; then
|
||||
AC_CHECK_LIB(aio, io_setup,
|
||||
[AC_DEFINE([AIO_SUPPORT], 1, [Define to 1 if aio is available.])
|
||||
AIO_LIBS="-laio"
|
||||
AIO_SUPPORT=yes],
|
||||
[AIO_LIBS=
|
||||
AIO_SUPPORT=no ])
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
dnl -- Build lvmetad
|
||||
AC_MSG_CHECKING(whether to build LVMetaD)
|
||||
@ -2081,11 +2061,9 @@ AC_SUBST(DEFAULT_USE_LVMETAD)
|
||||
AC_SUBST(DEFAULT_USE_LVMPOLLD)
|
||||
AC_SUBST(DEFAULT_USE_LVMLOCKD)
|
||||
AC_SUBST(DEVMAPPER)
|
||||
AC_SUBST(AIO)
|
||||
AC_SUBST(DLM_CFLAGS)
|
||||
AC_SUBST(DLM_LIBS)
|
||||
AC_SUBST(DL_LIBS)
|
||||
AC_SUBST(AIO_LIBS)
|
||||
AC_SUBST(DMEVENTD_PATH)
|
||||
AC_SUBST(DM_LIB_PATCHLEVEL)
|
||||
AC_SUBST(ELDFLAGS)
|
||||
|
@ -1,215 +0,0 @@
|
||||
Introducing asynchronous I/O to LVM
|
||||
===================================
|
||||
|
||||
Issuing I/O asynchronously means instructing the kernel to perform specific
|
||||
I/O and return immediately without waiting for it to complete. The data
|
||||
is collected from the kernel later.
|
||||
|
||||
Advantages
|
||||
----------
|
||||
|
||||
A1. While waiting for the I/O to happen, the program could perform other
|
||||
operations.
|
||||
|
||||
A2. When LVM is searching for its Physical Volumes, it issues a small amount of
|
||||
I/O to a large number of disks. If this was issued in parallel the overall
|
||||
runtime might be shorter while there should be little effect on the cpu time.
|
||||
|
||||
A3. If more than one timeout occurs when accessing any devices, these can be
|
||||
taken in parallel, again reducing the runtime. This applies globally,
|
||||
not just while the code is searching for Physical Volumes, so reading,
|
||||
writing and committing the metadata may occasionally benefit too to some
|
||||
extent and there are probably maintenance advantages in using the same
|
||||
method of I/O throughout the main body of the code.
|
||||
|
||||
A4. By introducing a simple callback function mechanism, the conversion can be
|
||||
performed largely incrementally by first refactoring and continuing to
|
||||
use synchronous I/O with the callbacks performed immediately. This allows the
|
||||
callbacks to be introduced without changing the running sequence of the code
|
||||
initially. Future projects could refactor some of the calling sites to
|
||||
simplify the code structure and even eliminate some of the nesting.
|
||||
This allows each part of what might ultimately amount to a large change to be
|
||||
introduced and tested independently.
|
||||
|
||||
|
||||
Disadvantages
|
||||
-------------
|
||||
|
||||
D1. The resulting code may be more complex with more failure modes to
|
||||
handle. Mitigate by thorough auditing and testing, rolling out
|
||||
gradually, and offering a simple switch to revert to the old behaviour.
|
||||
|
||||
D2. The linux asynchronous I/O implementation is less mature than
|
||||
its synchronous I/O implementation and might show up problems that
|
||||
depend on the version of the kernel or library used. Fixes or
|
||||
workarounds for some of these might require kernel changes. For
|
||||
example, there are suggestions that despite being supposedly async,
|
||||
there are still cases where system calls can block. There might be
|
||||
resource dependencies on other processes running on the system that make
|
||||
it unsuitable for use while any devices are suspended. Mitigation
|
||||
as for D1.
|
||||
|
||||
D3. The error handling within callbacks becomes more complicated.
|
||||
However we know that existing call paths can already sometimes discard
|
||||
errors, sometimes deliberately, sometimes not, so this aspect is in need
|
||||
of a complete review anyway and the new approach will make the error
|
||||
handling more transparent. Aim initially for overall behaviour that is
|
||||
no worse than that of the existing code, then work on improving it
|
||||
later.
|
||||
|
||||
D4. The work will take a few weeks to code and test. This leads to a
|
||||
significant opportunity cost when compared against other enhancements
|
||||
that could be achieved in that time. However, the proof-of-concept work
|
||||
performed while writing this design has satisfied me that the work could
|
||||
proceed and be committed incrementally as a background task.
|
||||
|
||||
|
||||
Observations regarding LVM's I/O Architecture
|
||||
---------------------------------------------
|
||||
|
||||
H1. All device, metadata and config file I/O is constrained to pass through a
|
||||
single route in lib/device.
|
||||
|
||||
H2. The first step of the analysis was to instrument this code path with
|
||||
log_debug messages. I/O is split into the following categories:
|
||||
|
||||
"dev signatures",
|
||||
"PV labels",
|
||||
"VG metadata header",
|
||||
"VG metadata content",
|
||||
"extra VG metadata header",
|
||||
"extra VG metadata content",
|
||||
"LVM1 metadata",
|
||||
"pool metadata",
|
||||
"LV content",
|
||||
"logging",
|
||||
|
||||
H3. A bounce buffer is used for most I/O.
|
||||
|
||||
H4. Most callers finish using the supplied data before any further I/O is
|
||||
issued. The few that don't could be converted trivially to do so.
|
||||
|
||||
H5. There is one stream of I/O per metadata area on each device.
|
||||
|
||||
H6. Some reads fall at offsets close to immediately preceding reads, so it's
|
||||
possible to avoid these by caching one "block" per metadata area I/O stream.
|
||||
|
||||
H7. Simple analysis suggests a minimum aligned read size of 8k would deliver
|
||||
immediate gains from this caching. A larger size might perform worse because
|
||||
almost all the time the extra data read would not be used, but this can be
|
||||
re-examined and tuned after the code is in place.
|
||||
|
||||
|
||||
Proposal
|
||||
--------
|
||||
|
||||
P1. Retain the "single I/O path" but offer an asynchronous option.
|
||||
|
||||
P2. Eliminate the bounce buffer in most cases by improving alignment.
|
||||
|
||||
P3. Reduce the number of reads by always reading a minimum of an aligned
|
||||
8k block.
|
||||
|
||||
P4. Eliminate repeated reads by caching the last block read and changing
|
||||
the lib/device interface to return a pointer to read-only data within
|
||||
this block.
|
||||
|
||||
P5. Only perform these interface changes for code on the critical path
|
||||
for now by converting other code sites to use wrappers around the new
|
||||
interface.
|
||||
|
||||
P6. Treat asynchronous I/O as the interface of choice and optimise only
|
||||
for this case.
|
||||
|
||||
P7. Convert the callers on the critical path to pass callback functions
|
||||
to the device layer. These functions will be called later with the
|
||||
read-only data, a context pointer and a success/failure indicator.
|
||||
Where an existing function performs a sequence of I/O, this has the
|
||||
advantage of breaking up the large function into smaller ones and
|
||||
wrapping the parameters used into structures. While this might look
|
||||
rather messy and ad-hoc in the short-term, it's a first step towards
|
||||
breaking up confusingly long functions into component parts and wrapping
|
||||
the existing long parameter lists into more appropriate structures and
|
||||
refactoring these parts of the code.
|
||||
|
||||
P8. Limit the resources used by the asynchronous I/O by using two
|
||||
tunable parameters, one limiting the number of outstanding I/Os issued
|
||||
and another limiting the total amount of memory used.
|
||||
|
||||
P9. Provide a fallback option if asynchronous I/O is unavailable by
|
||||
sharing the code paths but issuing the I/O synchronously and calling the
|
||||
callback immediately.
|
||||
|
||||
P10. Only allocate the buffer for the I/O at the point where the I/O is
|
||||
about to be issued.
|
||||
|
||||
P11. If the thresholds are exceeded, add the request to a simple queue,
|
||||
and process it later after some I/O has completed.
|
||||
|
||||
|
||||
Future work
|
||||
-----------
|
||||
F1. Perform a complete review of the error tracking so that device
|
||||
failures are handled and reported more cleanly, extending the existing
|
||||
basic error counting mechanism.
|
||||
|
||||
F2. Consider whether some of the nested callbacks can be eliminated,
|
||||
which would allow for additional simplifications.
|
||||
|
||||
F3. Adjust the contents of the adhoc context structs into more logical
|
||||
arrangements and use them more widely.
|
||||
|
||||
F4. Perform wider refactoring of these areas of code.
|
||||
|
||||
|
||||
Testing considerations
|
||||
----------------------
|
||||
T1. The changes touch code on the device path, so a thorough re-test of
|
||||
the device layer is required. The new code needs a full audit down
|
||||
through the library layer into the kernel to check that all the error
|
||||
conditions that are currently implemented (such as EAGAIN) are handled
|
||||
sensibly. (LVM's I/O layer needs to remain as solid as we can make it.)
|
||||
|
||||
T2. The current test suite provides a reasonably broad range of coverage
|
||||
of this area but is far from comprehensive.
|
||||
|
||||
|
||||
Acceptance criteria
|
||||
-------------------
|
||||
A1. The current test suite should pass to the same extent as before the
|
||||
changes.
|
||||
|
||||
A2. When all debugging and logging is disabled, strace -c must show
|
||||
improvements e.g. the expected fewer number of reads.
|
||||
|
||||
A3. Running a range of commands under valgrind must not reveal any
|
||||
new leaks due to the changes.
|
||||
|
||||
A4. All new coverity reports from the change must be addressed.
|
||||
|
||||
A5. CPU time should be similar to that before, as the same work
|
||||
is being done overall, just in a different order.
|
||||
|
||||
A6. Tests need to show improved behaviour in targetted areas. For example,
|
||||
if several devices are slow and time out, the delays should occur
|
||||
in parallel and the elapsed time should be less than before.
|
||||
|
||||
|
||||
Release considerations
|
||||
----------------------
|
||||
R1. Async I/O should be widely available and largely reliable on linux
|
||||
nowadays (even though parts of its interface and implementation remain a
|
||||
matter of controversy) so we should try to make its use the default
|
||||
whereever it is supported. If certain types of systems have problems we
|
||||
should try to detect those cases and disable it automatically there.
|
||||
|
||||
R2. Because the implications of an unexpected problem in the new code
|
||||
could be severe for the people affected, the roll out needs to be gentle
|
||||
without a deadline to allow us plenty of time to gain confidence in the
|
||||
new code. Our own testing will only be able to cover a tiny fraction of
|
||||
the different setups our users have, so we need to look out for problems
|
||||
caused by this proactively and encourage people to test it on their own
|
||||
systems and report back. It must go into the tree near the start of a
|
||||
release cycle rather than at the end to provide time for our confidence
|
||||
in it to grow.
|
||||
|
@ -1,8 +1,5 @@
|
||||
/* include/configure.h.in. Generated from configure.in by autoheader. */
|
||||
|
||||
/* Define to 1 if aio is available. */
|
||||
#undef AIO_SUPPORT
|
||||
|
||||
/* Define to 1 to use libblkid detection of signatures when wiping. */
|
||||
#undef BLKID_WIPING_SUPPORT
|
||||
|
||||
|
51
lib/cache/lvmcache.c
vendored
51
lib/cache/lvmcache.c
vendored
@ -141,8 +141,6 @@ void lvmcache_seed_infos_from_lvmetad(struct cmd_context *cmd)
|
||||
/* Volume Group metadata cache functions */
|
||||
static void _free_cached_vgmetadata(struct lvmcache_vginfo *vginfo)
|
||||
{
|
||||
struct lvmcache_info *info;
|
||||
|
||||
if (!vginfo || !vginfo->vgmetadata)
|
||||
return;
|
||||
|
||||
@ -156,10 +154,6 @@ static void _free_cached_vgmetadata(struct lvmcache_vginfo *vginfo)
|
||||
vginfo->cft = NULL;
|
||||
}
|
||||
|
||||
/* Invalidate any cached device buffers */
|
||||
dm_list_iterate_items(info, &vginfo->infos)
|
||||
devbufs_release(info->dev);
|
||||
|
||||
log_debug_cache("lvmcache: VG %s wiped.", vginfo->vgname);
|
||||
|
||||
release_vg(vginfo->cached_vg);
|
||||
@ -548,6 +542,7 @@ const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd,
|
||||
{
|
||||
struct lvmcache_vginfo *vginfo;
|
||||
struct lvmcache_info *info;
|
||||
struct label *label;
|
||||
struct dm_list *devh, *tmp;
|
||||
struct dm_list devs;
|
||||
struct device_list *devl;
|
||||
@ -592,7 +587,7 @@ const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd,
|
||||
|
||||
dm_list_iterate_safe(devh, tmp, &devs) {
|
||||
devl = dm_list_item(devh, struct device_list);
|
||||
(void) label_read(devl->dev, NULL, UINT64_C(0));
|
||||
(void) label_read(devl->dev, &label, UINT64_C(0));
|
||||
dm_list_del(&devl->list);
|
||||
dm_free(devl);
|
||||
}
|
||||
@ -773,8 +768,10 @@ char *lvmcache_vgname_from_pvid(struct cmd_context *cmd, const char *pvid)
|
||||
|
||||
static void _rescan_entry(struct lvmcache_info *info)
|
||||
{
|
||||
struct label *label;
|
||||
|
||||
if (info->status & CACHE_INVALID)
|
||||
(void) label_read(info->dev, NULL, UINT64_C(0));
|
||||
(void) label_read(info->dev, &label, UINT64_C(0));
|
||||
}
|
||||
|
||||
static int _scan_invalid(void)
|
||||
@ -1096,31 +1093,17 @@ next:
|
||||
goto next;
|
||||
}
|
||||
|
||||
/* Track the number of outstanding label reads */
|
||||
/* FIXME Switch to struct and also track failed */
|
||||
static void _process_label_data(int failed, unsigned ioflags, void *context, const void *data)
|
||||
{
|
||||
int *nr_labels_outstanding = context;
|
||||
|
||||
if (!*nr_labels_outstanding) {
|
||||
log_error(INTERNAL_ERROR "_process_label_data called too many times");
|
||||
return;
|
||||
}
|
||||
|
||||
(*nr_labels_outstanding)--;
|
||||
}
|
||||
|
||||
int lvmcache_label_scan(struct cmd_context *cmd)
|
||||
{
|
||||
struct dm_list del_cache_devs;
|
||||
struct dm_list add_cache_devs;
|
||||
struct lvmcache_info *info;
|
||||
struct device_list *devl;
|
||||
struct label *label;
|
||||
struct dev_iter *iter;
|
||||
struct device *dev;
|
||||
struct format_type *fmt;
|
||||
int dev_count = 0;
|
||||
int nr_labels_outstanding = 0;
|
||||
|
||||
int r = 0;
|
||||
|
||||
@ -1159,22 +1142,13 @@ int lvmcache_label_scan(struct cmd_context *cmd)
|
||||
_destroy_duplicate_device_list(&_found_duplicate_devs);
|
||||
|
||||
while ((dev = dev_iter_get(iter))) {
|
||||
log_debug_io("Scanning device %s", dev_name(dev));
|
||||
nr_labels_outstanding++;
|
||||
if (!label_read_callback(dev, UINT64_C(0), AIO_SUPPORTED_CODE_PATH, _process_label_data, &nr_labels_outstanding))
|
||||
nr_labels_outstanding--;
|
||||
(void) label_read(dev, &label, UINT64_C(0));
|
||||
dev_count++;
|
||||
}
|
||||
|
||||
dev_iter_destroy(iter);
|
||||
|
||||
while (nr_labels_outstanding) {
|
||||
log_very_verbose("Scanned %d device labels (%d outstanding)", dev_count, nr_labels_outstanding);
|
||||
if (!dev_async_getevents())
|
||||
return_0;
|
||||
}
|
||||
|
||||
log_very_verbose("Scanned %d device labels (%d outstanding)", dev_count, nr_labels_outstanding);
|
||||
log_very_verbose("Scanned %d device labels", dev_count);
|
||||
|
||||
/*
|
||||
* _choose_preferred_devs() returns:
|
||||
@ -1208,7 +1182,7 @@ int lvmcache_label_scan(struct cmd_context *cmd)
|
||||
|
||||
dm_list_iterate_items(devl, &add_cache_devs) {
|
||||
log_debug_cache("Rescan preferred device %s for lvmcache", dev_name(devl->dev));
|
||||
(void) label_read(devl->dev, NULL, UINT64_C(0));
|
||||
(void) label_read(devl->dev, &label, UINT64_C(0));
|
||||
}
|
||||
|
||||
dm_list_splice(&_unused_duplicate_devs, &del_cache_devs);
|
||||
@ -1228,7 +1202,7 @@ int lvmcache_label_scan(struct cmd_context *cmd)
|
||||
*/
|
||||
if (_force_label_scan && cmd->is_long_lived &&
|
||||
cmd->dump_filter && cmd->full_filter && cmd->full_filter->dump &&
|
||||
!cmd->full_filter->dump(cmd->full_filter, cmd->mem, 0))
|
||||
!cmd->full_filter->dump(cmd->full_filter, 0))
|
||||
stack;
|
||||
|
||||
r = 1;
|
||||
@ -1529,6 +1503,7 @@ const char *lvmcache_pvid_from_devname(struct cmd_context *cmd,
|
||||
const char *devname)
|
||||
{
|
||||
struct device *dev;
|
||||
struct label *label;
|
||||
|
||||
if (!(dev = dev_cache_get(devname, cmd->filter))) {
|
||||
log_error("%s: Couldn't find device. Check your filters?",
|
||||
@ -1536,7 +1511,7 @@ const char *lvmcache_pvid_from_devname(struct cmd_context *cmd,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!(label_read(dev, NULL, UINT64_C(0))))
|
||||
if (!(label_read(dev, &label, UINT64_C(0))))
|
||||
return NULL;
|
||||
|
||||
return dev->pvid;
|
||||
@ -2001,7 +1976,7 @@ int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt)
|
||||
return _lvmcache_update_vgname(NULL, vgname, vgname, 0, "", fmt);
|
||||
}
|
||||
|
||||
int lvmcache_update_vgname_and_id(struct lvmcache_info *info, const struct lvmcache_vgsummary *vgsummary)
|
||||
int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary)
|
||||
{
|
||||
const char *vgname = vgsummary->vgname;
|
||||
const char *vgid = (char *)&vgsummary->vgid;
|
||||
|
2
lib/cache/lvmcache.h
vendored
2
lib/cache/lvmcache.h
vendored
@ -85,7 +85,7 @@ void lvmcache_del(struct lvmcache_info *info);
|
||||
|
||||
/* Update things */
|
||||
int lvmcache_update_vgname_and_id(struct lvmcache_info *info,
|
||||
const struct lvmcache_vgsummary *vgsummary);
|
||||
struct lvmcache_vgsummary *vgsummary);
|
||||
int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted);
|
||||
|
||||
void lvmcache_lock_vgname(const char *vgname, int read_only);
|
||||
|
2
lib/cache/lvmetad.c
vendored
2
lib/cache/lvmetad.c
vendored
@ -1771,7 +1771,7 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
|
||||
struct volume_group *vg;
|
||||
|
||||
if (mda_is_ignored(mda) ||
|
||||
!(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1, 0)))
|
||||
!(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1)))
|
||||
return 1;
|
||||
|
||||
/* FIXME Also ensure contents match etc. */
|
||||
|
@ -636,16 +636,6 @@ static int _process_config(struct cmd_context *cmd)
|
||||
*/
|
||||
cmd->default_settings.udev_fallback = udev_disabled ? 1 : -1;
|
||||
|
||||
#ifdef AIO_SUPPORT
|
||||
cmd->use_aio = find_config_tree_bool(cmd, devices_use_aio_CFG, NULL);
|
||||
#else
|
||||
cmd->use_aio = 0;
|
||||
#endif
|
||||
if (cmd->use_aio && !dev_async_setup(cmd))
|
||||
cmd->use_aio = 0;
|
||||
|
||||
log_debug_io("%ssing asynchronous I/O.", cmd->use_aio ? "U" : "Not u");
|
||||
|
||||
init_retry_deactivation(find_config_tree_bool(cmd, activation_retry_deactivation_CFG, NULL));
|
||||
|
||||
init_activation_checks(find_config_tree_bool(cmd, activation_checks_CFG, NULL));
|
||||
@ -1298,7 +1288,7 @@ int init_filters(struct cmd_context *cmd, unsigned load_persistent_cache)
|
||||
lvm_stat_ctim(&ts, &st);
|
||||
cts = config_file_timestamp(cmd->cft);
|
||||
if (timespeccmp(&ts, &cts, >) &&
|
||||
!persistent_filter_load(cmd->mem, cmd->filter, NULL))
|
||||
!persistent_filter_load(cmd->filter, NULL))
|
||||
log_verbose("Failed to load existing device cache from %s",
|
||||
dev_cache);
|
||||
}
|
||||
@ -2160,8 +2150,6 @@ int refresh_toolcontext(struct cmd_context *cmd)
|
||||
|
||||
cmd->lib_dir = NULL;
|
||||
|
||||
label_init();
|
||||
|
||||
if (!_init_lvm_conf(cmd))
|
||||
return_0;
|
||||
|
||||
@ -2249,7 +2237,7 @@ void destroy_toolcontext(struct cmd_context *cmd)
|
||||
int flags;
|
||||
|
||||
if (cmd->dump_filter && cmd->filter && cmd->filter->dump &&
|
||||
!cmd->filter->dump(cmd->filter, cmd->mem, 1))
|
||||
!cmd->filter->dump(cmd->filter, 1))
|
||||
stack;
|
||||
|
||||
archive_exit(cmd);
|
||||
|
@ -165,7 +165,6 @@ struct cmd_context {
|
||||
unsigned vg_notify:1;
|
||||
unsigned lv_notify:1;
|
||||
unsigned pv_notify:1;
|
||||
unsigned use_aio:1;
|
||||
unsigned activate_component:1; /* command activates component LV */
|
||||
unsigned process_component_lvs:1; /* command processes also component LVs */
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
|
||||
* Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved.
|
||||
* Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is part of LVM2.
|
||||
*
|
||||
@ -279,7 +279,7 @@ struct dm_config_tree *config_file_open_and_read(const char *config_file,
|
||||
}
|
||||
|
||||
log_very_verbose("Loading config file: %s", config_file);
|
||||
if (!config_file_read(cmd->mem, cft)) {
|
||||
if (!config_file_read(cft)) {
|
||||
log_error("Failed to load config file %s", config_file);
|
||||
goto bad;
|
||||
}
|
||||
@ -489,102 +489,32 @@ int override_config_tree_from_profile(struct cmd_context *cmd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct process_config_file_params {
|
||||
struct dm_config_tree *cft;
|
||||
struct device *dev;
|
||||
off_t offset;
|
||||
size_t size;
|
||||
off_t offset2;
|
||||
size_t size2;
|
||||
checksum_fn_t checksum_fn;
|
||||
uint32_t checksum;
|
||||
int checksum_only;
|
||||
int no_dup_node_check;
|
||||
lvm_callback_fn_t config_file_read_fd_callback;
|
||||
void *config_file_read_fd_context;
|
||||
int ret;
|
||||
};
|
||||
|
||||
static void _process_config_file_buffer(int failed, unsigned ioflags, void *context, const void *data)
|
||||
{
|
||||
struct process_config_file_params *pcfp = context;
|
||||
const char *fb = data, *fe;
|
||||
|
||||
if (failed) {
|
||||
pcfp->ret = 0;
|
||||
goto_out;
|
||||
}
|
||||
|
||||
if (pcfp->checksum_fn && pcfp->checksum !=
|
||||
(pcfp->checksum_fn(pcfp->checksum_fn(INITIAL_CRC, (const uint8_t *)fb, pcfp->size),
|
||||
(const uint8_t *)(fb + pcfp->size), pcfp->size2))) {
|
||||
log_error("%s: Checksum error at offset %" PRIu64, dev_name(pcfp->dev), (uint64_t) pcfp->offset);
|
||||
pcfp->ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!pcfp->checksum_only) {
|
||||
fe = fb + pcfp->size + pcfp->size2;
|
||||
if (pcfp->no_dup_node_check) {
|
||||
if (!dm_config_parse_without_dup_node_check(pcfp->cft, fb, fe))
|
||||
pcfp->ret = 0;
|
||||
} else if (!dm_config_parse(pcfp->cft, fb, fe))
|
||||
pcfp->ret = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
if (pcfp->config_file_read_fd_callback)
|
||||
pcfp->config_file_read_fd_callback(!pcfp->ret, ioflags, pcfp->config_file_read_fd_context, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* When checksum_only is set, the checksum of buffer is only matched
|
||||
* and function avoids parsing of mda into config tree which
|
||||
* remains unmodified and should not be used.
|
||||
*/
|
||||
int config_file_read_fd(struct dm_pool *mem, struct dm_config_tree *cft, struct device *dev, dev_io_reason_t reason,
|
||||
int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, dev_io_reason_t reason,
|
||||
off_t offset, size_t size, off_t offset2, size_t size2,
|
||||
checksum_fn_t checksum_fn, uint32_t checksum,
|
||||
int checksum_only, int no_dup_node_check, unsigned ioflags,
|
||||
lvm_callback_fn_t config_file_read_fd_callback, void *config_file_read_fd_context)
|
||||
int checksum_only, int no_dup_node_check)
|
||||
{
|
||||
char *fb;
|
||||
char *fb, *fe;
|
||||
int r = 0;
|
||||
off_t mmap_offset = 0;
|
||||
int use_mmap = 1;
|
||||
const char *buf = NULL;
|
||||
unsigned circular = size2 ? 1 : 0; /* Wrapped around end of disk metadata buffer? */
|
||||
off_t mmap_offset = 0;
|
||||
char *buf = NULL;
|
||||
struct config_source *cs = dm_config_get_custom(cft);
|
||||
struct process_config_file_params *pcfp;
|
||||
|
||||
if (!_is_file_based_config_source(cs->type)) {
|
||||
log_error(INTERNAL_ERROR "config_file_read_fd: expected file, special file "
|
||||
"or profile config source, found %s config source.",
|
||||
_config_source_names[cs->type]);
|
||||
goto bad;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!(pcfp = dm_pool_zalloc(mem, sizeof(*pcfp)))) {
|
||||
log_debug("config_file_read_fd: process_config_file_params struct allocation failed");
|
||||
goto bad;
|
||||
}
|
||||
|
||||
pcfp->cft = cft;
|
||||
pcfp->dev = dev;
|
||||
pcfp->offset = offset;
|
||||
pcfp->size = size;
|
||||
pcfp->offset2 = offset2;
|
||||
pcfp->size2 = size2;
|
||||
pcfp->checksum_fn = checksum_fn;
|
||||
pcfp->checksum = checksum;
|
||||
pcfp->checksum_only = checksum_only;
|
||||
pcfp->no_dup_node_check = no_dup_node_check;
|
||||
pcfp->config_file_read_fd_callback = config_file_read_fd_callback;
|
||||
pcfp->config_file_read_fd_context = config_file_read_fd_context;
|
||||
pcfp->ret = 1;
|
||||
|
||||
/* Only use mmap with regular files */
|
||||
if (!(dev->flags & DEV_REGULAR) || circular)
|
||||
if (!(dev->flags & DEV_REGULAR) || size2)
|
||||
use_mmap = 0;
|
||||
|
||||
if (use_mmap) {
|
||||
@ -594,40 +524,56 @@ int config_file_read_fd(struct dm_pool *mem, struct dm_config_tree *cft, struct
|
||||
MAP_PRIVATE, dev_fd(dev), offset - mmap_offset);
|
||||
if (fb == (caddr_t) (-1)) {
|
||||
log_sys_error("mmap", dev_name(dev));
|
||||
goto bad;
|
||||
goto out;
|
||||
}
|
||||
_process_config_file_buffer(0, ioflags, pcfp, fb + mmap_offset);
|
||||
r = pcfp->ret;
|
||||
fb = fb + mmap_offset;
|
||||
} else {
|
||||
if (!(buf = dm_malloc(size + size2))) {
|
||||
log_error("Failed to allocate circular buffer.");
|
||||
return 0;
|
||||
}
|
||||
if (!dev_read_circular(dev, (uint64_t) offset, size,
|
||||
(uint64_t) offset2, size2, reason, buf)) {
|
||||
goto out;
|
||||
}
|
||||
fb = buf;
|
||||
}
|
||||
|
||||
if (checksum_fn && checksum !=
|
||||
(checksum_fn(checksum_fn(INITIAL_CRC, (const uint8_t *)fb, size),
|
||||
(const uint8_t *)(fb + size), size2))) {
|
||||
log_error("%s: Checksum error at offset %" PRIu64, dev_name(dev), (uint64_t) offset);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!checksum_only) {
|
||||
fe = fb + size + size2;
|
||||
if (no_dup_node_check) {
|
||||
if (!dm_config_parse_without_dup_node_check(cft, fb, fe))
|
||||
goto_out;
|
||||
} else {
|
||||
if (!dm_config_parse(cft, fb, fe))
|
||||
goto_out;
|
||||
}
|
||||
}
|
||||
|
||||
r = 1;
|
||||
|
||||
out:
|
||||
if (!use_mmap)
|
||||
dm_free(buf);
|
||||
else {
|
||||
/* unmap the file */
|
||||
if (munmap(fb, size + mmap_offset)) {
|
||||
if (munmap(fb - mmap_offset, size + mmap_offset)) {
|
||||
log_sys_error("munmap", dev_name(dev));
|
||||
r = 0;
|
||||
}
|
||||
} else {
|
||||
if (circular) {
|
||||
if (!(buf = dev_read_circular(dev, (uint64_t) offset, size, (uint64_t) offset2, size2, reason)))
|
||||
goto_out;
|
||||
_process_config_file_buffer(0, ioflags, pcfp, buf);
|
||||
dm_free((void *)buf);
|
||||
} else {
|
||||
dev_read_callback(dev, (uint64_t) offset, size, reason, ioflags, _process_config_file_buffer, pcfp);
|
||||
if (config_file_read_fd_callback)
|
||||
return 1;
|
||||
}
|
||||
r = pcfp->ret;
|
||||
}
|
||||
|
||||
out:
|
||||
return r;
|
||||
|
||||
bad:
|
||||
if (config_file_read_fd_callback)
|
||||
config_file_read_fd_callback(1, ioflags, config_file_read_fd_context, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int config_file_read(struct dm_pool *mem, struct dm_config_tree *cft)
|
||||
int config_file_read(struct dm_config_tree *cft)
|
||||
{
|
||||
const char *filename = NULL;
|
||||
struct config_source *cs = dm_config_get_custom(cft);
|
||||
@ -655,8 +601,8 @@ int config_file_read(struct dm_pool *mem, struct dm_config_tree *cft)
|
||||
}
|
||||
}
|
||||
|
||||
r = config_file_read_fd(mem, cft, cf->dev, DEV_IO_MDA_CONTENT, 0, (size_t) info.st_size, 0, 0,
|
||||
(checksum_fn_t) NULL, 0, 0, 0, 0, NULL, NULL);
|
||||
r = config_file_read_fd(cft, cf->dev, DEV_IO_MDA_CONTENT, 0, (size_t) info.st_size, 0, 0,
|
||||
(checksum_fn_t) NULL, 0, 0, 0);
|
||||
|
||||
if (!cf->keep_open) {
|
||||
if (!dev_close(cf->dev))
|
||||
|
@ -239,13 +239,11 @@ config_source_t config_get_source_type(struct dm_config_tree *cft);
|
||||
typedef uint32_t (*checksum_fn_t) (uint32_t initial, const uint8_t *buf, uint32_t size);
|
||||
|
||||
struct dm_config_tree *config_open(config_source_t source, const char *filename, int keep_open);
|
||||
int config_file_read_fd(struct dm_pool *mem, struct dm_config_tree *cft, struct device *dev, dev_io_reason_t reason,
|
||||
int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, dev_io_reason_t reason,
|
||||
off_t offset, size_t size, off_t offset2, size_t size2,
|
||||
checksum_fn_t checksum_fn, uint32_t checksum,
|
||||
int skip_parse, int no_dup_node_check, unsigned ioflags,
|
||||
lvm_callback_fn_t config_file_read_fd_callback, void *config_file_read_fd_context);
|
||||
|
||||
int config_file_read(struct dm_pool *mem, struct dm_config_tree *cft);
|
||||
int skip_parse, int no_dup_node_check);
|
||||
int config_file_read(struct dm_config_tree *cft);
|
||||
struct dm_config_tree *config_file_open_and_read(const char *config_file, config_source_t source,
|
||||
struct cmd_context *cmd);
|
||||
int config_write(struct dm_config_tree *cft, struct config_def_tree_spec *tree_spec,
|
||||
|
@ -226,16 +226,6 @@ cfg(devices_dir_CFG, "dir", devices_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_STRING,
|
||||
cfg_array(devices_scan_CFG, "scan", devices_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_STRING, "#S/dev", vsn(1, 0, 0), NULL, 0, NULL,
|
||||
"Directories containing device nodes to use with LVM.\n")
|
||||
|
||||
cfg(devices_use_aio_CFG, "use_aio", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_USE_AIO, vsn(2, 2, 178), NULL, 0, NULL,
|
||||
"Use linux asynchronous I/O for parallel device access where possible.\n")
|
||||
|
||||
cfg(devices_aio_max_CFG, "aio_max", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_AIO_MAX, vsn(2, 2, 178), NULL, 0, NULL,
|
||||
"Maximum number of asynchronous I/Os to issue concurrently.\n")
|
||||
|
||||
cfg(devices_aio_memory_CFG, "aio_memory", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_AIO_MEMORY, vsn(2, 2, 178), NULL, 0, NULL,
|
||||
"Approximate maximum total amount of memory (in MB) used\n"
|
||||
"for asynchronous I/O buffers.\n")
|
||||
|
||||
cfg_array(devices_loopfiles_CFG, "loopfiles", devices_CFG_SECTION, CFG_DEFAULT_UNDEFINED | CFG_UNSUPPORTED, CFG_TYPE_STRING, NULL, vsn(1, 2, 0), NULL, 0, NULL, NULL)
|
||||
|
||||
cfg(devices_obtain_device_list_from_udev_CFG, "obtain_device_list_from_udev", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV, vsn(2, 2, 85), NULL, 0, NULL,
|
||||
|
@ -32,9 +32,6 @@
|
||||
#define DEFAULT_SYSTEM_ID_SOURCE "none"
|
||||
#define DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV 1
|
||||
#define DEFAULT_EXTERNAL_DEVICE_INFO_SOURCE "none"
|
||||
#define DEFAULT_USE_AIO 1
|
||||
#define DEFAULT_AIO_MAX 128
|
||||
#define DEFAULT_AIO_MEMORY 10
|
||||
#define DEFAULT_SYSFS_SCAN 1
|
||||
#define DEFAULT_MD_COMPONENT_DETECTION 1
|
||||
#define DEFAULT_FW_RAID_COMPONENT_DETECTION 0
|
||||
|
@ -1245,24 +1245,12 @@ int dev_cache_check_for_open_devices(void)
|
||||
|
||||
int dev_cache_exit(void)
|
||||
{
|
||||
struct btree_iter *b;
|
||||
int num_open = 0;
|
||||
|
||||
dev_async_exit();
|
||||
|
||||
if (_cache.names)
|
||||
if ((num_open = _check_for_open_devices(1)) > 0)
|
||||
log_error(INTERNAL_ERROR "%d device(s) were left open and have been closed.", num_open);
|
||||
|
||||
if (_cache.devices) {
|
||||
/* FIXME Replace with structured devbuf cache */
|
||||
b = btree_first(_cache.devices);
|
||||
while (b) {
|
||||
devbufs_release(btree_get_data(b));
|
||||
b = btree_next(b);
|
||||
}
|
||||
}
|
||||
|
||||
if (_cache.mem)
|
||||
dm_pool_destroy(_cache.mem);
|
||||
|
||||
|
@ -23,10 +23,10 @@
|
||||
* predicate for devices.
|
||||
*/
|
||||
struct dev_filter {
|
||||
int (*passes_filter) (struct dev_filter *f, struct device *dev);
|
||||
void (*destroy) (struct dev_filter *f);
|
||||
void (*wipe) (struct dev_filter *f);
|
||||
int (*dump) (struct dev_filter *f, struct dm_pool *mem, int merge_existing);
|
||||
int (*passes_filter) (struct dev_filter * f, struct device * dev);
|
||||
void (*destroy) (struct dev_filter * f);
|
||||
void (*wipe) (struct dev_filter * f);
|
||||
int (*dump) (struct dev_filter * f, int merge_existing);
|
||||
void *private;
|
||||
unsigned use_count;
|
||||
};
|
||||
|
@ -53,12 +53,6 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Always read at least 8k from disk.
|
||||
* This seems to be a good compromise for the existing LVM2 metadata layout.
|
||||
*/
|
||||
#define MIN_READ_SIZE (8 * 1024)
|
||||
|
||||
static DM_LIST_INIT(_open_devices);
|
||||
static unsigned _dev_size_seqno = 1;
|
||||
|
||||
@ -80,319 +74,38 @@ static const char *_reason_text(dev_io_reason_t reason)
|
||||
return _reasons[(unsigned) reason];
|
||||
}
|
||||
|
||||
/*
|
||||
* Release the memory holding the last data we read
|
||||
*/
|
||||
static void _release_devbuf(struct device_buffer *devbuf)
|
||||
{
|
||||
dm_free(devbuf->malloc_address);
|
||||
devbuf->malloc_address = NULL;
|
||||
}
|
||||
|
||||
void devbufs_release(struct device *dev)
|
||||
{
|
||||
if ((dev->flags & DEV_REGULAR))
|
||||
return;
|
||||
|
||||
_release_devbuf(&dev->last_devbuf);
|
||||
_release_devbuf(&dev->last_extra_devbuf);
|
||||
}
|
||||
|
||||
#ifdef AIO_SUPPORT
|
||||
|
||||
# include <libaio.h>
|
||||
|
||||
static io_context_t _aio_ctx = 0;
|
||||
static struct io_event *_aio_events = NULL;
|
||||
static int _aio_max = 0;
|
||||
static int64_t _aio_memory_max = 0;
|
||||
static int _aio_must_queue = 0; /* Have we reached AIO capacity? */
|
||||
|
||||
static DM_LIST_INIT(_aio_queue);
|
||||
|
||||
#define DEFAULT_AIO_COLLECTION_EVENTS 32
|
||||
|
||||
int dev_async_setup(struct cmd_context *cmd)
|
||||
{
|
||||
int r;
|
||||
|
||||
_aio_max = find_config_tree_int(cmd, devices_aio_max_CFG, NULL);
|
||||
_aio_memory_max = find_config_tree_int(cmd, devices_aio_memory_CFG, NULL) * INT64_C(1024 * 1024);
|
||||
|
||||
/* Threshold is zero? */
|
||||
if (!_aio_max || !_aio_memory_max) {
|
||||
if (_aio_ctx)
|
||||
dev_async_exit();
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Already set up? */
|
||||
if (_aio_ctx)
|
||||
return 1;
|
||||
|
||||
log_debug_io("Setting up aio context for up to %" PRId64 " MB across %d events.", _aio_memory_max, _aio_max);
|
||||
|
||||
if (!_aio_events && !(_aio_events = dm_zalloc(sizeof(*_aio_events) * DEFAULT_AIO_COLLECTION_EVENTS))) {
|
||||
log_error("Failed to allocate io_event array for asynchronous I/O.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((r = io_setup(_aio_max, &_aio_ctx)) < 0) {
|
||||
/*
|
||||
* Possible errors:
|
||||
* ENOSYS - aio not available in current kernel
|
||||
* EAGAIN - _aio_max is too big
|
||||
* EFAULT - invalid pointer
|
||||
* EINVAL - _aio_ctx != 0 or kernel aio limits exceeded
|
||||
* ENOMEM
|
||||
*/
|
||||
log_warn("WARNING: Asynchronous I/O setup for %d events failed: %s", _aio_max, strerror(-r));
|
||||
log_warn("WARNING: Using only synchronous I/O.");
|
||||
dm_free(_aio_events);
|
||||
_aio_events = NULL;
|
||||
_aio_ctx = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Reset aio context after fork */
|
||||
int dev_async_reset(struct cmd_context *cmd)
|
||||
{
|
||||
log_debug_io("Resetting asynchronous I/O context.");
|
||||
_aio_ctx = 0;
|
||||
dm_free(_aio_events);
|
||||
_aio_events = NULL;
|
||||
|
||||
return dev_async_setup(cmd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Track the amount of in-flight async I/O.
|
||||
* If it exceeds the defined threshold set _aio_must_queue.
|
||||
*/
|
||||
static void _update_aio_counters(int nr, ssize_t bytes)
|
||||
{
|
||||
static int64_t aio_bytes = 0;
|
||||
static int aio_count = 0;
|
||||
|
||||
aio_bytes += bytes;
|
||||
aio_count += nr;
|
||||
|
||||
if (aio_count >= _aio_max || aio_bytes > _aio_memory_max)
|
||||
_aio_must_queue = 1;
|
||||
else
|
||||
_aio_must_queue = 0;
|
||||
}
|
||||
|
||||
static int _io(struct device_buffer *devbuf, unsigned ioflags);
|
||||
|
||||
int dev_async_getevents(void)
|
||||
{
|
||||
struct device_buffer *devbuf, *tmp;
|
||||
lvm_callback_fn_t dev_read_callback_fn;
|
||||
void *dev_read_callback_context;
|
||||
int r, event_nr;
|
||||
|
||||
if (!_aio_ctx)
|
||||
return 1;
|
||||
|
||||
do {
|
||||
/* FIXME Add timeout - currently NULL - waits for ever for at least 1 item */
|
||||
r = io_getevents(_aio_ctx, 1, DEFAULT_AIO_COLLECTION_EVENTS, _aio_events, NULL);
|
||||
if (r > 0)
|
||||
break;
|
||||
if (!r)
|
||||
return 1; /* Timeout elapsed */
|
||||
if (r == -EINTR)
|
||||
continue;
|
||||
if (r == -EAGAIN) {
|
||||
usleep(100);
|
||||
return 1; /* Give the caller the opportunity to do other work before repeating */
|
||||
}
|
||||
/*
|
||||
* ENOSYS - not supported by kernel
|
||||
* EFAULT - memory invalid
|
||||
* EINVAL - _aio_ctx invalid or min_nr/nr/timeout out of range
|
||||
*/
|
||||
log_error("Asynchronous event collection failed: %s", strerror(-r));
|
||||
return 0;
|
||||
} while (1);
|
||||
|
||||
for (event_nr = 0; event_nr < r; event_nr++) {
|
||||
devbuf = _aio_events[event_nr].obj->data;
|
||||
dm_free(_aio_events[event_nr].obj);
|
||||
|
||||
_update_aio_counters(-1, -devbuf->where.size);
|
||||
|
||||
dev_read_callback_fn = devbuf->dev_read_callback_fn;
|
||||
dev_read_callback_context = devbuf->dev_read_callback_context;
|
||||
|
||||
/* Clear the callbacks as a precaution */
|
||||
devbuf->dev_read_callback_context = NULL;
|
||||
devbuf->dev_read_callback_fn = NULL;
|
||||
|
||||
if (_aio_events[event_nr].res == devbuf->where.size) {
|
||||
if (dev_read_callback_fn)
|
||||
dev_read_callback_fn(0, AIO_SUPPORTED_CODE_PATH, dev_read_callback_context, (char *)devbuf->buf + devbuf->data_offset);
|
||||
} else {
|
||||
/* FIXME If partial read is possible, resubmit remainder */
|
||||
log_error("%s: asynchronous read only I/O failed (" FMTd64 ") of " FMTu64 " bytes at " FMTu64 " (for %s): %s",
|
||||
dev_name(devbuf->where.dev), _aio_events[event_nr].res,
|
||||
(uint64_t) devbuf->where.size, (uint64_t) devbuf->where.start,
|
||||
_reason_text(devbuf->reason),
|
||||
(((int64_t)_aio_events[event_nr].res) < 0) ? strerror(-(int64_t)_aio_events[event_nr].res) : 0);
|
||||
_release_devbuf(devbuf);
|
||||
if (dev_read_callback_fn)
|
||||
dev_read_callback_fn(1, AIO_SUPPORTED_CODE_PATH, dev_read_callback_context, NULL);
|
||||
else
|
||||
r = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Submit further queued events if we can */
|
||||
dm_list_iterate_items_gen_safe(devbuf, tmp, &_aio_queue, aio_queued) {
|
||||
if (_aio_must_queue)
|
||||
break;
|
||||
dm_list_del(&devbuf->aio_queued);
|
||||
_io(devbuf, 1);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int _io_async(struct device_buffer *devbuf)
|
||||
{
|
||||
struct device_area *where = &devbuf->where;
|
||||
struct iocb *iocb;
|
||||
int r;
|
||||
|
||||
_update_aio_counters(1, devbuf->where.size);
|
||||
|
||||
if (!(iocb = dm_malloc(sizeof(*iocb)))) {
|
||||
log_error("Failed to allocate I/O control block array for asynchronous I/O.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
io_prep_pread(iocb, dev_fd(where->dev), devbuf->buf, where->size, where->start);
|
||||
iocb->data = devbuf;
|
||||
|
||||
do {
|
||||
r = io_submit(_aio_ctx, 1L, &iocb);
|
||||
if (r ==1)
|
||||
break; /* Success */
|
||||
if (r == -EAGAIN) {
|
||||
/* Try to release some resources then retry */
|
||||
usleep(100);
|
||||
if (dev_async_getevents())
|
||||
return_0;
|
||||
/* FIXME Add counter/timeout so we can't get stuck here for ever */
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Possible errors:
|
||||
* EFAULT - invalid data
|
||||
* ENOSYS - no aio support in kernel
|
||||
* EBADF - bad file descriptor in iocb
|
||||
* EINVAL - invalid _aio_ctx / iocb not initialised / invalid operation for this fd
|
||||
*/
|
||||
log_error("Asynchronous event submission failed: %s", strerror(-r));
|
||||
return 0;
|
||||
} while (1);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void dev_async_exit(void)
|
||||
{
|
||||
struct device_buffer *devbuf, *tmp;
|
||||
lvm_callback_fn_t dev_read_callback_fn;
|
||||
void *dev_read_callback_context;
|
||||
int r;
|
||||
|
||||
if (!_aio_ctx)
|
||||
return;
|
||||
|
||||
/* Discard any queued requests */
|
||||
dm_list_iterate_items_gen_safe(devbuf, tmp, &_aio_queue, aio_queued) {
|
||||
dm_list_del(&devbuf->aio_queued);
|
||||
|
||||
_update_aio_counters(-1, -devbuf->where.size);
|
||||
|
||||
dev_read_callback_fn = devbuf->dev_read_callback_fn;
|
||||
dev_read_callback_context = devbuf->dev_read_callback_context;
|
||||
|
||||
_release_devbuf(devbuf);
|
||||
|
||||
if (dev_read_callback_fn)
|
||||
dev_read_callback_fn(1, AIO_SUPPORTED_CODE_PATH, dev_read_callback_context, NULL);
|
||||
}
|
||||
|
||||
log_debug_io("Destroying aio context.");
|
||||
if ((r = io_destroy(_aio_ctx)) < 0)
|
||||
/* Returns -ENOSYS if aio not in kernel or -EINVAL if _aio_ctx invalid */
|
||||
log_error("Failed to destroy asynchronous I/O context: %s", strerror(-r));
|
||||
|
||||
dm_free(_aio_events);
|
||||
_aio_events = NULL;
|
||||
|
||||
_aio_ctx = 0;
|
||||
}
|
||||
|
||||
static void _queue_aio(struct device_buffer *devbuf)
|
||||
{
|
||||
dm_list_add(&_aio_queue, &devbuf->aio_queued);
|
||||
log_debug_io("Queueing aio.");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static int _aio_ctx = 0;
|
||||
static int _aio_must_queue = 0;
|
||||
|
||||
int dev_async_setup(struct cmd_context *cmd)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
int dev_async_reset(struct cmd_context *cmd)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
int dev_async_getevents(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
void dev_async_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
static int _io_async(struct device_buffer *devbuf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void _queue_aio(struct device_buffer *devbuf)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* AIO_SUPPORT */
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* The standard io loop that keeps submitting an io until it's
|
||||
* all gone.
|
||||
*---------------------------------------------------------------*/
|
||||
static int _io_sync(struct device_buffer *devbuf)
|
||||
static int _io(struct device_area *where, char *buffer, int should_write, dev_io_reason_t reason)
|
||||
{
|
||||
struct device_area *where = &devbuf->where;
|
||||
int fd = dev_fd(where->dev);
|
||||
char *buffer = devbuf->buf;
|
||||
ssize_t n = 0;
|
||||
size_t total = 0;
|
||||
|
||||
if (fd < 0) {
|
||||
log_error("Attempt to read an unopened device (%s).",
|
||||
dev_name(where->dev));
|
||||
return 0;
|
||||
}
|
||||
|
||||
log_debug_io("%s %s:%8" PRIu64 " bytes (sync) at %" PRIu64 "%s (for %s)",
|
||||
should_write ? "Write" : "Read ", dev_name(where->dev),
|
||||
where->size, (uint64_t) where->start,
|
||||
(should_write && test_mode()) ? " (test mode - suppressed)" : "", _reason_text(reason));
|
||||
|
||||
/*
|
||||
* Skip all writes in test mode.
|
||||
*/
|
||||
if (should_write && test_mode())
|
||||
return 1;
|
||||
|
||||
if (where->size > SSIZE_MAX) {
|
||||
log_error("Read size too large: %" PRIu64, where->size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (lseek(fd, (off_t) where->start, SEEK_SET) == (off_t) -1) {
|
||||
log_error("%s: lseek %" PRIu64 " failed: %s",
|
||||
dev_name(where->dev), (uint64_t) where->start,
|
||||
@ -402,19 +115,18 @@ static int _io_sync(struct device_buffer *devbuf)
|
||||
|
||||
while (total < (size_t) where->size) {
|
||||
do
|
||||
n = devbuf->write ?
|
||||
n = should_write ?
|
||||
write(fd, buffer, (size_t) where->size - total) :
|
||||
read(fd, buffer, (size_t) where->size - total);
|
||||
while ((n < 0) && ((errno == EINTR) || (errno == EAGAIN)));
|
||||
|
||||
if (n < 0)
|
||||
log_error("%s: synchronous %s failed after %" PRIu64 " of %" PRIu64
|
||||
" at %" PRIu64 " (for %s): %s", dev_name(where->dev),
|
||||
devbuf->write ? "write" : "read",
|
||||
(uint64_t) total,
|
||||
(uint64_t) where->size, (uint64_t) where->start,
|
||||
_reason_text(devbuf->reason),
|
||||
strerror(errno));
|
||||
log_error_once("%s: %s failed after %" PRIu64 " of %" PRIu64
|
||||
" at %" PRIu64 ": %s", dev_name(where->dev),
|
||||
should_write ? "write" : "read",
|
||||
(uint64_t) total,
|
||||
(uint64_t) where->size,
|
||||
(uint64_t) where->start, strerror(errno));
|
||||
|
||||
if (n <= 0)
|
||||
break;
|
||||
@ -426,42 +138,6 @@ static int _io_sync(struct device_buffer *devbuf)
|
||||
return (total == (size_t) where->size);
|
||||
}
|
||||
|
||||
static int _io(struct device_buffer *devbuf, unsigned ioflags)
|
||||
{
|
||||
struct device_area *where = &devbuf->where;
|
||||
int fd = dev_fd(where->dev);
|
||||
int async = (!devbuf->write && _aio_ctx && aio_supported_code_path(ioflags) && devbuf->dev_read_callback_fn) ? 1 : 0;
|
||||
|
||||
if (fd < 0) {
|
||||
log_error("Attempt to read an unopened device (%s).",
|
||||
dev_name(where->dev));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!devbuf->buf && !(devbuf->malloc_address = devbuf->buf = dm_malloc_aligned((size_t) devbuf->where.size, 0))) {
|
||||
log_error("I/O buffer malloc failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
log_debug_io("%s %s(fd %d):%8" PRIu64 " bytes (%ssync) at %" PRIu64 "%s (for %s)",
|
||||
devbuf->write ? "Write" : "Read ", dev_name(where->dev), fd,
|
||||
where->size, async ? "a" : "", (uint64_t) where->start,
|
||||
(devbuf->write && test_mode()) ? " (test mode - suppressed)" : "", _reason_text(devbuf->reason));
|
||||
|
||||
/*
|
||||
* Skip all writes in test mode.
|
||||
*/
|
||||
if (devbuf->write && test_mode())
|
||||
return 1;
|
||||
|
||||
if (where->size > SSIZE_MAX) {
|
||||
log_error("Read size too large: %" PRIu64, where->size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return async ? _io_async(devbuf) : _io_sync(devbuf);
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* LVM2 uses O_DIRECT when performing metadata io, which requires
|
||||
* block size aligned accesses. If any io is not aligned we have
|
||||
@ -551,16 +227,15 @@ static void _widen_region(unsigned int block_size, struct device_area *region,
|
||||
result->size += block_size - delta;
|
||||
}
|
||||
|
||||
static int _aligned_io(struct device_area *where, char *write_buffer,
|
||||
int should_write, dev_io_reason_t reason,
|
||||
unsigned ioflags, lvm_callback_fn_t dev_read_callback_fn, void *dev_read_callback_context)
|
||||
static int _aligned_io(struct device_area *where, char *buffer,
|
||||
int should_write, dev_io_reason_t reason)
|
||||
{
|
||||
char *bounce, *bounce_buf;
|
||||
unsigned int physical_block_size = 0;
|
||||
unsigned int block_size = 0;
|
||||
unsigned buffer_was_widened = 0;
|
||||
uintptr_t mask;
|
||||
struct device_area widened;
|
||||
struct device_buffer *devbuf;
|
||||
int r = 0;
|
||||
|
||||
if (!(where->dev->flags & DEV_REGULAR) &&
|
||||
@ -569,11 +244,6 @@ static int _aligned_io(struct device_area *where, char *write_buffer,
|
||||
|
||||
if (!block_size)
|
||||
block_size = lvm_getpagesize();
|
||||
|
||||
/* Apply minimum read size */
|
||||
if (!should_write && block_size < MIN_READ_SIZE)
|
||||
block_size = MIN_READ_SIZE;
|
||||
|
||||
mask = block_size - 1;
|
||||
|
||||
_widen_region(block_size, where, &widened);
|
||||
@ -583,75 +253,50 @@ static int _aligned_io(struct device_area *where, char *write_buffer,
|
||||
buffer_was_widened = 1;
|
||||
log_debug_io("Widening request for %" PRIu64 " bytes at %" PRIu64 " to %" PRIu64 " bytes at %" PRIu64 " on %s (for %s)",
|
||||
where->size, (uint64_t) where->start, widened.size, (uint64_t) widened.start, dev_name(where->dev), _reason_text(reason));
|
||||
}
|
||||
|
||||
devbuf = DEV_DEVBUF(where->dev, reason);
|
||||
_release_devbuf(devbuf);
|
||||
devbuf->where.dev = where->dev;
|
||||
devbuf->where.start = widened.start;
|
||||
devbuf->where.size = widened.size;
|
||||
devbuf->write = should_write;
|
||||
devbuf->reason = reason;
|
||||
devbuf->dev_read_callback_fn = dev_read_callback_fn;
|
||||
devbuf->dev_read_callback_context = dev_read_callback_context;
|
||||
|
||||
/* Store location of requested data relative to start of buf */
|
||||
devbuf->data_offset = where->start - devbuf->where.start;
|
||||
|
||||
if (should_write && !buffer_was_widened && !((uintptr_t) write_buffer & mask))
|
||||
} else if (!((uintptr_t) buffer & mask))
|
||||
/* Perform the I/O directly. */
|
||||
devbuf->buf = write_buffer;
|
||||
else if (!should_write)
|
||||
/* Postpone buffer allocation until we're about to issue the I/O */
|
||||
devbuf->buf = NULL;
|
||||
else {
|
||||
/* Allocate a bounce buffer with an extra block */
|
||||
if (!(devbuf->malloc_address = devbuf->buf = dm_malloc((size_t) devbuf->where.size + block_size))) {
|
||||
log_error("Bounce buffer malloc failed");
|
||||
return 0;
|
||||
}
|
||||
return _io(where, buffer, should_write, reason);
|
||||
|
||||
/*
|
||||
* Realign start of bounce buffer (using the extra sector)
|
||||
*/
|
||||
if (((uintptr_t) devbuf->buf) & mask)
|
||||
devbuf->buf = (char *) ((((uintptr_t) devbuf->buf) + mask) & ~mask);
|
||||
/* Allocate a bounce buffer with an extra block */
|
||||
if (!(bounce_buf = bounce = dm_malloc((size_t) widened.size + block_size))) {
|
||||
log_error("Bounce buffer malloc failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If we've reached our concurrent AIO limit, add this request to the queue */
|
||||