mirror of
git://sourceware.org/git/lvm2.git
synced 2025-09-29 13:44:18 +03:00
The interface consists of: - A context struct, one for the entire command. - An io struct, one per io operation (read). - dev_async_context_setup() creates an aio context. - dev_async_context_destroy() destroys an aio context. - dev_async_alloc_ios() allocates a specified number of io structs, along with an associated buffer for the data. - dev_async_free_ios() frees all the allocated io structs+buffers. - dev_async_io_get() gets an available io struct from those allocated in alloc_ios. If none are available, it will allocate a new io struct if under limit. - dev_async_io_put() puts a used io struct back into the set of unused io structs, making it available for get. - dev_async_read_submit() start an async read io. - dev_async_getevents() collect async io completions.
1151 lines
25 KiB
C
1151 lines
25 KiB
C
/*
|
|
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
|
|
* Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
|
|
*
|
|
* This file is part of LVM2.
|
|
*
|
|
* This copyrighted material is made available to anyone wishing to use,
|
|
* modify, copy, or redistribute it subject to the terms and conditions
|
|
* of the GNU Lesser General Public License v.2.1.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "lib.h"
|
|
#include "device.h"
|
|
#include "metadata.h"
|
|
#include "lvmcache.h"
|
|
#include "memlock.h"
|
|
#include "locking.h"
|
|
|
|
#include <limits.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <sys/ioctl.h>
|
|
|
|
#ifdef __linux__
|
|
# define u64 uint64_t /* Missing without __KERNEL__ */
|
|
# undef WNOHANG /* Avoid redefinition */
|
|
# undef WUNTRACED /* Avoid redefinition */
|
|
# include <linux/fs.h> /* For block ioctl definitions */
|
|
# define BLKSIZE_SHIFT SECTOR_SHIFT
|
|
# ifndef BLKGETSIZE64 /* fs.h out-of-date */
|
|
# define BLKGETSIZE64 _IOR(0x12, 114, size_t)
|
|
# endif /* BLKGETSIZE64 */
|
|
# ifndef BLKDISCARD
|
|
# define BLKDISCARD _IO(0x12,119)
|
|
# endif
|
|
#else
|
|
# include <sys/disk.h>
|
|
# define BLKBSZGET DKIOCGETBLOCKSIZE
|
|
# define BLKSSZGET DKIOCGETBLOCKSIZE
|
|
# define BLKGETSIZE64 DKIOCGETBLOCKCOUNT
|
|
# define BLKFLSBUF DKIOCSYNCHRONIZECACHE
|
|
# define BLKSIZE_SHIFT 0
|
|
#endif
|
|
|
|
#ifdef O_DIRECT_SUPPORT
|
|
# ifndef O_DIRECT
|
|
# error O_DIRECT support configured but O_DIRECT definition not found in headers
|
|
# endif
|
|
#endif
|
|
|
|
static DM_LIST_INIT(_open_devices);
|
|
static unsigned _dev_size_seqno = 1;
|
|
|
|
/*-----------------------------------------------------------------
|
|
* The standard io loop that keeps submitting an io until it's
|
|
* all gone.
|
|
*---------------------------------------------------------------*/
|
|
static int _io(struct device_area *where, char *buffer, int should_write)
|
|
{
|
|
int fd = dev_fd(where->dev);
|
|
ssize_t n = 0;
|
|
size_t total = 0;
|
|
|
|
if (fd < 0) {
|
|
log_error("Attempt to read an unopened device (%s).",
|
|
dev_name(where->dev));
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Skip all writes in test mode.
|
|
*/
|
|
if (should_write && test_mode())
|
|
return 1;
|
|
|
|
if (where->size > SSIZE_MAX) {
|
|
log_error("Read size too large: %" PRIu64, where->size);
|
|
return 0;
|
|
}
|
|
|
|
if (lseek(fd, (off_t) where->start, SEEK_SET) == (off_t) -1) {
|
|
log_error("%s: lseek %" PRIu64 " failed: %s",
|
|
dev_name(where->dev), (uint64_t) where->start,
|
|
strerror(errno));
|
|
return 0;
|
|
}
|
|
|
|
while (total < (size_t) where->size) {
|
|
do
|
|
n = should_write ?
|
|
write(fd, buffer, (size_t) where->size - total) :
|
|
read(fd, buffer, (size_t) where->size - total);
|
|
while ((n < 0) && ((errno == EINTR) || (errno == EAGAIN)));
|
|
|
|
if (n < 0)
|
|
log_error_once("%s: %s failed after %" PRIu64 " of %" PRIu64
|
|
" at %" PRIu64 ": %s", dev_name(where->dev),
|
|
should_write ? "write" : "read",
|
|
(uint64_t) total,
|
|
(uint64_t) where->size,
|
|
(uint64_t) where->start, strerror(errno));
|
|
|
|
if (n <= 0)
|
|
break;
|
|
|
|
total += n;
|
|
buffer += n;
|
|
}
|
|
|
|
return (total == (size_t) where->size);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
* LVM2 uses O_DIRECT when performing metadata io, which requires
|
|
* block size aligned accesses. If any io is not aligned we have
|
|
* to perform the io via a bounce buffer, obviously this is quite
|
|
* inefficient.
|
|
*---------------------------------------------------------------*/
|
|
|
|
/*
|
|
* Get the physical and logical block size for a device.
|
|
*/
|
|
int dev_get_block_size(struct device *dev, unsigned int *physical_block_size, unsigned int *block_size)
|
|
{
|
|
const char *name = dev_name(dev);
|
|
int needs_open;
|
|
int r = 1;
|
|
|
|
needs_open = (!dev->open_count && (dev->phys_block_size == -1 || dev->block_size == -1));
|
|
|
|
if (needs_open && !dev_open_readonly(dev))
|
|
return_0;
|
|
|
|
if (dev->block_size == -1) {
|
|
if (ioctl(dev_fd(dev), BLKBSZGET, &dev->block_size) < 0) {
|
|
log_sys_error("ioctl BLKBSZGET", name);
|
|
r = 0;
|
|
goto out;
|
|
}
|
|
log_debug_devs("%s: block size is %u bytes", name, dev->block_size);
|
|
}
|
|
|
|
#ifdef BLKPBSZGET
|
|
/* BLKPBSZGET is available in kernel >= 2.6.32 only */
|
|
if (dev->phys_block_size == -1) {
|
|
if (ioctl(dev_fd(dev), BLKPBSZGET, &dev->phys_block_size) < 0) {
|
|
log_sys_error("ioctl BLKPBSZGET", name);
|
|
r = 0;
|
|
goto out;
|
|
}
|
|
log_debug_devs("%s: physical block size is %u bytes", name, dev->phys_block_size);
|
|
}
|
|
#elif defined (BLKSSZGET)
|
|
/* if we can't get physical block size, just use logical block size instead */
|
|
if (dev->phys_block_size == -1) {
|
|
if (ioctl(dev_fd(dev), BLKSSZGET, &dev->phys_block_size) < 0) {
|
|
log_sys_error("ioctl BLKSSZGET", name);
|
|
r = 0;
|
|
goto out;
|
|
}
|
|
log_debug_devs("%s: physical block size can't be determined, using logical "
|
|
"block size of %u bytes", name, dev->phys_block_size);
|
|
}
|
|
#else
|
|
/* if even BLKSSZGET is not available, use default 512b */
|
|
if (dev->phys_block_size == -1) {
|
|
dev->phys_block_size = 512;
|
|
log_debug_devs("%s: physical block size can't be determined, using block "
|
|
"size of %u bytes instead", name, dev->phys_block_size);
|
|
}
|
|
#endif
|
|
|
|
*physical_block_size = (unsigned int) dev->phys_block_size;
|
|
*block_size = (unsigned int) dev->block_size;
|
|
out:
|
|
if (needs_open && !dev_close(dev))
|
|
stack;
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Widens a region to be an aligned region.
|
|
*/
|
|
static void _widen_region(unsigned int block_size, struct device_area *region,
|
|
struct device_area *result)
|
|
{
|
|
uint64_t mask = block_size - 1, delta;
|
|
memcpy(result, region, sizeof(*result));
|
|
|
|
/* adjust the start */
|
|
delta = result->start & mask;
|
|
if (delta) {
|
|
result->start -= delta;
|
|
result->size += delta;
|
|
}
|
|
|
|
/* adjust the end */
|
|
delta = (result->start + result->size) & mask;
|
|
if (delta)
|
|
result->size += block_size - delta;
|
|
}
|
|
|
|
static int _aligned_io(struct device_area *where, char *buffer,
|
|
int should_write)
|
|
{
|
|
char *bounce, *bounce_buf;
|
|
unsigned int physical_block_size = 0;
|
|
unsigned int block_size = 0;
|
|
uintptr_t mask;
|
|
struct device_area widened;
|
|
int r = 0;
|
|
|
|
if (!(where->dev->flags & DEV_REGULAR) &&
|
|
!dev_get_block_size(where->dev, &physical_block_size, &block_size))
|
|
return_0;
|
|
|
|
if (!block_size)
|
|
block_size = lvm_getpagesize();
|
|
|
|
_widen_region(block_size, where, &widened);
|
|
|
|
/* Do we need to use a bounce buffer? */
|
|
mask = block_size - 1;
|
|
if (!memcmp(where, &widened, sizeof(widened)) &&
|
|
!((uintptr_t) buffer & mask))
|
|
return _io(where, buffer, should_write);
|
|
|
|
/* Allocate a bounce buffer with an extra block */
|
|
if (!(bounce_buf = bounce = dm_malloc((size_t) widened.size + block_size))) {
|
|
log_error("Bounce buffer malloc failed");
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Realign start of bounce buffer (using the extra sector)
|
|
*/
|
|
if (((uintptr_t) bounce) & mask)
|
|
bounce = (char *) ((((uintptr_t) bounce) + mask) & ~mask);
|
|
|
|
/* channel the io through the bounce buffer */
|
|
if (!_io(&widened, bounce, 0)) {
|
|
if (!should_write)
|
|
goto_out;
|
|
/* FIXME pre-extend the file */
|
|
memset(bounce, '\n', widened.size);
|
|
}
|
|
|
|
if (should_write) {
|
|
memcpy(bounce + (where->start - widened.start), buffer,
|
|
(size_t) where->size);
|
|
|
|
/* ... then we write */
|
|
if (!(r = _io(&widened, bounce, 1)))
|
|
stack;
|
|
|
|
goto out;
|
|
}
|
|
|
|
memcpy(buffer, bounce + (where->start - widened.start),
|
|
(size_t) where->size);
|
|
|
|
r = 1;
|
|
|
|
out:
|
|
dm_free(bounce_buf);
|
|
return r;
|
|
}
|
|
|
|
static int _dev_get_size_file(struct device *dev, uint64_t *size)
|
|
{
|
|
const char *name = dev_name(dev);
|
|
struct stat info;
|
|
|
|
if (dev->size_seqno == _dev_size_seqno) {
|
|
log_very_verbose("%s: using cached size %" PRIu64 " sectors",
|
|
name, dev->size);
|
|
*size = dev->size;
|
|
return 1;
|
|
}
|
|
|
|
if (stat(name, &info)) {
|
|
log_sys_error("stat", name);
|
|
return 0;
|
|
}
|
|
|
|
*size = info.st_size;
|
|
*size >>= SECTOR_SHIFT; /* Convert to sectors */
|
|
dev->size = *size;
|
|
dev->size_seqno = _dev_size_seqno;
|
|
|
|
log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _dev_get_size_dev(struct device *dev, uint64_t *size)
|
|
{
|
|
const char *name = dev_name(dev);
|
|
|
|
if (dev->size_seqno == _dev_size_seqno) {
|
|
log_very_verbose("%s: using cached size %" PRIu64 " sectors",
|
|
name, dev->size);
|
|
*size = dev->size;
|
|
return 1;
|
|
}
|
|
|
|
if (!dev_open_readonly(dev))
|
|
return_0;
|
|
|
|
if (ioctl(dev_fd(dev), BLKGETSIZE64, size) < 0) {
|
|
log_sys_error("ioctl BLKGETSIZE64", name);
|
|
if (!dev_close(dev))
|
|
log_sys_error("close", name);
|
|
return 0;
|
|
}
|
|
|
|
*size >>= BLKSIZE_SHIFT; /* Convert to sectors */
|
|
dev->size = *size;
|
|
dev->size_seqno = _dev_size_seqno;
|
|
|
|
if (!dev_close(dev))
|
|
log_sys_error("close", name);
|
|
|
|
log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _dev_read_ahead_dev(struct device *dev, uint32_t *read_ahead)
|
|
{
|
|
long read_ahead_long;
|
|
|
|
if (dev->read_ahead != -1) {
|
|
*read_ahead = (uint32_t) dev->read_ahead;
|
|
return 1;
|
|
}
|
|
|
|
if (!dev_open_readonly(dev))
|
|
return_0;
|
|
|
|
if (ioctl(dev->fd, BLKRAGET, &read_ahead_long) < 0) {
|
|
log_sys_error("ioctl BLKRAGET", dev_name(dev));
|
|
if (!dev_close(dev))
|
|
stack;
|
|
return 0;
|
|
}
|
|
|
|
*read_ahead = (uint32_t) read_ahead_long;
|
|
dev->read_ahead = read_ahead_long;
|
|
|
|
log_very_verbose("%s: read_ahead is %u sectors",
|
|
dev_name(dev), *read_ahead);
|
|
|
|
if (!dev_close(dev))
|
|
stack;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes)
|
|
{
|
|
uint64_t discard_range[2];
|
|
|
|
if (!dev_open(dev))
|
|
return_0;
|
|
|
|
discard_range[0] = offset_bytes;
|
|
discard_range[1] = size_bytes;
|
|
|
|
log_debug_devs("Discarding %" PRIu64 " bytes offset %" PRIu64 " bytes on %s.",
|
|
size_bytes, offset_bytes, dev_name(dev));
|
|
if (ioctl(dev->fd, BLKDISCARD, &discard_range) < 0) {
|
|
log_error("%s: BLKDISCARD ioctl at offset %" PRIu64 " size %" PRIu64 " failed: %s.",
|
|
dev_name(dev), offset_bytes, size_bytes, strerror(errno));
|
|
if (!dev_close(dev))
|
|
stack;
|
|
/* It doesn't matter if discard failed, so return success. */
|
|
return 1;
|
|
}
|
|
|
|
if (!dev_close(dev))
|
|
stack;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
* Public functions
|
|
*---------------------------------------------------------------*/
|
|
void dev_size_seqno_inc(void)
|
|
{
|
|
_dev_size_seqno++;
|
|
}
|
|
|
|
int dev_get_size(struct device *dev, uint64_t *size)
|
|
{
|
|
if (!dev)
|
|
return 0;
|
|
|
|
if ((dev->flags & DEV_REGULAR))
|
|
return _dev_get_size_file(dev, size);
|
|
|
|
return _dev_get_size_dev(dev, size);
|
|
}
|
|
|
|
int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead)
|
|
{
|
|
if (!dev)
|
|
return 0;
|
|
|
|
if (dev->flags & DEV_REGULAR) {
|
|
*read_ahead = 0;
|
|
return 1;
|
|
}
|
|
|
|
return _dev_read_ahead_dev(dev, read_ahead);
|
|
}
|
|
|
|
int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes)
|
|
{
|
|
if (!dev)
|
|
return 0;
|
|
|
|
if (dev->flags & DEV_REGULAR)
|
|
return 1;
|
|
|
|
return _dev_discard_blocks(dev, offset_bytes, size_bytes);
|
|
}
|
|
|
|
void dev_flush(struct device *dev)
|
|
{
|
|
if (!(dev->flags & DEV_REGULAR) && ioctl(dev->fd, BLKFLSBUF, 0) >= 0)
|
|
return;
|
|
|
|
if (fsync(dev->fd) >= 0)
|
|
return;
|
|
|
|
sync();
|
|
}
|
|
|
|
int dev_open_flags(struct device *dev, int flags, int direct, int quiet)
|
|
{
|
|
struct stat buf;
|
|
const char *name;
|
|
int need_excl = 0, need_rw = 0;
|
|
|
|
if ((flags & O_ACCMODE) == O_RDWR)
|
|
need_rw = 1;
|
|
|
|
if ((flags & O_EXCL))
|
|
need_excl = 1;
|
|
|
|
if (dev->fd >= 0) {
|
|
if (((dev->flags & DEV_OPENED_RW) || !need_rw) &&
|
|
((dev->flags & DEV_OPENED_EXCL) || !need_excl)) {
|
|
dev->open_count++;
|
|
return 1;
|
|
}
|
|
|
|
if (dev->open_count && !need_excl) {
|
|
log_debug_devs("%s already opened read-only. Upgrading "
|
|
"to read-write.", dev_name(dev));
|
|
dev->open_count++;
|
|
}
|
|
|
|
dev_close_immediate(dev);
|
|
// FIXME: dev with DEV_ALLOCED is released
|
|
// but code is referencing it
|
|
}
|
|
|
|
if (critical_section())
|
|
/* FIXME Make this log_error */
|
|
log_verbose("dev_open(%s) called while suspended",
|
|
dev_name(dev));
|
|
|
|
if (!(name = dev_name_confirmed(dev, quiet)))
|
|
return_0;
|
|
|
|
#ifdef O_DIRECT_SUPPORT
|
|
if (direct) {
|
|
if (!(dev->flags & DEV_O_DIRECT_TESTED))
|
|
dev->flags |= DEV_O_DIRECT;
|
|
|
|
if ((dev->flags & DEV_O_DIRECT))
|
|
flags |= O_DIRECT;
|
|
}
|
|
#endif
|
|
|
|
#ifdef O_NOATIME
|
|
/* Don't update atime on device inodes */
|
|
if (!(dev->flags & DEV_REGULAR) && !(dev->flags & DEV_NOT_O_NOATIME))
|
|
flags |= O_NOATIME;
|
|
#endif
|
|
|
|
if ((dev->fd = open(name, flags, 0777)) < 0) {
|
|
#ifdef O_NOATIME
|
|
if ((errno == EPERM) && (flags & O_NOATIME)) {
|
|
flags &= ~O_NOATIME;
|
|
dev->flags |= DEV_NOT_O_NOATIME;
|
|
if ((dev->fd = open(name, flags, 0777)) >= 0) {
|
|
log_debug_devs("%s: Not using O_NOATIME", name);
|
|
goto opened;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef O_DIRECT_SUPPORT
|
|
if (direct && !(dev->flags & DEV_O_DIRECT_TESTED)) {
|
|
flags &= ~O_DIRECT;
|
|
if ((dev->fd = open(name, flags, 0777)) >= 0) {
|
|
dev->flags &= ~DEV_O_DIRECT;
|
|
log_debug_devs("%s: Not using O_DIRECT", name);
|
|
goto opened;
|
|
}
|
|
}
|
|
#endif
|
|
if (quiet)
|
|
log_sys_debug("open", name);
|
|
else
|
|
log_sys_error("open", name);
|
|
|
|
dev->flags |= DEV_OPEN_FAILURE;
|
|
return 0;
|
|
}
|
|
|
|
#ifdef O_DIRECT_SUPPORT
|
|
opened:
|
|
if (direct)
|
|
dev->flags |= DEV_O_DIRECT_TESTED;
|
|
#endif
|
|
dev->open_count++;
|
|
dev->flags &= ~DEV_ACCESSED_W;
|
|
|
|
if (need_rw)
|
|
dev->flags |= DEV_OPENED_RW;
|
|
else
|
|
dev->flags &= ~DEV_OPENED_RW;
|
|
|
|
if (need_excl)
|
|
dev->flags |= DEV_OPENED_EXCL;
|
|
else
|
|
dev->flags &= ~DEV_OPENED_EXCL;
|
|
|
|
if (!(dev->flags & DEV_REGULAR) &&
|
|
((fstat(dev->fd, &buf) < 0) || (buf.st_rdev != dev->dev))) {
|
|
log_error("%s: fstat failed: Has device name changed?", name);
|
|
dev_close_immediate(dev);
|
|
return 0;
|
|
}
|
|
|
|
#ifndef O_DIRECT_SUPPORT
|
|
if (!(dev->flags & DEV_REGULAR))
|
|
dev_flush(dev);
|
|
#endif
|
|
|
|
if ((flags & O_CREAT) && !(flags & O_TRUNC))
|
|
dev->end = lseek(dev->fd, (off_t) 0, SEEK_END);
|
|
|
|
dm_list_add(&_open_devices, &dev->open_list);
|
|
|
|
log_debug_devs("Opened %s %s%s%s", dev_name(dev),
|
|
dev->flags & DEV_OPENED_RW ? "RW" : "RO",
|
|
dev->flags & DEV_OPENED_EXCL ? " O_EXCL" : "",
|
|
dev->flags & DEV_O_DIRECT ? " O_DIRECT" : "");
|
|
|
|
dev->flags &= ~DEV_OPEN_FAILURE;
|
|
return 1;
|
|
}
|
|
|
|
int dev_open_quiet(struct device *dev)
|
|
{
|
|
return dev_open_flags(dev, O_RDWR, 1, 1);
|
|
}
|
|
|
|
int dev_open(struct device *dev)
|
|
{
|
|
return dev_open_flags(dev, O_RDWR, 1, 0);
|
|
}
|
|
|
|
int dev_open_readonly(struct device *dev)
|
|
{
|
|
return dev_open_flags(dev, O_RDONLY, 1, 0);
|
|
}
|
|
|
|
int dev_open_readonly_buffered(struct device *dev)
|
|
{
|
|
return dev_open_flags(dev, O_RDONLY, 0, 0);
|
|
}
|
|
|
|
int dev_open_readonly_quiet(struct device *dev)
|
|
{
|
|
return dev_open_flags(dev, O_RDONLY, 1, 1);
|
|
}
|
|
|
|
int dev_test_excl(struct device *dev)
|
|
{
|
|
int flags;
|
|
int r;
|
|
|
|
flags = vg_write_lock_held() ? O_RDWR : O_RDONLY;
|
|
flags |= O_EXCL;
|
|
|
|
r = dev_open_flags(dev, flags, 1, 1);
|
|
if (r)
|
|
dev_close_immediate(dev);
|
|
|
|
return r;
|
|
}
|
|
|
|
static void _close(struct device *dev)
|
|
{
|
|
if (close(dev->fd))
|
|
log_sys_error("close", dev_name(dev));
|
|
dev->fd = -1;
|
|
dev->phys_block_size = -1;
|
|
dev->block_size = -1;
|
|
dm_list_del(&dev->open_list);
|
|
|
|
log_debug_devs("Closed %s", dev_name(dev));
|
|
|
|
if (dev->flags & DEV_ALLOCED)
|
|
dev_destroy_file(dev);
|
|
}
|
|
|
|
static int _dev_close(struct device *dev, int immediate)
|
|
{
|
|
|
|
if (dev->fd < 0) {
|
|
log_error("Attempt to close device '%s' "
|
|
"which is not open.", dev_name(dev));
|
|
return 0;
|
|
}
|
|
|
|
#ifndef O_DIRECT_SUPPORT
|
|
if (dev->flags & DEV_ACCESSED_W)
|
|
dev_flush(dev);
|
|
#endif
|
|
|
|
if (dev->open_count > 0)
|
|
dev->open_count--;
|
|
|
|
if (immediate && dev->open_count)
|
|
log_debug_devs("%s: Immediate close attempt while still referenced",
|
|
dev_name(dev));
|
|
|
|
/* Close unless device is known to belong to a locked VG */
|
|
if (immediate ||
|
|
(dev->open_count < 1 && !lvmcache_pvid_is_locked(dev->pvid)))
|
|
_close(dev);
|
|
|
|
return 1;
|
|
}
|
|
|
|
int dev_close(struct device *dev)
|
|
{
|
|
return _dev_close(dev, 0);
|
|
}
|
|
|
|
int dev_close_immediate(struct device *dev)
|
|
{
|
|
return _dev_close(dev, 1);
|
|
}
|
|
|
|
void dev_close_all(void)
|
|
{
|
|
struct dm_list *doh, *doht;
|
|
struct device *dev;
|
|
|
|
dm_list_iterate_safe(doh, doht, &_open_devices) {
|
|
dev = dm_list_struct_base(doh, struct device, open_list);
|
|
if (dev->open_count < 1)
|
|
_close(dev);
|
|
}
|
|
}
|
|
|
|
static inline int _dev_is_valid(struct device *dev)
|
|
{
|
|
return (dev->max_error_count == NO_DEV_ERROR_COUNT_LIMIT ||
|
|
dev->error_count < dev->max_error_count);
|
|
}
|
|
|
|
static void _dev_inc_error_count(struct device *dev)
|
|
{
|
|
if (++dev->error_count == dev->max_error_count)
|
|
log_warn("WARNING: Error counts reached a limit of %d. "
|
|
"Device %s was disabled",
|
|
dev->max_error_count, dev_name(dev));
|
|
}
|
|
|
|
int dev_read(struct device *dev, uint64_t offset, size_t len, void *buffer)
|
|
{
|
|
struct device_area where;
|
|
int ret;
|
|
|
|
if (!dev->open_count)
|
|
return_0;
|
|
|
|
if (!_dev_is_valid(dev))
|
|
return 0;
|
|
|
|
where.dev = dev;
|
|
where.start = offset;
|
|
where.size = len;
|
|
|
|
// fprintf(stderr, "READ: %s, %lld, %d\n", dev_name(dev), offset, len);
|
|
|
|
ret = _aligned_io(&where, buffer, 0);
|
|
if (!ret)
|
|
_dev_inc_error_count(dev);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Read from 'dev' into 'buf', possibly in 2 distinct regions, denoted
|
|
* by (offset,len) and (offset2,len2). Thus, the total size of
|
|
* 'buf' should be len+len2.
|
|
*/
|
|
int dev_read_circular(struct device *dev, uint64_t offset, size_t len,
|
|
uint64_t offset2, size_t len2, char *buf)
|
|
{
|
|
if (!dev_read(dev, offset, len, buf)) {
|
|
log_error("Read from %s failed", dev_name(dev));
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* The second region is optional, and allows for
|
|
* a circular buffer on the device.
|
|
*/
|
|
if (!len2)
|
|
return 1;
|
|
|
|
if (!dev_read(dev, offset2, len2, buf + len)) {
|
|
log_error("Circular read from %s failed",
|
|
dev_name(dev));
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* FIXME If O_DIRECT can't extend file, dev_extend first; dev_truncate after.
|
|
* But fails if concurrent processes writing
|
|
*/
|
|
|
|
/* FIXME pre-extend the file */
|
|
int dev_append(struct device *dev, size_t len, char *buffer)
|
|
{
|
|
int r;
|
|
|
|
if (!dev->open_count)
|
|
return_0;
|
|
|
|
r = dev_write(dev, dev->end, len, buffer);
|
|
dev->end += (uint64_t) len;
|
|
|
|
#ifndef O_DIRECT_SUPPORT
|
|
dev_flush(dev);
|
|
#endif
|
|
return r;
|
|
}
|
|
|
|
int dev_write(struct device *dev, uint64_t offset, size_t len, void *buffer)
|
|
{
|
|
struct device_area where;
|
|
int ret;
|
|
|
|
if (!dev->open_count)
|
|
return_0;
|
|
|
|
if (!_dev_is_valid(dev))
|
|
return 0;
|
|
|
|
where.dev = dev;
|
|
where.start = offset;
|
|
where.size = len;
|
|
|
|
dev->flags |= DEV_ACCESSED_W;
|
|
|
|
ret = _aligned_io(&where, buffer, 1);
|
|
if (!ret)
|
|
_dev_inc_error_count(dev);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int dev_set(struct device *dev, uint64_t offset, size_t len, int value)
|
|
{
|
|
size_t s;
|
|
char buffer[4096] __attribute__((aligned(8)));
|
|
|
|
if (!dev_open(dev))
|
|
return_0;
|
|
|
|
if ((offset % SECTOR_SIZE) || (len % SECTOR_SIZE))
|
|
log_debug_devs("Wiping %s at %" PRIu64 " length %" PRIsize_t,
|
|
dev_name(dev), offset, len);
|
|
else
|
|
log_debug_devs("Wiping %s at sector %" PRIu64 " length %" PRIsize_t
|
|
" sectors", dev_name(dev), offset >> SECTOR_SHIFT,
|
|
len >> SECTOR_SHIFT);
|
|
|
|
memset(buffer, value, sizeof(buffer));
|
|
while (1) {
|
|
s = len > sizeof(buffer) ? sizeof(buffer) : len;
|
|
if (!dev_write(dev, offset, s, buffer))
|
|
break;
|
|
|
|
len -= s;
|
|
if (!len)
|
|
break;
|
|
|
|
offset += s;
|
|
}
|
|
|
|
dev->flags |= DEV_ACCESSED_W;
|
|
|
|
if (!dev_close(dev))
|
|
stack;
|
|
|
|
return (len == 0);
|
|
}
|
|
|
|
#ifdef AIO_SUPPORT
|
|
|
|
/*
|
|
* io_setup() wrapper:
|
|
* async_event_count is the max number of concurrent async
|
|
* i/os, i.e. the number of devices that can be read at once
|
|
*
|
|
* max_io_alloc_count: max number of aio structs to allocate,
|
|
* each with a buf_len size buffer.
|
|
*
|
|
* max_buf_alloc_bytes: max number of bytes to use for buffers
|
|
* attached to all aio structs; each aio struct gets a
|
|
* buf_len size buffer.
|
|
*
|
|
* When only max_io_alloc_count is set, it is used directly.
|
|
*
|
|
* When only max_buf_alloc_bytes is set, the number of aio
|
|
* structs is determined by this number divided by buf_len.
|
|
*
|
|
* When both are set, max_io_alloc_count is reduced, if needed,
|
|
* to whatever value max_buf_alloc_bytes would allow.
|
|
*
|
|
* When both are zero, there is no limit on the number of aio
|
|
* structs. If allocation fails for an aio struct or its buffer,
|
|
* the code should revert to synchronous io.
|
|
*/
|
|
|
|
struct dev_async_context *dev_async_context_setup(unsigned async_event_count,
|
|
unsigned max_io_alloc_count,
|
|
unsigned max_buf_alloc_bytes,
|
|
int buf_len)
|
|
{
|
|
struct dev_async_context *ac;
|
|
unsigned nr_events = DEFAULT_ASYNC_EVENTS;
|
|
int count;
|
|
int error;
|
|
|
|
if (async_event_count)
|
|
nr_events = async_event_count;
|
|
|
|
if (!(ac = malloc(sizeof(struct dev_async_context))))
|
|
return_0;
|
|
|
|
memset(ac, 0, sizeof(struct dev_async_context));
|
|
|
|
dm_list_init(&ac->unused_ios);
|
|
|
|
error = io_setup(nr_events, &ac->aio_ctx);
|
|
|
|
if (error < 0) {
|
|
log_warn("WARNING: async io setup error %d with %u events.", error, nr_events);
|
|
free(ac);
|
|
return_0;
|
|
}
|
|
|
|
|
|
if (!max_io_alloc_count && !max_buf_alloc_bytes)
|
|
count = 0;
|
|
else if (!max_io_alloc_count && max_buf_alloc_bytes)
|
|
count = max_buf_alloc_bytes / buf_len;
|
|
else if (max_io_alloc_count && max_buf_alloc_bytes) {
|
|
if (max_io_alloc_count * buf_len > max_buf_alloc_bytes)
|
|
count = max_buf_alloc_bytes / buf_len;
|
|
} else
|
|
count = max_io_alloc_count;
|
|
|
|
ac->max_ios = count;
|
|
return ac;
|
|
}
|
|
|
|
void dev_async_context_destroy(struct dev_async_context *ac)
|
|
{
|
|
io_destroy(ac->aio_ctx);
|
|
free(ac);
|
|
}
|
|
|
|
static struct dev_async_io *_async_io_alloc(int buf_len)
|
|
{
|
|
struct dev_async_io *aio;
|
|
char *buf;
|
|
char **p_buf;
|
|
|
|
/*
|
|
* mem pool doesn't seem to work for this, probably because
|
|
* of the memalign that follows.
|
|
*/
|
|
if (!(aio = malloc(sizeof(struct dev_async_io))))
|
|
return_0;
|
|
|
|
memset(aio, 0, sizeof(struct dev_async_io));
|
|
|
|
buf = NULL;
|
|
p_buf = &buf;
|
|
|
|
if (posix_memalign((void *)p_buf, getpagesize(), buf_len)) {
|
|
free(aio);
|
|
return_NULL;
|
|
}
|
|
|
|
memset(buf, 0, buf_len);
|
|
|
|
aio->buf = buf;
|
|
aio->buf_len = buf_len;
|
|
return aio;
|
|
}
|
|
|
|
static void _async_io_free(struct dev_async_io *aio)
|
|
{
|
|
if (aio->buf)
|
|
free(aio->buf);
|
|
free(aio);
|
|
}
|
|
|
|
int dev_async_alloc_ios(struct dev_async_context *ac, int num, int buf_len, int *available)
|
|
{
|
|
struct dev_async_io *aio;
|
|
int count;
|
|
int i;
|
|
|
|
/* FIXME: check if num wants more pre allocated? */
|
|
if (!dm_list_empty(&ac->unused_ios))
|
|
return 1;
|
|
|
|
/*
|
|
* When no limit is used and no pre-alloc number is set,
|
|
* then no ios are allocated up front, but the are
|
|
* allocated as needed in get().
|
|
*/
|
|
if (!ac->max_ios && !num) {
|
|
*available = 0;
|
|
return 1;
|
|
}
|
|
|
|
if (num && !ac->max_ios)
|
|
count = num;
|
|
else if (!num && ac->max_ios)
|
|
count = ac->max_ios;
|
|
else if (num > ac->max_ios)
|
|
count = ac->max_ios;
|
|
else if (num < ac->max_ios)
|
|
count = num;
|
|
else
|
|
count = ac->max_ios;
|
|
|
|
for (i = 0; i < count; i++) {
|
|
if (!(aio = _async_io_alloc(buf_len))) {
|
|
ac->num_ios = i;
|
|
*available = i;
|
|
return 1;
|
|
}
|
|
dm_list_add(&ac->unused_ios, &aio->list);
|
|
}
|
|
|
|
ac->num_ios = count;
|
|
*available = count;
|
|
return 1;
|
|
}
|
|
|
|
void dev_async_free_ios(struct dev_async_context *ac)
|
|
{
|
|
struct dev_async_io *aio, *aio2;
|
|
|
|
dm_list_iterate_items_safe(aio, aio2, &ac->unused_ios) {
|
|
dm_list_del(&aio->list);
|
|
_async_io_free(aio);
|
|
}
|
|
}
|
|
|
|
struct dev_async_io *dev_async_io_get(struct dev_async_context *ac, int buf_len)
|
|
{
|
|
struct dm_list *aio_item;
|
|
struct dev_async_io *aio;
|
|
|
|
if (!(aio_item = dm_list_first(&ac->unused_ios)))
|
|
goto alloc_new;
|
|
|
|
aio = dm_list_item(aio_item, struct dev_async_io);
|
|
dm_list_del(&aio->list);
|
|
return aio;
|
|
|
|
alloc_new:
|
|
/* alloc on demand if there is no max or we have used less than max */
|
|
if (!ac->max_ios || (ac->num_ios < ac->max_ios)) {
|
|
if ((aio = _async_io_alloc(buf_len))) {
|
|
ac->num_ios++;
|
|
return aio;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
void dev_async_io_put(struct dev_async_context *ac, struct dev_async_io *aio)
|
|
{
|
|
if (!ac)
|
|
_async_io_free(aio);
|
|
else {
|
|
memset(aio->buf, 0, aio->buf_len);
|
|
aio->dev = NULL;
|
|
aio->len = 0;
|
|
aio->done = 0;
|
|
aio->result = 0;
|
|
dm_list_add(&ac->unused_ios, &aio->list);
|
|
}
|
|
}
|
|
|
|
/* io_submit() wrapper */
|
|
|
|
int dev_async_read_submit(struct dev_async_context *ac, struct dev_async_io *aio,
|
|
struct device *dev, uint32_t len, uint64_t offset, int *nospace)
|
|
{
|
|
struct iocb *iocb = &aio->iocb;
|
|
int error;
|
|
|
|
*nospace = 0;
|
|
|
|
if (len > aio->buf_len)
|
|
return_0;
|
|
|
|
aio->len = len;
|
|
|
|
iocb->data = aio;
|
|
iocb->aio_fildes = dev_fd(dev);
|
|
iocb->aio_lio_opcode = IO_CMD_PREAD;
|
|
iocb->u.c.buf = aio->buf;
|
|
iocb->u.c.nbytes = len;
|
|
iocb->u.c.offset = offset;
|
|
|
|
error = io_submit(ac->aio_ctx, 1, &iocb);
|
|
if (error == -EAGAIN)
|
|
*nospace = 1;
|
|
if (error < 0)
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/* io_getevents() wrapper */
|
|
|
|
int dev_async_getevents(struct dev_async_context *ac, int wait_count, struct timespec *timeout,
|
|
int *done_count)
|
|
{
|
|
int wait_nr;
|
|
int rv;
|
|
int i;
|
|
|
|
*done_count = 0;
|
|
|
|
retry:
|
|
memset(&ac->events, 0, sizeof(ac->events));
|
|
|
|
if (wait_count >= MAX_GET_EVENTS)
|
|
wait_nr = MAX_GET_EVENTS;
|
|
else
|
|
wait_nr = wait_count;
|
|
|
|
rv = io_getevents(ac->aio_ctx, 1, wait_nr, (struct io_event *)&ac->events, timeout);
|
|
|
|
if (rv == -EINTR)
|
|
goto retry;
|
|
if (rv < 0)
|
|
return 0;
|
|
if (!rv)
|
|
return 1;
|
|
|
|
for (i = 0; i < rv; i++) {
|
|
struct iocb *iocb = ac->events[i].obj;
|
|
struct dev_async_io *aio = iocb->data;
|
|
aio->result = ac->events[i].res;
|
|
aio->done = 1;
|
|
}
|
|
|
|
*done_count = rv;
|
|
return 1;
|
|
}
|
|
|
|
#else /* AIO_SUPPORT */
|
|
|
|
struct dev_async_context *dev_async_context_setup(unsigned async_event_count,
|
|
unsigned max_io_alloc_count,
|
|
unsigned max_buf_alloc_bytes,
|
|
int buf_len)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
void dev_async_context_destroy(struct dev_async_context *ac)
|
|
{
|
|
}
|
|
|
|
int dev_async_alloc_ios(struct dev_async_context *ac, int num, int buf_len, int *available)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
void dev_async_free_ios(struct dev_async_context *ac)
|
|
{
|
|
}
|
|
|
|
struct dev_async_io *dev_async_io_get(struct dev_async_context *ac, int buf_len)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
void dev_async_io_put(struct dev_async_context *ac, struct dev_async_io *aio)
|
|
{
|
|
}
|
|
|
|
int dev_async_read_submit(struct dev_async_context *ac, struct dev_async_io *aio,
|
|
struct device *dev, uint32_t len, uint64_t offset, int *nospace)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
int dev_async_getevents(struct dev_async_context *ac, int wait_count, struct timespec *timeout,
|
|
int *done_count)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#endif /* AIO_SUPPORT */
|