Merge branch 'for-4.5/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: "This is the block driver pull request for 4.5, with the exception of NVMe, which is in a separate branch and will be posted after this one. This pull request contains: - A set of bcache stability fixes, which have been acked by Kent. These have been used and tested for more than a year by the community, so it's about time that they got in. - A set of drbd updates from the drbd team (Andreas, Lars, Philipp) and Markus Elfring, Oleg Drokin. - A set of fixes for xen blkback/front from the usual suspects, (Bob, Konrad) as well as community based fixes from Kiri, Julien, and Peng. - A 2038 time fix for sx8 from Shraddha, with a fix from me. - A small mtip32xx cleanup from Zhu Yanjun. - A null_blk division fix from Arnd" * 'for-4.5/drivers' of git://git.kernel.dk/linux-block: (71 commits) null_blk: use sector_div instead of do_div mtip32xx: restrict variables visible in current code module xen/blkfront: Fix crash if backend doesn't follow the right states. xen/blkback: Fix two memory leaks. xen/blkback: make st_ statistics per ring xen/blkfront: Handle non-indirect grant with 64KB pages xen-blkfront: Introduce blkif_ring_get_request xen-blkback: clear PF_NOFREEZE for xen_blkif_schedule() xen/blkback: Free resources if connect_ring failed. xen/blocks: Return -EXX instead of -1 xen/blkback: make pool of persistent grants and free pages per-queue xen/blkback: get the number of hardware queues/rings from blkfront xen/blkback: pseudo support for multi hardware queues/rings xen/blkback: separate ring information out of struct xen_blkif xen/blkfront: correct setting for xen_blkif_max_ring_order xen/blkfront: make persistent grants pool per-queue xen/blkfront: Remove duplicate setting of ->xbdev. xen/blkfront: Cleanup of comments, fix unaligned variables, and syntax errors. xen/blkfront: negotiate number of queues/rings to be used with backend xen/blkfront: split per device io_lock ...
This commit is contained in:
commit
641203549a
11
MAINTAINERS
11
MAINTAINERS
@ -3665,13 +3665,12 @@ F: drivers/scsi/dpt*
|
||||
F: drivers/scsi/dpt/
|
||||
|
||||
DRBD DRIVER
|
||||
P: Philipp Reisner
|
||||
P: Lars Ellenberg
|
||||
M: drbd-dev@lists.linbit.com
|
||||
L: drbd-user@lists.linbit.com
|
||||
M: Philipp Reisner <philipp.reisner@linbit.com>
|
||||
M: Lars Ellenberg <lars.ellenberg@linbit.com>
|
||||
L: drbd-dev@lists.linbit.com
|
||||
W: http://www.drbd.org
|
||||
T: git git://git.drbd.org/linux-2.6-drbd.git drbd
|
||||
T: git git://git.drbd.org/drbd-8.3.git
|
||||
T: git git://git.linbit.com/linux-drbd.git
|
||||
T: git git://git.linbit.com/drbd-8.4.git
|
||||
S: Supported
|
||||
F: drivers/block/drbd/
|
||||
F: lib/lru_cache.c
|
||||
|
@ -288,7 +288,162 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *
|
||||
return need_transaction;
|
||||
}
|
||||
|
||||
static int al_write_transaction(struct drbd_device *device);
|
||||
#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
|
||||
/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
|
||||
* are still coupled, or assume too much about their relation.
|
||||
* Code below will not work if this is violated.
|
||||
* Will be cleaned up with some followup patch.
|
||||
*/
|
||||
# error FIXME
|
||||
#endif
|
||||
|
||||
static unsigned int al_extent_to_bm_page(unsigned int al_enr)
|
||||
{
|
||||
return al_enr >>
|
||||
/* bit to page */
|
||||
((PAGE_SHIFT + 3) -
|
||||
/* al extent number to bit */
|
||||
(AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
|
||||
}
|
||||
|
||||
static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
|
||||
{
|
||||
const unsigned int stripes = device->ldev->md.al_stripes;
|
||||
const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
|
||||
|
||||
/* transaction number, modulo on-disk ring buffer wrap around */
|
||||
unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
|
||||
|
||||
/* ... to aligned 4k on disk block */
|
||||
t = ((t % stripes) * stripe_size_4kB) + t/stripes;
|
||||
|
||||
/* ... to 512 byte sector in activity log */
|
||||
t *= 8;
|
||||
|
||||
/* ... plus offset to the on disk position */
|
||||
return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
|
||||
}
|
||||
|
||||
static int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer)
|
||||
{
|
||||
struct lc_element *e;
|
||||
sector_t sector;
|
||||
int i, mx;
|
||||
unsigned extent_nr;
|
||||
unsigned crc = 0;
|
||||
int err = 0;
|
||||
|
||||
memset(buffer, 0, sizeof(*buffer));
|
||||
buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
|
||||
buffer->tr_number = cpu_to_be32(device->al_tr_number);
|
||||
|
||||
i = 0;
|
||||
|
||||
/* Even though no one can start to change this list
|
||||
* once we set the LC_LOCKED -- from drbd_al_begin_io(),
|
||||
* lc_try_lock_for_transaction() --, someone may still
|
||||
* be in the process of changing it. */
|
||||
spin_lock_irq(&device->al_lock);
|
||||
list_for_each_entry(e, &device->act_log->to_be_changed, list) {
|
||||
if (i == AL_UPDATES_PER_TRANSACTION) {
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
|
||||
buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
|
||||
if (e->lc_number != LC_FREE)
|
||||
drbd_bm_mark_for_writeout(device,
|
||||
al_extent_to_bm_page(e->lc_number));
|
||||
i++;
|
||||
}
|
||||
spin_unlock_irq(&device->al_lock);
|
||||
BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
|
||||
|
||||
buffer->n_updates = cpu_to_be16(i);
|
||||
for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
|
||||
buffer->update_slot_nr[i] = cpu_to_be16(-1);
|
||||
buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
|
||||
}
|
||||
|
||||
buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
|
||||
buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
|
||||
|
||||
mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
|
||||
device->act_log->nr_elements - device->al_tr_cycle);
|
||||
for (i = 0; i < mx; i++) {
|
||||
unsigned idx = device->al_tr_cycle + i;
|
||||
extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
|
||||
buffer->context[i] = cpu_to_be32(extent_nr);
|
||||
}
|
||||
for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
|
||||
buffer->context[i] = cpu_to_be32(LC_FREE);
|
||||
|
||||
device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
|
||||
if (device->al_tr_cycle >= device->act_log->nr_elements)
|
||||
device->al_tr_cycle = 0;
|
||||
|
||||
sector = al_tr_number_to_on_disk_sector(device);
|
||||
|
||||
crc = crc32c(0, buffer, 4096);
|
||||
buffer->crc32c = cpu_to_be32(crc);
|
||||
|
||||
if (drbd_bm_write_hinted(device))
|
||||
err = -EIO;
|
||||
else {
|
||||
bool write_al_updates;
|
||||
rcu_read_lock();
|
||||
write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
|
||||
rcu_read_unlock();
|
||||
if (write_al_updates) {
|
||||
if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
|
||||
err = -EIO;
|
||||
drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
|
||||
} else {
|
||||
device->al_tr_number++;
|
||||
device->al_writ_cnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int al_write_transaction(struct drbd_device *device)
|
||||
{
|
||||
struct al_transaction_on_disk *buffer;
|
||||
int err;
|
||||
|
||||
if (!get_ldev(device)) {
|
||||
drbd_err(device, "disk is %s, cannot start al transaction\n",
|
||||
drbd_disk_str(device->state.disk));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* The bitmap write may have failed, causing a state change. */
|
||||
if (device->state.disk < D_INCONSISTENT) {
|
||||
drbd_err(device,
|
||||
"disk is %s, cannot write al transaction\n",
|
||||
drbd_disk_str(device->state.disk));
|
||||
put_ldev(device);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* protects md_io_buffer, al_tr_cycle, ... */
|
||||
buffer = drbd_md_get_buffer(device, __func__);
|
||||
if (!buffer) {
|
||||
drbd_err(device, "disk failed while waiting for md_io buffer\n");
|
||||
put_ldev(device);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
err = __al_write_transaction(device, buffer);
|
||||
|
||||
drbd_md_put_buffer(device);
|
||||
put_ldev(device);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
void drbd_al_begin_io_commit(struct drbd_device *device)
|
||||
{
|
||||
@ -420,153 +575,6 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
|
||||
wake_up(&device->al_wait);
|
||||
}
|
||||
|
||||
#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
|
||||
/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
|
||||
* are still coupled, or assume too much about their relation.
|
||||
* Code below will not work if this is violated.
|
||||
* Will be cleaned up with some followup patch.
|
||||
*/
|
||||
# error FIXME
|
||||
#endif
|
||||
|
||||
static unsigned int al_extent_to_bm_page(unsigned int al_enr)
|
||||
{
|
||||
return al_enr >>
|
||||
/* bit to page */
|
||||
((PAGE_SHIFT + 3) -
|
||||
/* al extent number to bit */
|
||||
(AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
|
||||
}
|
||||
|
||||
static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
|
||||
{
|
||||
const unsigned int stripes = device->ldev->md.al_stripes;
|
||||
const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
|
||||
|
||||
/* transaction number, modulo on-disk ring buffer wrap around */
|
||||
unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
|
||||
|
||||
/* ... to aligned 4k on disk block */
|
||||
t = ((t % stripes) * stripe_size_4kB) + t/stripes;
|
||||
|
||||
/* ... to 512 byte sector in activity log */
|
||||
t *= 8;
|
||||
|
||||
/* ... plus offset to the on disk position */
|
||||
return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
|
||||
}
|
||||
|
||||
int al_write_transaction(struct drbd_device *device)
|
||||
{
|
||||
struct al_transaction_on_disk *buffer;
|
||||
struct lc_element *e;
|
||||
sector_t sector;
|
||||
int i, mx;
|
||||
unsigned extent_nr;
|
||||
unsigned crc = 0;
|
||||
int err = 0;
|
||||
|
||||
if (!get_ldev(device)) {
|
||||
drbd_err(device, "disk is %s, cannot start al transaction\n",
|
||||
drbd_disk_str(device->state.disk));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* The bitmap write may have failed, causing a state change. */
|
||||
if (device->state.disk < D_INCONSISTENT) {
|
||||
drbd_err(device,
|
||||
"disk is %s, cannot write al transaction\n",
|
||||
drbd_disk_str(device->state.disk));
|
||||
put_ldev(device);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* protects md_io_buffer, al_tr_cycle, ... */
|
||||
buffer = drbd_md_get_buffer(device, __func__);
|
||||
if (!buffer) {
|
||||
drbd_err(device, "disk failed while waiting for md_io buffer\n");
|
||||
put_ldev(device);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
memset(buffer, 0, sizeof(*buffer));
|
||||
buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
|
||||
buffer->tr_number = cpu_to_be32(device->al_tr_number);
|
||||
|
||||
i = 0;
|
||||
|
||||
/* Even though no one can start to change this list
|
||||
* once we set the LC_LOCKED -- from drbd_al_begin_io(),
|
||||
* lc_try_lock_for_transaction() --, someone may still
|
||||
* be in the process of changing it. */
|
||||
spin_lock_irq(&device->al_lock);
|
||||
list_for_each_entry(e, &device->act_log->to_be_changed, list) {
|
||||
if (i == AL_UPDATES_PER_TRANSACTION) {
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
|
||||
buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
|
||||
if (e->lc_number != LC_FREE)
|
||||
drbd_bm_mark_for_writeout(device,
|
||||
al_extent_to_bm_page(e->lc_number));
|
||||
i++;
|
||||
}
|
||||
spin_unlock_irq(&device->al_lock);
|
||||
BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
|
||||
|
||||
buffer->n_updates = cpu_to_be16(i);
|
||||
for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
|
||||
buffer->update_slot_nr[i] = cpu_to_be16(-1);
|
||||
buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
|
||||
}
|
||||
|
||||
buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
|
||||
buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
|
||||
|
||||
mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
|
||||
device->act_log->nr_elements - device->al_tr_cycle);
|
||||
for (i = 0; i < mx; i++) {
|
||||
unsigned idx = device->al_tr_cycle + i;
|
||||
extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
|
||||
buffer->context[i] = cpu_to_be32(extent_nr);
|
||||
}
|
||||
for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
|
||||
buffer->context[i] = cpu_to_be32(LC_FREE);
|
||||
|
||||
device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
|
||||
if (device->al_tr_cycle >= device->act_log->nr_elements)
|
||||
device->al_tr_cycle = 0;
|
||||
|
||||
sector = al_tr_number_to_on_disk_sector(device);
|
||||
|
||||
crc = crc32c(0, buffer, 4096);
|
||||
buffer->crc32c = cpu_to_be32(crc);
|
||||
|
||||
if (drbd_bm_write_hinted(device))
|
||||
err = -EIO;
|
||||
else {
|
||||
bool write_al_updates;
|
||||
rcu_read_lock();
|
||||
write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
|
||||
rcu_read_unlock();
|
||||
if (write_al_updates) {
|
||||
if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
|
||||
err = -EIO;
|
||||
drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
|
||||
} else {
|
||||
device->al_tr_number++;
|
||||
device->al_writ_cnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drbd_md_put_buffer(device);
|
||||
put_ldev(device);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
|
||||
{
|
||||
int rv;
|
||||
@ -606,21 +614,24 @@ void drbd_al_shrink(struct drbd_device *device)
|
||||
wake_up(&device->al_wait);
|
||||
}
|
||||
|
||||
int drbd_initialize_al(struct drbd_device *device, void *buffer)
|
||||
int drbd_al_initialize(struct drbd_device *device, void *buffer)
|
||||
{
|
||||
struct al_transaction_on_disk *al = buffer;
|
||||
struct drbd_md *md = &device->ldev->md;
|
||||
sector_t al_base = md->md_offset + md->al_offset;
|
||||
int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
|
||||
int i;
|
||||
|
||||
memset(al, 0, 4096);
|
||||
al->magic = cpu_to_be32(DRBD_AL_MAGIC);
|
||||
al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
|
||||
al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
|
||||
__al_write_transaction(device, al);
|
||||
/* There may or may not have been a pending transaction. */
|
||||
spin_lock_irq(&device->al_lock);
|
||||
lc_committed(device->act_log);
|
||||
spin_unlock_irq(&device->al_lock);
|
||||
|
||||
for (i = 0; i < al_size_4k; i++) {
|
||||
int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE);
|
||||
/* The rest of the transactions will have an empty "updates" list, and
|
||||
* are written out only to provide the context, and to initialize the
|
||||
* on-disk ring buffer. */
|
||||
for (i = 1; i < al_size_4k; i++) {
|
||||
int err = __al_write_transaction(device, al);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/drbd.h>
|
||||
@ -479,8 +479,14 @@ void drbd_bm_cleanup(struct drbd_device *device)
|
||||
* this masks out the remaining bits.
|
||||
* Returns the number of bits cleared.
|
||||
*/
|
||||
#ifndef BITS_PER_PAGE
|
||||
#define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3))
|
||||
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1)
|
||||
#else
|
||||
# if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3))
|
||||
# error "ambiguous BITS_PER_PAGE"
|
||||
# endif
|
||||
#endif
|
||||
#define BITS_PER_LONG_MASK (BITS_PER_LONG - 1)
|
||||
static int bm_clear_surplus(struct drbd_bitmap *b)
|
||||
{
|
||||
@ -559,21 +565,19 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
|
||||
unsigned long *p_addr;
|
||||
unsigned long bits = 0;
|
||||
unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
|
||||
int idx, i, last_word;
|
||||
int idx, last_word;
|
||||
|
||||
/* all but last page */
|
||||
for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
|
||||
p_addr = __bm_map_pidx(b, idx);
|
||||
for (i = 0; i < LWPP; i++)
|
||||
bits += hweight_long(p_addr[i]);
|
||||
bits += bitmap_weight(p_addr, BITS_PER_PAGE);
|
||||
__bm_unmap(p_addr);
|
||||
cond_resched();
|
||||
}
|
||||
/* last (or only) page */
|
||||
last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
|
||||
p_addr = __bm_map_pidx(b, idx);
|
||||
for (i = 0; i < last_word; i++)
|
||||
bits += hweight_long(p_addr[i]);
|
||||
bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG);
|
||||
p_addr[last_word] &= cpu_to_lel(mask);
|
||||
bits += hweight_long(p_addr[last_word]);
|
||||
/* 32bit arch, may have an unused padding long */
|
||||
@ -1419,6 +1423,9 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
|
||||
int bits;
|
||||
int changed = 0;
|
||||
unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
|
||||
|
||||
/* I think it is more cache line friendly to hweight_long then set to ~0UL,
|
||||
* than to first bitmap_weight() all words, then bitmap_fill() all words */
|
||||
for (i = first_word; i < last_word; i++) {
|
||||
bits = hweight_long(paddr[i]);
|
||||
paddr[i] = ~0UL;
|
||||
@ -1628,8 +1635,7 @@ int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
|
||||
int n = e-s;
|
||||
p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
|
||||
bm = p_addr + MLPP(s);
|
||||
while (n--)
|
||||
count += hweight_long(*bm++);
|
||||
count += bitmap_weight(bm, n * BITS_PER_LONG);
|
||||
bm_unmap(p_addr);
|
||||
} else {
|
||||
drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s);
|
||||
|
@ -771,6 +771,13 @@ static int device_data_gen_id_show(struct seq_file *m, void *ignored)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int device_ed_gen_id_show(struct seq_file *m, void *ignored)
|
||||
{
|
||||
struct drbd_device *device = m->private;
|
||||
seq_printf(m, "0x%016llX\n", (unsigned long long)device->ed_uuid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define drbd_debugfs_device_attr(name) \
|
||||
static int device_ ## name ## _open(struct inode *inode, struct file *file) \
|
||||
{ \
|
||||
@ -796,6 +803,7 @@ drbd_debugfs_device_attr(oldest_requests)
|
||||
drbd_debugfs_device_attr(act_log_extents)
|
||||
drbd_debugfs_device_attr(resync_extents)
|
||||
drbd_debugfs_device_attr(data_gen_id)
|
||||
drbd_debugfs_device_attr(ed_gen_id)
|
||||
|
||||
void drbd_debugfs_device_add(struct drbd_device *device)
|
||||
{
|
||||
@ -839,6 +847,7 @@ void drbd_debugfs_device_add(struct drbd_device *device)
|
||||
DCF(act_log_extents);
|
||||
DCF(resync_extents);
|
||||
DCF(data_gen_id);
|
||||
DCF(ed_gen_id);
|
||||
#undef DCF
|
||||
return;
|
||||
|
||||
@ -854,6 +863,7 @@ void drbd_debugfs_device_cleanup(struct drbd_device *device)
|
||||
drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
|
||||
drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
|
||||
drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
|
||||
drbd_debugfs_remove(&device->debugfs_vol_ed_gen_id);
|
||||
drbd_debugfs_remove(&device->debugfs_vol);
|
||||
}
|
||||
|
||||
|
@ -77,13 +77,6 @@ extern int fault_devs;
|
||||
extern char usermode_helper[];
|
||||
|
||||
|
||||
/* I don't remember why XCPU ...
|
||||
* This is used to wake the asender,
|
||||
* and to interrupt sending the sending task
|
||||
* on disconnect.
|
||||
*/
|
||||
#define DRBD_SIG SIGXCPU
|
||||
|
||||
/* This is used to stop/restart our threads.
|
||||
* Cannot use SIGTERM nor SIGKILL, since these
|
||||
* are sent out by init on runlevel changes
|
||||
@ -292,6 +285,9 @@ struct drbd_device_work {
|
||||
|
||||
extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *);
|
||||
|
||||
extern void lock_all_resources(void);
|
||||
extern void unlock_all_resources(void);
|
||||
|
||||
struct drbd_request {
|
||||
struct drbd_work w;
|
||||
struct drbd_device *device;
|
||||
@ -504,7 +500,6 @@ enum {
|
||||
|
||||
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
|
||||
|
||||
SUSPEND_IO, /* suspend application io */
|
||||
BITMAP_IO, /* suspend application io;
|
||||
once no more io in flight, start bitmap io */
|
||||
BITMAP_IO_QUEUED, /* Started bitmap IO */
|
||||
@ -632,12 +627,6 @@ struct bm_io_work {
|
||||
void (*done)(struct drbd_device *device, int rv);
|
||||
};
|
||||
|
||||
enum write_ordering_e {
|
||||
WO_none,
|
||||
WO_drain_io,
|
||||
WO_bdev_flush,
|
||||
};
|
||||
|
||||
struct fifo_buffer {
|
||||
unsigned int head_index;
|
||||
unsigned int size;
|
||||
@ -650,8 +639,7 @@ extern struct fifo_buffer *fifo_alloc(int fifo_size);
|
||||
enum {
|
||||
NET_CONGESTED, /* The data socket is congested */
|
||||
RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */
|
||||
SEND_PING, /* whether asender should send a ping asap */
|
||||
SIGNAL_ASENDER, /* whether asender wants to be interrupted */
|
||||
SEND_PING,
|
||||
GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */
|
||||
CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */
|
||||
CONN_WD_ST_CHG_OKAY,
|
||||
@ -670,6 +658,8 @@ enum {
|
||||
DEVICE_WORK_PENDING, /* tell worker that some device has pending work */
|
||||
};
|
||||
|
||||
enum which_state { NOW, OLD = NOW, NEW };
|
||||
|
||||
struct drbd_resource {
|
||||
char *name;
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
@ -755,7 +745,8 @@ struct drbd_connection {
|
||||
unsigned long last_reconnect_jif;
|
||||
struct drbd_thread receiver;
|
||||
struct drbd_thread worker;
|
||||
struct drbd_thread asender;
|
||||
struct drbd_thread ack_receiver;
|
||||
struct workqueue_struct *ack_sender;
|
||||
|
||||
/* cached pointers,
|
||||
* so we can look up the oldest pending requests more quickly.
|
||||
@ -774,6 +765,8 @@ struct drbd_connection {
|
||||
struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
|
||||
|
||||
struct {
|
||||
unsigned long last_sent_barrier_jif;
|
||||
|
||||
/* whether this sender thread
|
||||
* has processed a single write yet. */
|
||||
bool seen_any_write_yet;
|
||||
@ -788,6 +781,17 @@ struct drbd_connection {
|
||||
} send;
|
||||
};
|
||||
|
||||
static inline bool has_net_conf(struct drbd_connection *connection)
|
||||
{
|
||||
bool has_net_conf;
|
||||
|
||||
rcu_read_lock();
|
||||
has_net_conf = rcu_dereference(connection->net_conf);
|
||||
rcu_read_unlock();
|
||||
|
||||
return has_net_conf;
|
||||
}
|
||||
|
||||
void __update_timing_details(
|
||||
struct drbd_thread_timing_details *tdp,
|
||||
unsigned int *cb_nr,
|
||||
@ -811,6 +815,7 @@ struct drbd_peer_device {
|
||||
struct list_head peer_devices;
|
||||
struct drbd_device *device;
|
||||
struct drbd_connection *connection;
|
||||
struct work_struct send_acks_work;
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct dentry *debugfs_peer_dev;
|
||||
#endif
|
||||
@ -829,6 +834,7 @@ struct drbd_device {
|
||||
struct dentry *debugfs_vol_act_log_extents;
|
||||
struct dentry *debugfs_vol_resync_extents;
|
||||
struct dentry *debugfs_vol_data_gen_id;
|
||||
struct dentry *debugfs_vol_ed_gen_id;
|
||||
#endif
|
||||
|
||||
unsigned int vnr; /* volume number within the connection */
|
||||
@ -873,6 +879,7 @@ struct drbd_device {
|
||||
atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
|
||||
atomic_t unacked_cnt; /* Need to send replies for */
|
||||
atomic_t local_cnt; /* Waiting for local completion */
|
||||
atomic_t suspend_cnt;
|
||||
|
||||
/* Interval tree of pending local requests */
|
||||
struct rb_root read_requests;
|
||||
@ -1020,6 +1027,12 @@ static inline struct drbd_peer_device *first_peer_device(struct drbd_device *dev
|
||||
return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
|
||||
}
|
||||
|
||||
static inline struct drbd_peer_device *
|
||||
conn_peer_device(struct drbd_connection *connection, int volume_number)
|
||||
{
|
||||
return idr_find(&connection->peer_devices, volume_number);
|
||||
}
|
||||
|
||||
#define for_each_resource(resource, _resources) \
|
||||
list_for_each_entry(resource, _resources, resources)
|
||||
|
||||
@ -1113,7 +1126,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
|
||||
extern int drbd_send_bitmap(struct drbd_device *device);
|
||||
extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
|
||||
extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
|
||||
extern void drbd_free_ldev(struct drbd_backing_dev *ldev);
|
||||
extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
|
||||
extern void drbd_device_cleanup(struct drbd_device *device);
|
||||
void drbd_print_uuids(struct drbd_device *device, const char *text);
|
||||
|
||||
@ -1424,7 +1437,7 @@ extern struct bio_set *drbd_md_io_bio_set;
|
||||
/* to allocate from that set */
|
||||
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
|
||||
|
||||
extern rwlock_t global_state_lock;
|
||||
extern struct mutex resources_mutex;
|
||||
|
||||
extern int conn_lowest_minor(struct drbd_connection *connection);
|
||||
extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
|
||||
@ -1454,6 +1467,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
|
||||
|
||||
|
||||
/* drbd_nl.c */
|
||||
|
||||
extern struct mutex notification_mutex;
|
||||
|
||||
extern void drbd_suspend_io(struct drbd_device *device);
|
||||
extern void drbd_resume_io(struct drbd_device *device);
|
||||
extern char *ppsize(char *buf, unsigned long long size);
|
||||
@ -1536,7 +1552,9 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
|
||||
|
||||
/* drbd_receiver.c */
|
||||
extern int drbd_receiver(struct drbd_thread *thi);
|
||||
extern int drbd_asender(struct drbd_thread *thi);
|
||||
extern int drbd_ack_receiver(struct drbd_thread *thi);
|
||||
extern void drbd_send_ping_wf(struct work_struct *ws);
|
||||
extern void drbd_send_acks_wf(struct work_struct *ws);
|
||||
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
|
||||
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
|
||||
bool throttle_if_app_is_waiting);
|
||||
@ -1649,7 +1667,7 @@ extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int s
|
||||
#define drbd_rs_failed_io(device, sector, size) \
|
||||
__drbd_change_sync(device, sector, size, RECORD_RS_FAILED)
|
||||
extern void drbd_al_shrink(struct drbd_device *device);
|
||||
extern int drbd_initialize_al(struct drbd_device *, void *);
|
||||
extern int drbd_al_initialize(struct drbd_device *, void *);
|
||||
|
||||
/* drbd_nl.c */
|
||||
/* state info broadcast */
|
||||
@ -1668,6 +1686,29 @@ struct sib_info {
|
||||
};
|
||||
void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
|
||||
|
||||
extern void notify_resource_state(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_resource *,
|
||||
struct resource_info *,
|
||||
enum drbd_notification_type);
|
||||
extern void notify_device_state(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_device *,
|
||||
struct device_info *,
|
||||
enum drbd_notification_type);
|
||||
extern void notify_connection_state(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_connection *,
|
||||
struct connection_info *,
|
||||
enum drbd_notification_type);
|
||||
extern void notify_peer_device_state(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_peer_device *,
|
||||
struct peer_device_info *,
|
||||
enum drbd_notification_type);
|
||||
extern void notify_helper(enum drbd_notification_type, struct drbd_device *,
|
||||
struct drbd_connection *, const char *, int);
|
||||
|
||||
/*
|
||||
* inline helper functions
|
||||
*************************/
|
||||
@ -1694,19 +1735,6 @@ static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_r
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline enum drbd_state_rv
|
||||
_drbd_set_state(struct drbd_device *device, union drbd_state ns,
|
||||
enum chg_state_flags flags, struct completion *done)
|
||||
{
|
||||
enum drbd_state_rv rv;
|
||||
|
||||
read_lock(&global_state_lock);
|
||||
rv = __drbd_set_state(device, ns, flags, done);
|
||||
read_unlock(&global_state_lock);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static inline union drbd_state drbd_read_state(struct drbd_device *device)
|
||||
{
|
||||
struct drbd_resource *resource = device->resource;
|
||||
@ -1937,16 +1965,21 @@ drbd_device_post_work(struct drbd_device *device, int work_bit)
|
||||
|
||||
extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
|
||||
|
||||
static inline void wake_asender(struct drbd_connection *connection)
|
||||
/* To get the ack_receiver out of the blocking network stack,
|
||||
* so it can change its sk_rcvtimeo from idle- to ping-timeout,
|
||||
* and send a ping, we need to send a signal.
|
||||
* Which signal we send is irrelevant. */
|
||||
static inline void wake_ack_receiver(struct drbd_connection *connection)
|
||||
{
|
||||
if (test_bit(SIGNAL_ASENDER, &connection->flags))
|
||||
force_sig(DRBD_SIG, connection->asender.task);
|
||||
struct task_struct *task = connection->ack_receiver.task;
|
||||
if (task && get_t_state(&connection->ack_receiver) == RUNNING)
|
||||
force_sig(SIGXCPU, task);
|
||||
}
|
||||
|
||||
static inline void request_ping(struct drbd_connection *connection)
|
||||
{
|
||||
set_bit(SEND_PING, &connection->flags);
|
||||
wake_asender(connection);
|
||||
wake_ack_receiver(connection);
|
||||
}
|
||||
|
||||
extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *);
|
||||
@ -2230,7 +2263,7 @@ static inline bool may_inc_ap_bio(struct drbd_device *device)
|
||||
|
||||
if (drbd_suspended(device))
|
||||
return false;
|
||||
if (test_bit(SUSPEND_IO, &device->flags))
|
||||
if (atomic_read(&device->suspend_cnt))
|
||||
return false;
|
||||
|
||||
/* to avoid potential deadlock or bitmap corruption,
|
||||
|
@ -117,6 +117,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0
|
||||
*/
|
||||
struct idr drbd_devices;
|
||||
struct list_head drbd_resources;
|
||||
struct mutex resources_mutex;
|
||||
|
||||
struct kmem_cache *drbd_request_cache;
|
||||
struct kmem_cache *drbd_ee_cache; /* peer requests */
|
||||
@ -1435,8 +1436,8 @@ static int we_should_drop_the_connection(struct drbd_connection *connection, str
|
||||
/* long elapsed = (long)(jiffies - device->last_received); */
|
||||
|
||||
drop_it = connection->meta.socket == sock
|
||||
|| !connection->asender.task
|
||||
|| get_t_state(&connection->asender) != RUNNING
|
||||
|| !connection->ack_receiver.task
|
||||
|| get_t_state(&connection->ack_receiver) != RUNNING
|
||||
|| connection->cstate < C_WF_REPORT_PARAMS;
|
||||
|
||||
if (drop_it)
|
||||
@ -1793,15 +1794,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
|
||||
drbd_update_congested(connection);
|
||||
}
|
||||
do {
|
||||
/* STRANGE
|
||||
* tcp_sendmsg does _not_ use its size parameter at all ?
|
||||
*
|
||||
* -EAGAIN on timeout, -EINTR on signal.
|
||||
*/
|
||||
/* THINK
|
||||
* do we need to block DRBD_SIG if sock == &meta.socket ??
|
||||
* otherwise wake_asender() might interrupt some send_*Ack !
|
||||
*/
|
||||
rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
|
||||
if (rv == -EAGAIN) {
|
||||
if (we_should_drop_the_connection(connection, sock))
|
||||
@ -2000,7 +1992,7 @@ void drbd_device_cleanup(struct drbd_device *device)
|
||||
drbd_bm_cleanup(device);
|
||||
}
|
||||
|
||||
drbd_free_ldev(device->ldev);
|
||||
drbd_backing_dev_free(device, device->ldev);
|
||||
device->ldev = NULL;
|
||||
|
||||
clear_bit(AL_SUSPENDED, &device->flags);
|
||||
@ -2179,7 +2171,7 @@ void drbd_destroy_device(struct kref *kref)
|
||||
if (device->this_bdev)
|
||||
bdput(device->this_bdev);
|
||||
|
||||
drbd_free_ldev(device->ldev);
|
||||
drbd_backing_dev_free(device, device->ldev);
|
||||
device->ldev = NULL;
|
||||
|
||||
drbd_release_all_peer_reqs(device);
|
||||
@ -2563,7 +2555,7 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op
|
||||
cpumask_copy(resource->cpu_mask, new_cpu_mask);
|
||||
for_each_connection_rcu(connection, resource) {
|
||||
connection->receiver.reset_cpu_mask = 1;
|
||||
connection->asender.reset_cpu_mask = 1;
|
||||
connection->ack_receiver.reset_cpu_mask = 1;
|
||||
connection->worker.reset_cpu_mask = 1;
|
||||
}
|
||||
}
|
||||
@ -2590,7 +2582,7 @@ struct drbd_resource *drbd_create_resource(const char *name)
|
||||
kref_init(&resource->kref);
|
||||
idr_init(&resource->devices);
|
||||
INIT_LIST_HEAD(&resource->connections);
|
||||
resource->write_ordering = WO_bdev_flush;
|
||||
resource->write_ordering = WO_BDEV_FLUSH;
|
||||
list_add_tail_rcu(&resource->resources, &drbd_resources);
|
||||
mutex_init(&resource->conf_update);
|
||||
mutex_init(&resource->adm_mutex);
|
||||
@ -2652,8 +2644,8 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
|
||||
connection->receiver.connection = connection;
|
||||
drbd_thread_init(resource, &connection->worker, drbd_worker, "worker");
|
||||
connection->worker.connection = connection;
|
||||
drbd_thread_init(resource, &connection->asender, drbd_asender, "asender");
|
||||
connection->asender.connection = connection;
|
||||
drbd_thread_init(resource, &connection->ack_receiver, drbd_ack_receiver, "ack_recv");
|
||||
connection->ack_receiver.connection = connection;
|
||||
|
||||
kref_init(&connection->kref);
|
||||
|
||||
@ -2702,8 +2694,8 @@ static int init_submitter(struct drbd_device *device)
|
||||
{
|
||||
/* opencoded create_singlethread_workqueue(),
|
||||
* to be able to say "drbd%d", ..., minor */
|
||||
device->submit.wq = alloc_workqueue("drbd%u_submit",
|
||||
WQ_UNBOUND | WQ_MEM_RECLAIM, 1, device->minor);
|
||||
device->submit.wq =
|
||||
alloc_ordered_workqueue("drbd%u_submit", WQ_MEM_RECLAIM, device->minor);
|
||||
if (!device->submit.wq)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -2820,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
goto out_idr_remove_from_resource;
|
||||
}
|
||||
kref_get(&connection->kref);
|
||||
INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf);
|
||||
}
|
||||
|
||||
if (init_submitter(device)) {
|
||||
@ -2923,7 +2916,7 @@ static int __init drbd_init(void)
|
||||
drbd_proc = NULL; /* play safe for drbd_cleanup */
|
||||
idr_init(&drbd_devices);
|
||||
|
||||
rwlock_init(&global_state_lock);
|
||||
mutex_init(&resources_mutex);
|
||||
INIT_LIST_HEAD(&drbd_resources);
|
||||
|
||||
err = drbd_genl_register();
|
||||
@ -2971,18 +2964,6 @@ fail:
|
||||
return err;
|
||||
}
|
||||
|
||||
void drbd_free_ldev(struct drbd_backing_dev *ldev)
|
||||
{
|
||||
if (ldev == NULL)
|
||||
return;
|
||||
|
||||
blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
|
||||
blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
|
||||
|
||||
kfree(ldev->disk_conf);
|
||||
kfree(ldev);
|
||||
}
|
||||
|
||||
static void drbd_free_one_sock(struct drbd_socket *ds)
|
||||
{
|
||||
struct socket *s;
|
||||
@ -3277,6 +3258,10 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
|
||||
* and read it. */
|
||||
bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
|
||||
bdev->md.md_offset = drbd_md_ss(bdev);
|
||||
/* Even for (flexible or indexed) external meta data,
|
||||
* initially restrict us to the 4k superblock for now.
|
||||
* Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */
|
||||
bdev->md.md_size_sect = 8;
|
||||
|
||||
if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) {
|
||||
/* NOTE: can't do normal error processing here as this is
|
||||
@ -3578,7 +3563,9 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
|
||||
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
set_bit(BITMAP_IO, &device->flags);
|
||||
if (atomic_read(&device->ap_bio_cnt) == 0) {
|
||||
/* don't wait for pending application IO if the caller indicates that
|
||||
* application IO does not conflict anyways. */
|
||||
if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) {
|
||||
if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
|
||||
drbd_queue_work(&first_peer_device(device)->connection->sender_work,
|
||||
&device->bm_io_work.w);
|
||||
@ -3746,6 +3733,27 @@ int drbd_wait_misc(struct drbd_device *device, struct drbd_interval *i)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void lock_all_resources(void)
|
||||
{
|
||||
struct drbd_resource *resource;
|
||||
int __maybe_unused i = 0;
|
||||
|
||||
mutex_lock(&resources_mutex);
|
||||
local_irq_disable();
|
||||
for_each_resource(resource, &drbd_resources)
|
||||
spin_lock_nested(&resource->req_lock, i++);
|
||||
}
|
||||
|
||||
void unlock_all_resources(void)
|
||||
{
|
||||
struct drbd_resource *resource;
|
||||
|
||||
for_each_resource(resource, &drbd_resources)
|
||||
spin_unlock(&resource->req_lock);
|
||||
local_irq_enable();
|
||||
mutex_unlock(&resources_mutex);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DRBD_FAULT_INJECTION
|
||||
/* Fault insertion support including random number generator shamelessly
|
||||
* stolen from kernel/rcutorture.c */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -245,9 +245,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
||||
char wp;
|
||||
|
||||
static char write_ordering_chars[] = {
|
||||
[WO_none] = 'n',
|
||||
[WO_drain_io] = 'd',
|
||||
[WO_bdev_flush] = 'f',
|
||||
[WO_NONE] = 'n',
|
||||
[WO_DRAIN_IO] = 'd',
|
||||
[WO_BDEV_FLUSH] = 'f',
|
||||
};
|
||||
|
||||
seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
|
||||
|
@ -23,7 +23,7 @@ enum drbd_packet {
|
||||
P_AUTH_RESPONSE = 0x11,
|
||||
P_STATE_CHG_REQ = 0x12,
|
||||
|
||||
/* asender (meta socket */
|
||||
/* (meta socket) */
|
||||
P_PING = 0x13,
|
||||
P_PING_ACK = 0x14,
|
||||
P_RECV_ACK = 0x15, /* Used in protocol B */
|
||||
|
@ -215,7 +215,7 @@ static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
|
||||
}
|
||||
}
|
||||
|
||||
static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
|
||||
static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
|
||||
{
|
||||
LIST_HEAD(reclaimed);
|
||||
struct drbd_peer_request *peer_req, *t;
|
||||
@ -223,11 +223,30 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
reclaim_finished_net_peer_reqs(device, &reclaimed);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
|
||||
list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
|
||||
drbd_free_net_peer_req(device, peer_req);
|
||||
}
|
||||
|
||||
static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
|
||||
{
|
||||
struct drbd_peer_device *peer_device;
|
||||
int vnr;
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
|
||||
struct drbd_device *device = peer_device->device;
|
||||
if (!atomic_read(&device->pp_in_use_by_net))
|
||||
continue;
|
||||
|
||||
kref_get(&device->kref);
|
||||
rcu_read_unlock();
|
||||
drbd_reclaim_net_peer_reqs(device);
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
rcu_read_lock();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
|
||||
* @device: DRBD device.
|
||||
@ -265,10 +284,15 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
|
||||
if (atomic_read(&device->pp_in_use) < mxb)
|
||||
page = __drbd_alloc_pages(device, number);
|
||||
|
||||
/* Try to keep the fast path fast, but occasionally we need
|
||||
* to reclaim the pages we lended to the network stack. */
|
||||
if (page && atomic_read(&device->pp_in_use_by_net) > 512)
|
||||
drbd_reclaim_net_peer_reqs(device);
|
||||
|
||||
while (page == NULL) {
|
||||
prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
drbd_kick_lo_and_reclaim_net(device);
|
||||
drbd_reclaim_net_peer_reqs(device);
|
||||
|
||||
if (atomic_read(&device->pp_in_use) < mxb) {
|
||||
page = __drbd_alloc_pages(device, number);
|
||||
@ -1099,7 +1123,15 @@ randomize:
|
||||
return 0;
|
||||
}
|
||||
|
||||
drbd_thread_start(&connection->asender);
|
||||
drbd_thread_start(&connection->ack_receiver);
|
||||
/* opencoded create_singlethread_workqueue(),
|
||||
* to be able to use format string arguments */
|
||||
connection->ack_sender =
|
||||
alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
|
||||
if (!connection->ack_sender) {
|
||||
drbd_err(connection, "Failed to create workqueue ack_sender\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
mutex_lock(&connection->resource->conf_update);
|
||||
/* The discard_my_data flag is a single-shot modifier to the next
|
||||
@ -1178,7 +1210,7 @@ static void drbd_flush(struct drbd_connection *connection)
|
||||
struct drbd_peer_device *peer_device;
|
||||
int vnr;
|
||||
|
||||
if (connection->resource->write_ordering >= WO_bdev_flush) {
|
||||
if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
|
||||
struct drbd_device *device = peer_device->device;
|
||||
@ -1203,7 +1235,7 @@ static void drbd_flush(struct drbd_connection *connection)
|
||||
/* would rather check on EOPNOTSUPP, but that is not reliable.
|
||||
* don't try again for ANY return value != 0
|
||||
* if (rv == -EOPNOTSUPP) */
|
||||
drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
|
||||
drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
|
||||
}
|
||||
put_ldev(device);
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
@ -1299,10 +1331,10 @@ max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
|
||||
|
||||
dc = rcu_dereference(bdev->disk_conf);
|
||||
|
||||
if (wo == WO_bdev_flush && !dc->disk_flushes)
|
||||
wo = WO_drain_io;
|
||||
if (wo == WO_drain_io && !dc->disk_drain)
|
||||
wo = WO_none;
|
||||
if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
|
||||
wo = WO_DRAIN_IO;
|
||||
if (wo == WO_DRAIN_IO && !dc->disk_drain)
|
||||
wo = WO_NONE;
|
||||
|
||||
return wo;
|
||||
}
|
||||
@ -1319,13 +1351,13 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
|
||||
enum write_ordering_e pwo;
|
||||
int vnr;
|
||||
static char *write_ordering_str[] = {
|
||||
[WO_none] = "none",
|
||||
[WO_drain_io] = "drain",
|
||||
[WO_bdev_flush] = "flush",
|
||||
[WO_NONE] = "none",
|
||||
[WO_DRAIN_IO] = "drain",
|
||||
[WO_BDEV_FLUSH] = "flush",
|
||||
};
|
||||
|
||||
pwo = resource->write_ordering;
|
||||
if (wo != WO_bdev_flush)
|
||||
if (wo != WO_BDEV_FLUSH)
|
||||
wo = min(pwo, wo);
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&resource->devices, device, vnr) {
|
||||
@ -1343,7 +1375,7 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
|
||||
rcu_read_unlock();
|
||||
|
||||
resource->write_ordering = wo;
|
||||
if (pwo != resource->write_ordering || wo == WO_bdev_flush)
|
||||
if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
|
||||
drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
|
||||
}
|
||||
|
||||
@ -1380,7 +1412,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
|
||||
if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
|
||||
/* wait for all pending IO completions, before we start
|
||||
* zeroing things out. */
|
||||
conn_wait_active_ee_empty(first_peer_device(device)->connection);
|
||||
conn_wait_active_ee_empty(peer_req->peer_device->connection);
|
||||
/* add it to the active list now,
|
||||
* so we can find it to present it in debugfs */
|
||||
peer_req->submit_jif = jiffies;
|
||||
@ -1508,12 +1540,6 @@ static void conn_wait_active_ee_empty(struct drbd_connection *connection)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static struct drbd_peer_device *
|
||||
conn_peer_device(struct drbd_connection *connection, int volume_number)
|
||||
{
|
||||
return idr_find(&connection->peer_devices, volume_number);
|
||||
}
|
||||
|
||||
static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
|
||||
{
|
||||
int rv;
|
||||
@ -1533,7 +1559,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
|
||||
* Therefore we must send the barrier_ack after the barrier request was
|
||||
* completed. */
|
||||
switch (connection->resource->write_ordering) {
|
||||
case WO_none:
|
||||
case WO_NONE:
|
||||
if (rv == FE_RECYCLED)
|
||||
return 0;
|
||||
|
||||
@ -1546,8 +1572,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
|
||||
drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
|
||||
/* Fall through */
|
||||
|
||||
case WO_bdev_flush:
|
||||
case WO_drain_io:
|
||||
case WO_BDEV_FLUSH:
|
||||
case WO_DRAIN_IO:
|
||||
conn_wait_active_ee_empty(connection);
|
||||
drbd_flush(connection);
|
||||
|
||||
@ -1752,7 +1778,7 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req
|
||||
}
|
||||
|
||||
/*
|
||||
* e_end_resync_block() is called in asender context via
|
||||
* e_end_resync_block() is called in ack_sender context via
|
||||
* drbd_finish_peer_reqs().
|
||||
*/
|
||||
static int e_end_resync_block(struct drbd_work *w, int unused)
|
||||
@ -1926,7 +1952,7 @@ static void restart_conflicting_writes(struct drbd_device *device,
|
||||
}
|
||||
|
||||
/*
|
||||
* e_end_block() is called in asender context via drbd_finish_peer_reqs().
|
||||
* e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
|
||||
*/
|
||||
static int e_end_block(struct drbd_work *w, int cancel)
|
||||
{
|
||||
@ -1966,7 +1992,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
|
||||
} else
|
||||
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
|
||||
|
||||
drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
|
||||
drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -2098,7 +2124,7 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
|
||||
tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!tp)
|
||||
@ -2217,7 +2243,7 @@ static int handle_write_conflicts(struct drbd_device *device,
|
||||
peer_req->w.cb = superseded ? e_send_superseded :
|
||||
e_send_retry_write;
|
||||
list_add_tail(&peer_req->w.list, &device->done_ee);
|
||||
wake_asender(connection);
|
||||
queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
|
||||
|
||||
err = -ENOENT;
|
||||
goto out;
|
||||
@ -2364,7 +2390,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
|
||||
if (dp_flags & DP_SEND_RECEIVE_ACK) {
|
||||
/* I really don't like it that the receiver thread
|
||||
* sends on the msock, but anyways */
|
||||
drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
|
||||
drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
|
||||
}
|
||||
|
||||
if (tp) {
|
||||
@ -4056,7 +4082,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
|
||||
os = ns = drbd_read_state(device);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
|
||||
/* If some other part of the code (asender thread, timeout)
|
||||
/* If some other part of the code (ack_receiver thread, timeout)
|
||||
* already decided to close the connection again,
|
||||
* we must not "re-establish" it here. */
|
||||
if (os.conn <= C_TEAR_DOWN)
|
||||
@ -4661,8 +4687,12 @@ static void conn_disconnect(struct drbd_connection *connection)
|
||||
*/
|
||||
conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
|
||||
|
||||
/* asender does not clean up anything. it must not interfere, either */
|
||||
drbd_thread_stop(&connection->asender);
|
||||
/* ack_receiver does not clean up anything. it must not interfere, either */
|
||||
drbd_thread_stop(&connection->ack_receiver);
|
||||
if (connection->ack_sender) {
|
||||
destroy_workqueue(connection->ack_sender);
|
||||
connection->ack_sender = NULL;
|
||||
}
|
||||
drbd_free_sock(connection);
|
||||
|
||||
rcu_read_lock();
|
||||
@ -5431,49 +5461,39 @@ static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int connection_finish_peer_reqs(struct drbd_connection *connection)
|
||||
{
|
||||
struct drbd_peer_device *peer_device;
|
||||
int vnr, not_empty = 0;
|
||||
|
||||
do {
|
||||
clear_bit(SIGNAL_ASENDER, &connection->flags);
|
||||
flush_signals(current);
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
|
||||
struct drbd_device *device = peer_device->device;
|
||||
kref_get(&device->kref);
|
||||
rcu_read_unlock();
|
||||
if (drbd_finish_peer_reqs(device)) {
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
return 1;
|
||||
}
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
rcu_read_lock();
|
||||
}
|
||||
set_bit(SIGNAL_ASENDER, &connection->flags);
|
||||
|
||||
spin_lock_irq(&connection->resource->req_lock);
|
||||
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
|
||||
struct drbd_device *device = peer_device->device;
|
||||
not_empty = !list_empty(&device->done_ee);
|
||||
if (not_empty)
|
||||
break;
|
||||
}
|
||||
spin_unlock_irq(&connection->resource->req_lock);
|
||||
rcu_read_unlock();
|
||||
} while (not_empty);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct asender_cmd {
|
||||
struct meta_sock_cmd {
|
||||
size_t pkt_size;
|
||||
int (*fn)(struct drbd_connection *connection, struct packet_info *);
|
||||
};
|
||||
|
||||
static struct asender_cmd asender_tbl[] = {
|
||||
static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
|
||||
{
|
||||
long t;
|
||||
struct net_conf *nc;
|
||||
|
||||
rcu_read_lock();
|
||||
nc = rcu_dereference(connection->net_conf);
|
||||
t = ping_timeout ? nc->ping_timeo : nc->ping_int;
|
||||
rcu_read_unlock();
|
||||
|
||||
t *= HZ;
|
||||
if (ping_timeout)
|
||||
t /= 10;
|
||||
|
||||
connection->meta.socket->sk->sk_rcvtimeo = t;
|
||||
}
|
||||
|
||||
static void set_ping_timeout(struct drbd_connection *connection)
|
||||
{
|
||||
set_rcvtimeo(connection, 1);
|
||||
}
|
||||
|
||||
static void set_idle_timeout(struct drbd_connection *connection)
|
||||
{
|
||||
set_rcvtimeo(connection, 0);
|
||||
}
|
||||
|
||||
static struct meta_sock_cmd ack_receiver_tbl[] = {
|
||||
[P_PING] = { 0, got_Ping },
|
||||
[P_PING_ACK] = { 0, got_PingAck },
|
||||
[P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
|
||||
@ -5493,64 +5513,40 @@ static struct asender_cmd asender_tbl[] = {
|
||||
[P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
|
||||
};
|
||||
|
||||
int drbd_asender(struct drbd_thread *thi)
|
||||
int drbd_ack_receiver(struct drbd_thread *thi)
|
||||
{
|
||||
struct drbd_connection *connection = thi->connection;
|
||||
struct asender_cmd *cmd = NULL;
|
||||
struct meta_sock_cmd *cmd = NULL;
|
||||
struct packet_info pi;
|
||||
unsigned long pre_recv_jif;
|
||||
int rv;
|
||||
void *buf = connection->meta.rbuf;
|
||||
int received = 0;
|
||||
unsigned int header_size = drbd_header_size(connection);
|
||||
int expect = header_size;
|
||||
bool ping_timeout_active = false;
|
||||
struct net_conf *nc;
|
||||
int ping_timeo, tcp_cork, ping_int;
|
||||
struct sched_param param = { .sched_priority = 2 };
|
||||
|
||||
rv = sched_setscheduler(current, SCHED_RR, ¶m);
|
||||
if (rv < 0)
|
||||
drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
|
||||
drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
|
||||
|
||||
while (get_t_state(thi) == RUNNING) {
|
||||
drbd_thread_current_set_cpu(thi);
|
||||
|
||||
rcu_read_lock();
|
||||
nc = rcu_dereference(connection->net_conf);
|
||||
ping_timeo = nc->ping_timeo;
|
||||
tcp_cork = nc->tcp_cork;
|
||||
ping_int = nc->ping_int;
|
||||
rcu_read_unlock();
|
||||
conn_reclaim_net_peer_reqs(connection);
|
||||
|
||||
if (test_and_clear_bit(SEND_PING, &connection->flags)) {
|
||||
if (drbd_send_ping(connection)) {
|
||||
drbd_err(connection, "drbd_send_ping has failed\n");
|
||||
goto reconnect;
|
||||
}
|
||||
connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
|
||||
set_ping_timeout(connection);
|
||||
ping_timeout_active = true;
|
||||
}
|
||||
|
||||
/* TODO: conditionally cork; it may hurt latency if we cork without
|
||||
much to send */
|
||||
if (tcp_cork)
|
||||
drbd_tcp_cork(connection->meta.socket);
|
||||
if (connection_finish_peer_reqs(connection)) {
|
||||
drbd_err(connection, "connection_finish_peer_reqs() failed\n");
|
||||
goto reconnect;
|
||||
}
|
||||
/* but unconditionally uncork unless disabled */
|
||||
if (tcp_cork)
|
||||
drbd_tcp_uncork(connection->meta.socket);
|
||||
|
||||
/* short circuit, recv_msg would return EINTR anyways. */
|
||||
if (signal_pending(current))
|
||||
continue;
|
||||
|
||||
pre_recv_jif = jiffies;
|
||||
rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
|
||||
clear_bit(SIGNAL_ASENDER, &connection->flags);
|
||||
|
||||
flush_signals(current);
|
||||
|
||||
/* Note:
|
||||
* -EINTR (on meta) we got a signal
|
||||
@ -5562,7 +5558,6 @@ int drbd_asender(struct drbd_thread *thi)
|
||||
* rv < expected: "woken" by signal during receive
|
||||
* rv == 0 : "connection shut down by peer"
|
||||
*/
|
||||
received_more:
|
||||
if (likely(rv > 0)) {
|
||||
received += rv;
|
||||
buf += rv;
|
||||
@ -5584,8 +5579,7 @@ received_more:
|
||||
} else if (rv == -EAGAIN) {
|
||||
/* If the data socket received something meanwhile,
|
||||
* that is good enough: peer is still alive. */
|
||||
if (time_after(connection->last_received,
|
||||
jiffies - connection->meta.socket->sk->sk_rcvtimeo))
|
||||
if (time_after(connection->last_received, pre_recv_jif))
|
||||
continue;
|
||||
if (ping_timeout_active) {
|
||||
drbd_err(connection, "PingAck did not arrive in time.\n");
|
||||
@ -5594,6 +5588,10 @@ received_more:
|
||||
set_bit(SEND_PING, &connection->flags);
|
||||
continue;
|
||||
} else if (rv == -EINTR) {
|
||||
/* maybe drbd_thread_stop(): the while condition will notice.
|
||||
* maybe woken for send_ping: we'll send a ping above,
|
||||
* and change the rcvtimeo */
|
||||
flush_signals(current);
|
||||
continue;
|
||||
} else {
|
||||
drbd_err(connection, "sock_recvmsg returned %d\n", rv);
|
||||
@ -5603,8 +5601,8 @@ received_more:
|
||||
if (received == expect && cmd == NULL) {
|
||||
if (decode_header(connection, connection->meta.rbuf, &pi))
|
||||
goto reconnect;
|
||||
cmd = &asender_tbl[pi.cmd];
|
||||
if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
|
||||
cmd = &ack_receiver_tbl[pi.cmd];
|
||||
if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
|
||||
drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
|
||||
cmdname(pi.cmd), pi.cmd);
|
||||
goto disconnect;
|
||||
@ -5627,9 +5625,8 @@ received_more:
|
||||
|
||||
connection->last_received = jiffies;
|
||||
|
||||
if (cmd == &asender_tbl[P_PING_ACK]) {
|
||||
/* restore idle timeout */
|
||||
connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
|
||||
if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
|
||||
set_idle_timeout(connection);
|
||||
ping_timeout_active = false;
|
||||
}
|
||||
|
||||
@ -5638,11 +5635,6 @@ received_more:
|
||||
expect = header_size;
|
||||
cmd = NULL;
|
||||
}
|
||||
if (test_bit(SEND_PING, &connection->flags))
|
||||
continue;
|
||||
rv = drbd_recv_short(connection->meta.socket, buf, expect-received, MSG_DONTWAIT);
|
||||
if (rv > 0)
|
||||
goto received_more;
|
||||
}
|
||||
|
||||
if (0) {
|
||||
@ -5654,9 +5646,41 @@ reconnect:
|
||||
disconnect:
|
||||
conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
|
||||
}
|
||||
clear_bit(SIGNAL_ASENDER, &connection->flags);
|
||||
|
||||
drbd_info(connection, "asender terminated\n");
|
||||
drbd_info(connection, "ack_receiver terminated\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void drbd_send_acks_wf(struct work_struct *ws)
|
||||
{
|
||||
struct drbd_peer_device *peer_device =
|
||||
container_of(ws, struct drbd_peer_device, send_acks_work);
|
||||
struct drbd_connection *connection = peer_device->connection;
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct net_conf *nc;
|
||||
int tcp_cork, err;
|
||||
|
||||
rcu_read_lock();
|
||||
nc = rcu_dereference(connection->net_conf);
|
||||
tcp_cork = nc->tcp_cork;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (tcp_cork)
|
||||
drbd_tcp_cork(connection->meta.socket);
|
||||
|
||||
err = drbd_finish_peer_reqs(device);
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
|
||||
struct work_struct send_acks_work alive, which is in the peer_device object */
|
||||
|
||||
if (err) {
|
||||
conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
|
||||
return;
|
||||
}
|
||||
|
||||
if (tcp_cork)
|
||||
drbd_tcp_uncork(connection->meta.socket);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -453,12 +453,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
|
||||
kref_get(&req->kref); /* wait for the DONE */
|
||||
|
||||
if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
|
||||
/* potentially already completed in the asender thread */
|
||||
/* potentially already completed in the ack_receiver thread */
|
||||
if (!(s & RQ_NET_DONE)) {
|
||||
atomic_add(req->i.size >> 9, &device->ap_in_flight);
|
||||
set_if_null_req_not_net_done(peer_device, req);
|
||||
}
|
||||
if (s & RQ_NET_PENDING)
|
||||
if (req->rq_state & RQ_NET_PENDING)
|
||||
set_if_null_req_ack_pending(peer_device, req);
|
||||
}
|
||||
|
||||
@ -1095,6 +1095,24 @@ static bool do_remote_read(struct drbd_request *req)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool drbd_should_do_remote(union drbd_dev_state s)
|
||||
{
|
||||
return s.pdsk == D_UP_TO_DATE ||
|
||||
(s.pdsk >= D_INCONSISTENT &&
|
||||
s.conn >= C_WF_BITMAP_T &&
|
||||
s.conn < C_AHEAD);
|
||||
/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
|
||||
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
|
||||
states. */
|
||||
}
|
||||
|
||||
static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
|
||||
{
|
||||
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
|
||||
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
|
||||
since we enter state C_AHEAD only if proto >= 96 */
|
||||
}
|
||||
|
||||
/* returns number of connections (== 1, for drbd 8.4)
|
||||
* expected to actually write this data,
|
||||
* which does NOT include those that we are L_AHEAD for. */
|
||||
@ -1149,7 +1167,6 @@ drbd_submit_req_private_bio(struct drbd_request *req)
|
||||
* stable storage, and this is a WRITE, we may not even submit
|
||||
* this bio. */
|
||||
if (get_ldev(device)) {
|
||||
req->pre_submit_jif = jiffies;
|
||||
if (drbd_insert_fault(device,
|
||||
rw == WRITE ? DRBD_FAULT_DT_WR
|
||||
: rw == READ ? DRBD_FAULT_DT_RD
|
||||
@ -1293,6 +1310,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
|
||||
&device->pending_master_completion[rw == WRITE]);
|
||||
if (req->private_bio) {
|
||||
/* needs to be marked within the same spinlock */
|
||||
req->pre_submit_jif = jiffies;
|
||||
list_add_tail(&req->req_pending_local,
|
||||
&device->pending_completion[rw == WRITE]);
|
||||
_req_mod(req, TO_BE_SUBMITTED);
|
||||
@ -1513,6 +1531,78 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
static bool net_timeout_reached(struct drbd_request *net_req,
|
||||
struct drbd_connection *connection,
|
||||
unsigned long now, unsigned long ent,
|
||||
unsigned int ko_count, unsigned int timeout)
|
||||
{
|
||||
struct drbd_device *device = net_req->device;
|
||||
|
||||
if (!time_after(now, net_req->pre_send_jif + ent))
|
||||
return false;
|
||||
|
||||
if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
|
||||
return false;
|
||||
|
||||
if (net_req->rq_state & RQ_NET_PENDING) {
|
||||
drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
|
||||
jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* We received an ACK already (or are using protocol A),
|
||||
* but are waiting for the epoch closing barrier ack.
|
||||
* Check if we sent the barrier already. We should not blame the peer
|
||||
* for being unresponsive, if we did not even ask it yet. */
|
||||
if (net_req->epoch == connection->send.current_epoch_nr) {
|
||||
drbd_warn(device,
|
||||
"We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
|
||||
jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Worst case: we may have been blocked for whatever reason, then
|
||||
* suddenly are able to send a lot of requests (and epoch separating
|
||||
* barriers) in quick succession.
|
||||
* The timestamp of the net_req may be much too old and not correspond
|
||||
* to the sending time of the relevant unack'ed barrier packet, so
|
||||
* would trigger a spurious timeout. The latest barrier packet may
|
||||
* have a too recent timestamp to trigger the timeout, potentially miss
|
||||
* a timeout. Right now we don't have a place to conveniently store
|
||||
* these timestamps.
|
||||
* But in this particular situation, the application requests are still
|
||||
* completed to upper layers, DRBD should still "feel" responsive.
|
||||
* No need yet to kill this connection, it may still recover.
|
||||
* If not, eventually we will have queued enough into the network for
|
||||
* us to block. From that point of view, the timestamp of the last sent
|
||||
* barrier packet is relevant enough.
|
||||
*/
|
||||
if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
|
||||
drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
|
||||
connection->send.last_sent_barrier_jif, now,
|
||||
jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* A request is considered timed out, if
|
||||
* - we have some effective timeout from the configuration,
|
||||
* with some state restrictions applied,
|
||||
* - the oldest request is waiting for a response from the network
|
||||
* resp. the local disk,
|
||||
* - the oldest request is in fact older than the effective timeout,
|
||||
* - the connection was established (resp. disk was attached)
|
||||
* for longer than the timeout already.
|
||||
* Note that for 32bit jiffies and very stable connections/disks,
|
||||
* we may have a wrap around, which is catched by
|
||||
* !time_in_range(now, last_..._jif, last_..._jif + timeout).
|
||||
*
|
||||
* Side effect: once per 32bit wrap-around interval, which means every
|
||||
* ~198 days with 250 HZ, we have a window where the timeout would need
|
||||
* to expire twice (worst case) to become effective. Good enough.
|
||||
*/
|
||||
|
||||
void request_timer_fn(unsigned long data)
|
||||
{
|
||||
struct drbd_device *device = (struct drbd_device *) data;
|
||||
@ -1522,11 +1612,14 @@ void request_timer_fn(unsigned long data)
|
||||
unsigned long oldest_submit_jif;
|
||||
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
|
||||
unsigned long now;
|
||||
unsigned int ko_count = 0, timeout = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
nc = rcu_dereference(connection->net_conf);
|
||||
if (nc && device->state.conn >= C_WF_REPORT_PARAMS)
|
||||
ent = nc->timeout * HZ/10 * nc->ko_count;
|
||||
if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
|
||||
ko_count = nc->ko_count;
|
||||
timeout = nc->timeout;
|
||||
}
|
||||
|
||||
if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */
|
||||
dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
|
||||
@ -1534,6 +1627,8 @@ void request_timer_fn(unsigned long data)
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
|
||||
ent = timeout * HZ/10 * ko_count;
|
||||
et = min_not_zero(dt, ent);
|
||||
|
||||
if (!et)
|
||||
@ -1545,11 +1640,22 @@ void request_timer_fn(unsigned long data)
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
|
||||
req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
|
||||
req_peer = connection->req_not_net_done;
|
||||
|
||||
/* maybe the oldest request waiting for the peer is in fact still
|
||||
* blocking in tcp sendmsg */
|
||||
if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
|
||||
req_peer = connection->req_next;
|
||||
* blocking in tcp sendmsg. That's ok, though, that's handled via the
|
||||
* socket send timeout, requesting a ping, and bumping ko-count in
|
||||
* we_should_drop_the_connection().
|
||||
*/
|
||||
|
||||
/* check the oldest request we did successfully sent,
|
||||
* but which is still waiting for an ACK. */
|
||||
req_peer = connection->req_ack_pending;
|
||||
|
||||
/* if we don't have such request (e.g. protocoll A)
|
||||
* check the oldest requests which is still waiting on its epoch
|
||||
* closing barrier ack. */
|
||||
if (!req_peer)
|
||||
req_peer = connection->req_not_net_done;
|
||||
|
||||
/* evaluate the oldest peer request only in one timer! */
|
||||
if (req_peer && req_peer->device != device)
|
||||
@ -1566,28 +1672,9 @@ void request_timer_fn(unsigned long data)
|
||||
: req_write ? req_write->pre_submit_jif
|
||||
: req_read ? req_read->pre_submit_jif : now;
|
||||
|
||||
/* The request is considered timed out, if
|
||||
* - we have some effective timeout from the configuration,
|
||||
* with above state restrictions applied,
|
||||
* - the oldest request is waiting for a response from the network
|
||||
* resp. the local disk,
|
||||
* - the oldest request is in fact older than the effective timeout,
|
||||
* - the connection was established (resp. disk was attached)
|
||||
* for longer than the timeout already.
|
||||
* Note that for 32bit jiffies and very stable connections/disks,
|
||||
* we may have a wrap around, which is catched by
|
||||
* !time_in_range(now, last_..._jif, last_..._jif + timeout).
|
||||
*
|
||||
* Side effect: once per 32bit wrap-around interval, which means every
|
||||
* ~198 days with 250 HZ, we have a window where the timeout would need
|
||||
* to expire twice (worst case) to become effective. Good enough.
|
||||
*/
|
||||
if (ent && req_peer &&
|
||||
time_after(now, req_peer->pre_send_jif + ent) &&
|
||||
!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
|
||||
drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
|
||||
if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
|
||||
_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD);
|
||||
}
|
||||
|
||||
if (dt && oldest_submit_jif != now &&
|
||||
time_after(now, oldest_submit_jif + dt) &&
|
||||
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
|
||||
|
@ -331,21 +331,6 @@ static inline int req_mod(struct drbd_request *req,
|
||||
return rv;
|
||||
}
|
||||
|
||||
static inline bool drbd_should_do_remote(union drbd_dev_state s)
|
||||
{
|
||||
return s.pdsk == D_UP_TO_DATE ||
|
||||
(s.pdsk >= D_INCONSISTENT &&
|
||||
s.conn >= C_WF_BITMAP_T &&
|
||||
s.conn < C_AHEAD);
|
||||
/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
|
||||
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
|
||||
states. */
|
||||
}
|
||||
static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
|
||||
{
|
||||
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
|
||||
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
|
||||
since we enter state C_AHEAD only if proto >= 96 */
|
||||
}
|
||||
extern bool drbd_should_do_remote(union drbd_dev_state);
|
||||
|
||||
#endif
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "drbd_int.h"
|
||||
#include "drbd_protocol.h"
|
||||
#include "drbd_req.h"
|
||||
#include "drbd_state_change.h"
|
||||
|
||||
struct after_state_chg_work {
|
||||
struct drbd_work w;
|
||||
@ -37,6 +38,7 @@ struct after_state_chg_work {
|
||||
union drbd_state ns;
|
||||
enum chg_state_flags flags;
|
||||
struct completion *done;
|
||||
struct drbd_state_change *state_change;
|
||||
};
|
||||
|
||||
enum sanitize_state_warnings {
|
||||
@ -48,9 +50,248 @@ enum sanitize_state_warnings {
|
||||
IMPLICITLY_UPGRADED_PDSK,
|
||||
};
|
||||
|
||||
static void count_objects(struct drbd_resource *resource,
|
||||
unsigned int *n_devices,
|
||||
unsigned int *n_connections)
|
||||
{
|
||||
struct drbd_device *device;
|
||||
struct drbd_connection *connection;
|
||||
int vnr;
|
||||
|
||||
*n_devices = 0;
|
||||
*n_connections = 0;
|
||||
|
||||
idr_for_each_entry(&resource->devices, device, vnr)
|
||||
(*n_devices)++;
|
||||
for_each_connection(connection, resource)
|
||||
(*n_connections)++;
|
||||
}
|
||||
|
||||
static struct drbd_state_change *alloc_state_change(unsigned int n_devices, unsigned int n_connections, gfp_t gfp)
|
||||
{
|
||||
struct drbd_state_change *state_change;
|
||||
unsigned int size, n;
|
||||
|
||||
size = sizeof(struct drbd_state_change) +
|
||||
n_devices * sizeof(struct drbd_device_state_change) +
|
||||
n_connections * sizeof(struct drbd_connection_state_change) +
|
||||
n_devices * n_connections * sizeof(struct drbd_peer_device_state_change);
|
||||
state_change = kmalloc(size, gfp);
|
||||
if (!state_change)
|
||||
return NULL;
|
||||
state_change->n_devices = n_devices;
|
||||
state_change->n_connections = n_connections;
|
||||
state_change->devices = (void *)(state_change + 1);
|
||||
state_change->connections = (void *)&state_change->devices[n_devices];
|
||||
state_change->peer_devices = (void *)&state_change->connections[n_connections];
|
||||
state_change->resource->resource = NULL;
|
||||
for (n = 0; n < n_devices; n++)
|
||||
state_change->devices[n].device = NULL;
|
||||
for (n = 0; n < n_connections; n++)
|
||||
state_change->connections[n].connection = NULL;
|
||||
return state_change;
|
||||
}
|
||||
|
||||
struct drbd_state_change *remember_old_state(struct drbd_resource *resource, gfp_t gfp)
|
||||
{
|
||||
struct drbd_state_change *state_change;
|
||||
struct drbd_device *device;
|
||||
unsigned int n_devices;
|
||||
struct drbd_connection *connection;
|
||||
unsigned int n_connections;
|
||||
int vnr;
|
||||
|
||||
struct drbd_device_state_change *device_state_change;
|
||||
struct drbd_peer_device_state_change *peer_device_state_change;
|
||||
struct drbd_connection_state_change *connection_state_change;
|
||||
|
||||
/* Caller holds req_lock spinlock.
|
||||
* No state, no device IDR, no connections lists can change. */
|
||||
count_objects(resource, &n_devices, &n_connections);
|
||||
state_change = alloc_state_change(n_devices, n_connections, gfp);
|
||||
if (!state_change)
|
||||
return NULL;
|
||||
|
||||
kref_get(&resource->kref);
|
||||
state_change->resource->resource = resource;
|
||||
state_change->resource->role[OLD] =
|
||||
conn_highest_role(first_connection(resource));
|
||||
state_change->resource->susp[OLD] = resource->susp;
|
||||
state_change->resource->susp_nod[OLD] = resource->susp_nod;
|
||||
state_change->resource->susp_fen[OLD] = resource->susp_fen;
|
||||
|
||||
connection_state_change = state_change->connections;
|
||||
for_each_connection(connection, resource) {
|
||||
kref_get(&connection->kref);
|
||||
connection_state_change->connection = connection;
|
||||
connection_state_change->cstate[OLD] =
|
||||
connection->cstate;
|
||||
connection_state_change->peer_role[OLD] =
|
||||
conn_highest_peer(connection);
|
||||
connection_state_change++;
|
||||
}
|
||||
|
||||
device_state_change = state_change->devices;
|
||||
peer_device_state_change = state_change->peer_devices;
|
||||
idr_for_each_entry(&resource->devices, device, vnr) {
|
||||
kref_get(&device->kref);
|
||||
device_state_change->device = device;
|
||||
device_state_change->disk_state[OLD] = device->state.disk;
|
||||
|
||||
/* The peer_devices for each device have to be enumerated in
|
||||
the order of the connections. We may not use for_each_peer_device() here. */
|
||||
for_each_connection(connection, resource) {
|
||||
struct drbd_peer_device *peer_device;
|
||||
|
||||
peer_device = conn_peer_device(connection, device->vnr);
|
||||
peer_device_state_change->peer_device = peer_device;
|
||||
peer_device_state_change->disk_state[OLD] =
|
||||
device->state.pdsk;
|
||||
peer_device_state_change->repl_state[OLD] =
|
||||
max_t(enum drbd_conns,
|
||||
C_WF_REPORT_PARAMS, device->state.conn);
|
||||
peer_device_state_change->resync_susp_user[OLD] =
|
||||
device->state.user_isp;
|
||||
peer_device_state_change->resync_susp_peer[OLD] =
|
||||
device->state.peer_isp;
|
||||
peer_device_state_change->resync_susp_dependency[OLD] =
|
||||
device->state.aftr_isp;
|
||||
peer_device_state_change++;
|
||||
}
|
||||
device_state_change++;
|
||||
}
|
||||
|
||||
return state_change;
|
||||
}
|
||||
|
||||
static void remember_new_state(struct drbd_state_change *state_change)
|
||||
{
|
||||
struct drbd_resource_state_change *resource_state_change;
|
||||
struct drbd_resource *resource;
|
||||
unsigned int n;
|
||||
|
||||
if (!state_change)
|
||||
return;
|
||||
|
||||
resource_state_change = &state_change->resource[0];
|
||||
resource = resource_state_change->resource;
|
||||
|
||||
resource_state_change->role[NEW] =
|
||||
conn_highest_role(first_connection(resource));
|
||||
resource_state_change->susp[NEW] = resource->susp;
|
||||
resource_state_change->susp_nod[NEW] = resource->susp_nod;
|
||||
resource_state_change->susp_fen[NEW] = resource->susp_fen;
|
||||
|
||||
for (n = 0; n < state_change->n_devices; n++) {
|
||||
struct drbd_device_state_change *device_state_change =
|
||||
&state_change->devices[n];
|
||||
struct drbd_device *device = device_state_change->device;
|
||||
|
||||
device_state_change->disk_state[NEW] = device->state.disk;
|
||||
}
|
||||
|
||||
for (n = 0; n < state_change->n_connections; n++) {
|
||||
struct drbd_connection_state_change *connection_state_change =
|
||||
&state_change->connections[n];
|
||||
struct drbd_connection *connection =
|
||||
connection_state_change->connection;
|
||||
|
||||
connection_state_change->cstate[NEW] = connection->cstate;
|
||||
connection_state_change->peer_role[NEW] =
|
||||
conn_highest_peer(connection);
|
||||
}
|
||||
|
||||
for (n = 0; n < state_change->n_devices * state_change->n_connections; n++) {
|
||||
struct drbd_peer_device_state_change *peer_device_state_change =
|
||||
&state_change->peer_devices[n];
|
||||
struct drbd_device *device =
|
||||
peer_device_state_change->peer_device->device;
|
||||
union drbd_dev_state state = device->state;
|
||||
|
||||
peer_device_state_change->disk_state[NEW] = state.pdsk;
|
||||
peer_device_state_change->repl_state[NEW] =
|
||||
max_t(enum drbd_conns, C_WF_REPORT_PARAMS, state.conn);
|
||||
peer_device_state_change->resync_susp_user[NEW] =
|
||||
state.user_isp;
|
||||
peer_device_state_change->resync_susp_peer[NEW] =
|
||||
state.peer_isp;
|
||||
peer_device_state_change->resync_susp_dependency[NEW] =
|
||||
state.aftr_isp;
|
||||
}
|
||||
}
|
||||
|
||||
void copy_old_to_new_state_change(struct drbd_state_change *state_change)
|
||||
{
|
||||
struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
|
||||
unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
|
||||
|
||||
#define OLD_TO_NEW(x) \
|
||||
(x[NEW] = x[OLD])
|
||||
|
||||
OLD_TO_NEW(resource_state_change->role);
|
||||
OLD_TO_NEW(resource_state_change->susp);
|
||||
OLD_TO_NEW(resource_state_change->susp_nod);
|
||||
OLD_TO_NEW(resource_state_change->susp_fen);
|
||||
|
||||
for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
|
||||
struct drbd_connection_state_change *connection_state_change =
|
||||
&state_change->connections[n_connection];
|
||||
|
||||
OLD_TO_NEW(connection_state_change->peer_role);
|
||||
OLD_TO_NEW(connection_state_change->cstate);
|
||||
}
|
||||
|
||||
for (n_device = 0; n_device < state_change->n_devices; n_device++) {
|
||||
struct drbd_device_state_change *device_state_change =
|
||||
&state_change->devices[n_device];
|
||||
|
||||
OLD_TO_NEW(device_state_change->disk_state);
|
||||
}
|
||||
|
||||
n_peer_devices = state_change->n_devices * state_change->n_connections;
|
||||
for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
|
||||
struct drbd_peer_device_state_change *p =
|
||||
&state_change->peer_devices[n_peer_device];
|
||||
|
||||
OLD_TO_NEW(p->disk_state);
|
||||
OLD_TO_NEW(p->repl_state);
|
||||
OLD_TO_NEW(p->resync_susp_user);
|
||||
OLD_TO_NEW(p->resync_susp_peer);
|
||||
OLD_TO_NEW(p->resync_susp_dependency);
|
||||
}
|
||||
|
||||
#undef OLD_TO_NEW
|
||||
}
|
||||
|
||||
void forget_state_change(struct drbd_state_change *state_change)
|
||||
{
|
||||
unsigned int n;
|
||||
|
||||
if (!state_change)
|
||||
return;
|
||||
|
||||
if (state_change->resource->resource)
|
||||
kref_put(&state_change->resource->resource->kref, drbd_destroy_resource);
|
||||
for (n = 0; n < state_change->n_devices; n++) {
|
||||
struct drbd_device *device = state_change->devices[n].device;
|
||||
|
||||
if (device)
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
}
|
||||
for (n = 0; n < state_change->n_connections; n++) {
|
||||
struct drbd_connection *connection =
|
||||
state_change->connections[n].connection;
|
||||
|
||||
if (connection)
|
||||
kref_put(&connection->kref, drbd_destroy_connection);
|
||||
}
|
||||
kfree(state_change);
|
||||
}
|
||||
|
||||
static int w_after_state_ch(struct drbd_work *w, int unused);
|
||||
static void after_state_ch(struct drbd_device *device, union drbd_state os,
|
||||
union drbd_state ns, enum chg_state_flags flags);
|
||||
union drbd_state ns, enum chg_state_flags flags,
|
||||
struct drbd_state_change *);
|
||||
static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
|
||||
static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
|
||||
static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
|
||||
@ -93,6 +334,7 @@ static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
|
||||
return R_SECONDARY;
|
||||
return R_UNKNOWN;
|
||||
}
|
||||
|
||||
static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
|
||||
{
|
||||
if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
|
||||
@ -937,7 +1179,7 @@ void drbd_resume_al(struct drbd_device *device)
|
||||
drbd_info(device, "Resumed AL updates\n");
|
||||
}
|
||||
|
||||
/* helper for __drbd_set_state */
|
||||
/* helper for _drbd_set_state */
|
||||
static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
|
||||
{
|
||||
if (first_peer_device(device)->connection->agreed_pro_version < 90)
|
||||
@ -965,17 +1207,17 @@ static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
|
||||
}
|
||||
|
||||
/**
|
||||
* __drbd_set_state() - Set a new DRBD state
|
||||
* _drbd_set_state() - Set a new DRBD state
|
||||
* @device: DRBD device.
|
||||
* @ns: new state.
|
||||
* @flags: Flags
|
||||
* @done: Optional completion, that will get completed after the after_state_ch() finished
|
||||
*
|
||||
* Caller needs to hold req_lock, and global_state_lock. Do not call directly.
|
||||
* Caller needs to hold req_lock. Do not call directly.
|
||||
*/
|
||||
enum drbd_state_rv
|
||||
__drbd_set_state(struct drbd_device *device, union drbd_state ns,
|
||||
enum chg_state_flags flags, struct completion *done)
|
||||
_drbd_set_state(struct drbd_device *device, union drbd_state ns,
|
||||
enum chg_state_flags flags, struct completion *done)
|
||||
{
|
||||
struct drbd_peer_device *peer_device = first_peer_device(device);
|
||||
struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
|
||||
@ -983,6 +1225,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
|
||||
enum drbd_state_rv rv = SS_SUCCESS;
|
||||
enum sanitize_state_warnings ssw;
|
||||
struct after_state_chg_work *ascw;
|
||||
struct drbd_state_change *state_change;
|
||||
|
||||
os = drbd_read_state(device);
|
||||
|
||||
@ -1037,6 +1280,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
|
||||
if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
|
||||
clear_bit(RS_DONE, &device->flags);
|
||||
|
||||
/* FIXME: Have any flags been set earlier in this function already? */
|
||||
state_change = remember_old_state(device->resource, GFP_ATOMIC);
|
||||
|
||||
/* changes to local_cnt and device flags should be visible before
|
||||
* changes to state, which again should be visible before anything else
|
||||
* depending on that change happens. */
|
||||
@ -1047,6 +1293,8 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
|
||||
device->resource->susp_fen = ns.susp_fen;
|
||||
smp_wmb();
|
||||
|
||||
remember_new_state(state_change);
|
||||
|
||||
/* put replicated vs not-replicated requests in seperate epochs */
|
||||
if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
|
||||
drbd_should_do_remote((union drbd_dev_state)ns.i))
|
||||
@ -1184,6 +1432,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
|
||||
ascw->w.cb = w_after_state_ch;
|
||||
ascw->device = device;
|
||||
ascw->done = done;
|
||||
ascw->state_change = state_change;
|
||||
drbd_queue_work(&connection->sender_work,
|
||||
&ascw->w);
|
||||
} else {
|
||||
@ -1199,7 +1448,8 @@ static int w_after_state_ch(struct drbd_work *w, int unused)
|
||||
container_of(w, struct after_state_chg_work, w);
|
||||
struct drbd_device *device = ascw->device;
|
||||
|
||||
after_state_ch(device, ascw->os, ascw->ns, ascw->flags);
|
||||
after_state_ch(device, ascw->os, ascw->ns, ascw->flags, ascw->state_change);
|
||||
forget_state_change(ascw->state_change);
|
||||
if (ascw->flags & CS_WAIT_COMPLETE)
|
||||
complete(ascw->done);
|
||||
kfree(ascw);
|
||||
@ -1234,7 +1484,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
|
||||
D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
|
||||
|
||||
/* open coded non-blocking drbd_suspend_io(device); */
|
||||
set_bit(SUSPEND_IO, &device->flags);
|
||||
atomic_inc(&device->suspend_cnt);
|
||||
|
||||
drbd_bm_lock(device, why, flags);
|
||||
rv = io_fn(device);
|
||||
@ -1245,6 +1495,139 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
|
||||
return rv;
|
||||
}
|
||||
|
||||
void notify_resource_state_change(struct sk_buff *skb,
|
||||
unsigned int seq,
|
||||
struct drbd_resource_state_change *resource_state_change,
|
||||
enum drbd_notification_type type)
|
||||
{
|
||||
struct drbd_resource *resource = resource_state_change->resource;
|
||||
struct resource_info resource_info = {
|
||||
.res_role = resource_state_change->role[NEW],
|
||||
.res_susp = resource_state_change->susp[NEW],
|
||||
.res_susp_nod = resource_state_change->susp_nod[NEW],
|
||||
.res_susp_fen = resource_state_change->susp_fen[NEW],
|
||||
};
|
||||
|
||||
notify_resource_state(skb, seq, resource, &resource_info, type);
|
||||
}
|
||||
|
||||
void notify_connection_state_change(struct sk_buff *skb,
|
||||
unsigned int seq,
|
||||
struct drbd_connection_state_change *connection_state_change,
|
||||
enum drbd_notification_type type)
|
||||
{
|
||||
struct drbd_connection *connection = connection_state_change->connection;
|
||||
struct connection_info connection_info = {
|
||||
.conn_connection_state = connection_state_change->cstate[NEW],
|
||||
.conn_role = connection_state_change->peer_role[NEW],
|
||||
};
|
||||
|
||||
notify_connection_state(skb, seq, connection, &connection_info, type);
|
||||
}
|
||||
|
||||
void notify_device_state_change(struct sk_buff *skb,
|
||||
unsigned int seq,
|
||||
struct drbd_device_state_change *device_state_change,
|
||||
enum drbd_notification_type type)
|
||||
{
|
||||
struct drbd_device *device = device_state_change->device;
|
||||
struct device_info device_info = {
|
||||
.dev_disk_state = device_state_change->disk_state[NEW],
|
||||
};
|
||||
|
||||
notify_device_state(skb, seq, device, &device_info, type);
|
||||
}
|
||||
|
||||
void notify_peer_device_state_change(struct sk_buff *skb,
|
||||
unsigned int seq,
|
||||
struct drbd_peer_device_state_change *p,
|
||||
enum drbd_notification_type type)
|
||||
{
|
||||
struct drbd_peer_device *peer_device = p->peer_device;
|
||||
struct peer_device_info peer_device_info = {
|
||||
.peer_repl_state = p->repl_state[NEW],
|
||||
.peer_disk_state = p->disk_state[NEW],
|
||||
.peer_resync_susp_user = p->resync_susp_user[NEW],
|
||||
.peer_resync_susp_peer = p->resync_susp_peer[NEW],
|
||||
.peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
|
||||
};
|
||||
|
||||
notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
|
||||
}
|
||||
|
||||
static void broadcast_state_change(struct drbd_state_change *state_change)
|
||||
{
|
||||
struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
|
||||
bool resource_state_has_changed;
|
||||
unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
|
||||
void (*last_func)(struct sk_buff *, unsigned int, void *,
|
||||
enum drbd_notification_type) = NULL;
|
||||
void *uninitialized_var(last_arg);
|
||||
|
||||
#define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW])
|
||||
#define FINAL_STATE_CHANGE(type) \
|
||||
({ if (last_func) \
|
||||
last_func(NULL, 0, last_arg, type); \
|
||||
})
|
||||
#define REMEMBER_STATE_CHANGE(func, arg, type) \
|
||||
({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \
|
||||
last_func = (typeof(last_func))func; \
|
||||
last_arg = arg; \
|
||||
})
|
||||
|
||||
mutex_lock(¬ification_mutex);
|
||||
|
||||
resource_state_has_changed =
|
||||
HAS_CHANGED(resource_state_change->role) ||
|
||||
HAS_CHANGED(resource_state_change->susp) ||
|
||||
HAS_CHANGED(resource_state_change->susp_nod) ||
|
||||
HAS_CHANGED(resource_state_change->susp_fen);
|
||||
|
||||
if (resource_state_has_changed)
|
||||
REMEMBER_STATE_CHANGE(notify_resource_state_change,
|
||||
resource_state_change, NOTIFY_CHANGE);
|
||||
|
||||
for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
|
||||
struct drbd_connection_state_change *connection_state_change =
|
||||
&state_change->connections[n_connection];
|
||||
|
||||
if (HAS_CHANGED(connection_state_change->peer_role) ||
|
||||
HAS_CHANGED(connection_state_change->cstate))
|
||||
REMEMBER_STATE_CHANGE(notify_connection_state_change,
|
||||
connection_state_change, NOTIFY_CHANGE);
|
||||
}
|
||||
|
||||
for (n_device = 0; n_device < state_change->n_devices; n_device++) {
|
||||
struct drbd_device_state_change *device_state_change =
|
||||
&state_change->devices[n_device];
|
||||
|
||||
if (HAS_CHANGED(device_state_change->disk_state))
|
||||
REMEMBER_STATE_CHANGE(notify_device_state_change,
|
||||
device_state_change, NOTIFY_CHANGE);
|
||||
}
|
||||
|
||||
n_peer_devices = state_change->n_devices * state_change->n_connections;
|
||||
for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
|
||||
struct drbd_peer_device_state_change *p =
|
||||
&state_change->peer_devices[n_peer_device];
|
||||
|
||||
if (HAS_CHANGED(p->disk_state) ||
|
||||
HAS_CHANGED(p->repl_state) ||
|
||||
HAS_CHANGED(p->resync_susp_user) ||
|
||||
HAS_CHANGED(p->resync_susp_peer) ||
|
||||
HAS_CHANGED(p->resync_susp_dependency))
|
||||
REMEMBER_STATE_CHANGE(notify_peer_device_state_change,
|
||||
p, NOTIFY_CHANGE);
|
||||
}
|
||||
|
||||
FINAL_STATE_CHANGE(NOTIFY_CHANGE);
|
||||
mutex_unlock(¬ification_mutex);
|
||||
|
||||
#undef HAS_CHANGED
|
||||
#undef FINAL_STATE_CHANGE
|
||||
#undef REMEMBER_STATE_CHANGE
|
||||
}
|
||||
|
||||
/**
|
||||
* after_state_ch() - Perform after state change actions that may sleep
|
||||
* @device: DRBD device.
|
||||
@ -1253,13 +1636,16 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
|
||||
* @flags: Flags
|
||||
*/
|
||||
static void after_state_ch(struct drbd_device *device, union drbd_state os,
|
||||
union drbd_state ns, enum chg_state_flags flags)
|
||||
union drbd_state ns, enum chg_state_flags flags,
|
||||
struct drbd_state_change *state_change)
|
||||
{
|
||||
struct drbd_resource *resource = device->resource;
|
||||
struct drbd_peer_device *peer_device = first_peer_device(device);
|
||||
struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
|
||||
struct sib_info sib;
|
||||
|
||||
broadcast_state_change(state_change);
|
||||
|
||||
sib.sib_reason = SIB_STATE_CHANGE;
|
||||
sib.os = os;
|
||||
sib.ns = ns;
|
||||
@ -1377,7 +1763,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
|
||||
}
|
||||
|
||||
if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) {
|
||||
if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
|
||||
if (os.peer != R_PRIMARY && ns.peer == R_PRIMARY &&
|
||||
device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
|
||||
drbd_uuid_new_current(device);
|
||||
drbd_send_uuids(peer_device);
|
||||
@ -1444,7 +1830,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
|
||||
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
|
||||
enum drbd_io_error_p eh = EP_PASS_ON;
|
||||
int was_io_error = 0;
|
||||
/* corresponding get_ldev was in __drbd_set_state, to serialize
|
||||
/* corresponding get_ldev was in _drbd_set_state, to serialize
|
||||
* our cleanup here with the transition to D_DISKLESS.
|
||||
* But is is still not save to dreference ldev here, since
|
||||
* we might come from an failed Attach before ldev was set. */
|
||||
@ -1455,6 +1841,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
|
||||
|
||||
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags);
|
||||
|
||||
/* Intentionally call this handler first, before drbd_send_state().
|
||||
* See: 2932204 drbd: call local-io-error handler early
|
||||
* People may chose to hard-reset the box from this handler.
|
||||
* It is useful if this looks like a "regular node crash". */
|
||||
if (was_io_error && eh == EP_CALL_HELPER)
|
||||
drbd_khelper(device, "local-io-error");
|
||||
|
||||
@ -1572,6 +1962,7 @@ struct after_conn_state_chg_work {
|
||||
union drbd_state ns_max; /* new, max state, over all devices */
|
||||
enum chg_state_flags flags;
|
||||
struct drbd_connection *connection;
|
||||
struct drbd_state_change *state_change;
|
||||
};
|
||||
|
||||
static int w_after_conn_state_ch(struct drbd_work *w, int unused)
|
||||
@ -1584,6 +1975,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
|
||||
struct drbd_peer_device *peer_device;
|
||||
int vnr;
|
||||
|
||||
broadcast_state_change(acscw->state_change);
|
||||
forget_state_change(acscw->state_change);
|
||||
kfree(acscw);
|
||||
|
||||
/* Upon network configuration, we need to start the receiver */
|
||||
@ -1593,6 +1986,13 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
|
||||
if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
|
||||
struct net_conf *old_conf;
|
||||
|
||||
mutex_lock(¬ification_mutex);
|
||||
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
|
||||
notify_peer_device_state(NULL, 0, peer_device, NULL,
|
||||
NOTIFY_DESTROY | NOTIFY_CONTINUES);
|
||||
notify_connection_state(NULL, 0, connection, NULL, NOTIFY_DESTROY);
|
||||
mutex_unlock(¬ification_mutex);
|
||||
|
||||
mutex_lock(&connection->resource->conf_update);
|
||||
old_conf = connection->net_conf;
|
||||
connection->my_addr_len = 0;
|
||||
@ -1759,7 +2159,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
|
||||
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
|
||||
ns.disk = os.disk;
|
||||
|
||||
rv = __drbd_set_state(device, ns, flags, NULL);
|
||||
rv = _drbd_set_state(device, ns, flags, NULL);
|
||||
if (rv < SS_SUCCESS)
|
||||
BUG();
|
||||
|
||||
@ -1823,6 +2223,7 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u
|
||||
enum drbd_conns oc = connection->cstate;
|
||||
union drbd_state ns_max, ns_min, os;
|
||||
bool have_mutex = false;
|
||||
struct drbd_state_change *state_change;
|
||||
|
||||
if (mask.conn) {
|
||||
rv = is_valid_conn_transition(oc, val.conn);
|
||||
@ -1868,10 +2269,12 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u
|
||||
goto abort;
|
||||
}
|
||||
|
||||
state_change = remember_old_state(connection->resource, GFP_ATOMIC);
|
||||
conn_old_common_state(connection, &os, &flags);
|
||||
flags |= CS_DC_SUSP;
|
||||
conn_set_state(connection, mask, val, &ns_min, &ns_max, flags);
|
||||
conn_pr_state_change(connection, os, ns_max, flags);
|
||||
remember_new_state(state_change);
|
||||
|
||||
acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC);
|
||||
if (acscw) {
|
||||
@ -1882,6 +2285,7 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u
|
||||
acscw->w.cb = w_after_conn_state_ch;
|
||||
kref_get(&connection->kref);
|
||||
acscw->connection = connection;
|
||||
acscw->state_change = state_change;
|
||||
drbd_queue_work(&connection->sender_work, &acscw->w);
|
||||
} else {
|
||||
drbd_err(connection, "Could not kmalloc an acscw\n");
|
||||
|
@ -122,9 +122,9 @@ extern enum drbd_state_rv
|
||||
_drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state,
|
||||
union drbd_state, enum chg_state_flags);
|
||||
|
||||
extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state,
|
||||
enum chg_state_flags,
|
||||
struct completion *done);
|
||||
extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state,
|
||||
enum chg_state_flags,
|
||||
struct completion *done);
|
||||
extern void print_st_err(struct drbd_device *, union drbd_state,
|
||||
union drbd_state, int);
|
||||
|
||||
|
63
drivers/block/drbd/drbd_state_change.h
Normal file
63
drivers/block/drbd/drbd_state_change.h
Normal file
@ -0,0 +1,63 @@
|
||||
#ifndef DRBD_STATE_CHANGE_H
|
||||
#define DRBD_STATE_CHANGE_H
|
||||
|
||||
struct drbd_resource_state_change {
|
||||
struct drbd_resource *resource;
|
||||
enum drbd_role role[2];
|
||||
bool susp[2];
|
||||
bool susp_nod[2];
|
||||
bool susp_fen[2];
|
||||
};
|
||||
|
||||
struct drbd_device_state_change {
|
||||
struct drbd_device *device;
|
||||
enum drbd_disk_state disk_state[2];
|
||||
};
|
||||
|
||||
struct drbd_connection_state_change {
|
||||
struct drbd_connection *connection;
|
||||
enum drbd_conns cstate[2]; /* drbd9: enum drbd_conn_state */
|
||||
enum drbd_role peer_role[2];
|
||||
};
|
||||
|
||||
struct drbd_peer_device_state_change {
|
||||
struct drbd_peer_device *peer_device;
|
||||
enum drbd_disk_state disk_state[2];
|
||||
enum drbd_conns repl_state[2]; /* drbd9: enum drbd_repl_state */
|
||||
bool resync_susp_user[2];
|
||||
bool resync_susp_peer[2];
|
||||
bool resync_susp_dependency[2];
|
||||
};
|
||||
|
||||
struct drbd_state_change {
|
||||
struct list_head list;
|
||||
unsigned int n_devices;
|
||||
unsigned int n_connections;
|
||||
struct drbd_resource_state_change resource[1];
|
||||
struct drbd_device_state_change *devices;
|
||||
struct drbd_connection_state_change *connections;
|
||||
struct drbd_peer_device_state_change *peer_devices;
|
||||
};
|
||||
|
||||
extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_t);
|
||||
extern void copy_old_to_new_state_change(struct drbd_state_change *);
|
||||
extern void forget_state_change(struct drbd_state_change *);
|
||||
|
||||
extern void notify_resource_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_resource_state_change *,
|
||||
enum drbd_notification_type type);
|
||||
extern void notify_connection_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_connection_state_change *,
|
||||
enum drbd_notification_type type);
|
||||
extern void notify_device_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_device_state_change *,
|
||||
enum drbd_notification_type type);
|
||||
extern void notify_peer_device_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_peer_device_state_change *,
|
||||
enum drbd_notification_type type);
|
||||
|
||||
#endif /* DRBD_STATE_CHANGE_H */
|
@ -55,13 +55,6 @@ static int make_resync_request(struct drbd_device *, int);
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* About the global_state_lock
|
||||
Each state transition on an device holds a read lock. In case we have
|
||||
to evaluate the resync after dependencies, we grab a write lock, because
|
||||
we need stable states on all devices for that. */
|
||||
rwlock_t global_state_lock;
|
||||
|
||||
/* used for synchronous meta data and bitmap IO
|
||||
* submitted by drbd_md_sync_page_io()
|
||||
*/
|
||||
@ -120,6 +113,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
|
||||
unsigned long flags = 0;
|
||||
struct drbd_peer_device *peer_device = peer_req->peer_device;
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct drbd_connection *connection = peer_device->connection;
|
||||
struct drbd_interval i;
|
||||
int do_wake;
|
||||
u64 block_id;
|
||||
@ -152,6 +146,12 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
|
||||
* ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
|
||||
if (peer_req->flags & EE_WAS_ERROR)
|
||||
__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
|
||||
|
||||
if (connection->cstate >= C_WF_REPORT_PARAMS) {
|
||||
kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
|
||||
if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
}
|
||||
spin_unlock_irqrestore(&device->resource->req_lock, flags);
|
||||
|
||||
if (block_id == ID_SYNCER)
|
||||
@ -163,7 +163,6 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
|
||||
if (do_al_complete_io)
|
||||
drbd_al_complete_io(device, &i);
|
||||
|
||||
wake_asender(peer_device->connection);
|
||||
put_ldev(device);
|
||||
}
|
||||
|
||||
@ -195,6 +194,12 @@ void drbd_peer_request_endio(struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
|
||||
{
|
||||
panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
|
||||
device->minor, device->resource->name, device->vnr);
|
||||
}
|
||||
|
||||
/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
|
||||
*/
|
||||
void drbd_request_endio(struct bio *bio)
|
||||
@ -238,7 +243,7 @@ void drbd_request_endio(struct bio *bio)
|
||||
drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
|
||||
|
||||
if (!bio->bi_error)
|
||||
panic("possible random memory corruption caused by delayed completion of aborted local request\n");
|
||||
drbd_panic_after_delayed_completion_of_aborted_request(device);
|
||||
}
|
||||
|
||||
/* to avoid recursion in __req_mod */
|
||||
@ -1291,6 +1296,7 @@ static int drbd_send_barrier(struct drbd_connection *connection)
|
||||
p->barrier = connection->send.current_epoch_nr;
|
||||
p->pad = 0;
|
||||
connection->send.current_epoch_writes = 0;
|
||||
connection->send.last_sent_barrier_jif = jiffies;
|
||||
|
||||
return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
|
||||
}
|
||||
@ -1315,6 +1321,7 @@ static void re_init_if_first_write(struct drbd_connection *connection, unsigned
|
||||
connection->send.seen_any_write_yet = true;
|
||||
connection->send.current_epoch_nr = epoch;
|
||||
connection->send.current_epoch_writes = 0;
|
||||
connection->send.last_sent_barrier_jif = jiffies;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1456,70 +1463,73 @@ static int _drbd_may_sync_now(struct drbd_device *device)
|
||||
}
|
||||
|
||||
/**
|
||||
* _drbd_pause_after() - Pause resync on all devices that may not resync now
|
||||
* drbd_pause_after() - Pause resync on all devices that may not resync now
|
||||
* @device: DRBD device.
|
||||
*
|
||||
* Called from process context only (admin command and after_state_ch).
|
||||
*/
|
||||
static int _drbd_pause_after(struct drbd_device *device)
|
||||
static bool drbd_pause_after(struct drbd_device *device)
|
||||
{
|
||||
bool changed = false;
|
||||
struct drbd_device *odev;
|
||||
int i, rv = 0;
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&drbd_devices, odev, i) {
|
||||
if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
|
||||
continue;
|
||||
if (!_drbd_may_sync_now(odev))
|
||||
rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
|
||||
!= SS_NOTHING_TO_DO);
|
||||
if (!_drbd_may_sync_now(odev) &&
|
||||
_drbd_set_state(_NS(odev, aftr_isp, 1),
|
||||
CS_HARD, NULL) != SS_NOTHING_TO_DO)
|
||||
changed = true;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return rv;
|
||||
return changed;
|
||||
}
|
||||
|
||||
/**
|
||||
* _drbd_resume_next() - Resume resync on all devices that may resync now
|
||||
* drbd_resume_next() - Resume resync on all devices that may resync now
|
||||
* @device: DRBD device.
|
||||
*
|
||||
* Called from process context only (admin command and worker).
|
||||
*/
|
||||
static int _drbd_resume_next(struct drbd_device *device)
|
||||
static bool drbd_resume_next(struct drbd_device *device)
|
||||
{
|
||||
bool changed = false;
|
||||
struct drbd_device *odev;
|
||||
int i, rv = 0;
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&drbd_devices, odev, i) {
|
||||
if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
|
||||
continue;
|
||||
if (odev->state.aftr_isp) {
|
||||
if (_drbd_may_sync_now(odev))
|
||||
rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
|
||||
CS_HARD, NULL)
|
||||
!= SS_NOTHING_TO_DO) ;
|
||||
if (_drbd_may_sync_now(odev) &&
|
||||
_drbd_set_state(_NS(odev, aftr_isp, 0),
|
||||
CS_HARD, NULL) != SS_NOTHING_TO_DO)
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return rv;
|
||||
return changed;
|
||||
}
|
||||
|
||||
void resume_next_sg(struct drbd_device *device)
|
||||
{
|
||||
write_lock_irq(&global_state_lock);
|
||||
_drbd_resume_next(device);
|
||||
write_unlock_irq(&global_state_lock);
|
||||
lock_all_resources();
|
||||
drbd_resume_next(device);
|
||||
unlock_all_resources();
|
||||
}
|
||||
|
||||
void suspend_other_sg(struct drbd_device *device)
|
||||
{
|
||||
write_lock_irq(&global_state_lock);
|
||||
_drbd_pause_after(device);
|
||||
write_unlock_irq(&global_state_lock);
|
||||
lock_all_resources();
|
||||
drbd_pause_after(device);
|
||||
unlock_all_resources();
|
||||
}
|
||||
|
||||
/* caller must hold global_state_lock */
|
||||
/* caller must lock_all_resources() */
|
||||
enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
|
||||
{
|
||||
struct drbd_device *odev;
|
||||
@ -1557,15 +1567,15 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_min
|
||||
}
|
||||
}
|
||||
|
||||
/* caller must hold global_state_lock */
|
||||
/* caller must lock_all_resources() */
|
||||
void drbd_resync_after_changed(struct drbd_device *device)
|
||||
{
|
||||
int changes;
|
||||
int changed;
|
||||
|
||||
do {
|
||||
changes = _drbd_pause_after(device);
|
||||
changes |= _drbd_resume_next(device);
|
||||
} while (changes);
|
||||
changed = drbd_pause_after(device);
|
||||
changed |= drbd_resume_next(device);
|
||||
} while (changed);
|
||||
}
|
||||
|
||||
void drbd_rs_controller_reset(struct drbd_device *device)
|
||||
@ -1685,19 +1695,14 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
} else {
|
||||
mutex_lock(device->state_mutex);
|
||||
}
|
||||
clear_bit(B_RS_H_DONE, &device->flags);
|
||||
|
||||
/* req_lock: serialize with drbd_send_and_submit() and others
|
||||
* global_state_lock: for stable sync-after dependencies */
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
write_lock(&global_state_lock);
|
||||
lock_all_resources();
|
||||
clear_bit(B_RS_H_DONE, &device->flags);
|
||||
/* Did some connection breakage or IO error race with us? */
|
||||
if (device->state.conn < C_CONNECTED
|
||||
|| !get_ldev_if_state(device, D_NEGOTIATING)) {
|
||||
write_unlock(&global_state_lock);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
mutex_unlock(device->state_mutex);
|
||||
return;
|
||||
unlock_all_resources();
|
||||
goto out;
|
||||
}
|
||||
|
||||
ns = drbd_read_state(device);
|
||||
@ -1711,7 +1716,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
else /* side == C_SYNC_SOURCE */
|
||||
ns.pdsk = D_INCONSISTENT;
|
||||
|
||||
r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
|
||||
r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
|
||||
ns = drbd_read_state(device);
|
||||
|
||||
if (ns.conn < C_CONNECTED)
|
||||
@ -1732,7 +1737,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
device->rs_mark_left[i] = tw;
|
||||
device->rs_mark_time[i] = now;
|
||||
}
|
||||
_drbd_pause_after(device);
|
||||
drbd_pause_after(device);
|
||||
/* Forget potentially stale cached per resync extent bit-counts.
|
||||
* Open coded drbd_rs_cancel_all(device), we already have IRQs
|
||||
* disabled, and know the disk state is ok. */
|
||||
@ -1742,8 +1747,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
device->resync_wenr = LC_FREE;
|
||||
spin_unlock(&device->al_lock);
|
||||
}
|
||||
write_unlock(&global_state_lock);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
unlock_all_resources();
|
||||
|
||||
if (r == SS_SUCCESS) {
|
||||
wake_up(&device->al_wait); /* for lc_reset() above */
|
||||
@ -1807,6 +1811,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
drbd_md_sync(device);
|
||||
}
|
||||
put_ldev(device);
|
||||
out:
|
||||
mutex_unlock(device->state_mutex);
|
||||
}
|
||||
|
||||
@ -1836,7 +1841,7 @@ static void drbd_ldev_destroy(struct drbd_device *device)
|
||||
device->act_log = NULL;
|
||||
|
||||
__acquire(local);
|
||||
drbd_free_ldev(device->ldev);
|
||||
drbd_backing_dev_free(device, device->ldev);
|
||||
device->ldev = NULL;
|
||||
__release(local);
|
||||
|
||||
|
@ -104,9 +104,9 @@
|
||||
/* Device instance number, incremented each time a device is probed. */
|
||||
static int instance;
|
||||
|
||||
struct list_head online_list;
|
||||
struct list_head removing_list;
|
||||
spinlock_t dev_lock;
|
||||
static struct list_head online_list;
|
||||
static struct list_head removing_list;
|
||||
static spinlock_t dev_lock;
|
||||
|
||||
/*
|
||||
* Global variable used to hold the major block device number
|
||||
|
@ -495,17 +495,17 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
|
||||
id->ppaf.ch_offset = 56;
|
||||
id->ppaf.ch_len = 8;
|
||||
|
||||
do_div(size, bs); /* convert size to pages */
|
||||
do_div(size, 256); /* concert size to pgs pr blk */
|
||||
sector_div(size, bs); /* convert size to pages */
|
||||
size >>= 8; /* concert size to pgs pr blk */
|
||||
grp = &id->groups[0];
|
||||
grp->mtype = 0;
|
||||
grp->fmtype = 0;
|
||||
grp->num_ch = 1;
|
||||
grp->num_pg = 256;
|
||||
blksize = size;
|
||||
do_div(size, (1 << 16));
|
||||
size >>= 16;
|
||||
grp->num_lun = size + 1;
|
||||
do_div(blksize, grp->num_lun);
|
||||
sector_div(blksize, grp->num_lun);
|
||||
grp->num_blk = blksize;
|
||||
grp->num_pln = 1;
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/completion.h>
|
||||
@ -671,16 +671,15 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
|
||||
static unsigned int carm_fill_sync_time(struct carm_host *host,
|
||||
unsigned int idx, void *mem)
|
||||
{
|
||||
struct timeval tv;
|
||||
struct carm_msg_sync_time *st = mem;
|
||||
|
||||
do_gettimeofday(&tv);
|
||||
time64_t tv = ktime_get_real_seconds();
|
||||
|
||||
memset(st, 0, sizeof(*st));
|
||||
st->type = CARM_MSG_MISC;
|
||||
st->subtype = MISC_SET_TIME;
|
||||
st->handle = cpu_to_le32(TAG_ENCODE(idx));
|
||||
st->timestamp = cpu_to_le32(tv.tv_sec);
|
||||
st->timestamp = cpu_to_le32(tv);
|
||||
|
||||
return sizeof(struct carm_msg_sync_time);
|
||||
}
|
||||
|
@ -83,6 +83,16 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
|
||||
MODULE_PARM_DESC(max_persistent_grants,
|
||||
"Maximum number of grants to map persistently");
|
||||
|
||||
/*
|
||||
* Maximum number of rings/queues blkback supports, allow as many queues as there
|
||||
* are CPUs if user has not specified a value.
|
||||
*/
|
||||
unsigned int xenblk_max_queues;
|
||||
module_param_named(max_queues, xenblk_max_queues, uint, 0644);
|
||||
MODULE_PARM_DESC(max_queues,
|
||||
"Maximum number of hardware queues per virtual disk." \
|
||||
"By default it is the number of online CPUs.");
|
||||
|
||||
/*
|
||||
* Maximum order of pages to be used for the shared ring between front and
|
||||
* backend, 4KB page granularity is used.
|
||||
@ -113,71 +123,71 @@ module_param(log_stats, int, 0644);
|
||||
/* Number of free pages to remove on each call to gnttab_free_pages */
|
||||
#define NUM_BATCH_FREE_PAGES 10
|
||||
|
||||
static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
|
||||
static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&blkif->free_pages_lock, flags);
|
||||
if (list_empty(&blkif->free_pages)) {
|
||||
BUG_ON(blkif->free_pages_num != 0);
|
||||
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
|
||||
spin_lock_irqsave(&ring->free_pages_lock, flags);
|
||||
if (list_empty(&ring->free_pages)) {
|
||||
BUG_ON(ring->free_pages_num != 0);
|
||||
spin_unlock_irqrestore(&ring->free_pages_lock, flags);
|
||||
return gnttab_alloc_pages(1, page);
|
||||
}
|
||||
BUG_ON(blkif->free_pages_num == 0);
|
||||
page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
|
||||
BUG_ON(ring->free_pages_num == 0);
|
||||
page[0] = list_first_entry(&ring->free_pages, struct page, lru);
|
||||
list_del(&page[0]->lru);
|
||||
blkif->free_pages_num--;
|
||||
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
|
||||
ring->free_pages_num--;
|
||||
spin_unlock_irqrestore(&ring->free_pages_lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
|
||||
static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
|
||||
int num)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
spin_lock_irqsave(&blkif->free_pages_lock, flags);
|
||||
spin_lock_irqsave(&ring->free_pages_lock, flags);
|
||||
for (i = 0; i < num; i++)
|
||||
list_add(&page[i]->lru, &blkif->free_pages);
|
||||
blkif->free_pages_num += num;
|
||||
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
|
||||
list_add(&page[i]->lru, &ring->free_pages);
|
||||
ring->free_pages_num += num;
|
||||
spin_unlock_irqrestore(&ring->free_pages_lock, flags);
|
||||
}
|
||||
|
||||
static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
|
||||
static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
|
||||
{
|
||||
/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
|
||||
struct page *page[NUM_BATCH_FREE_PAGES];
|
||||
unsigned int num_pages = 0;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&blkif->free_pages_lock, flags);
|
||||
while (blkif->free_pages_num > num) {
|
||||
BUG_ON(list_empty(&blkif->free_pages));
|
||||
page[num_pages] = list_first_entry(&blkif->free_pages,
|
||||
spin_lock_irqsave(&ring->free_pages_lock, flags);
|
||||
while (ring->free_pages_num > num) {
|
||||
BUG_ON(list_empty(&ring->free_pages));
|
||||
page[num_pages] = list_first_entry(&ring->free_pages,
|
||||
struct page, lru);
|
||||
list_del(&page[num_pages]->lru);
|
||||
blkif->free_pages_num--;
|
||||
ring->free_pages_num--;
|
||||
if (++num_pages == NUM_BATCH_FREE_PAGES) {
|
||||
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
|
||||
spin_unlock_irqrestore(&ring->free_pages_lock, flags);
|
||||
gnttab_free_pages(num_pages, page);
|
||||
spin_lock_irqsave(&blkif->free_pages_lock, flags);
|
||||
spin_lock_irqsave(&ring->free_pages_lock, flags);
|
||||
num_pages = 0;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
|
||||
spin_unlock_irqrestore(&ring->free_pages_lock, flags);
|
||||
if (num_pages != 0)
|
||||
gnttab_free_pages(num_pages, page);
|
||||
}
|
||||
|
||||
#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
|
||||
|
||||
static int do_block_io_op(struct xen_blkif *blkif);
|
||||
static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
static int do_block_io_op(struct xen_blkif_ring *ring);
|
||||
static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
|
||||
struct blkif_request *req,
|
||||
struct pending_req *pending_req);
|
||||
static void make_response(struct xen_blkif *blkif, u64 id,
|
||||
static void make_response(struct xen_blkif_ring *ring, u64 id,
|
||||
unsigned short op, int st);
|
||||
|
||||
#define foreach_grant_safe(pos, n, rbtree, node) \
|
||||
@ -190,7 +200,7 @@ static void make_response(struct xen_blkif *blkif, u64 id,
|
||||
|
||||
/*
|
||||
* We don't need locking around the persistent grant helpers
|
||||
* because blkback uses a single-thread for each backed, so we
|
||||
* because blkback uses a single-thread for each backend, so we
|
||||
* can be sure that this functions will never be called recursively.
|
||||
*
|
||||
* The only exception to that is put_persistent_grant, that can be called
|
||||
@ -198,19 +208,20 @@ static void make_response(struct xen_blkif *blkif, u64 id,
|
||||
* bit operations to modify the flags of a persistent grant and to count
|
||||
* the number of used grants.
|
||||
*/
|
||||
static int add_persistent_gnt(struct xen_blkif *blkif,
|
||||
static int add_persistent_gnt(struct xen_blkif_ring *ring,
|
||||
struct persistent_gnt *persistent_gnt)
|
||||
{
|
||||
struct rb_node **new = NULL, *parent = NULL;
|
||||
struct persistent_gnt *this;
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
|
||||
if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
|
||||
if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
|
||||
if (!blkif->vbd.overflow_max_grants)
|
||||
blkif->vbd.overflow_max_grants = 1;
|
||||
return -EBUSY;
|
||||
}
|
||||
/* Figure out where to put new node */
|
||||
new = &blkif->persistent_gnts.rb_node;
|
||||
new = &ring->persistent_gnts.rb_node;
|
||||
while (*new) {
|
||||
this = container_of(*new, struct persistent_gnt, node);
|
||||
|
||||
@ -229,19 +240,19 @@ static int add_persistent_gnt(struct xen_blkif *blkif,
|
||||
set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
|
||||
/* Add new node and rebalance tree. */
|
||||
rb_link_node(&(persistent_gnt->node), parent, new);
|
||||
rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
|
||||
blkif->persistent_gnt_c++;
|
||||
atomic_inc(&blkif->persistent_gnt_in_use);
|
||||
rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
|
||||
ring->persistent_gnt_c++;
|
||||
atomic_inc(&ring->persistent_gnt_in_use);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
|
||||
static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
|
||||
grant_ref_t gref)
|
||||
{
|
||||
struct persistent_gnt *data;
|
||||
struct rb_node *node = NULL;
|
||||
|
||||
node = blkif->persistent_gnts.rb_node;
|
||||
node = ring->persistent_gnts.rb_node;
|
||||
while (node) {
|
||||
data = container_of(node, struct persistent_gnt, node);
|
||||
|
||||
@ -255,24 +266,24 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
|
||||
return NULL;
|
||||
}
|
||||
set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
|
||||
atomic_inc(&blkif->persistent_gnt_in_use);
|
||||
atomic_inc(&ring->persistent_gnt_in_use);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void put_persistent_gnt(struct xen_blkif *blkif,
|
||||
static void put_persistent_gnt(struct xen_blkif_ring *ring,
|
||||
struct persistent_gnt *persistent_gnt)
|
||||
{
|
||||
if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
|
||||
pr_alert_ratelimited("freeing a grant already unused\n");
|
||||
set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
|
||||
clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
|
||||
atomic_dec(&blkif->persistent_gnt_in_use);
|
||||
atomic_dec(&ring->persistent_gnt_in_use);
|
||||
}
|
||||
|
||||
static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
|
||||
static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root,
|
||||
unsigned int num)
|
||||
{
|
||||
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
@ -303,7 +314,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
|
||||
unmap_data.count = segs_to_unmap;
|
||||
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
|
||||
|
||||
put_free_pages(blkif, pages, segs_to_unmap);
|
||||
put_free_pages(ring, pages, segs_to_unmap);
|
||||
segs_to_unmap = 0;
|
||||
}
|
||||
|
||||
@ -320,15 +331,15 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
|
||||
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
struct persistent_gnt *persistent_gnt;
|
||||
int segs_to_unmap = 0;
|
||||
struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
|
||||
struct xen_blkif_ring *ring = container_of(work, typeof(*ring), persistent_purge_work);
|
||||
struct gntab_unmap_queue_data unmap_data;
|
||||
|
||||
unmap_data.pages = pages;
|
||||
unmap_data.unmap_ops = unmap;
|
||||
unmap_data.kunmap_ops = NULL;
|
||||
|
||||
while(!list_empty(&blkif->persistent_purge_list)) {
|
||||
persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
|
||||
while(!list_empty(&ring->persistent_purge_list)) {
|
||||
persistent_gnt = list_first_entry(&ring->persistent_purge_list,
|
||||
struct persistent_gnt,
|
||||
remove_node);
|
||||
list_del(&persistent_gnt->remove_node);
|
||||
@ -343,7 +354,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
|
||||
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
|
||||
unmap_data.count = segs_to_unmap;
|
||||
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
|
||||
put_free_pages(blkif, pages, segs_to_unmap);
|
||||
put_free_pages(ring, pages, segs_to_unmap);
|
||||
segs_to_unmap = 0;
|
||||
}
|
||||
kfree(persistent_gnt);
|
||||
@ -351,11 +362,11 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
|
||||
if (segs_to_unmap > 0) {
|
||||
unmap_data.count = segs_to_unmap;
|
||||
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
|
||||
put_free_pages(blkif, pages, segs_to_unmap);
|
||||
put_free_pages(ring, pages, segs_to_unmap);
|
||||
}
|
||||
}
|
||||
|
||||
static void purge_persistent_gnt(struct xen_blkif *blkif)
|
||||
static void purge_persistent_gnt(struct xen_blkif_ring *ring)
|
||||
{
|
||||
struct persistent_gnt *persistent_gnt;
|
||||
struct rb_node *n;
|
||||
@ -363,23 +374,23 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
|
||||
bool scan_used = false, clean_used = false;
|
||||
struct rb_root *root;
|
||||
|
||||
if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
|
||||
(blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
|
||||
!blkif->vbd.overflow_max_grants)) {
|
||||
return;
|
||||
if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
|
||||
(ring->persistent_gnt_c == xen_blkif_max_pgrants &&
|
||||
!ring->blkif->vbd.overflow_max_grants)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (work_busy(&blkif->persistent_purge_work)) {
|
||||
if (work_busy(&ring->persistent_purge_work)) {
|
||||
pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
|
||||
num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
|
||||
num_clean = min(blkif->persistent_gnt_c, num_clean);
|
||||
num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
|
||||
num_clean = min(ring->persistent_gnt_c, num_clean);
|
||||
if ((num_clean == 0) ||
|
||||
(num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
|
||||
return;
|
||||
(num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* At this point, we can assure that there will be no calls
|
||||
@ -394,8 +405,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
|
||||
|
||||
pr_debug("Going to purge %u persistent grants\n", num_clean);
|
||||
|
||||
BUG_ON(!list_empty(&blkif->persistent_purge_list));
|
||||
root = &blkif->persistent_gnts;
|
||||
BUG_ON(!list_empty(&ring->persistent_purge_list));
|
||||
root = &ring->persistent_gnts;
|
||||
purge_list:
|
||||
foreach_grant_safe(persistent_gnt, n, root, node) {
|
||||
BUG_ON(persistent_gnt->handle ==
|
||||
@ -414,7 +425,7 @@ purge_list:
|
||||
|
||||
rb_erase(&persistent_gnt->node, root);
|
||||
list_add(&persistent_gnt->remove_node,
|
||||
&blkif->persistent_purge_list);
|
||||
&ring->persistent_purge_list);
|
||||
if (--num_clean == 0)
|
||||
goto finished;
|
||||
}
|
||||
@ -435,30 +446,32 @@ finished:
|
||||
goto purge_list;
|
||||
}
|
||||
|
||||
blkif->persistent_gnt_c -= (total - num_clean);
|
||||
blkif->vbd.overflow_max_grants = 0;
|
||||
ring->persistent_gnt_c -= (total - num_clean);
|
||||
ring->blkif->vbd.overflow_max_grants = 0;
|
||||
|
||||
/* We can defer this work */
|
||||
schedule_work(&blkif->persistent_purge_work);
|
||||
schedule_work(&ring->persistent_purge_work);
|
||||
pr_debug("Purged %u/%u\n", (total - num_clean), total);
|
||||
|
||||
out:
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve from the 'pending_reqs' a free pending_req structure to be used.
|
||||
*/
|
||||
static struct pending_req *alloc_req(struct xen_blkif *blkif)
|
||||
static struct pending_req *alloc_req(struct xen_blkif_ring *ring)
|
||||
{
|
||||
struct pending_req *req = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&blkif->pending_free_lock, flags);
|
||||
if (!list_empty(&blkif->pending_free)) {
|
||||
req = list_entry(blkif->pending_free.next, struct pending_req,
|
||||
spin_lock_irqsave(&ring->pending_free_lock, flags);
|
||||
if (!list_empty(&ring->pending_free)) {
|
||||
req = list_entry(ring->pending_free.next, struct pending_req,
|
||||
free_list);
|
||||
list_del(&req->free_list);
|
||||
}
|
||||
spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
|
||||
spin_unlock_irqrestore(&ring->pending_free_lock, flags);
|
||||
return req;
|
||||
}
|
||||
|
||||
@ -466,17 +479,17 @@ static struct pending_req *alloc_req(struct xen_blkif *blkif)
|
||||
* Return the 'pending_req' structure back to the freepool. We also
|
||||
* wake up the thread if it was waiting for a free page.
|
||||
*/
|
||||
static void free_req(struct xen_blkif *blkif, struct pending_req *req)
|
||||
static void free_req(struct xen_blkif_ring *ring, struct pending_req *req)
|
||||
{
|
||||
unsigned long flags;
|
||||
int was_empty;
|
||||
|
||||
spin_lock_irqsave(&blkif->pending_free_lock, flags);
|
||||
was_empty = list_empty(&blkif->pending_free);
|
||||
list_add(&req->free_list, &blkif->pending_free);
|
||||
spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
|
||||
spin_lock_irqsave(&ring->pending_free_lock, flags);
|
||||
was_empty = list_empty(&ring->pending_free);
|
||||
list_add(&req->free_list, &ring->pending_free);
|
||||
spin_unlock_irqrestore(&ring->pending_free_lock, flags);
|
||||
if (was_empty)
|
||||
wake_up(&blkif->pending_free_wq);
|
||||
wake_up(&ring->pending_free_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -556,10 +569,10 @@ abort:
|
||||
/*
|
||||
* Notification from the guest OS.
|
||||
*/
|
||||
static void blkif_notify_work(struct xen_blkif *blkif)
|
||||
static void blkif_notify_work(struct xen_blkif_ring *ring)
|
||||
{
|
||||
blkif->waiting_reqs = 1;
|
||||
wake_up(&blkif->wq);
|
||||
ring->waiting_reqs = 1;
|
||||
wake_up(&ring->wq);
|
||||
}
|
||||
|
||||
irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
|
||||
@ -572,31 +585,33 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
|
||||
* SCHEDULER FUNCTIONS
|
||||
*/
|
||||
|
||||
static void print_stats(struct xen_blkif *blkif)
|
||||
static void print_stats(struct xen_blkif_ring *ring)
|
||||
{
|
||||
pr_info("(%s): oo %3llu | rd %4llu | wr %4llu | f %4llu"
|
||||
" | ds %4llu | pg: %4u/%4d\n",
|
||||
current->comm, blkif->st_oo_req,
|
||||
blkif->st_rd_req, blkif->st_wr_req,
|
||||
blkif->st_f_req, blkif->st_ds_req,
|
||||
blkif->persistent_gnt_c,
|
||||
current->comm, ring->st_oo_req,
|
||||
ring->st_rd_req, ring->st_wr_req,
|
||||
ring->st_f_req, ring->st_ds_req,
|
||||
ring->persistent_gnt_c,
|
||||
xen_blkif_max_pgrants);
|
||||
blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
|
||||
blkif->st_rd_req = 0;
|
||||
blkif->st_wr_req = 0;
|
||||
blkif->st_oo_req = 0;
|
||||
blkif->st_ds_req = 0;
|
||||
ring->st_print = jiffies + msecs_to_jiffies(10 * 1000);
|
||||
ring->st_rd_req = 0;
|
||||
ring->st_wr_req = 0;
|
||||
ring->st_oo_req = 0;
|
||||
ring->st_ds_req = 0;
|
||||
}
|
||||
|
||||
int xen_blkif_schedule(void *arg)
|
||||
{
|
||||
struct xen_blkif *blkif = arg;
|
||||
struct xen_blkif_ring *ring = arg;
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
struct xen_vbd *vbd = &blkif->vbd;
|
||||
unsigned long timeout;
|
||||
int ret;
|
||||
|
||||
xen_blkif_get(blkif);
|
||||
|
||||
set_freezable();
|
||||
while (!kthread_should_stop()) {
|
||||
if (try_to_freeze())
|
||||
continue;
|
||||
@ -606,50 +621,50 @@ int xen_blkif_schedule(void *arg)
|
||||
timeout = msecs_to_jiffies(LRU_INTERVAL);
|
||||
|
||||
timeout = wait_event_interruptible_timeout(
|
||||
blkif->wq,
|
||||
blkif->waiting_reqs || kthread_should_stop(),
|
||||
ring->wq,
|
||||
ring->waiting_reqs || kthread_should_stop(),
|
||||
timeout);
|
||||
if (timeout == 0)
|
||||
goto purge_gnt_list;
|
||||
timeout = wait_event_interruptible_timeout(
|
||||
blkif->pending_free_wq,
|
||||
!list_empty(&blkif->pending_free) ||
|
||||
ring->pending_free_wq,
|
||||
!list_empty(&ring->pending_free) ||
|
||||
kthread_should_stop(),
|
||||
timeout);
|
||||
if (timeout == 0)
|
||||
goto purge_gnt_list;
|
||||
|
||||
blkif->waiting_reqs = 0;
|
||||
ring->waiting_reqs = 0;
|
||||
smp_mb(); /* clear flag *before* checking for work */
|
||||
|
||||
ret = do_block_io_op(blkif);
|
||||
ret = do_block_io_op(ring);
|
||||
if (ret > 0)
|
||||
blkif->waiting_reqs = 1;
|
||||
ring->waiting_reqs = 1;
|
||||
if (ret == -EACCES)
|
||||
wait_event_interruptible(blkif->shutdown_wq,
|
||||
wait_event_interruptible(ring->shutdown_wq,
|
||||
kthread_should_stop());
|
||||
|
||||
purge_gnt_list:
|
||||
if (blkif->vbd.feature_gnt_persistent &&
|
||||
time_after(jiffies, blkif->next_lru)) {
|
||||
purge_persistent_gnt(blkif);
|
||||
blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
|
||||
time_after(jiffies, ring->next_lru)) {
|
||||
purge_persistent_gnt(ring);
|
||||
ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
|
||||
}
|
||||
|
||||
/* Shrink if we have more than xen_blkif_max_buffer_pages */
|
||||
shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
|
||||
shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);
|
||||
|
||||
if (log_stats && time_after(jiffies, blkif->st_print))
|
||||
print_stats(blkif);
|
||||
if (log_stats && time_after(jiffies, ring->st_print))
|
||||
print_stats(ring);
|
||||
}
|
||||
|
||||
/* Drain pending purge work */
|
||||
flush_work(&blkif->persistent_purge_work);
|
||||
flush_work(&ring->persistent_purge_work);
|
||||
|
||||
if (log_stats)
|
||||
print_stats(blkif);
|
||||
print_stats(ring);
|
||||
|
||||
blkif->xenblkd = NULL;
|
||||
ring->xenblkd = NULL;
|
||||
xen_blkif_put(blkif);
|
||||
|
||||
return 0;
|
||||
@ -658,22 +673,22 @@ purge_gnt_list:
|
||||
/*
|
||||
* Remove persistent grants and empty the pool of free pages
|
||||
*/
|
||||
void xen_blkbk_free_caches(struct xen_blkif *blkif)
|
||||
void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
|
||||
{
|
||||
/* Free all persistent grant pages */
|
||||
if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
|
||||
free_persistent_gnts(blkif, &blkif->persistent_gnts,
|
||||
blkif->persistent_gnt_c);
|
||||
if (!RB_EMPTY_ROOT(&ring->persistent_gnts))
|
||||
free_persistent_gnts(ring, &ring->persistent_gnts,
|
||||
ring->persistent_gnt_c);
|
||||
|
||||
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
|
||||
blkif->persistent_gnt_c = 0;
|
||||
BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
|
||||
ring->persistent_gnt_c = 0;
|
||||
|
||||
/* Since we are shutting down remove all pages from the buffer */
|
||||
shrink_free_pagepool(blkif, 0 /* All */);
|
||||
shrink_free_pagepool(ring, 0 /* All */);
|
||||
}
|
||||
|
||||
static unsigned int xen_blkbk_unmap_prepare(
|
||||
struct xen_blkif *blkif,
|
||||
struct xen_blkif_ring *ring,
|
||||
struct grant_page **pages,
|
||||
unsigned int num,
|
||||
struct gnttab_unmap_grant_ref *unmap_ops,
|
||||
@ -683,7 +698,7 @@ static unsigned int xen_blkbk_unmap_prepare(
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (pages[i]->persistent_gnt != NULL) {
|
||||
put_persistent_gnt(blkif, pages[i]->persistent_gnt);
|
||||
put_persistent_gnt(ring, pages[i]->persistent_gnt);
|
||||
continue;
|
||||
}
|
||||
if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
|
||||
@ -700,17 +715,18 @@ static unsigned int xen_blkbk_unmap_prepare(
|
||||
|
||||
static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
|
||||
{
|
||||
struct pending_req* pending_req = (struct pending_req*) (data->data);
|
||||
struct xen_blkif *blkif = pending_req->blkif;
|
||||
struct pending_req *pending_req = (struct pending_req *)(data->data);
|
||||
struct xen_blkif_ring *ring = pending_req->ring;
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
|
||||
/* BUG_ON used to reproduce existing behaviour,
|
||||
but is this the best way to deal with this? */
|
||||
BUG_ON(result);
|
||||
|
||||
put_free_pages(blkif, data->pages, data->count);
|
||||
make_response(blkif, pending_req->id,
|
||||
put_free_pages(ring, data->pages, data->count);
|
||||
make_response(ring, pending_req->id,
|
||||
pending_req->operation, pending_req->status);
|
||||
free_req(blkif, pending_req);
|
||||
free_req(ring, pending_req);
|
||||
/*
|
||||
* Make sure the request is freed before releasing blkif,
|
||||
* or there could be a race between free_req and the
|
||||
@ -723,7 +739,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
|
||||
* pending_free_wq if there's a drain going on, but it has
|
||||
* to be taken into account if the current model is changed.
|
||||
*/
|
||||
if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
|
||||
if (atomic_dec_and_test(&ring->inflight) && atomic_read(&blkif->drain)) {
|
||||
complete(&blkif->drain_complete);
|
||||
}
|
||||
xen_blkif_put(blkif);
|
||||
@ -732,11 +748,11 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
|
||||
static void xen_blkbk_unmap_and_respond(struct pending_req *req)
|
||||
{
|
||||
struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data;
|
||||
struct xen_blkif *blkif = req->blkif;
|
||||
struct xen_blkif_ring *ring = req->ring;
|
||||
struct grant_page **pages = req->segments;
|
||||
unsigned int invcount;
|
||||
|
||||
invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs,
|
||||
invcount = xen_blkbk_unmap_prepare(ring, pages, req->nr_segs,
|
||||
req->unmap, req->unmap_pages);
|
||||
|
||||
work->data = req;
|
||||
@ -757,7 +773,7 @@ static void xen_blkbk_unmap_and_respond(struct pending_req *req)
|
||||
* of hypercalls, but since this is only used in error paths there's
|
||||
* no real need.
|
||||
*/
|
||||
static void xen_blkbk_unmap(struct xen_blkif *blkif,
|
||||
static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
|
||||
struct grant_page *pages[],
|
||||
int num)
|
||||
{
|
||||
@ -768,20 +784,20 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
|
||||
|
||||
while (num) {
|
||||
unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
|
||||
invcount = xen_blkbk_unmap_prepare(blkif, pages, batch,
|
||||
|
||||
invcount = xen_blkbk_unmap_prepare(ring, pages, batch,
|
||||
unmap, unmap_pages);
|
||||
if (invcount) {
|
||||
ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
|
||||
BUG_ON(ret);
|
||||
put_free_pages(blkif, unmap_pages, invcount);
|
||||
put_free_pages(ring, unmap_pages, invcount);
|
||||
}
|
||||
pages += batch;
|
||||
num -= batch;
|
||||
}
|
||||
}
|
||||
|
||||
static int xen_blkbk_map(struct xen_blkif *blkif,
|
||||
static int xen_blkbk_map(struct xen_blkif_ring *ring,
|
||||
struct grant_page *pages[],
|
||||
int num, bool ro)
|
||||
{
|
||||
@ -794,6 +810,7 @@ static int xen_blkbk_map(struct xen_blkif *blkif,
|
||||
int ret = 0;
|
||||
int last_map = 0, map_until = 0;
|
||||
int use_persistent_gnts;
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
|
||||
use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
|
||||
|
||||
@ -806,10 +823,11 @@ again:
|
||||
for (i = map_until; i < num; i++) {
|
||||
uint32_t flags;
|
||||
|
||||
if (use_persistent_gnts)
|
||||
if (use_persistent_gnts) {
|
||||
persistent_gnt = get_persistent_gnt(
|
||||
blkif,
|
||||
ring,
|
||||
pages[i]->gref);
|
||||
}
|
||||
|
||||
if (persistent_gnt) {
|
||||
/*
|
||||
@ -819,7 +837,7 @@ again:
|
||||
pages[i]->page = persistent_gnt->page;
|
||||
pages[i]->persistent_gnt = persistent_gnt;
|
||||
} else {
|
||||
if (get_free_page(blkif, &pages[i]->page))
|
||||
if (get_free_page(ring, &pages[i]->page))
|
||||
goto out_of_memory;
|
||||
addr = vaddr(pages[i]->page);
|
||||
pages_to_gnt[segs_to_map] = pages[i]->page;
|
||||
@ -852,7 +870,7 @@ again:
|
||||
BUG_ON(new_map_idx >= segs_to_map);
|
||||
if (unlikely(map[new_map_idx].status != 0)) {
|
||||
pr_debug("invalid buffer -- could not remap it\n");
|
||||
put_free_pages(blkif, &pages[seg_idx]->page, 1);
|
||||
put_free_pages(ring, &pages[seg_idx]->page, 1);
|
||||
pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
|
||||
ret |= 1;
|
||||
goto next;
|
||||
@ -862,7 +880,7 @@ again:
|
||||
continue;
|
||||
}
|
||||
if (use_persistent_gnts &&
|
||||
blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
|
||||
ring->persistent_gnt_c < xen_blkif_max_pgrants) {
|
||||
/*
|
||||
* We are using persistent grants, the grant is
|
||||
* not mapped but we might have room for it.
|
||||
@ -880,7 +898,7 @@ again:
|
||||
persistent_gnt->gnt = map[new_map_idx].ref;
|
||||
persistent_gnt->handle = map[new_map_idx].handle;
|
||||
persistent_gnt->page = pages[seg_idx]->page;
|
||||
if (add_persistent_gnt(blkif,
|
||||
if (add_persistent_gnt(ring,
|
||||
persistent_gnt)) {
|
||||
kfree(persistent_gnt);
|
||||
persistent_gnt = NULL;
|
||||
@ -888,7 +906,7 @@ again:
|
||||
}
|
||||
pages[seg_idx]->persistent_gnt = persistent_gnt;
|
||||
pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
|
||||
persistent_gnt->gnt, blkif->persistent_gnt_c,
|
||||
persistent_gnt->gnt, ring->persistent_gnt_c,
|
||||
xen_blkif_max_pgrants);
|
||||
goto next;
|
||||
}
|
||||
@ -913,7 +931,7 @@ next:
|
||||
|
||||
out_of_memory:
|
||||
pr_alert("%s: out of memory\n", __func__);
|
||||
put_free_pages(blkif, pages_to_gnt, segs_to_map);
|
||||
put_free_pages(ring, pages_to_gnt, segs_to_map);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -921,7 +939,7 @@ static int xen_blkbk_map_seg(struct pending_req *pending_req)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
|
||||
rc = xen_blkbk_map(pending_req->ring, pending_req->segments,
|
||||
pending_req->nr_segs,
|
||||
(pending_req->operation != BLKIF_OP_READ));
|
||||
|
||||
@ -934,7 +952,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
|
||||
struct phys_req *preq)
|
||||
{
|
||||
struct grant_page **pages = pending_req->indirect_pages;
|
||||
struct xen_blkif *blkif = pending_req->blkif;
|
||||
struct xen_blkif_ring *ring = pending_req->ring;
|
||||
int indirect_grefs, rc, n, nseg, i;
|
||||
struct blkif_request_segment *segments = NULL;
|
||||
|
||||
@ -945,7 +963,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
|
||||
for (i = 0; i < indirect_grefs; i++)
|
||||
pages[i]->gref = req->u.indirect.indirect_grefs[i];
|
||||
|
||||
rc = xen_blkbk_map(blkif, pages, indirect_grefs, true);
|
||||
rc = xen_blkbk_map(ring, pages, indirect_grefs, true);
|
||||
if (rc)
|
||||
goto unmap;
|
||||
|
||||
@ -977,15 +995,16 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
|
||||
unmap:
|
||||
if (segments)
|
||||
kunmap_atomic(segments);
|
||||
xen_blkbk_unmap(blkif, pages, indirect_grefs);
|
||||
xen_blkbk_unmap(ring, pages, indirect_grefs);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int dispatch_discard_io(struct xen_blkif *blkif,
|
||||
static int dispatch_discard_io(struct xen_blkif_ring *ring,
|
||||
struct blkif_request *req)
|
||||
{
|
||||
int err = 0;
|
||||
int status = BLKIF_RSP_OKAY;
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
struct block_device *bdev = blkif->vbd.bdev;
|
||||
unsigned long secure;
|
||||
struct phys_req preq;
|
||||
@ -1002,7 +1021,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
|
||||
preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
|
||||
goto fail_response;
|
||||
}
|
||||
blkif->st_ds_req++;
|
||||
ring->st_ds_req++;
|
||||
|
||||
secure = (blkif->vbd.discard_secure &&
|
||||
(req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
|
||||
@ -1018,26 +1037,28 @@ fail_response:
|
||||
} else if (err)
|
||||
status = BLKIF_RSP_ERROR;
|
||||
|
||||
make_response(blkif, req->u.discard.id, req->operation, status);
|
||||
make_response(ring, req->u.discard.id, req->operation, status);
|
||||
xen_blkif_put(blkif);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int dispatch_other_io(struct xen_blkif *blkif,
|
||||
static int dispatch_other_io(struct xen_blkif_ring *ring,
|
||||
struct blkif_request *req,
|
||||
struct pending_req *pending_req)
|
||||
{
|
||||
free_req(blkif, pending_req);
|
||||
make_response(blkif, req->u.other.id, req->operation,
|
||||
free_req(ring, pending_req);
|
||||
make_response(ring, req->u.other.id, req->operation,
|
||||
BLKIF_RSP_EOPNOTSUPP);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static void xen_blk_drain_io(struct xen_blkif *blkif)
|
||||
static void xen_blk_drain_io(struct xen_blkif_ring *ring)
|
||||
{
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
|
||||
atomic_set(&blkif->drain, 1);
|
||||
do {
|
||||
if (atomic_read(&blkif->inflight) == 0)
|
||||
if (atomic_read(&ring->inflight) == 0)
|
||||
break;
|
||||
wait_for_completion_interruptible_timeout(
|
||||
&blkif->drain_complete, HZ);
|
||||
@ -1058,12 +1079,12 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
|
||||
if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
|
||||
(error == -EOPNOTSUPP)) {
|
||||
pr_debug("flush diskcache op failed, not supported\n");
|
||||
xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
|
||||
xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
|
||||
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
|
||||
} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
|
||||
(error == -EOPNOTSUPP)) {
|
||||
pr_debug("write barrier op failed, not supported\n");
|
||||
xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
|
||||
xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
|
||||
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
|
||||
} else if (error) {
|
||||
pr_debug("Buffer not up-to-date at end of operation,"
|
||||
@ -1097,9 +1118,9 @@ static void end_block_io_op(struct bio *bio)
|
||||
* and transmute it to the block API to hand it over to the proper block disk.
|
||||
*/
|
||||
static int
|
||||
__do_block_io_op(struct xen_blkif *blkif)
|
||||
__do_block_io_op(struct xen_blkif_ring *ring)
|
||||
{
|
||||
union blkif_back_rings *blk_rings = &blkif->blk_rings;
|
||||
union blkif_back_rings *blk_rings = &ring->blk_rings;
|
||||
struct blkif_request req;
|
||||
struct pending_req *pending_req;
|
||||
RING_IDX rc, rp;
|
||||
@ -1112,7 +1133,7 @@ __do_block_io_op(struct xen_blkif *blkif)
|
||||
if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
|
||||
rc = blk_rings->common.rsp_prod_pvt;
|
||||
pr_warn("Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
|
||||
rp, rc, rp - rc, blkif->vbd.pdevice);
|
||||
rp, rc, rp - rc, ring->blkif->vbd.pdevice);
|
||||
return -EACCES;
|
||||
}
|
||||
while (rc != rp) {
|
||||
@ -1125,14 +1146,14 @@ __do_block_io_op(struct xen_blkif *blkif)
|
||||
break;
|
||||
}
|
||||
|
||||
pending_req = alloc_req(blkif);
|
||||
pending_req = alloc_req(ring);
|
||||
if (NULL == pending_req) {
|
||||
blkif->st_oo_req++;
|
||||
ring->st_oo_req++;
|
||||
more_to_do = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (blkif->blk_protocol) {
|
||||
switch (ring->blkif->blk_protocol) {
|
||||
case BLKIF_PROTOCOL_NATIVE:
|
||||
memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
|
||||
break;
|
||||
@ -1156,16 +1177,16 @@ __do_block_io_op(struct xen_blkif *blkif)
|
||||
case BLKIF_OP_WRITE_BARRIER:
|
||||
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||
case BLKIF_OP_INDIRECT:
|
||||
if (dispatch_rw_block_io(blkif, &req, pending_req))
|
||||
if (dispatch_rw_block_io(ring, &req, pending_req))
|
||||
goto done;
|
||||
break;
|
||||
case BLKIF_OP_DISCARD:
|
||||
free_req(blkif, pending_req);
|
||||
if (dispatch_discard_io(blkif, &req))
|
||||
free_req(ring, pending_req);
|
||||
if (dispatch_discard_io(ring, &req))
|
||||
goto done;
|
||||
break;
|
||||
default:
|
||||
if (dispatch_other_io(blkif, &req, pending_req))
|
||||
if (dispatch_other_io(ring, &req, pending_req))
|
||||
goto done;
|
||||
break;
|
||||
}
|
||||
@ -1178,13 +1199,13 @@ done:
|
||||
}
|
||||
|
||||
static int
|
||||
do_block_io_op(struct xen_blkif *blkif)
|
||||
do_block_io_op(struct xen_blkif_ring *ring)
|
||||
{
|
||||
union blkif_back_rings *blk_rings = &blkif->blk_rings;
|
||||
union blkif_back_rings *blk_rings = &ring->blk_rings;
|
||||
int more_to_do;
|
||||
|
||||
do {
|
||||
more_to_do = __do_block_io_op(blkif);
|
||||
more_to_do = __do_block_io_op(ring);
|
||||
if (more_to_do)
|
||||
break;
|
||||
|
||||
@ -1197,7 +1218,7 @@ do_block_io_op(struct xen_blkif *blkif)
|
||||
* Transmutation of the 'struct blkif_request' to a proper 'struct bio'
|
||||
* and call the 'submit_bio' to pass it to the underlying storage.
|
||||
*/
|
||||
static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
|
||||
struct blkif_request *req,
|
||||
struct pending_req *pending_req)
|
||||
{
|
||||
@ -1225,17 +1246,17 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
|
||||
switch (req_operation) {
|
||||
case BLKIF_OP_READ:
|
||||
blkif->st_rd_req++;
|
||||
ring->st_rd_req++;
|
||||
operation = READ;
|
||||
break;
|
||||
case BLKIF_OP_WRITE:
|
||||
blkif->st_wr_req++;
|
||||
ring->st_wr_req++;
|
||||
operation = WRITE_ODIRECT;
|
||||
break;
|
||||
case BLKIF_OP_WRITE_BARRIER:
|
||||
drain = true;
|
||||
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||
blkif->st_f_req++;
|
||||
ring->st_f_req++;
|
||||
operation = WRITE_FLUSH;
|
||||
break;
|
||||
default:
|
||||
@ -1260,7 +1281,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
|
||||
preq.nr_sects = 0;
|
||||
|
||||
pending_req->blkif = blkif;
|
||||
pending_req->ring = ring;
|
||||
pending_req->id = req->u.rw.id;
|
||||
pending_req->operation = req_operation;
|
||||
pending_req->status = BLKIF_RSP_OKAY;
|
||||
@ -1287,12 +1308,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
goto fail_response;
|
||||
}
|
||||
|
||||
if (xen_vbd_translate(&preq, blkif, operation) != 0) {
|
||||
if (xen_vbd_translate(&preq, ring->blkif, operation) != 0) {
|
||||
pr_debug("access denied: %s of [%llu,%llu] on dev=%04x\n",
|
||||
operation == READ ? "read" : "write",
|
||||
preq.sector_number,
|
||||
preq.sector_number + preq.nr_sects,
|
||||
blkif->vbd.pdevice);
|
||||
ring->blkif->vbd.pdevice);
|
||||
goto fail_response;
|
||||
}
|
||||
|
||||
@ -1304,7 +1325,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
if (((int)preq.sector_number|(int)seg[i].nsec) &
|
||||
((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
|
||||
pr_debug("Misaligned I/O request from domain %d\n",
|
||||
blkif->domid);
|
||||
ring->blkif->domid);
|
||||
goto fail_response;
|
||||
}
|
||||
}
|
||||
@ -1313,7 +1334,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
* issue the WRITE_FLUSH.
|
||||
*/
|
||||
if (drain)
|
||||
xen_blk_drain_io(pending_req->blkif);
|
||||
xen_blk_drain_io(pending_req->ring);
|
||||
|
||||
/*
|
||||
* If we have failed at this point, we need to undo the M2P override,
|
||||
@ -1328,8 +1349,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
* This corresponding xen_blkif_put is done in __end_block_io_op, or
|
||||
* below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
|
||||
*/
|
||||
xen_blkif_get(blkif);
|
||||
atomic_inc(&blkif->inflight);
|
||||
xen_blkif_get(ring->blkif);
|
||||
atomic_inc(&ring->inflight);
|
||||
|
||||
for (i = 0; i < nseg; i++) {
|
||||
while ((bio == NULL) ||
|
||||
@ -1377,19 +1398,19 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
if (operation == READ)
|
||||
blkif->st_rd_sect += preq.nr_sects;
|
||||
ring->st_rd_sect += preq.nr_sects;
|
||||
else if (operation & WRITE)
|
||||
blkif->st_wr_sect += preq.nr_sects;
|
||||
ring->st_wr_sect += preq.nr_sects;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_flush:
|
||||
xen_blkbk_unmap(blkif, pending_req->segments,
|
||||
xen_blkbk_unmap(ring, pending_req->segments,
|
||||
pending_req->nr_segs);
|
||||
fail_response:
|
||||
/* Haven't submitted any bio's yet. */
|
||||
make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
|
||||
free_req(blkif, pending_req);
|
||||
make_response(ring, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
|
||||
free_req(ring, pending_req);
|
||||
msleep(1); /* back off a bit */
|
||||
return -EIO;
|
||||
|
||||
@ -1407,21 +1428,22 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
|
||||
/*
|
||||
* Put a response on the ring on how the operation fared.
|
||||
*/
|
||||
static void make_response(struct xen_blkif *blkif, u64 id,
|
||||
static void make_response(struct xen_blkif_ring *ring, u64 id,
|
||||
unsigned short op, int st)
|
||||
{
|
||||
struct blkif_response resp;
|
||||
unsigned long flags;
|
||||
union blkif_back_rings *blk_rings = &blkif->blk_rings;
|
||||
union blkif_back_rings *blk_rings;
|
||||
int notify;
|
||||
|
||||
resp.id = id;
|
||||
resp.operation = op;
|
||||
resp.status = st;
|
||||
|
||||
spin_lock_irqsave(&blkif->blk_ring_lock, flags);
|
||||
spin_lock_irqsave(&ring->blk_ring_lock, flags);
|
||||
blk_rings = &ring->blk_rings;
|
||||
/* Place on the response ring for the relevant domain. */
|
||||
switch (blkif->blk_protocol) {
|
||||
switch (ring->blkif->blk_protocol) {
|
||||
case BLKIF_PROTOCOL_NATIVE:
|
||||
memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
|
||||
&resp, sizeof(resp));
|
||||
@ -1439,9 +1461,9 @@ static void make_response(struct xen_blkif *blkif, u64 id,
|
||||
}
|
||||
blk_rings->common.rsp_prod_pvt++;
|
||||
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
|
||||
spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
|
||||
spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
|
||||
if (notify)
|
||||
notify_remote_via_irq(blkif->irq);
|
||||
notify_remote_via_irq(ring->irq);
|
||||
}
|
||||
|
||||
static int __init xen_blkif_init(void)
|
||||
@ -1457,6 +1479,9 @@ static int __init xen_blkif_init(void)
|
||||
xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
|
||||
}
|
||||
|
||||
if (xenblk_max_queues == 0)
|
||||
xenblk_max_queues = num_online_cpus();
|
||||
|
||||
rc = xen_blkif_interface_init();
|
||||
if (rc)
|
||||
goto failed_init;
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include <xen/interface/io/protocols.h>
|
||||
|
||||
extern unsigned int xen_blkif_max_ring_order;
|
||||
extern unsigned int xenblk_max_queues;
|
||||
/*
|
||||
* This is the maximum number of segments that would be allowed in indirect
|
||||
* requests. This value will also be passed to the frontend.
|
||||
@ -269,68 +270,79 @@ struct persistent_gnt {
|
||||
struct list_head remove_node;
|
||||
};
|
||||
|
||||
struct xen_blkif {
|
||||
/* Unique identifier for this interface. */
|
||||
domid_t domid;
|
||||
unsigned int handle;
|
||||
/* Per-ring information. */
|
||||
struct xen_blkif_ring {
|
||||
/* Physical parameters of the comms window. */
|
||||
unsigned int irq;
|
||||
/* Comms information. */
|
||||
enum blkif_protocol blk_protocol;
|
||||
union blkif_back_rings blk_rings;
|
||||
void *blk_ring;
|
||||
/* The VBD attached to this interface. */
|
||||
struct xen_vbd vbd;
|
||||
/* Back pointer to the backend_info. */
|
||||
struct backend_info *be;
|
||||
/* Private fields. */
|
||||
spinlock_t blk_ring_lock;
|
||||
atomic_t refcnt;
|
||||
|
||||
wait_queue_head_t wq;
|
||||
/* for barrier (drain) requests */
|
||||
struct completion drain_complete;
|
||||
atomic_t drain;
|
||||
atomic_t inflight;
|
||||
/* One thread per one blkif. */
|
||||
/* One thread per blkif ring. */
|
||||
struct task_struct *xenblkd;
|
||||
unsigned int waiting_reqs;
|
||||
|
||||
/* tree to store persistent grants */
|
||||
struct rb_root persistent_gnts;
|
||||
unsigned int persistent_gnt_c;
|
||||
atomic_t persistent_gnt_in_use;
|
||||
unsigned long next_lru;
|
||||
|
||||
/* used by the kworker that offload work from the persistent purge */
|
||||
struct list_head persistent_purge_list;
|
||||
struct work_struct persistent_purge_work;
|
||||
|
||||
/* buffer of free pages to map grant refs */
|
||||
spinlock_t free_pages_lock;
|
||||
int free_pages_num;
|
||||
struct list_head free_pages;
|
||||
|
||||
/* List of all 'pending_req' available */
|
||||
struct list_head pending_free;
|
||||
/* And its spinlock. */
|
||||
spinlock_t pending_free_lock;
|
||||
wait_queue_head_t pending_free_wq;
|
||||
|
||||
/* statistics */
|
||||
/* Tree to store persistent grants. */
|
||||
spinlock_t pers_gnts_lock;
|
||||
struct rb_root persistent_gnts;
|
||||
unsigned int persistent_gnt_c;
|
||||
atomic_t persistent_gnt_in_use;
|
||||
unsigned long next_lru;
|
||||
|
||||
/* Statistics. */
|
||||
unsigned long st_print;
|
||||
unsigned long long st_rd_req;
|
||||
unsigned long long st_wr_req;
|
||||
unsigned long long st_oo_req;
|
||||
unsigned long long st_f_req;
|
||||
unsigned long long st_ds_req;
|
||||
unsigned long long st_rd_sect;
|
||||
unsigned long long st_wr_sect;
|
||||
unsigned long long st_rd_req;
|
||||
unsigned long long st_wr_req;
|
||||
unsigned long long st_oo_req;
|
||||
unsigned long long st_f_req;
|
||||
unsigned long long st_ds_req;
|
||||
unsigned long long st_rd_sect;
|
||||
unsigned long long st_wr_sect;
|
||||
|
||||
/* Used by the kworker that offload work from the persistent purge. */
|
||||
struct list_head persistent_purge_list;
|
||||
struct work_struct persistent_purge_work;
|
||||
|
||||
/* Buffer of free pages to map grant refs. */
|
||||
spinlock_t free_pages_lock;
|
||||
int free_pages_num;
|
||||
struct list_head free_pages;
|
||||
|
||||
struct work_struct free_work;
|
||||
/* Thread shutdown wait queue. */
|
||||
wait_queue_head_t shutdown_wq;
|
||||
unsigned int nr_ring_pages;
|
||||
struct xen_blkif *blkif;
|
||||
};
|
||||
|
||||
struct xen_blkif {
|
||||
/* Unique identifier for this interface. */
|
||||
domid_t domid;
|
||||
unsigned int handle;
|
||||
/* Comms information. */
|
||||
enum blkif_protocol blk_protocol;
|
||||
/* The VBD attached to this interface. */
|
||||
struct xen_vbd vbd;
|
||||
/* Back pointer to the backend_info. */
|
||||
struct backend_info *be;
|
||||
atomic_t refcnt;
|
||||
/* for barrier (drain) requests */
|
||||
struct completion drain_complete;
|
||||
atomic_t drain;
|
||||
|
||||
struct work_struct free_work;
|
||||
unsigned int nr_ring_pages;
|
||||
/* All rings for this device. */
|
||||
struct xen_blkif_ring *rings;
|
||||
unsigned int nr_rings;
|
||||
};
|
||||
|
||||
struct seg_buf {
|
||||
@ -352,7 +364,7 @@ struct grant_page {
|
||||
* response queued for it, with the saved 'id' passed back.
|
||||
*/
|
||||
struct pending_req {
|
||||
struct xen_blkif *blkif;
|
||||
struct xen_blkif_ring *ring;
|
||||
u64 id;
|
||||
int nr_segs;
|
||||
atomic_t pendcnt;
|
||||
@ -394,7 +406,7 @@ int xen_blkif_xenbus_init(void);
|
||||
irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
|
||||
int xen_blkif_schedule(void *arg);
|
||||
int xen_blkif_purge_persistent(void *arg);
|
||||
void xen_blkbk_free_caches(struct xen_blkif *blkif);
|
||||
void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
|
||||
|
||||
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
|
||||
struct backend_info *be, int state);
|
||||
|
@ -86,9 +86,11 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
|
||||
{
|
||||
int err;
|
||||
char name[BLKBACK_NAME_LEN];
|
||||
struct xen_blkif_ring *ring;
|
||||
int i;
|
||||
|
||||
/* Not ready to connect? */
|
||||
if (!blkif->irq || !blkif->vbd.bdev)
|
||||
if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
|
||||
return;
|
||||
|
||||
/* Already connected? */
|
||||
@ -113,13 +115,55 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
|
||||
}
|
||||
invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
|
||||
|
||||
blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name);
|
||||
if (IS_ERR(blkif->xenblkd)) {
|
||||
err = PTR_ERR(blkif->xenblkd);
|
||||
blkif->xenblkd = NULL;
|
||||
xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
|
||||
return;
|
||||
for (i = 0; i < blkif->nr_rings; i++) {
|
||||
ring = &blkif->rings[i];
|
||||
ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
|
||||
if (IS_ERR(ring->xenblkd)) {
|
||||
err = PTR_ERR(ring->xenblkd);
|
||||
ring->xenblkd = NULL;
|
||||
xenbus_dev_fatal(blkif->be->dev, err,
|
||||
"start %s-%d xenblkd", name, i);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
||||
out:
|
||||
while (--i >= 0) {
|
||||
ring = &blkif->rings[i];
|
||||
kthread_stop(ring->xenblkd);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
|
||||
{
|
||||
unsigned int r;
|
||||
|
||||
blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct xen_blkif_ring), GFP_KERNEL);
|
||||
if (!blkif->rings)
|
||||
return -ENOMEM;
|
||||
|
||||
for (r = 0; r < blkif->nr_rings; r++) {
|
||||
struct xen_blkif_ring *ring = &blkif->rings[r];
|
||||
|
||||
spin_lock_init(&ring->blk_ring_lock);
|
||||
init_waitqueue_head(&ring->wq);
|
||||
INIT_LIST_HEAD(&ring->pending_free);
|
||||
INIT_LIST_HEAD(&ring->persistent_purge_list);
|
||||
INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
|
||||
spin_lock_init(&ring->free_pages_lock);
|
||||
INIT_LIST_HEAD(&ring->free_pages);
|
||||
|
||||
spin_lock_init(&ring->pending_free_lock);
|
||||
init_waitqueue_head(&ring->pending_free_wq);
|
||||
init_waitqueue_head(&ring->shutdown_wq);
|
||||
ring->blkif = blkif;
|
||||
ring->st_print = jiffies;
|
||||
xen_blkif_get(blkif);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
|
||||
@ -133,41 +177,25 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
blkif->domid = domid;
|
||||
spin_lock_init(&blkif->blk_ring_lock);
|
||||
atomic_set(&blkif->refcnt, 1);
|
||||
init_waitqueue_head(&blkif->wq);
|
||||
init_completion(&blkif->drain_complete);
|
||||
atomic_set(&blkif->drain, 0);
|
||||
blkif->st_print = jiffies;
|
||||
blkif->persistent_gnts.rb_node = NULL;
|
||||
spin_lock_init(&blkif->free_pages_lock);
|
||||
INIT_LIST_HEAD(&blkif->free_pages);
|
||||
INIT_LIST_HEAD(&blkif->persistent_purge_list);
|
||||
blkif->free_pages_num = 0;
|
||||
atomic_set(&blkif->persistent_gnt_in_use, 0);
|
||||
atomic_set(&blkif->inflight, 0);
|
||||
INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
|
||||
|
||||
INIT_LIST_HEAD(&blkif->pending_free);
|
||||
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
|
||||
spin_lock_init(&blkif->pending_free_lock);
|
||||
init_waitqueue_head(&blkif->pending_free_wq);
|
||||
init_waitqueue_head(&blkif->shutdown_wq);
|
||||
|
||||
return blkif;
|
||||
}
|
||||
|
||||
static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
|
||||
static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
|
||||
unsigned int nr_grefs, unsigned int evtchn)
|
||||
{
|
||||
int err;
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
|
||||
/* Already connected through? */
|
||||
if (blkif->irq)
|
||||
if (ring->irq)
|
||||
return 0;
|
||||
|
||||
err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
|
||||
&blkif->blk_ring);
|
||||
&ring->blk_ring);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
@ -175,24 +203,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
|
||||
case BLKIF_PROTOCOL_NATIVE:
|
||||
{
|
||||
struct blkif_sring *sring;
|
||||
sring = (struct blkif_sring *)blkif->blk_ring;
|
||||
BACK_RING_INIT(&blkif->blk_rings.native, sring,
|
||||
sring = (struct blkif_sring *)ring->blk_ring;
|
||||
BACK_RING_INIT(&ring->blk_rings.native, sring,
|
||||
XEN_PAGE_SIZE * nr_grefs);
|
||||
break;
|
||||
}
|
||||
case BLKIF_PROTOCOL_X86_32:
|
||||
{
|
||||
struct blkif_x86_32_sring *sring_x86_32;
|
||||
sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
|
||||
BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
|
||||
sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
|
||||
BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
|
||||
XEN_PAGE_SIZE * nr_grefs);
|
||||
break;
|
||||
}
|
||||
case BLKIF_PROTOCOL_X86_64:
|
||||
{
|
||||
struct blkif_x86_64_sring *sring_x86_64;
|
||||
sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
|
||||
BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
|
||||
sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
|
||||
BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
|
||||
XEN_PAGE_SIZE * nr_grefs);
|
||||
break;
|
||||
}
|
||||
@ -202,13 +230,13 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
|
||||
|
||||
err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
|
||||
xen_blkif_be_int, 0,
|
||||
"blkif-backend", blkif);
|
||||
"blkif-backend", ring);
|
||||
if (err < 0) {
|
||||
xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
|
||||
blkif->blk_rings.common.sring = NULL;
|
||||
xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
|
||||
ring->blk_rings.common.sring = NULL;
|
||||
return err;
|
||||
}
|
||||
blkif->irq = err;
|
||||
ring->irq = err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -216,50 +244,69 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
|
||||
static int xen_blkif_disconnect(struct xen_blkif *blkif)
|
||||
{
|
||||
struct pending_req *req, *n;
|
||||
int i = 0, j;
|
||||
unsigned int j, r;
|
||||
|
||||
if (blkif->xenblkd) {
|
||||
kthread_stop(blkif->xenblkd);
|
||||
wake_up(&blkif->shutdown_wq);
|
||||
blkif->xenblkd = NULL;
|
||||
for (r = 0; r < blkif->nr_rings; r++) {
|
||||
struct xen_blkif_ring *ring = &blkif->rings[r];
|
||||
unsigned int i = 0;
|
||||
|
||||
if (ring->xenblkd) {
|
||||
kthread_stop(ring->xenblkd);
|
||||
wake_up(&ring->shutdown_wq);
|
||||
ring->xenblkd = NULL;
|
||||
}
|
||||
|
||||
/* The above kthread_stop() guarantees that at this point we
|
||||
* don't have any discard_io or other_io requests. So, checking
|
||||
* for inflight IO is enough.
|
||||
*/
|
||||
if (atomic_read(&ring->inflight) > 0)
|
||||
return -EBUSY;
|
||||
|
||||
if (ring->irq) {
|
||||
unbind_from_irqhandler(ring->irq, ring);
|
||||
ring->irq = 0;
|
||||
}
|
||||
|
||||
if (ring->blk_rings.common.sring) {
|
||||
xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
|
||||
ring->blk_rings.common.sring = NULL;
|
||||
}
|
||||
|
||||
/* Remove all persistent grants and the cache of ballooned pages. */
|
||||
xen_blkbk_free_caches(ring);
|
||||
|
||||
/* Check that there is no request in use */
|
||||
list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
|
||||
list_del(&req->free_list);
|
||||
|
||||
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
|
||||
kfree(req->segments[j]);
|
||||
|
||||
for (j = 0; j < MAX_INDIRECT_PAGES; j++)
|
||||
kfree(req->indirect_pages[j]);
|
||||
|
||||
kfree(req);
|
||||
i++;
|
||||
}
|
||||
|
||||
BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
|
||||
BUG_ON(!list_empty(&ring->persistent_purge_list));
|
||||
BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
|
||||
BUG_ON(!list_empty(&ring->free_pages));
|
||||
BUG_ON(ring->free_pages_num != 0);
|
||||
BUG_ON(ring->persistent_gnt_c != 0);
|
||||
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
|
||||
xen_blkif_put(blkif);
|
||||
}
|
||||
|
||||
/* The above kthread_stop() guarantees that at this point we
|
||||
* don't have any discard_io or other_io requests. So, checking
|
||||
* for inflight IO is enough.
|
||||
*/
|
||||
if (atomic_read(&blkif->inflight) > 0)
|
||||
return -EBUSY;
|
||||
|
||||
if (blkif->irq) {
|
||||
unbind_from_irqhandler(blkif->irq, blkif);
|
||||
blkif->irq = 0;
|
||||
}
|
||||
|
||||
if (blkif->blk_rings.common.sring) {
|
||||
xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
|
||||
blkif->blk_rings.common.sring = NULL;
|
||||
}
|
||||
|
||||
/* Remove all persistent grants and the cache of ballooned pages. */
|
||||
xen_blkbk_free_caches(blkif);
|
||||
|
||||
/* Check that there is no request in use */
|
||||
list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
|
||||
list_del(&req->free_list);
|
||||
|
||||
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
|
||||
kfree(req->segments[j]);
|
||||
|
||||
for (j = 0; j < MAX_INDIRECT_PAGES; j++)
|
||||
kfree(req->indirect_pages[j]);
|
||||
|
||||
kfree(req);
|
||||
i++;
|
||||
}
|
||||
|
||||
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
|
||||
blkif->nr_ring_pages = 0;
|
||||
/*
|
||||
* blkif->rings was allocated in connect_ring, so we should free it in
|
||||
* here.
|
||||
*/
|
||||
kfree(blkif->rings);
|
||||
blkif->rings = NULL;
|
||||
blkif->nr_rings = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -271,13 +318,6 @@ static void xen_blkif_free(struct xen_blkif *blkif)
|
||||
xen_vbd_free(&blkif->vbd);
|
||||
|
||||
/* Make sure everything is drained before shutting down */
|
||||
BUG_ON(blkif->persistent_gnt_c != 0);
|
||||
BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
|
||||
BUG_ON(blkif->free_pages_num != 0);
|
||||
BUG_ON(!list_empty(&blkif->persistent_purge_list));
|
||||
BUG_ON(!list_empty(&blkif->free_pages));
|
||||
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
|
||||
|
||||
kmem_cache_free(xen_blkif_cachep, blkif);
|
||||
}
|
||||
|
||||
@ -296,25 +336,38 @@ int __init xen_blkif_interface_init(void)
|
||||
* sysfs interface for VBD I/O requests
|
||||
*/
|
||||
|
||||
#define VBD_SHOW(name, format, args...) \
|
||||
#define VBD_SHOW_ALLRING(name, format) \
|
||||
static ssize_t show_##name(struct device *_dev, \
|
||||
struct device_attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
struct xenbus_device *dev = to_xenbus_device(_dev); \
|
||||
struct backend_info *be = dev_get_drvdata(&dev->dev); \
|
||||
struct xen_blkif *blkif = be->blkif; \
|
||||
unsigned int i; \
|
||||
unsigned long long result = 0; \
|
||||
\
|
||||
return sprintf(buf, format, ##args); \
|
||||
if (!blkif->rings) \
|
||||
goto out; \
|
||||
\
|
||||
for (i = 0; i < blkif->nr_rings; i++) { \
|
||||
struct xen_blkif_ring *ring = &blkif->rings[i]; \
|
||||
\
|
||||
result += ring->st_##name; \
|
||||
} \
|
||||
\
|
||||
out: \
|
||||
return sprintf(buf, format, result); \
|
||||
} \
|
||||
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
|
||||
|
||||
VBD_SHOW(oo_req, "%llu\n", be->blkif->st_oo_req);
|
||||
VBD_SHOW(rd_req, "%llu\n", be->blkif->st_rd_req);
|
||||
VBD_SHOW(wr_req, "%llu\n", be->blkif->st_wr_req);
|
||||
VBD_SHOW(f_req, "%llu\n", be->blkif->st_f_req);
|
||||
VBD_SHOW(ds_req, "%llu\n", be->blkif->st_ds_req);
|
||||
VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
|
||||
VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
|
||||
VBD_SHOW_ALLRING(oo_req, "%llu\n");
|
||||
VBD_SHOW_ALLRING(rd_req, "%llu\n");
|
||||
VBD_SHOW_ALLRING(wr_req, "%llu\n");
|
||||
VBD_SHOW_ALLRING(f_req, "%llu\n");
|
||||
VBD_SHOW_ALLRING(ds_req, "%llu\n");
|
||||
VBD_SHOW_ALLRING(rd_sect, "%llu\n");
|
||||
VBD_SHOW_ALLRING(wr_sect, "%llu\n");
|
||||
|
||||
static struct attribute *xen_vbdstat_attrs[] = {
|
||||
&dev_attr_oo_req.attr,
|
||||
@ -332,6 +385,18 @@ static struct attribute_group xen_vbdstat_group = {
|
||||
.attrs = xen_vbdstat_attrs,
|
||||
};
|
||||
|
||||
#define VBD_SHOW(name, format, args...) \
|
||||
static ssize_t show_##name(struct device *_dev, \
|
||||
struct device_attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
struct xenbus_device *dev = to_xenbus_device(_dev); \
|
||||
struct backend_info *be = dev_get_drvdata(&dev->dev); \
|
||||
\
|
||||
return sprintf(buf, format, ##args); \
|
||||
} \
|
||||
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
|
||||
|
||||
VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
|
||||
VBD_SHOW(mode, "%s\n", be->mode);
|
||||
|
||||
@ -440,11 +505,11 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
|
||||
|
||||
dev_set_drvdata(&dev->dev, NULL);
|
||||
|
||||
if (be->blkif) {
|
||||
if (be->blkif)
|
||||
xen_blkif_disconnect(be->blkif);
|
||||
xen_blkif_put(be->blkif);
|
||||
}
|
||||
|
||||
/* Put the reference we set in xen_blkif_alloc(). */
|
||||
xen_blkif_put(be->blkif);
|
||||
kfree(be->mode);
|
||||
kfree(be);
|
||||
return 0;
|
||||
@ -553,6 +618,12 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Multi-queue: advertise how many queues are supported by us.*/
|
||||
err = xenbus_printf(XBT_NIL, dev->nodename,
|
||||
"multi-queue-max-queues", "%u", xenblk_max_queues);
|
||||
if (err)
|
||||
pr_warn("Error writing multi-queue-max-queues\n");
|
||||
|
||||
/* setup back pointer */
|
||||
be->blkif->be = be;
|
||||
|
||||
@ -708,8 +779,14 @@ static void frontend_changed(struct xenbus_device *dev,
|
||||
}
|
||||
|
||||
err = connect_ring(be);
|
||||
if (err)
|
||||
if (err) {
|
||||
/*
|
||||
* Clean up so that memory resources can be used by
|
||||
* other devices. connect_ring reported already error.
|
||||
*/
|
||||
xen_blkif_disconnect(be->blkif);
|
||||
break;
|
||||
}
|
||||
xen_update_blkif_status(be->blkif);
|
||||
break;
|
||||
|
||||
@ -825,50 +902,43 @@ again:
|
||||
xenbus_transaction_end(xbt, 1);
|
||||
}
|
||||
|
||||
|
||||
static int connect_ring(struct backend_info *be)
|
||||
/*
|
||||
* Each ring may have multi pages, depends on "ring-page-order".
|
||||
*/
|
||||
static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
|
||||
{
|
||||
struct xenbus_device *dev = be->dev;
|
||||
unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
|
||||
unsigned int evtchn, nr_grefs, ring_page_order;
|
||||
unsigned int pers_grants;
|
||||
char protocol[64] = "";
|
||||
struct pending_req *req, *n;
|
||||
int err, i, j;
|
||||
struct xen_blkif *blkif = ring->blkif;
|
||||
struct xenbus_device *dev = blkif->be->dev;
|
||||
unsigned int ring_page_order, nr_grefs, evtchn;
|
||||
|
||||
pr_debug("%s %s\n", __func__, dev->otherend);
|
||||
|
||||
err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
|
||||
err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
|
||||
&evtchn);
|
||||
if (err != 1) {
|
||||
err = -EINVAL;
|
||||
xenbus_dev_fatal(dev, err, "reading %s/event-channel",
|
||||
dev->otherend);
|
||||
xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir);
|
||||
return err;
|
||||
}
|
||||
pr_info("event-channel %u\n", evtchn);
|
||||
|
||||
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
|
||||
&ring_page_order);
|
||||
if (err != 1) {
|
||||
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
|
||||
"%u", &ring_ref[0]);
|
||||
err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]);
|
||||
if (err != 1) {
|
||||
err = -EINVAL;
|
||||
xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
|
||||
dev->otherend);
|
||||
xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
|
||||
return err;
|
||||
}
|
||||
nr_grefs = 1;
|
||||
pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
|
||||
ring_ref[0]);
|
||||
} else {
|
||||
unsigned int i;
|
||||
|
||||
if (ring_page_order > xen_blkif_max_ring_order) {
|
||||
err = -EINVAL;
|
||||
xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
|
||||
dev->otherend, ring_page_order,
|
||||
dir, ring_page_order,
|
||||
xen_blkif_max_ring_order);
|
||||
return err;
|
||||
}
|
||||
@ -878,52 +948,23 @@ static int connect_ring(struct backend_info *be)
|
||||
char ring_ref_name[RINGREF_NAME_LEN];
|
||||
|
||||
snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
|
||||
err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
|
||||
err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
|
||||
"%u", &ring_ref[i]);
|
||||
if (err != 1) {
|
||||
err = -EINVAL;
|
||||
xenbus_dev_fatal(dev, err, "reading %s/%s",
|
||||
dev->otherend, ring_ref_name);
|
||||
dir, ring_ref_name);
|
||||
return err;
|
||||
}
|
||||
pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
|
||||
}
|
||||
}
|
||||
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
|
||||
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
|
||||
"%63s", protocol, NULL);
|
||||
if (err)
|
||||
strcpy(protocol, "unspecified, assuming default");
|
||||
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
|
||||
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
|
||||
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
|
||||
else {
|
||||
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
|
||||
return -1;
|
||||
}
|
||||
err = xenbus_gather(XBT_NIL, dev->otherend,
|
||||
"feature-persistent", "%u",
|
||||
&pers_grants, NULL);
|
||||
if (err)
|
||||
pers_grants = 0;
|
||||
|
||||
be->blkif->vbd.feature_gnt_persistent = pers_grants;
|
||||
be->blkif->vbd.overflow_max_grants = 0;
|
||||
be->blkif->nr_ring_pages = nr_grefs;
|
||||
|
||||
pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
|
||||
nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
|
||||
pers_grants ? "persistent grants" : "");
|
||||
blkif->nr_ring_pages = nr_grefs;
|
||||
|
||||
for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
|
||||
req = kzalloc(sizeof(*req), GFP_KERNEL);
|
||||
if (!req)
|
||||
goto fail;
|
||||
list_add_tail(&req->free_list, &be->blkif->pending_free);
|
||||
list_add_tail(&req->free_list, &ring->pending_free);
|
||||
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
|
||||
req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
|
||||
if (!req->segments[j])
|
||||
@ -938,7 +979,7 @@ static int connect_ring(struct backend_info *be)
|
||||
}
|
||||
|
||||
/* Map the shared frame, irq etc. */
|
||||
err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
|
||||
err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
|
||||
return err;
|
||||
@ -947,7 +988,7 @@ static int connect_ring(struct backend_info *be)
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
|
||||
list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
|
||||
list_del(&req->free_list);
|
||||
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
|
||||
if (!req->segments[j])
|
||||
@ -962,6 +1003,93 @@ fail:
|
||||
kfree(req);
|
||||
}
|
||||
return -ENOMEM;
|
||||
|
||||
}
|
||||
|
||||
static int connect_ring(struct backend_info *be)
|
||||
{
|
||||
struct xenbus_device *dev = be->dev;
|
||||
unsigned int pers_grants;
|
||||
char protocol[64] = "";
|
||||
int err, i;
|
||||
char *xspath;
|
||||
size_t xspathsize;
|
||||
const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
|
||||
unsigned int requested_num_queues = 0;
|
||||
|
||||
pr_debug("%s %s\n", __func__, dev->otherend);
|
||||
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
|
||||
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
|
||||
"%63s", protocol, NULL);
|
||||
if (err)
|
||||
strcpy(protocol, "unspecified, assuming default");
|
||||
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
|
||||
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
|
||||
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
|
||||
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
|
||||
else {
|
||||
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
|
||||
return -ENOSYS;
|
||||
}
|
||||
err = xenbus_gather(XBT_NIL, dev->otherend,
|
||||
"feature-persistent", "%u",
|
||||
&pers_grants, NULL);
|
||||
if (err)
|
||||
pers_grants = 0;
|
||||
|
||||
be->blkif->vbd.feature_gnt_persistent = pers_grants;
|
||||
be->blkif->vbd.overflow_max_grants = 0;
|
||||
|
||||
/*
|
||||
* Read the number of hardware queues from frontend.
|
||||
*/
|
||||
err = xenbus_scanf(XBT_NIL, dev->otherend, "multi-queue-num-queues",
|
||||
"%u", &requested_num_queues);
|
||||
if (err < 0) {
|
||||
requested_num_queues = 1;
|
||||
} else {
|
||||
if (requested_num_queues > xenblk_max_queues
|
||||
|| requested_num_queues == 0) {
|
||||
/* Buggy or malicious guest. */
|
||||
xenbus_dev_fatal(dev, err,
|
||||
"guest requested %u queues, exceeding the maximum of %u.",
|
||||
requested_num_queues, xenblk_max_queues);
|
||||
return -ENOSYS;
|
||||
}
|
||||
}
|
||||
be->blkif->nr_rings = requested_num_queues;
|
||||
if (xen_blkif_alloc_rings(be->blkif))
|
||||
return -ENOMEM;
|
||||
|
||||
pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
|
||||
be->blkif->nr_rings, be->blkif->blk_protocol, protocol,
|
||||
pers_grants ? "persistent grants" : "");
|
||||
|
||||
if (be->blkif->nr_rings == 1)
|
||||
return read_per_ring_refs(&be->blkif->rings[0], dev->otherend);
|
||||
else {
|
||||
xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
|
||||
xspath = kmalloc(xspathsize, GFP_KERNEL);
|
||||
if (!xspath) {
|
||||
xenbus_dev_fatal(dev, -ENOMEM, "reading ring references");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < be->blkif->nr_rings; i++) {
|
||||
memset(xspath, 0, xspathsize);
|
||||
snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
|
||||
err = read_per_ring_refs(&be->blkif->rings[i], xspath);
|
||||
if (err) {
|
||||
kfree(xspath);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
kfree(xspath);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct xenbus_device_id xen_blkbk_ids[] = {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1741,6 +1741,7 @@ static void bch_btree_gc(struct cache_set *c)
|
||||
do {
|
||||
ret = btree_root(gc_root, c, &op, &writes, &stats);
|
||||
closure_sync(&writes);
|
||||
cond_resched();
|
||||
|
||||
if (ret && ret != -EAGAIN)
|
||||
pr_warn("gc failed!");
|
||||
@ -2162,8 +2163,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
|
||||
rw_lock(true, b, b->level);
|
||||
|
||||
if (b->key.ptr[0] != btree_ptr ||
|
||||
b->seq != seq + 1)
|
||||
b->seq != seq + 1) {
|
||||
op->lock = b->level;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
SET_KEY_PTRS(check_key, 1);
|
||||
|
@ -685,6 +685,8 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
|
||||
WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
|
||||
sysfs_create_link(&c->kobj, &d->kobj, d->name),
|
||||
"Couldn't create device <-> cache set symlinks");
|
||||
|
||||
clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
|
||||
}
|
||||
|
||||
static void bcache_device_detach(struct bcache_device *d)
|
||||
@ -847,8 +849,11 @@ void bch_cached_dev_run(struct cached_dev *dc)
|
||||
buf[SB_LABEL_SIZE] = '\0';
|
||||
env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
|
||||
|
||||
if (atomic_xchg(&dc->running, 1))
|
||||
if (atomic_xchg(&dc->running, 1)) {
|
||||
kfree(env[1]);
|
||||
kfree(env[2]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!d->c &&
|
||||
BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
|
||||
@ -1933,6 +1938,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
|
||||
else
|
||||
err = "device busy";
|
||||
mutex_unlock(&bch_register_lock);
|
||||
if (attr == &ksysfs_register_quiet)
|
||||
goto out;
|
||||
}
|
||||
goto err;
|
||||
}
|
||||
@ -1971,8 +1978,7 @@ out:
|
||||
err_close:
|
||||
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
|
||||
err:
|
||||
if (attr != &ksysfs_register_quiet)
|
||||
pr_info("error opening %s: %s", path, err);
|
||||
pr_info("error opening %s: %s", path, err);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@ -2066,8 +2072,10 @@ static int __init bcache_init(void)
|
||||
closure_debug_init();
|
||||
|
||||
bcache_major = register_blkdev(0, "bcache");
|
||||
if (bcache_major < 0)
|
||||
if (bcache_major < 0) {
|
||||
unregister_reboot_notifier(&reboot);
|
||||
return bcache_major;
|
||||
}
|
||||
|
||||
if (!(bcache_wq = create_workqueue("bcache")) ||
|
||||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
|
||||
|
@ -323,6 +323,10 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
|
||||
|
||||
static bool dirty_pred(struct keybuf *buf, struct bkey *k)
|
||||
{
|
||||
struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys);
|
||||
|
||||
BUG_ON(KEY_INODE(k) != dc->disk.id);
|
||||
|
||||
return KEY_DIRTY(k);
|
||||
}
|
||||
|
||||
@ -372,11 +376,24 @@ next:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if we scanned the entire disk
|
||||
*/
|
||||
static bool refill_dirty(struct cached_dev *dc)
|
||||
{
|
||||
struct keybuf *buf = &dc->writeback_keys;
|
||||
struct bkey start = KEY(dc->disk.id, 0, 0);
|
||||
struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0);
|
||||
bool searched_from_start = false;
|
||||
struct bkey start_pos;
|
||||
|
||||
/*
|
||||
* make sure keybuf pos is inside the range for this disk - at bringup
|
||||
* we might not be attached yet so this disk's inode nr isn't
|
||||
* initialized then
|
||||
*/
|
||||
if (bkey_cmp(&buf->last_scanned, &start) < 0 ||
|
||||
bkey_cmp(&buf->last_scanned, &end) > 0)
|
||||
buf->last_scanned = start;
|
||||
|
||||
if (dc->partial_stripes_expensive) {
|
||||
refill_full_stripes(dc);
|
||||
@ -384,14 +401,20 @@ static bool refill_dirty(struct cached_dev *dc)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bkey_cmp(&buf->last_scanned, &end) >= 0) {
|
||||
buf->last_scanned = KEY(dc->disk.id, 0, 0);
|
||||
searched_from_start = true;
|
||||
}
|
||||
|
||||
start_pos = buf->last_scanned;
|
||||
bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
|
||||
|
||||
return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start;
|
||||
if (bkey_cmp(&buf->last_scanned, &end) < 0)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we get to the end start scanning again from the beginning, and
|
||||
* only scan up to where we initially started scanning from:
|
||||
*/
|
||||
buf->last_scanned = start;
|
||||
bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred);
|
||||
|
||||
return bkey_cmp(&buf->last_scanned, &start_pos) >= 0;
|
||||
}
|
||||
|
||||
static int bch_writeback_thread(void *arg)
|
||||
|
@ -63,7 +63,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
|
||||
|
||||
static inline void bch_writeback_queue(struct cached_dev *dc)
|
||||
{
|
||||
wake_up_process(dc->writeback_thread);
|
||||
if (!IS_ERR_OR_NULL(dc->writeback_thread))
|
||||
wake_up_process(dc->writeback_thread);
|
||||
}
|
||||
|
||||
static inline void bch_writeback_add(struct cached_dev *dc)
|
||||
|
@ -25,7 +25,6 @@
|
||||
*/
|
||||
#ifndef DRBD_H
|
||||
#define DRBD_H
|
||||
#include <linux/connector.h>
|
||||
#include <asm/types.h>
|
||||
|
||||
#ifdef __KERNEL__
|
||||
@ -52,7 +51,7 @@
|
||||
#endif
|
||||
|
||||
extern const char *drbd_buildtag(void);
|
||||
#define REL_VERSION "8.4.5"
|
||||
#define REL_VERSION "8.4.6"
|
||||
#define API_VERSION 1
|
||||
#define PRO_VERSION_MIN 86
|
||||
#define PRO_VERSION_MAX 101
|
||||
@ -339,6 +338,8 @@ enum drbd_state_rv {
|
||||
#define MDF_AL_CLEAN (1 << 7)
|
||||
#define MDF_AL_DISABLED (1 << 8)
|
||||
|
||||
#define MAX_PEERS 32
|
||||
|
||||
enum drbd_uuid_index {
|
||||
UI_CURRENT,
|
||||
UI_BITMAP,
|
||||
@ -349,14 +350,35 @@ enum drbd_uuid_index {
|
||||
UI_EXTENDED_SIZE /* Everything. */
|
||||
};
|
||||
|
||||
#define HISTORY_UUIDS MAX_PEERS
|
||||
|
||||
enum drbd_timeout_flag {
|
||||
UT_DEFAULT = 0,
|
||||
UT_DEGRADED = 1,
|
||||
UT_PEER_OUTDATED = 2,
|
||||
};
|
||||
|
||||
enum drbd_notification_type {
|
||||
NOTIFY_EXISTS,
|
||||
NOTIFY_CREATE,
|
||||
NOTIFY_CHANGE,
|
||||
NOTIFY_DESTROY,
|
||||
NOTIFY_CALL,
|
||||
NOTIFY_RESPONSE,
|
||||
|
||||
NOTIFY_CONTINUES = 0x8000,
|
||||
NOTIFY_FLAGS = NOTIFY_CONTINUES,
|
||||
};
|
||||
|
||||
#define UUID_JUST_CREATED ((__u64)4)
|
||||
|
||||
enum write_ordering_e {
|
||||
WO_NONE,
|
||||
WO_DRAIN_IO,
|
||||
WO_BDEV_FLUSH,
|
||||
WO_BIO_BARRIER
|
||||
};
|
||||
|
||||
/* magic numbers used in meta data and network packets */
|
||||
#define DRBD_MAGIC 0x83740267
|
||||
#define DRBD_MAGIC_BIG 0x835a
|
||||
|
@ -250,6 +250,76 @@ GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
|
||||
__flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_RESOURCE_INFO, 15, resource_info,
|
||||
__u32_field(1, 0, res_role)
|
||||
__flg_field(2, 0, res_susp)
|
||||
__flg_field(3, 0, res_susp_nod)
|
||||
__flg_field(4, 0, res_susp_fen)
|
||||
/* __flg_field(5, 0, res_weak) */
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_DEVICE_INFO, 16, device_info,
|
||||
__u32_field(1, 0, dev_disk_state)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_CONNECTION_INFO, 17, connection_info,
|
||||
__u32_field(1, 0, conn_connection_state)
|
||||
__u32_field(2, 0, conn_role)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_PEER_DEVICE_INFO, 18, peer_device_info,
|
||||
__u32_field(1, 0, peer_repl_state)
|
||||
__u32_field(2, 0, peer_disk_state)
|
||||
__u32_field(3, 0, peer_resync_susp_user)
|
||||
__u32_field(4, 0, peer_resync_susp_peer)
|
||||
__u32_field(5, 0, peer_resync_susp_dependency)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_RESOURCE_STATISTICS, 19, resource_statistics,
|
||||
__u32_field(1, 0, res_stat_write_ordering)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_DEVICE_STATISTICS, 20, device_statistics,
|
||||
__u64_field(1, 0, dev_size) /* (sectors) */
|
||||
__u64_field(2, 0, dev_read) /* (sectors) */
|
||||
__u64_field(3, 0, dev_write) /* (sectors) */
|
||||
__u64_field(4, 0, dev_al_writes) /* activity log writes (count) */
|
||||
__u64_field(5, 0, dev_bm_writes) /* bitmap writes (count) */
|
||||
__u32_field(6, 0, dev_upper_pending) /* application requests in progress */
|
||||
__u32_field(7, 0, dev_lower_pending) /* backing device requests in progress */
|
||||
__flg_field(8, 0, dev_upper_blocked)
|
||||
__flg_field(9, 0, dev_lower_blocked)
|
||||
__flg_field(10, 0, dev_al_suspended) /* activity log suspended */
|
||||
__u64_field(11, 0, dev_exposed_data_uuid)
|
||||
__u64_field(12, 0, dev_current_uuid)
|
||||
__u32_field(13, 0, dev_disk_flags)
|
||||
__bin_field(14, 0, history_uuids, HISTORY_UUIDS * sizeof(__u64))
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_CONNECTION_STATISTICS, 21, connection_statistics,
|
||||
__flg_field(1, 0, conn_congested)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_PEER_DEVICE_STATISTICS, 22, peer_device_statistics,
|
||||
__u64_field(1, 0, peer_dev_received) /* sectors */
|
||||
__u64_field(2, 0, peer_dev_sent) /* sectors */
|
||||
__u32_field(3, 0, peer_dev_pending) /* number of requests */
|
||||
__u32_field(4, 0, peer_dev_unacked) /* number of requests */
|
||||
__u64_field(5, 0, peer_dev_out_of_sync) /* sectors */
|
||||
__u64_field(6, 0, peer_dev_resync_failed) /* sectors */
|
||||
__u64_field(7, 0, peer_dev_bitmap_uuid)
|
||||
__u32_field(9, 0, peer_dev_flags)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_NOTIFICATION_HEADER, 23, drbd_notification_header,
|
||||
__u32_field(1, DRBD_GENLA_F_MANDATORY, nh_type)
|
||||
)
|
||||
|
||||
GENL_struct(DRBD_NLA_HELPER, 24, drbd_helper_info,
|
||||
__str_field(1, DRBD_GENLA_F_MANDATORY, helper_name, 32)
|
||||
__u32_field(2, DRBD_GENLA_F_MANDATORY, helper_status)
|
||||
)
|
||||
|
||||
/*
|
||||
* Notifications and commands (genlmsghdr->cmd)
|
||||
*/
|
||||
@ -382,3 +452,82 @@ GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
|
||||
|
||||
GENL_op(DRBD_ADM_GET_RESOURCES, 30,
|
||||
GENL_op_init(
|
||||
.dumpit = drbd_adm_dump_resources,
|
||||
),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_GENLA_F_MANDATORY))
|
||||
|
||||
GENL_op(DRBD_ADM_GET_DEVICES, 31,
|
||||
GENL_op_init(
|
||||
.dumpit = drbd_adm_dump_devices,
|
||||
.done = drbd_adm_dump_devices_done,
|
||||
),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY))
|
||||
|
||||
GENL_op(DRBD_ADM_GET_CONNECTIONS, 32,
|
||||
GENL_op_init(
|
||||
.dumpit = drbd_adm_dump_connections,
|
||||
.done = drbd_adm_dump_connections_done,
|
||||
),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_GENLA_F_MANDATORY))
|
||||
|
||||
GENL_op(DRBD_ADM_GET_PEER_DEVICES, 33,
|
||||
GENL_op_init(
|
||||
.dumpit = drbd_adm_dump_peer_devices,
|
||||
.done = drbd_adm_dump_peer_devices_done,
|
||||
),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_GENLA_F_MANDATORY)
|
||||
GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY))
|
||||
|
||||
GENL_notification(
|
||||
DRBD_RESOURCE_STATE, 34, events,
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_F_REQUIRED))
|
||||
|
||||
GENL_notification(
|
||||
DRBD_DEVICE_STATE, 35, events,
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_F_REQUIRED))
|
||||
|
||||
GENL_notification(
|
||||
DRBD_CONNECTION_STATE, 36, events,
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_F_REQUIRED))
|
||||
|
||||
GENL_notification(
|
||||
DRBD_PEER_DEVICE_STATE, 37, events,
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_F_REQUIRED))
|
||||
|
||||
GENL_op(
|
||||
DRBD_ADM_GET_INITIAL_STATE, 38,
|
||||
GENL_op_init(
|
||||
.dumpit = drbd_adm_get_initial_state,
|
||||
),
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY))
|
||||
|
||||
GENL_notification(
|
||||
DRBD_HELPER, 40, events,
|
||||
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
|
||||
GENL_tla_expected(DRBD_NLA_HELPER, DRBD_F_REQUIRED))
|
||||
|
||||
GENL_notification(
|
||||
DRBD_INITIAL_STATE_DONE, 41, events,
|
||||
GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED))
|
||||
|
@ -135,6 +135,20 @@ static inline void *idr_find(struct idr *idr, int id)
|
||||
#define idr_for_each_entry(idp, entry, id) \
|
||||
for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
|
||||
|
||||
/**
|
||||
* idr_for_each_entry - continue iteration over an idr's elements of a given type
|
||||
* @idp: idr handle
|
||||
* @entry: the type * to use as cursor
|
||||
* @id: id entry's key
|
||||
*
|
||||
* Continue to iterate over list of given type, continuing after
|
||||
* the current position.
|
||||
*/
|
||||
#define idr_for_each_entry_continue(idp, entry, id) \
|
||||
for ((entry) = idr_get_next((idp), &(id)); \
|
||||
entry; \
|
||||
++id, (entry) = idr_get_next((idp), &(id)))
|
||||
|
||||
/*
|
||||
* IDA - IDR based id allocator, use when translation from id to
|
||||
* pointer isn't necessary.
|
||||
|
@ -264,7 +264,7 @@ extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
|
||||
extern void lc_committed(struct lru_cache *lc);
|
||||
|
||||
struct seq_file;
|
||||
extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
|
||||
extern void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
|
||||
|
||||
extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
|
||||
void (*detail) (struct seq_file *, struct lc_element *));
|
||||
|
@ -27,6 +27,54 @@
|
||||
typedef uint16_t blkif_vdev_t;
|
||||
typedef uint64_t blkif_sector_t;
|
||||
|
||||
/*
|
||||
* Multiple hardware queues/rings:
|
||||
* If supported, the backend will write the key "multi-queue-max-queues" to
|
||||
* the directory for that vbd, and set its value to the maximum supported
|
||||
* number of queues.
|
||||
* Frontends that are aware of this feature and wish to use it can write the
|
||||
* key "multi-queue-num-queues" with the number they wish to use, which must be
|
||||
* greater than zero, and no more than the value reported by the backend in
|
||||
* "multi-queue-max-queues".
|
||||
*
|
||||
* For frontends requesting just one queue, the usual event-channel and
|
||||
* ring-ref keys are written as before, simplifying the backend processing
|
||||
* to avoid distinguishing between a frontend that doesn't understand the
|
||||
* multi-queue feature, and one that does, but requested only one queue.
|
||||
*
|
||||
* Frontends requesting two or more queues must not write the toplevel
|
||||
* event-channel and ring-ref keys, instead writing those keys under sub-keys
|
||||
* having the name "queue-N" where N is the integer ID of the queue/ring for
|
||||
* which those keys belong. Queues are indexed from zero.
|
||||
* For example, a frontend with two queues must write the following set of
|
||||
* queue-related keys:
|
||||
*
|
||||
* /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
|
||||
* /local/domain/1/device/vbd/0/queue-0 = ""
|
||||
* /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
|
||||
* /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
|
||||
* /local/domain/1/device/vbd/0/queue-1 = ""
|
||||
* /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
|
||||
* /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
|
||||
*
|
||||
* It is also possible to use multiple queues/rings together with
|
||||
* feature multi-page ring buffer.
|
||||
* For example, a frontend requests two queues/rings and the size of each ring
|
||||
* buffer is two pages must write the following set of related keys:
|
||||
*
|
||||
* /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
|
||||
* /local/domain/1/device/vbd/0/ring-page-order = "1"
|
||||
* /local/domain/1/device/vbd/0/queue-0 = ""
|
||||
* /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
|
||||
* /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
|
||||
* /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
|
||||
* /local/domain/1/device/vbd/0/queue-1 = ""
|
||||
* /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
|
||||
* /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
|
||||
* /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* REQUEST CODES.
|
||||
*/
|
||||
|
@ -238,7 +238,7 @@ void lc_reset(struct lru_cache *lc)
|
||||
* @seq: the seq_file to print into
|
||||
* @lc: the lru cache to print statistics of
|
||||
*/
|
||||
size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
|
||||
void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
|
||||
{
|
||||
/* NOTE:
|
||||
* total calls to lc_get are
|
||||
@ -250,8 +250,6 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
|
||||
seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
|
||||
lc->name, lc->used, lc->nr_elements,
|
||||
lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
|
||||
|
Loading…
x
Reference in New Issue
Block a user