Merge tag 'md-next-20230814-resend' into loongarch-next
LoongArch architecture changes for 6.5 (raid5/6 optimization) depend on the md changes to fix build and work, so merge them to create a base.
This commit is contained in:
commit
9d1785590b
@ -5,6 +5,7 @@
|
||||
menuconfig BLOCK
|
||||
bool "Enable the block layer" if EXPERT
|
||||
default y
|
||||
select FS_IOMAP
|
||||
select SBITMAP
|
||||
help
|
||||
Provide block layer support for the kernel.
|
||||
|
@ -123,20 +123,38 @@ void bio_integrity_free(struct bio *bio)
|
||||
int bio_integrity_add_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int offset)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
|
||||
if (bip->bip_vcnt >= bip->bip_max_vcnt) {
|
||||
printk(KERN_ERR "%s: bip_vec full\n", __func__);
|
||||
if (((bip->bip_iter.bi_size + len) >> SECTOR_SHIFT) >
|
||||
queue_max_hw_sectors(q))
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (bip->bip_vcnt &&
|
||||
bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
|
||||
&bip->bip_vec[bip->bip_vcnt - 1], offset))
|
||||
return 0;
|
||||
if (bip->bip_vcnt > 0) {
|
||||
struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
|
||||
bool same_page = false;
|
||||
|
||||
if (bvec_try_merge_hw_page(q, bv, page, len, offset,
|
||||
&same_page)) {
|
||||
bip->bip_iter.bi_size += len;
|
||||
return len;
|
||||
}
|
||||
|
||||
if (bip->bip_vcnt >=
|
||||
min(bip->bip_max_vcnt, queue_max_integrity_segments(q)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If the queue doesn't support SG gaps and adding this segment
|
||||
* would create a gap, disallow it.
|
||||
*/
|
||||
if (bvec_gap_to_prev(&q->limits, bv, offset))
|
||||
return 0;
|
||||
}
|
||||
|
||||
bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
|
||||
bip->bip_vcnt++;
|
||||
bip->bip_iter.bi_size += len;
|
||||
|
||||
return len;
|
||||
}
|
||||
@ -199,8 +217,6 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
unsigned long start, end;
|
||||
unsigned int len, nr_pages;
|
||||
unsigned int bytes, offset, i;
|
||||
unsigned int intervals;
|
||||
blk_status_t status;
|
||||
|
||||
if (!bi)
|
||||
return true;
|
||||
@ -224,12 +240,10 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
!(bi->flags & BLK_INTEGRITY_GENERATE))
|
||||
return true;
|
||||
}
|
||||
intervals = bio_integrity_intervals(bi, bio_sectors(bio));
|
||||
|
||||
/* Allocate kernel buffer for protection data */
|
||||
len = intervals * bi->tuple_size;
|
||||
len = bio_integrity_bytes(bi, bio_sectors(bio));
|
||||
buf = kmalloc(len, GFP_NOIO);
|
||||
status = BLK_STS_RESOURCE;
|
||||
if (unlikely(buf == NULL)) {
|
||||
printk(KERN_ERR "could not allocate integrity buffer\n");
|
||||
goto err_end_io;
|
||||
@ -244,12 +258,10 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
if (IS_ERR(bip)) {
|
||||
printk(KERN_ERR "could not allocate data integrity bioset\n");
|
||||
kfree(buf);
|
||||
status = BLK_STS_RESOURCE;
|
||||
goto err_end_io;
|
||||
}
|
||||
|
||||
bip->bip_flags |= BIP_BLOCK_INTEGRITY;
|
||||
bip->bip_iter.bi_size = len;
|
||||
bip_set_seed(bip, bio->bi_iter.bi_sector);
|
||||
|
||||
if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
|
||||
@ -257,28 +269,18 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
|
||||
/* Map it */
|
||||
offset = offset_in_page(buf);
|
||||
for (i = 0 ; i < nr_pages ; i++) {
|
||||
int ret;
|
||||
for (i = 0; i < nr_pages && len > 0; i++) {
|
||||
bytes = PAGE_SIZE - offset;
|
||||
|
||||
if (len <= 0)
|
||||
break;
|
||||
|
||||
if (bytes > len)
|
||||
bytes = len;
|
||||
|
||||
ret = bio_integrity_add_page(bio, virt_to_page(buf),
|
||||
bytes, offset);
|
||||
|
||||
if (ret == 0) {
|
||||
if (bio_integrity_add_page(bio, virt_to_page(buf),
|
||||
bytes, offset) < bytes) {
|
||||
printk(KERN_ERR "could not attach integrity payload\n");
|
||||
status = BLK_STS_RESOURCE;
|
||||
goto err_end_io;
|
||||
}
|
||||
|
||||
if (ret < bytes)
|
||||
break;
|
||||
|
||||
buf += bytes;
|
||||
len -= bytes;
|
||||
offset = 0;
|
||||
@ -294,10 +296,9 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
return true;
|
||||
|
||||
err_end_io:
|
||||
bio->bi_status = status;
|
||||
bio->bi_status = BLK_STS_RESOURCE;
|
||||
bio_endio(bio);
|
||||
return false;
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL(bio_integrity_prep);
|
||||
|
||||
|
142
block/bio.c
142
block/bio.c
@ -606,15 +606,15 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
|
||||
}
|
||||
EXPORT_SYMBOL(bio_kmalloc);
|
||||
|
||||
void zero_fill_bio(struct bio *bio)
|
||||
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
|
||||
{
|
||||
struct bio_vec bv;
|
||||
struct bvec_iter iter;
|
||||
|
||||
bio_for_each_segment(bv, bio, iter)
|
||||
__bio_for_each_segment(bv, bio, iter, start)
|
||||
memzero_bvec(&bv);
|
||||
}
|
||||
EXPORT_SYMBOL(zero_fill_bio);
|
||||
EXPORT_SYMBOL(zero_fill_bio_iter);
|
||||
|
||||
/**
|
||||
* bio_truncate - truncate the bio to small size of @new_size
|
||||
@ -903,9 +903,8 @@ static inline bool bio_full(struct bio *bio, unsigned len)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool page_is_mergeable(const struct bio_vec *bv,
|
||||
struct page *page, unsigned int len, unsigned int off,
|
||||
bool *same_page)
|
||||
static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
|
||||
unsigned int len, unsigned int off, bool *same_page)
|
||||
{
|
||||
size_t bv_end = bv->bv_offset + bv->bv_len;
|
||||
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
|
||||
@ -919,49 +918,15 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
|
||||
return false;
|
||||
|
||||
*same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
|
||||
if (*same_page)
|
||||
return true;
|
||||
else if (IS_ENABLED(CONFIG_KMSAN))
|
||||
return false;
|
||||
return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* __bio_try_merge_page - try appending data to an existing bvec.
|
||||
* @bio: destination bio
|
||||
* @page: start page to add
|
||||
* @len: length of the data to add
|
||||
* @off: offset of the data relative to @page
|
||||
* @same_page: return if the segment has been merged inside the same page
|
||||
*
|
||||
* Try to add the data at @page + @off to the last bvec of @bio. This is a
|
||||
* useful optimisation for file systems with a block size smaller than the
|
||||
* page size.
|
||||
*
|
||||
* Warn if (@len, @off) crosses pages in case that @same_page is true.
|
||||
*
|
||||
* Return %true on success or %false on failure.
|
||||
*/
|
||||
static bool __bio_try_merge_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int off, bool *same_page)
|
||||
{
|
||||
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
|
||||
return false;
|
||||
|
||||
if (bio->bi_vcnt > 0) {
|
||||
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
|
||||
if (page_is_mergeable(bv, page, len, off, same_page)) {
|
||||
if (bio->bi_iter.bi_size > UINT_MAX - len) {
|
||||
*same_page = false;
|
||||
return false;
|
||||
}
|
||||
bv->bv_len += len;
|
||||
bio->bi_iter.bi_size += len;
|
||||
return true;
|
||||
}
|
||||
if (!*same_page) {
|
||||
if (IS_ENABLED(CONFIG_KMSAN))
|
||||
return false;
|
||||
if (bv->bv_page + bv_end / PAGE_SIZE != page + off / PAGE_SIZE)
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
|
||||
bv->bv_len += len;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -969,11 +934,10 @@ static bool __bio_try_merge_page(struct bio *bio, struct page *page,
|
||||
* size limit. This is not for normal read/write bios, but for passthrough
|
||||
* or Zone Append operations that we can't split.
|
||||
*/
|
||||
static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned len,
|
||||
unsigned offset, bool *same_page)
|
||||
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
|
||||
struct page *page, unsigned len, unsigned offset,
|
||||
bool *same_page)
|
||||
{
|
||||
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
unsigned long mask = queue_segment_boundary(q);
|
||||
phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
|
||||
phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
|
||||
@ -982,7 +946,7 @@ static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
|
||||
return false;
|
||||
if (bv->bv_len + len > queue_max_segment_size(q))
|
||||
return false;
|
||||
return __bio_try_merge_page(bio, page, len, offset, same_page);
|
||||
return bvec_try_merge_page(bv, page, len, offset, same_page);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1002,33 +966,33 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned int len, unsigned int offset,
|
||||
unsigned int max_sectors, bool *same_page)
|
||||
{
|
||||
struct bio_vec *bvec;
|
||||
|
||||
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
|
||||
return 0;
|
||||
|
||||
if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
|
||||
if (((bio->bi_iter.bi_size + len) >> SECTOR_SHIFT) > max_sectors)
|
||||
return 0;
|
||||
|
||||
if (bio->bi_vcnt > 0) {
|
||||
if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
|
||||
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
|
||||
if (bvec_try_merge_hw_page(q, bv, page, len, offset,
|
||||
same_page)) {
|
||||
bio->bi_iter.bi_size += len;
|
||||
return len;
|
||||
}
|
||||
|
||||
if (bio->bi_vcnt >=
|
||||
min(bio->bi_max_vecs, queue_max_segments(q)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If the queue doesn't support SG gaps and adding this segment
|
||||
* would create a gap, disallow it.
|
||||
*/
|
||||
bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
if (bvec_gap_to_prev(&q->limits, bvec, offset))
|
||||
if (bvec_gap_to_prev(&q->limits, bv, offset))
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (bio_full(bio, len))
|
||||
return 0;
|
||||
|
||||
if (bio->bi_vcnt >= queue_max_segments(q))
|
||||
return 0;
|
||||
|
||||
bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset);
|
||||
bio->bi_vcnt++;
|
||||
bio->bi_iter.bi_size += len;
|
||||
@ -1129,11 +1093,21 @@ int bio_add_page(struct bio *bio, struct page *page,
|
||||
{
|
||||
bool same_page = false;
|
||||
|
||||
if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
|
||||
if (bio_full(bio, len))
|
||||
return 0;
|
||||
__bio_add_page(bio, page, len, offset);
|
||||
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
|
||||
return 0;
|
||||
if (bio->bi_iter.bi_size > UINT_MAX - len)
|
||||
return 0;
|
||||
|
||||
if (bio->bi_vcnt > 0 &&
|
||||
bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
|
||||
page, len, offset, &same_page)) {
|
||||
bio->bi_iter.bi_size += len;
|
||||
return len;
|
||||
}
|
||||
|
||||
if (bio->bi_vcnt >= bio->bi_max_vecs)
|
||||
return 0;
|
||||
__bio_add_page(bio, page, len, offset);
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_add_page);
|
||||
@ -1207,13 +1181,18 @@ static int bio_iov_add_page(struct bio *bio, struct page *page,
|
||||
{
|
||||
bool same_page = false;
|
||||
|
||||
if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
|
||||
__bio_add_page(bio, page, len, offset);
|
||||
if (WARN_ON_ONCE(bio->bi_iter.bi_size > UINT_MAX - len))
|
||||
return -EIO;
|
||||
|
||||
if (bio->bi_vcnt > 0 &&
|
||||
bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
|
||||
page, len, offset, &same_page)) {
|
||||
bio->bi_iter.bi_size += len;
|
||||
if (same_page)
|
||||
bio_release_page(bio, page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (same_page)
|
||||
bio_release_page(bio, page);
|
||||
__bio_add_page(bio, page, len, offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1252,7 +1231,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
struct page **pages = (struct page **)bv;
|
||||
ssize_t size, left;
|
||||
unsigned len, i = 0;
|
||||
size_t offset, trim;
|
||||
size_t offset;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
@ -1281,10 +1260,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
|
||||
nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
|
||||
|
||||
trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
|
||||
iov_iter_revert(iter, trim);
|
||||
if (bio->bi_bdev) {
|
||||
size_t trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
|
||||
iov_iter_revert(iter, trim);
|
||||
size -= trim;
|
||||
}
|
||||
|
||||
size -= trim;
|
||||
if (unlikely(!size)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
@ -1337,6 +1318,9 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
|
||||
return -EIO;
|
||||
|
||||
if (iov_iter_is_bvec(iter)) {
|
||||
bio_iov_bvec_set(bio, iter);
|
||||
iov_iter_advance(iter, bio->bi_iter.bi_size);
|
||||
@ -1490,6 +1474,7 @@ void bio_set_pages_dirty(struct bio *bio)
|
||||
set_page_dirty_lock(bvec->bv_page);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
|
||||
|
||||
/*
|
||||
* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
|
||||
@ -1549,6 +1534,7 @@ defer:
|
||||
spin_unlock_irqrestore(&bio_dirty_lock, flags);
|
||||
schedule_work(&bio_dirty_work);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
|
||||
|
||||
static inline bool bio_remaining_done(struct bio *bio)
|
||||
{
|
||||
|
@ -208,6 +208,7 @@ const char *blk_status_to_str(blk_status_t status)
|
||||
return "<null>";
|
||||
return blk_errors[idx].name;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_status_to_str);
|
||||
|
||||
/**
|
||||
* blk_sync_queue - cancel any pending callbacks on a queue
|
||||
|
@ -183,13 +183,13 @@ static void blk_flush_complete_seq(struct request *rq,
|
||||
/* queue for flush */
|
||||
if (list_empty(pending))
|
||||
fq->flush_pending_since = jiffies;
|
||||
list_move_tail(&rq->flush.list, pending);
|
||||
list_move_tail(&rq->queuelist, pending);
|
||||
break;
|
||||
|
||||
case REQ_FSEQ_DATA:
|
||||
list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
|
||||
fq->flush_data_in_flight++;
|
||||
spin_lock(&q->requeue_lock);
|
||||
list_add(&rq->queuelist, &q->requeue_list);
|
||||
list_move(&rq->queuelist, &q->requeue_list);
|
||||
spin_unlock(&q->requeue_lock);
|
||||
blk_mq_kick_requeue_list(q);
|
||||
break;
|
||||
@ -201,7 +201,7 @@ static void blk_flush_complete_seq(struct request *rq,
|
||||
* flush data request completion path. Restore @rq for
|
||||
* normal completion and end it.
|
||||
*/
|
||||
list_del_init(&rq->flush.list);
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_flush_restore_request(rq);
|
||||
blk_mq_end_request(rq, error);
|
||||
break;
|
||||
@ -257,7 +257,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
|
||||
fq->flush_running_idx ^= 1;
|
||||
|
||||
/* and push the waiting requests to the next stage */
|
||||
list_for_each_entry_safe(rq, n, running, flush.list) {
|
||||
list_for_each_entry_safe(rq, n, running, queuelist) {
|
||||
unsigned int seq = blk_flush_cur_seq(rq);
|
||||
|
||||
BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
|
||||
@ -291,7 +291,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
|
||||
{
|
||||
struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
|
||||
struct request *first_rq =
|
||||
list_first_entry(pending, struct request, flush.list);
|
||||
list_first_entry(pending, struct request, queuelist);
|
||||
struct request *flush_rq = fq->flush_rq;
|
||||
|
||||
/* C1 described at the top of this file */
|
||||
@ -299,7 +299,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
|
||||
return;
|
||||
|
||||
/* C2 and C3 */
|
||||
if (!list_empty(&fq->flush_data_in_flight) &&
|
||||
if (fq->flush_data_in_flight &&
|
||||
time_before(jiffies,
|
||||
fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
|
||||
return;
|
||||
@ -374,6 +374,12 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
|
||||
* the comment in flush_end_io().
|
||||
*/
|
||||
spin_lock_irqsave(&fq->mq_flush_lock, flags);
|
||||
fq->flush_data_in_flight--;
|
||||
/*
|
||||
* May have been corrupted by rq->rq_next reuse, we need to
|
||||
* re-initialize rq->queuelist before reusing it here.
|
||||
*/
|
||||
INIT_LIST_HEAD(&rq->queuelist);
|
||||
blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
|
||||
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
|
||||
|
||||
@ -384,7 +390,6 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
|
||||
static void blk_rq_init_flush(struct request *rq)
|
||||
{
|
||||
rq->flush.seq = 0;
|
||||
INIT_LIST_HEAD(&rq->flush.list);
|
||||
rq->rq_flags |= RQF_FLUSH_SEQ;
|
||||
rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
|
||||
rq->end_io = mq_flush_data_end_io;
|
||||
@ -443,9 +448,9 @@ bool blk_insert_flush(struct request *rq)
|
||||
* the post flush, and then just pass the command on.
|
||||
*/
|
||||
blk_rq_init_flush(rq);
|
||||
rq->flush.seq |= REQ_FSEQ_POSTFLUSH;
|
||||
rq->flush.seq |= REQ_FSEQ_PREFLUSH;
|
||||
spin_lock_irq(&fq->mq_flush_lock);
|
||||
list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
|
||||
fq->flush_data_in_flight++;
|
||||
spin_unlock_irq(&fq->mq_flush_lock);
|
||||
return false;
|
||||
default:
|
||||
@ -496,7 +501,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
|
||||
|
||||
INIT_LIST_HEAD(&fq->flush_queue[0]);
|
||||
INIT_LIST_HEAD(&fq->flush_queue[1]);
|
||||
INIT_LIST_HEAD(&fq->flush_data_in_flight);
|
||||
|
||||
return fq;
|
||||
|
||||
|
@ -824,29 +824,6 @@ static void iolatency_clear_scaling(struct blkcg_gq *blkg)
|
||||
}
|
||||
}
|
||||
|
||||
static int blk_iolatency_try_init(struct blkg_conf_ctx *ctx)
|
||||
{
|
||||
static DEFINE_MUTEX(init_mutex);
|
||||
int ret;
|
||||
|
||||
ret = blkg_conf_open_bdev(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* blk_iolatency_init() may fail after rq_qos_add() succeeds which can
|
||||
* confuse iolat_rq_qos() test. Make the test and init atomic.
|
||||
*/
|
||||
mutex_lock(&init_mutex);
|
||||
|
||||
if (!iolat_rq_qos(ctx->bdev->bd_queue))
|
||||
ret = blk_iolatency_init(ctx->bdev->bd_disk);
|
||||
|
||||
mutex_unlock(&init_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
@ -861,7 +838,17 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
||||
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blk_iolatency_try_init(&ctx);
|
||||
ret = blkg_conf_open_bdev(&ctx);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* blk_iolatency_init() may fail after rq_qos_add() succeeds which can
|
||||
* confuse iolat_rq_qos() test. Make the test and init atomic.
|
||||
*/
|
||||
lockdep_assert_held(&ctx.bdev->bd_queue->rq_qos_mutex);
|
||||
if (!iolat_rq_qos(ctx.bdev->bd_queue))
|
||||
ret = blk_iolatency_init(ctx.bdev->bd_disk);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include "blk-ioprio.h"
|
||||
|
||||
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
|
||||
static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);
|
||||
|
||||
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
|
||||
static void blk_mq_request_bypass_insert(struct request *rq,
|
||||
@ -1174,15 +1175,11 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
|
||||
|
||||
static void blk_mq_complete_send_ipi(struct request *rq)
|
||||
{
|
||||
struct llist_head *list;
|
||||
unsigned int cpu;
|
||||
|
||||
cpu = rq->mq_ctx->cpu;
|
||||
list = &per_cpu(blk_cpu_done, cpu);
|
||||
if (llist_add(&rq->ipi_list, list)) {
|
||||
INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
|
||||
smp_call_function_single_async(cpu, &rq->csd);
|
||||
}
|
||||
if (llist_add(&rq->ipi_list, &per_cpu(blk_cpu_done, cpu)))
|
||||
smp_call_function_single_async(cpu, &per_cpu(blk_cpu_csd, cpu));
|
||||
}
|
||||
|
||||
static void blk_mq_raise_softirq(struct request *rq)
|
||||
@ -1343,7 +1340,7 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head)
|
||||
}
|
||||
|
||||
blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
|
||||
|
||||
@ -2242,6 +2239,8 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
*/
|
||||
WARN_ON_ONCE(!async && in_interrupt());
|
||||
|
||||
might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING);
|
||||
|
||||
/*
|
||||
* When queue is quiesced, we may be switching io scheduler, or
|
||||
* updating nr_hw_queues, or other things, and we can't run queue
|
||||
@ -2257,8 +2256,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
if (!need_run)
|
||||
return;
|
||||
|
||||
if (async || (hctx->flags & BLK_MQ_F_BLOCKING) ||
|
||||
!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
|
||||
if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
|
||||
blk_mq_delay_run_hw_queue(hctx, 0);
|
||||
return;
|
||||
}
|
||||
@ -2393,7 +2391,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
|
||||
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_start_hw_queue);
|
||||
|
||||
@ -2423,7 +2421,8 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
|
||||
unsigned long i;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i)
|
||||
blk_mq_start_stopped_hw_queue(hctx, async);
|
||||
blk_mq_start_stopped_hw_queue(hctx, async ||
|
||||
(hctx->flags & BLK_MQ_F_BLOCKING));
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
|
||||
|
||||
@ -2481,6 +2480,8 @@ static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
list_for_each_entry(rq, list, queuelist) {
|
||||
BUG_ON(rq->mq_ctx != ctx);
|
||||
trace_block_rq_insert(rq);
|
||||
if (rq->cmd_flags & REQ_NOWAIT)
|
||||
run_queue_async = true;
|
||||
}
|
||||
|
||||
spin_lock(&ctx->lock);
|
||||
@ -2641,7 +2642,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
blk_mq_run_hw_queue(hctx, rq->cmd_flags & REQ_NOWAIT);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -4853,6 +4854,9 @@ static int __init blk_mq_init(void)
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
init_llist_head(&per_cpu(blk_cpu_done, i));
|
||||
for_each_possible_cpu(i)
|
||||
INIT_CSD(&per_cpu(blk_cpu_csd, i),
|
||||
__blk_mq_complete_request_remote, NULL);
|
||||
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
|
||||
|
||||
cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
|
||||
|
@ -830,10 +830,13 @@ EXPORT_SYMBOL(blk_set_queue_depth);
|
||||
*/
|
||||
void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
|
||||
{
|
||||
if (wc)
|
||||
if (wc) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_HW_WC, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_WC, q);
|
||||
else
|
||||
} else {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_HW_WC, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_WC, q);
|
||||
}
|
||||
if (fua)
|
||||
blk_queue_flag_set(QUEUE_FLAG_FUA, q);
|
||||
else
|
||||
|
@ -449,21 +449,16 @@ static ssize_t queue_wc_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_wc_store(struct request_queue *q, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
int set = -1;
|
||||
|
||||
if (!strncmp(page, "write back", 10))
|
||||
set = 1;
|
||||
else if (!strncmp(page, "write through", 13) ||
|
||||
!strncmp(page, "none", 4))
|
||||
set = 0;
|
||||
|
||||
if (set == -1)
|
||||
return -EINVAL;
|
||||
|
||||
if (set)
|
||||
if (!strncmp(page, "write back", 10)) {
|
||||
if (!test_bit(QUEUE_FLAG_HW_WC, &q->queue_flags))
|
||||
return -EINVAL;
|
||||
blk_queue_flag_set(QUEUE_FLAG_WC, q);
|
||||
else
|
||||
} else if (!strncmp(page, "write through", 13) ||
|
||||
!strncmp(page, "none", 4)) {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_WC, q);
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
10
block/blk.h
10
block/blk.h
@ -15,15 +15,14 @@ struct elevator_type;
|
||||
extern struct dentry *blk_debugfs_root;
|
||||
|
||||
struct blk_flush_queue {
|
||||
spinlock_t mq_flush_lock;
|
||||
unsigned int flush_pending_idx:1;
|
||||
unsigned int flush_running_idx:1;
|
||||
blk_status_t rq_status;
|
||||
unsigned long flush_pending_since;
|
||||
struct list_head flush_queue[2];
|
||||
struct list_head flush_data_in_flight;
|
||||
unsigned long flush_data_in_flight;
|
||||
struct request *flush_rq;
|
||||
|
||||
spinlock_t mq_flush_lock;
|
||||
};
|
||||
|
||||
bool is_flush_rq(struct request *req);
|
||||
@ -76,6 +75,10 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
|
||||
gfp_t gfp_mask);
|
||||
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);
|
||||
|
||||
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
|
||||
struct page *page, unsigned len, unsigned offset,
|
||||
bool *same_page);
|
||||
|
||||
static inline bool biovec_phys_mergeable(struct request_queue *q,
|
||||
struct bio_vec *vec1, struct bio_vec *vec2)
|
||||
{
|
||||
@ -251,7 +254,6 @@ static inline void bio_integrity_free(struct bio *bio)
|
||||
|
||||
unsigned long blk_rq_timeout(unsigned long timeout);
|
||||
void blk_add_timer(struct request *req);
|
||||
const char *blk_status_to_str(blk_status_t status);
|
||||
|
||||
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs);
|
||||
|
143
block/fops.c
143
block/fops.c
@ -15,6 +15,7 @@
|
||||
#include <linux/falloc.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/module.h>
|
||||
#include "blk.h"
|
||||
|
||||
@ -23,15 +24,6 @@ static inline struct inode *bdev_file_inode(struct file *file)
|
||||
return file->f_mapping->host;
|
||||
}
|
||||
|
||||
static int blkdev_get_block(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh, int create)
|
||||
{
|
||||
bh->b_bdev = I_BDEV(inode);
|
||||
bh->b_blocknr = iblock;
|
||||
set_buffer_mapped(bh);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
|
||||
{
|
||||
blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
|
||||
@ -387,6 +379,37 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||
return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
|
||||
}
|
||||
|
||||
static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
|
||||
{
|
||||
struct block_device *bdev = I_BDEV(inode);
|
||||
loff_t isize = i_size_read(inode);
|
||||
|
||||
iomap->bdev = bdev;
|
||||
iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
|
||||
if (iomap->offset >= isize)
|
||||
return -EIO;
|
||||
iomap->type = IOMAP_MAPPED;
|
||||
iomap->addr = iomap->offset;
|
||||
iomap->length = isize - iomap->offset;
|
||||
iomap->flags |= IOMAP_F_BUFFER_HEAD; /* noop for !CONFIG_BUFFER_HEAD */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct iomap_ops blkdev_iomap_ops = {
|
||||
.iomap_begin = blkdev_iomap_begin,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BUFFER_HEAD
|
||||
static int blkdev_get_block(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh, int create)
|
||||
{
|
||||
bh->b_bdev = I_BDEV(inode);
|
||||
bh->b_blocknr = iblock;
|
||||
set_buffer_mapped(bh);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
|
||||
{
|
||||
return block_write_full_page(page, blkdev_get_block, wbc);
|
||||
@ -429,10 +452,58 @@ const struct address_space_operations def_blk_aops = {
|
||||
.writepage = blkdev_writepage,
|
||||
.write_begin = blkdev_write_begin,
|
||||
.write_end = blkdev_write_end,
|
||||
.direct_IO = blkdev_direct_IO,
|
||||
.migrate_folio = buffer_migrate_folio_norefs,
|
||||
.is_dirty_writeback = buffer_check_dirty_writeback,
|
||||
};
|
||||
#else /* CONFIG_BUFFER_HEAD */
|
||||
static int blkdev_read_folio(struct file *file, struct folio *folio)
|
||||
{
|
||||
return iomap_read_folio(folio, &blkdev_iomap_ops);
|
||||
}
|
||||
|
||||
static void blkdev_readahead(struct readahead_control *rac)
|
||||
{
|
||||
iomap_readahead(rac, &blkdev_iomap_ops);
|
||||
}
|
||||
|
||||
static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
|
||||
struct inode *inode, loff_t offset)
|
||||
{
|
||||
loff_t isize = i_size_read(inode);
|
||||
|
||||
if (WARN_ON_ONCE(offset >= isize))
|
||||
return -EIO;
|
||||
if (offset >= wpc->iomap.offset &&
|
||||
offset < wpc->iomap.offset + wpc->iomap.length)
|
||||
return 0;
|
||||
return blkdev_iomap_begin(inode, offset, isize - offset,
|
||||
IOMAP_WRITE, &wpc->iomap, NULL);
|
||||
}
|
||||
|
||||
static const struct iomap_writeback_ops blkdev_writeback_ops = {
|
||||
.map_blocks = blkdev_map_blocks,
|
||||
};
|
||||
|
||||
static int blkdev_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
struct iomap_writepage_ctx wpc = { };
|
||||
|
||||
return iomap_writepages(mapping, wbc, &wpc, &blkdev_writeback_ops);
|
||||
}
|
||||
|
||||
const struct address_space_operations def_blk_aops = {
|
||||
.dirty_folio = filemap_dirty_folio,
|
||||
.release_folio = iomap_release_folio,
|
||||
.invalidate_folio = iomap_invalidate_folio,
|
||||
.read_folio = blkdev_read_folio,
|
||||
.readahead = blkdev_readahead,
|
||||
.writepages = blkdev_writepages,
|
||||
.is_partially_uptodate = iomap_is_partially_uptodate,
|
||||
.error_remove_page = generic_error_remove_page,
|
||||
.migrate_folio = filemap_migrate_folio,
|
||||
};
|
||||
#endif /* CONFIG_BUFFER_HEAD */
|
||||
|
||||
/*
|
||||
* for a block special file file_inode(file)->i_size is zero
|
||||
@ -506,7 +577,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
|
||||
* during an unstable branch.
|
||||
*/
|
||||
filp->f_flags |= O_LARGEFILE;
|
||||
filp->f_mode |= FMODE_BUF_RASYNC;
|
||||
filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
|
||||
|
||||
/*
|
||||
* Use the file private data to store the holder for exclusive openes.
|
||||
@ -534,6 +605,35 @@ static int blkdev_release(struct inode *inode, struct file *filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
size_t count = iov_iter_count(from);
|
||||
ssize_t written;
|
||||
|
||||
written = kiocb_invalidate_pages(iocb, count);
|
||||
if (written) {
|
||||
if (written == -EBUSY)
|
||||
return 0;
|
||||
return written;
|
||||
}
|
||||
|
||||
written = blkdev_direct_IO(iocb, from);
|
||||
if (written > 0) {
|
||||
kiocb_invalidate_post_direct_write(iocb, count);
|
||||
iocb->ki_pos += written;
|
||||
count -= written;
|
||||
}
|
||||
if (written != -EIOCBQUEUED)
|
||||
iov_iter_revert(from, count - iov_iter_count(from));
|
||||
return written;
|
||||
}
|
||||
|
||||
static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write data to the block device. Only intended for the block device itself
|
||||
* and the raw driver which basically is a fake block device.
|
||||
@ -543,7 +643,8 @@ static int blkdev_release(struct inode *inode, struct file *filp)
|
||||
*/
|
||||
static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct block_device *bdev = I_BDEV(file->f_mapping->host);
|
||||
struct inode *bd_inode = bdev->bd_inode;
|
||||
loff_t size = bdev_nr_bytes(bdev);
|
||||
size_t shorted = 0;
|
||||
@ -570,7 +671,23 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
iov_iter_truncate(from, size);
|
||||
}
|
||||
|
||||
ret = __generic_file_write_iter(iocb, from);
|
||||
ret = file_remove_privs(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = file_update_time(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||
ret = blkdev_direct_write(iocb, from);
|
||||
if (ret >= 0 && iov_iter_count(from))
|
||||
ret = direct_write_fallback(iocb, from, ret,
|
||||
blkdev_buffered_write(iocb, from));
|
||||
} else {
|
||||
ret = blkdev_buffered_write(iocb, from);
|
||||
}
|
||||
|
||||
if (ret > 0)
|
||||
ret = generic_write_sync(iocb, ret);
|
||||
iov_iter_reexpand(from, iov_iter_count(from) + shorted);
|
||||
|
@ -646,8 +646,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
struct blk_mq_tags *tags = hctx->sched_tags;
|
||||
unsigned int shift = tags->bitmap_tags.sb.shift;
|
||||
|
||||
dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
|
||||
dd->async_depth = max(1U, 3 * (1U << shift) / 4);
|
||||
|
||||
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
|
||||
}
|
||||
|
@ -2336,6 +2336,7 @@ static struct genl_family nbd_genl_family __ro_after_init = {
|
||||
.mcgrps = nbd_mcast_grps,
|
||||
.n_mcgrps = ARRAY_SIZE(nbd_mcast_grps),
|
||||
};
|
||||
MODULE_ALIAS_GENL_FAMILY(NBD_GENL_FAMILY_NAME);
|
||||
|
||||
static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply)
|
||||
{
|
||||
|
@ -1277,7 +1277,7 @@ static struct macio_driver swim3_driver =
|
||||
};
|
||||
|
||||
|
||||
int swim3_init(void)
|
||||
static int swim3_init(void)
|
||||
{
|
||||
macio_register_driver(&swim3_driver);
|
||||
return 0;
|
||||
|
@ -56,16 +56,21 @@
|
||||
| UBLK_F_USER_RECOVERY_REISSUE \
|
||||
| UBLK_F_UNPRIVILEGED_DEV \
|
||||
| UBLK_F_CMD_IOCTL_ENCODE \
|
||||
| UBLK_F_USER_COPY)
|
||||
| UBLK_F_USER_COPY \
|
||||
| UBLK_F_ZONED)
|
||||
|
||||
/* All UBLK_PARAM_TYPE_* should be included here */
|
||||
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
|
||||
UBLK_PARAM_TYPE_DISCARD | UBLK_PARAM_TYPE_DEVT)
|
||||
#define UBLK_PARAM_TYPE_ALL \
|
||||
(UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
|
||||
UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED)
|
||||
|
||||
struct ublk_rq_data {
|
||||
struct llist_node node;
|
||||
|
||||
struct kref ref;
|
||||
__u64 sector;
|
||||
__u32 operation;
|
||||
__u32 nr_zones;
|
||||
};
|
||||
|
||||
struct ublk_uring_cmd_pdu {
|
||||
@ -185,6 +190,263 @@ struct ublk_params_header {
|
||||
__u32 types;
|
||||
};
|
||||
|
||||
static inline unsigned int ublk_req_build_flags(struct request *req);
|
||||
static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
|
||||
int tag);
|
||||
|
||||
static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub)
|
||||
{
|
||||
return ub->dev_info.flags & UBLK_F_USER_COPY;
|
||||
}
|
||||
|
||||
static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
|
||||
{
|
||||
return ub->dev_info.flags & UBLK_F_ZONED;
|
||||
}
|
||||
|
||||
static inline bool ublk_queue_is_zoned(struct ublk_queue *ubq)
|
||||
{
|
||||
return ubq->flags & UBLK_F_ZONED;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
|
||||
static int ublk_get_nr_zones(const struct ublk_device *ub)
|
||||
{
|
||||
const struct ublk_param_basic *p = &ub->params.basic;
|
||||
|
||||
/* Zone size is a power of 2 */
|
||||
return p->dev_sectors >> ilog2(p->chunk_sectors);
|
||||
}
|
||||
|
||||
static int ublk_revalidate_disk_zones(struct ublk_device *ub)
|
||||
{
|
||||
return blk_revalidate_disk_zones(ub->ub_disk, NULL);
|
||||
}
|
||||
|
||||
static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
|
||||
{
|
||||
const struct ublk_param_zoned *p = &ub->params.zoned;
|
||||
int nr_zones;
|
||||
|
||||
if (!ublk_dev_is_zoned(ub))
|
||||
return -EINVAL;
|
||||
|
||||
if (!p->max_zone_append_sectors)
|
||||
return -EINVAL;
|
||||
|
||||
nr_zones = ublk_get_nr_zones(ub);
|
||||
|
||||
if (p->max_active_zones > nr_zones)
|
||||
return -EINVAL;
|
||||
|
||||
if (p->max_open_zones > nr_zones)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ublk_dev_param_zoned_apply(struct ublk_device *ub)
|
||||
{
|
||||
const struct ublk_param_zoned *p = &ub->params.zoned;
|
||||
|
||||
disk_set_zoned(ub->ub_disk, BLK_ZONED_HM);
|
||||
blk_queue_required_elevator_features(ub->ub_disk->queue,
|
||||
ELEVATOR_F_ZBD_SEQ_WRITE);
|
||||
disk_set_max_active_zones(ub->ub_disk, p->max_active_zones);
|
||||
disk_set_max_open_zones(ub->ub_disk, p->max_open_zones);
|
||||
blk_queue_max_zone_append_sectors(ub->ub_disk->queue, p->max_zone_append_sectors);
|
||||
|
||||
ub->ub_disk->nr_zones = ublk_get_nr_zones(ub);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Based on virtblk_alloc_report_buffer */
|
||||
static void *ublk_alloc_report_buffer(struct ublk_device *ublk,
|
||||
unsigned int nr_zones, size_t *buflen)
|
||||
{
|
||||
struct request_queue *q = ublk->ub_disk->queue;
|
||||
size_t bufsize;
|
||||
void *buf;
|
||||
|
||||
nr_zones = min_t(unsigned int, nr_zones,
|
||||
ublk->ub_disk->nr_zones);
|
||||
|
||||
bufsize = nr_zones * sizeof(struct blk_zone);
|
||||
bufsize =
|
||||
min_t(size_t, bufsize, queue_max_hw_sectors(q) << SECTOR_SHIFT);
|
||||
|
||||
while (bufsize >= sizeof(struct blk_zone)) {
|
||||
buf = kvmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
|
||||
if (buf) {
|
||||
*buflen = bufsize;
|
||||
return buf;
|
||||
}
|
||||
bufsize >>= 1;
|
||||
}
|
||||
|
||||
*buflen = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int ublk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
unsigned int nr_zones, report_zones_cb cb, void *data)
|
||||
{
|
||||
struct ublk_device *ub = disk->private_data;
|
||||
unsigned int zone_size_sectors = disk->queue->limits.chunk_sectors;
|
||||
unsigned int first_zone = sector >> ilog2(zone_size_sectors);
|
||||
unsigned int done_zones = 0;
|
||||
unsigned int max_zones_per_request;
|
||||
int ret;
|
||||
struct blk_zone *buffer;
|
||||
size_t buffer_length;
|
||||
|
||||
nr_zones = min_t(unsigned int, ub->ub_disk->nr_zones - first_zone,
|
||||
nr_zones);
|
||||
|
||||
buffer = ublk_alloc_report_buffer(ub, nr_zones, &buffer_length);
|
||||
if (!buffer)
|
||||
return -ENOMEM;
|
||||
|
||||
max_zones_per_request = buffer_length / sizeof(struct blk_zone);
|
||||
|
||||
while (done_zones < nr_zones) {
|
||||
unsigned int remaining_zones = nr_zones - done_zones;
|
||||
unsigned int zones_in_request =
|
||||
min_t(unsigned int, remaining_zones, max_zones_per_request);
|
||||
struct request *req;
|
||||
struct ublk_rq_data *pdu;
|
||||
blk_status_t status;
|
||||
|
||||
memset(buffer, 0, buffer_length);
|
||||
|
||||
req = blk_mq_alloc_request(disk->queue, REQ_OP_DRV_IN, 0);
|
||||
if (IS_ERR(req)) {
|
||||
ret = PTR_ERR(req);
|
||||
goto out;
|
||||
}
|
||||
|
||||
pdu = blk_mq_rq_to_pdu(req);
|
||||
pdu->operation = UBLK_IO_OP_REPORT_ZONES;
|
||||
pdu->sector = sector;
|
||||
pdu->nr_zones = zones_in_request;
|
||||
|
||||
ret = blk_rq_map_kern(disk->queue, req, buffer, buffer_length,
|
||||
GFP_KERNEL);
|
||||
if (ret) {
|
||||
blk_mq_free_request(req);
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = blk_execute_rq(req, 0);
|
||||
ret = blk_status_to_errno(status);
|
||||
blk_mq_free_request(req);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
for (unsigned int i = 0; i < zones_in_request; i++) {
|
||||
struct blk_zone *zone = buffer + i;
|
||||
|
||||
/* A zero length zone means no more zones in this response */
|
||||
if (!zone->len)
|
||||
break;
|
||||
|
||||
ret = cb(zone, i, data);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
done_zones++;
|
||||
sector += zone_size_sectors;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
ret = done_zones;
|
||||
|
||||
out:
|
||||
kvfree(buffer);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq,
|
||||
struct request *req)
|
||||
{
|
||||
struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
|
||||
struct ublk_io *io = &ubq->ios[req->tag];
|
||||
struct ublk_rq_data *pdu = blk_mq_rq_to_pdu(req);
|
||||
u32 ublk_op;
|
||||
|
||||
switch (req_op(req)) {
|
||||
case REQ_OP_ZONE_OPEN:
|
||||
ublk_op = UBLK_IO_OP_ZONE_OPEN;
|
||||
break;
|
||||
case REQ_OP_ZONE_CLOSE:
|
||||
ublk_op = UBLK_IO_OP_ZONE_CLOSE;
|
||||
break;
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
ublk_op = UBLK_IO_OP_ZONE_FINISH;
|
||||
break;
|
||||
case REQ_OP_ZONE_RESET:
|
||||
ublk_op = UBLK_IO_OP_ZONE_RESET;
|
||||
break;
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
ublk_op = UBLK_IO_OP_ZONE_APPEND;
|
||||
break;
|
||||
case REQ_OP_DRV_IN:
|
||||
ublk_op = pdu->operation;
|
||||
switch (ublk_op) {
|
||||
case UBLK_IO_OP_REPORT_ZONES:
|
||||
iod->op_flags = ublk_op | ublk_req_build_flags(req);
|
||||
iod->nr_zones = pdu->nr_zones;
|
||||
iod->start_sector = pdu->sector;
|
||||
return BLK_STS_OK;
|
||||
default:
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
case REQ_OP_ZONE_RESET_ALL:
|
||||
case REQ_OP_DRV_OUT:
|
||||
/* We do not support reset_all and drv_out */
|
||||
return BLK_STS_NOTSUPP;
|
||||
default:
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
iod->op_flags = ublk_op | ublk_req_build_flags(req);
|
||||
iod->nr_sectors = blk_rq_sectors(req);
|
||||
iod->start_sector = blk_rq_pos(req);
|
||||
iod->addr = io->addr;
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define ublk_report_zones (NULL)
|
||||
|
||||
static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int ublk_dev_param_zoned_apply(struct ublk_device *ub)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int ublk_revalidate_disk_zones(struct ublk_device *ub)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq,
|
||||
struct request *req)
|
||||
{
|
||||
return BLK_STS_NOTSUPP;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline void __ublk_complete_rq(struct request *req);
|
||||
static void ublk_complete_rq(struct kref *ref);
|
||||
|
||||
@ -281,6 +543,9 @@ static int ublk_validate_params(const struct ublk_device *ub)
|
||||
|
||||
if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9))
|
||||
return -EINVAL;
|
||||
|
||||
if (ublk_dev_is_zoned(ub) && !p->chunk_sectors)
|
||||
return -EINVAL;
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
@ -299,6 +564,11 @@ static int ublk_validate_params(const struct ublk_device *ub)
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_DEVT)
|
||||
return -EINVAL;
|
||||
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_ZONED)
|
||||
return ublk_dev_param_zoned_validate(ub);
|
||||
else if (ublk_dev_is_zoned(ub))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -312,6 +582,9 @@ static int ublk_apply_params(struct ublk_device *ub)
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
|
||||
ublk_dev_param_discard_apply(ub);
|
||||
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_ZONED)
|
||||
return ublk_dev_param_zoned_apply(ub);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -482,6 +755,7 @@ static const struct block_device_operations ub_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = ublk_open,
|
||||
.free_disk = ublk_free_disk,
|
||||
.report_zones = ublk_report_zones,
|
||||
};
|
||||
|
||||
#define UBLK_MAX_PIN_PAGES 32
|
||||
@ -596,7 +870,8 @@ static inline bool ublk_need_map_req(const struct request *req)
|
||||
|
||||
static inline bool ublk_need_unmap_req(const struct request *req)
|
||||
{
|
||||
return ublk_rq_has_data(req) && req_op(req) == REQ_OP_READ;
|
||||
return ublk_rq_has_data(req) &&
|
||||
(req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_DRV_IN);
|
||||
}
|
||||
|
||||
static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
|
||||
@ -680,8 +955,13 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
|
||||
{
|
||||
struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
|
||||
struct ublk_io *io = &ubq->ios[req->tag];
|
||||
enum req_op op = req_op(req);
|
||||
u32 ublk_op;
|
||||
|
||||
if (!ublk_queue_is_zoned(ubq) &&
|
||||
(op_is_zone_mgmt(op) || op == REQ_OP_ZONE_APPEND))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
switch (req_op(req)) {
|
||||
case REQ_OP_READ:
|
||||
ublk_op = UBLK_IO_OP_READ;
|
||||
@ -699,6 +979,8 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
|
||||
ublk_op = UBLK_IO_OP_WRITE_ZEROES;
|
||||
break;
|
||||
default:
|
||||
if (ublk_queue_is_zoned(ubq))
|
||||
return ublk_setup_iod_zoned(ubq, req);
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
@ -751,7 +1033,8 @@ static inline void __ublk_complete_rq(struct request *req)
|
||||
*
|
||||
* Both the two needn't unmap.
|
||||
*/
|
||||
if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE)
|
||||
if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE &&
|
||||
req_op(req) != REQ_OP_DRV_IN)
|
||||
goto exit;
|
||||
|
||||
/* for READ request, writing data in iod->addr to rq buffers */
|
||||
@ -1114,8 +1397,13 @@ static void ublk_commit_completion(struct ublk_device *ub,
|
||||
|
||||
/* find the io request and complete */
|
||||
req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag);
|
||||
if (WARN_ON_ONCE(unlikely(!req)))
|
||||
return;
|
||||
|
||||
if (req && likely(!blk_should_fake_timeout(req->q)))
|
||||
if (req_op(req) == REQ_OP_ZONE_APPEND)
|
||||
req->__sector = ub_cmd->zone_append_lba;
|
||||
|
||||
if (likely(!blk_should_fake_timeout(req->q)))
|
||||
ublk_put_req_ref(ubq, req);
|
||||
}
|
||||
|
||||
@ -1414,11 +1702,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA))
|
||||
goto out;
|
||||
|
||||
if (ublk_support_user_copy(ubq) && ub_cmd->addr) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ublk_check_cmd_op(cmd_op);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -1445,6 +1728,10 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
*/
|
||||
if (!ub_cmd->addr && !ublk_need_get_data(ubq))
|
||||
goto out;
|
||||
} else if (ub_cmd->addr) {
|
||||
/* User copy requires addr to be unset */
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
|
||||
@ -1464,7 +1751,15 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
|
||||
req_op(req) == REQ_OP_READ))
|
||||
goto out;
|
||||
} else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) {
|
||||
/*
|
||||
* User copy requires addr to be unset when command is
|
||||
* not zone append
|
||||
*/
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
|
||||
ublk_commit_completion(ub, ub_cmd);
|
||||
break;
|
||||
@ -1537,11 +1832,14 @@ static inline bool ublk_check_ubuf_dir(const struct request *req,
|
||||
int ubuf_dir)
|
||||
{
|
||||
/* copy ubuf to request pages */
|
||||
if (req_op(req) == REQ_OP_READ && ubuf_dir == ITER_SOURCE)
|
||||
if ((req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_DRV_IN) &&
|
||||
ubuf_dir == ITER_SOURCE)
|
||||
return true;
|
||||
|
||||
/* copy request pages to ubuf */
|
||||
if (req_op(req) == REQ_OP_WRITE && ubuf_dir == ITER_DEST)
|
||||
if ((req_op(req) == REQ_OP_WRITE ||
|
||||
req_op(req) == REQ_OP_ZONE_APPEND) &&
|
||||
ubuf_dir == ITER_DEST)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@ -1881,17 +2179,24 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
|
||||
|
||||
get_device(&ub->cdev_dev);
|
||||
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
||||
|
||||
if (ublk_dev_is_zoned(ub)) {
|
||||
ret = ublk_revalidate_disk_zones(ub);
|
||||
if (ret)
|
||||
goto out_put_cdev;
|
||||
}
|
||||
|
||||
ret = add_disk(disk);
|
||||
if (ret)
|
||||
goto out_put_cdev;
|
||||
|
||||
set_bit(UB_STATE_USED, &ub->state);
|
||||
|
||||
out_put_cdev:
|
||||
if (ret) {
|
||||
/*
|
||||
* Has to drop the reference since ->free_disk won't be
|
||||
* called in case of add_disk failure.
|
||||
*/
|
||||
ub->dev_info.state = UBLK_S_DEV_DEAD;
|
||||
ublk_put_device(ub);
|
||||
goto out_put_disk;
|
||||
}
|
||||
set_bit(UB_STATE_USED, &ub->state);
|
||||
out_put_disk:
|
||||
if (ret)
|
||||
put_disk(disk);
|
||||
@ -2038,9 +2343,16 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
||||
UBLK_F_URING_CMD_COMP_IN_TASK;
|
||||
|
||||
/* GET_DATA isn't needed any more with USER_COPY */
|
||||
if (ub->dev_info.flags & UBLK_F_USER_COPY)
|
||||
if (ublk_dev_is_user_copy(ub))
|
||||
ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
|
||||
|
||||
/* Zoned storage support requires user copy feature */
|
||||
if (ublk_dev_is_zoned(ub) &&
|
||||
(!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !ublk_dev_is_user_copy(ub))) {
|
||||
ret = -EINVAL;
|
||||
goto out_free_dev_number;
|
||||
}
|
||||
|
||||
/* We are not ready to support zero copy */
|
||||
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
|
||||
|
||||
@ -2433,14 +2745,9 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
|
||||
if (header->len < header->dev_path_len)
|
||||
return -EINVAL;
|
||||
|
||||
dev_path = kmalloc(header->dev_path_len + 1, GFP_KERNEL);
|
||||
if (!dev_path)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(dev_path, argp, header->dev_path_len))
|
||||
goto exit;
|
||||
dev_path[header->dev_path_len] = 0;
|
||||
dev_path = memdup_user_nul(argp, header->dev_path_len);
|
||||
if (IS_ERR(dev_path))
|
||||
return PTR_ERR(dev_path);
|
||||
|
||||
ret = -EINVAL;
|
||||
switch (_IOC_NR(cmd->cmd_op)) {
|
||||
|
@ -15,6 +15,7 @@ if MD
|
||||
config BLK_DEV_MD
|
||||
tristate "RAID support"
|
||||
select BLOCK_HOLDER_DEPRECATED if SYSFS
|
||||
select BUFFER_HEAD
|
||||
# BLOCK_LEGACY_AUTOLOAD requirement should be removed
|
||||
# after relevant mdadm enhancements - to make "names=yes"
|
||||
# the default - are widely available.
|
||||
@ -50,6 +51,16 @@ config MD_AUTODETECT
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config MD_BITMAP_FILE
|
||||
bool "MD bitmap file support (deprecated)"
|
||||
default y
|
||||
help
|
||||
If you say Y here, support for write intent bitmaps in files on an
|
||||
external file system is enabled. This is an alternative to the internal
|
||||
bitmaps near the MD superblock, and very problematic code that abuses
|
||||
various kernel APIs and can only work with files on a file system not
|
||||
actually sitting on the MD device.
|
||||
|
||||
config MD_LINEAR
|
||||
tristate "Linear (append) mode (deprecated)"
|
||||
depends on BLK_DEV_MD
|
||||
|
@ -1160,7 +1160,6 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio)
|
||||
|
||||
tag_len = io->cc->on_disk_tag_size * (bio_sectors(bio) >> io->cc->sector_shift);
|
||||
|
||||
bip->bip_iter.bi_size = tag_len;
|
||||
bip->bip_iter.bi_sector = io->cc->start + io->sector;
|
||||
|
||||
ret = bio_integrity_add_page(bio, virt_to_page(io->integrity_metadata),
|
||||
|
@ -3723,7 +3723,6 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
|
||||
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
|
||||
|
@ -139,29 +139,26 @@ static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page
|
||||
*/
|
||||
|
||||
/* IO operations when bitmap is stored near all superblocks */
|
||||
static int read_sb_page(struct mddev *mddev, loff_t offset,
|
||||
struct page *page,
|
||||
unsigned long index, int size)
|
||||
{
|
||||
/* choose a good rdev and read the page from there */
|
||||
|
||||
/* choose a good rdev and read the page from there */
|
||||
static int read_sb_page(struct mddev *mddev, loff_t offset,
|
||||
struct page *page, unsigned long index, int size)
|
||||
{
|
||||
|
||||
sector_t sector = mddev->bitmap_info.offset + offset +
|
||||
index * (PAGE_SIZE / SECTOR_SIZE);
|
||||
struct md_rdev *rdev;
|
||||
sector_t target;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (! test_bit(In_sync, &rdev->flags)
|
||||
|| test_bit(Faulty, &rdev->flags)
|
||||
|| test_bit(Bitmap_sync, &rdev->flags))
|
||||
u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev));
|
||||
|
||||
if (!test_bit(In_sync, &rdev->flags) ||
|
||||
test_bit(Faulty, &rdev->flags) ||
|
||||
test_bit(Bitmap_sync, &rdev->flags))
|
||||
continue;
|
||||
|
||||
target = offset + index * (PAGE_SIZE/512);
|
||||
|
||||
if (sync_page_io(rdev, target,
|
||||
roundup(size, bdev_logical_block_size(rdev->bdev)),
|
||||
page, REQ_OP_READ, true)) {
|
||||
page->index = index;
|
||||
if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -EIO;
|
||||
}
|
||||
@ -225,18 +222,19 @@ static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size,
|
||||
}
|
||||
|
||||
static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
||||
struct page *page)
|
||||
unsigned long pg_index, struct page *page)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct mddev *mddev = bitmap->mddev;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
loff_t sboff, offset = mddev->bitmap_info.offset;
|
||||
sector_t ps, doff;
|
||||
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
|
||||
unsigned int size = PAGE_SIZE;
|
||||
unsigned int opt_size = PAGE_SIZE;
|
||||
sector_t doff;
|
||||
|
||||
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
|
||||
if (page->index == store->file_pages - 1) {
|
||||
if (pg_index == store->file_pages - 1) {
|
||||
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
|
||||
|
||||
if (last_page_size == 0)
|
||||
@ -245,7 +243,6 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
||||
opt_size = optimal_io_size(bdev, last_page_size, size);
|
||||
}
|
||||
|
||||
ps = page->index * PAGE_SIZE / SECTOR_SIZE;
|
||||
sboff = rdev->sb_start + offset;
|
||||
doff = rdev->data_offset;
|
||||
|
||||
@ -279,55 +276,41 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index,
|
||||
struct page *page, bool wait)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
struct mddev *mddev = bitmap->mddev;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
rdev = NULL;
|
||||
struct md_rdev *rdev = NULL;
|
||||
|
||||
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
|
||||
ret = __write_sb_page(rdev, bitmap, page);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) {
|
||||
set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
|
||||
return;
|
||||
}
|
||||
}
|
||||
} while (wait && md_super_wait(mddev) < 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void md_bitmap_file_kick(struct bitmap *bitmap);
|
||||
/*
|
||||
* write out a page to a file
|
||||
*/
|
||||
static void write_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
|
||||
#ifdef CONFIG_MD_BITMAP_FILE
|
||||
static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
struct buffer_head *bh = page_buffers(page);
|
||||
|
||||
if (bitmap->storage.file == NULL) {
|
||||
switch (write_sb_page(bitmap, page, wait)) {
|
||||
case -EINVAL:
|
||||
set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
|
||||
}
|
||||
} else {
|
||||
|
||||
bh = page_buffers(page);
|
||||
|
||||
while (bh && bh->b_blocknr) {
|
||||
atomic_inc(&bitmap->pending_writes);
|
||||
set_buffer_locked(bh);
|
||||
set_buffer_mapped(bh);
|
||||
submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
|
||||
bh = bh->b_this_page;
|
||||
}
|
||||
|
||||
if (wait)
|
||||
wait_event(bitmap->write_wait,
|
||||
atomic_read(&bitmap->pending_writes)==0);
|
||||
while (bh && bh->b_blocknr) {
|
||||
atomic_inc(&bitmap->pending_writes);
|
||||
set_buffer_locked(bh);
|
||||
set_buffer_mapped(bh);
|
||||
submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
|
||||
bh = bh->b_this_page;
|
||||
}
|
||||
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
|
||||
md_bitmap_file_kick(bitmap);
|
||||
|
||||
if (wait)
|
||||
wait_event(bitmap->write_wait,
|
||||
atomic_read(&bitmap->pending_writes) == 0);
|
||||
}
|
||||
|
||||
static void end_bitmap_write(struct buffer_head *bh, int uptodate)
|
||||
@ -364,10 +347,8 @@ static void free_buffers(struct page *page)
|
||||
* This usage is similar to how swap files are handled, and allows us
|
||||
* to write to a file with no concerns of memory allocation failing.
|
||||
*/
|
||||
static int read_page(struct file *file, unsigned long index,
|
||||
struct bitmap *bitmap,
|
||||
unsigned long count,
|
||||
struct page *page)
|
||||
static int read_file_page(struct file *file, unsigned long index,
|
||||
struct bitmap *bitmap, unsigned long count, struct page *page)
|
||||
{
|
||||
int ret = 0;
|
||||
struct inode *inode = file_inode(file);
|
||||
@ -415,7 +396,6 @@ static int read_page(struct file *file, unsigned long index,
|
||||
blk_cur++;
|
||||
bh = bh->b_this_page;
|
||||
}
|
||||
page->index = index;
|
||||
|
||||
wait_event(bitmap->write_wait,
|
||||
atomic_read(&bitmap->pending_writes)==0);
|
||||
@ -429,11 +409,45 @@ out:
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
#else /* CONFIG_MD_BITMAP_FILE */
|
||||
static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
|
||||
{
|
||||
}
|
||||
static int read_file_page(struct file *file, unsigned long index,
|
||||
struct bitmap *bitmap, unsigned long count, struct page *page)
|
||||
{
|
||||
return -EIO;
|
||||
}
|
||||
static void free_buffers(struct page *page)
|
||||
{
|
||||
put_page(page);
|
||||
}
|
||||
#endif /* CONFIG_MD_BITMAP_FILE */
|
||||
|
||||
/*
|
||||
* bitmap file superblock operations
|
||||
*/
|
||||
|
||||
/*
|
||||
* write out a page to a file
|
||||
*/
|
||||
static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index,
|
||||
bool wait)
|
||||
{
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
struct page *page = store->filemap[pg_index];
|
||||
|
||||
if (mddev_is_clustered(bitmap->mddev)) {
|
||||
pg_index += bitmap->cluster_slot *
|
||||
DIV_ROUND_UP(store->bytes, PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (store->file)
|
||||
write_file_page(bitmap, page, wait);
|
||||
else
|
||||
write_sb_page(bitmap, pg_index, page, wait);
|
||||
}
|
||||
|
||||
/*
|
||||
* md_bitmap_wait_writes() should be called before writing any bitmap
|
||||
* blocks, to ensure previous writes, particularly from
|
||||
@ -488,7 +502,12 @@ void md_bitmap_update_sb(struct bitmap *bitmap)
|
||||
sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
|
||||
bitmap_info.space);
|
||||
kunmap_atomic(sb);
|
||||
write_page(bitmap, bitmap->storage.sb_page, 1);
|
||||
|
||||
if (bitmap->storage.file)
|
||||
write_file_page(bitmap, bitmap->storage.sb_page, 1);
|
||||
else
|
||||
write_sb_page(bitmap, bitmap->storage.sb_index,
|
||||
bitmap->storage.sb_page, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(md_bitmap_update_sb);
|
||||
|
||||
@ -540,7 +559,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
|
||||
bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (bitmap->storage.sb_page == NULL)
|
||||
return -ENOMEM;
|
||||
bitmap->storage.sb_page->index = 0;
|
||||
bitmap->storage.sb_index = 0;
|
||||
|
||||
sb = kmap_atomic(bitmap->storage.sb_page);
|
||||
|
||||
@ -601,7 +620,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
|
||||
unsigned long sectors_reserved = 0;
|
||||
int err = -EINVAL;
|
||||
struct page *sb_page;
|
||||
loff_t offset = bitmap->mddev->bitmap_info.offset;
|
||||
loff_t offset = 0;
|
||||
|
||||
if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
|
||||
chunksize = 128 * 1024 * 1024;
|
||||
@ -628,7 +647,7 @@ re_read:
|
||||
bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
|
||||
/* to 4k blocks */
|
||||
bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
|
||||
offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3));
|
||||
offset = bitmap->cluster_slot * (bm_blocks << 3);
|
||||
pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
|
||||
bitmap->cluster_slot, offset);
|
||||
}
|
||||
@ -637,13 +656,11 @@ re_read:
|
||||
loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
|
||||
int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
|
||||
|
||||
err = read_page(bitmap->storage.file, 0,
|
||||
err = read_file_page(bitmap->storage.file, 0,
|
||||
bitmap, bytes, sb_page);
|
||||
} else {
|
||||
err = read_sb_page(bitmap->mddev,
|
||||
offset,
|
||||
sb_page,
|
||||
0, sizeof(bitmap_super_t));
|
||||
err = read_sb_page(bitmap->mddev, offset, sb_page, 0,
|
||||
sizeof(bitmap_super_t));
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
@ -819,7 +836,7 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store,
|
||||
if (store->sb_page) {
|
||||
store->filemap[0] = store->sb_page;
|
||||
pnum = 1;
|
||||
store->sb_page->index = offset;
|
||||
store->sb_index = offset;
|
||||
}
|
||||
|
||||
for ( ; pnum < num_pages; pnum++) {
|
||||
@ -828,7 +845,6 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store,
|
||||
store->file_pages = pnum;
|
||||
return -ENOMEM;
|
||||
}
|
||||
store->filemap[pnum]->index = pnum + offset;
|
||||
}
|
||||
store->file_pages = pnum;
|
||||
|
||||
@ -847,14 +863,10 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store,
|
||||
|
||||
static void md_bitmap_file_unmap(struct bitmap_storage *store)
|
||||
{
|
||||
struct page **map, *sb_page;
|
||||
int pages;
|
||||
struct file *file;
|
||||
|
||||
file = store->file;
|
||||
map = store->filemap;
|
||||
pages = store->file_pages;
|
||||
sb_page = store->sb_page;
|
||||
struct file *file = store->file;
|
||||
struct page *sb_page = store->sb_page;
|
||||
struct page **map = store->filemap;
|
||||
int pages = store->file_pages;
|
||||
|
||||
while (pages--)
|
||||
if (map[pages] != sb_page) /* 0 is sb_page, release it below */
|
||||
@ -879,21 +891,13 @@ static void md_bitmap_file_unmap(struct bitmap_storage *store)
|
||||
*/
|
||||
static void md_bitmap_file_kick(struct bitmap *bitmap)
|
||||
{
|
||||
char *path, *ptr = NULL;
|
||||
|
||||
if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
|
||||
md_bitmap_update_sb(bitmap);
|
||||
|
||||
if (bitmap->storage.file) {
|
||||
path = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
if (path)
|
||||
ptr = file_path(bitmap->storage.file,
|
||||
path, PAGE_SIZE);
|
||||
pr_warn("%s: kicking failed bitmap file %pD4 from array!\n",
|
||||
bmname(bitmap), bitmap->storage.file);
|
||||
|
||||
pr_warn("%s: kicking failed bitmap file %s from array!\n",
|
||||
bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
|
||||
|
||||
kfree(path);
|
||||
} else
|
||||
pr_warn("%s: disabling internal bitmap due to errors\n",
|
||||
bmname(bitmap));
|
||||
@ -945,6 +949,7 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
||||
void *kaddr;
|
||||
unsigned long chunk = block >> bitmap->counts.chunkshift;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
unsigned long index = file_page_index(store, chunk);
|
||||
unsigned long node_offset = 0;
|
||||
|
||||
if (mddev_is_clustered(bitmap->mddev))
|
||||
@ -962,9 +967,9 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
||||
else
|
||||
set_bit_le(bit, kaddr);
|
||||
kunmap_atomic(kaddr);
|
||||
pr_debug("set file bit %lu page %lu\n", bit, page->index);
|
||||
pr_debug("set file bit %lu page %lu\n", bit, index);
|
||||
/* record page number so it gets flushed to disk when unplug occurs */
|
||||
set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY);
|
||||
set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY);
|
||||
}
|
||||
|
||||
static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
|
||||
@ -974,6 +979,7 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
|
||||
void *paddr;
|
||||
unsigned long chunk = block >> bitmap->counts.chunkshift;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
unsigned long index = file_page_index(store, chunk);
|
||||
unsigned long node_offset = 0;
|
||||
|
||||
if (mddev_is_clustered(bitmap->mddev))
|
||||
@ -989,8 +995,8 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
|
||||
else
|
||||
clear_bit_le(bit, paddr);
|
||||
kunmap_atomic(paddr);
|
||||
if (!test_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
|
||||
set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_PENDING);
|
||||
if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
|
||||
set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING);
|
||||
bitmap->allclean = 0;
|
||||
}
|
||||
}
|
||||
@ -1042,7 +1048,7 @@ void md_bitmap_unplug(struct bitmap *bitmap)
|
||||
"md bitmap_unplug");
|
||||
}
|
||||
clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
|
||||
write_page(bitmap, bitmap->storage.filemap[i], 0);
|
||||
filemap_write_page(bitmap, i, false);
|
||||
writing = 1;
|
||||
}
|
||||
}
|
||||
@ -1084,33 +1090,31 @@ void md_bitmap_unplug_async(struct bitmap *bitmap)
|
||||
EXPORT_SYMBOL(md_bitmap_unplug_async);
|
||||
|
||||
static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
|
||||
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
|
||||
* the in-memory bitmap from the on-disk bitmap -- also, sets up the
|
||||
* memory mapping of the bitmap file
|
||||
* Special cases:
|
||||
* if there's no bitmap file, or if the bitmap file had been
|
||||
* previously kicked from the array, we mark all the bits as
|
||||
* 1's in order to cause a full resync.
|
||||
|
||||
/*
|
||||
* Initialize the in-memory bitmap from the on-disk bitmap and set up the memory
|
||||
* mapping of the bitmap file.
|
||||
*
|
||||
* Special case: If there's no bitmap file, or if the bitmap file had been
|
||||
* previously kicked from the array, we mark all the bits as 1's in order to
|
||||
* cause a full resync.
|
||||
*
|
||||
* We ignore all bits for sectors that end earlier than 'start'.
|
||||
* This is used when reading an out-of-date bitmap...
|
||||
* This is used when reading an out-of-date bitmap.
|
||||
*/
|
||||
static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||
{
|
||||
unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
|
||||
struct page *page = NULL;
|
||||
unsigned long bit_cnt = 0;
|
||||
struct file *file;
|
||||
unsigned long offset;
|
||||
int outofdate;
|
||||
int ret = -ENOSPC;
|
||||
void *paddr;
|
||||
bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
|
||||
struct mddev *mddev = bitmap->mddev;
|
||||
unsigned long chunks = bitmap->counts.chunks;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
struct file *file = store->file;
|
||||
unsigned long node_offset = 0;
|
||||
unsigned long bit_cnt = 0;
|
||||
unsigned long i;
|
||||
int ret;
|
||||
|
||||
chunks = bitmap->counts.chunks;
|
||||
file = store->file;
|
||||
|
||||
if (!file && !bitmap->mddev->bitmap_info.offset) {
|
||||
if (!file && !mddev->bitmap_info.offset) {
|
||||
/* No permanent bitmap - fill with '1s'. */
|
||||
store->filemap = NULL;
|
||||
store->file_pages = 0;
|
||||
@ -1125,77 +1129,79 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||
return 0;
|
||||
}
|
||||
|
||||
outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
|
||||
if (outofdate)
|
||||
pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap));
|
||||
|
||||
if (file && i_size_read(file->f_mapping->host) < store->bytes) {
|
||||
pr_warn("%s: bitmap file too short %lu < %lu\n",
|
||||
bmname(bitmap),
|
||||
(unsigned long) i_size_read(file->f_mapping->host),
|
||||
store->bytes);
|
||||
ret = -ENOSPC;
|
||||
goto err;
|
||||
}
|
||||
|
||||
oldindex = ~0L;
|
||||
offset = 0;
|
||||
if (!bitmap->mddev->bitmap_info.external)
|
||||
offset = sizeof(bitmap_super_t);
|
||||
|
||||
if (mddev_is_clustered(bitmap->mddev))
|
||||
if (mddev_is_clustered(mddev))
|
||||
node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
|
||||
|
||||
for (i = 0; i < chunks; i++) {
|
||||
int b;
|
||||
index = file_page_index(&bitmap->storage, i);
|
||||
bit = file_page_offset(&bitmap->storage, i);
|
||||
if (index != oldindex) { /* this is a new page, read it in */
|
||||
int count;
|
||||
/* unmap the old page, we're done with it */
|
||||
if (index == store->file_pages-1)
|
||||
count = store->bytes - index * PAGE_SIZE;
|
||||
else
|
||||
count = PAGE_SIZE;
|
||||
page = store->filemap[index];
|
||||
if (file)
|
||||
ret = read_page(file, index, bitmap,
|
||||
count, page);
|
||||
else
|
||||
ret = read_sb_page(
|
||||
bitmap->mddev,
|
||||
bitmap->mddev->bitmap_info.offset,
|
||||
page,
|
||||
index + node_offset, count);
|
||||
for (i = 0; i < store->file_pages; i++) {
|
||||
struct page *page = store->filemap[i];
|
||||
int count;
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
/* unmap the old page, we're done with it */
|
||||
if (i == store->file_pages - 1)
|
||||
count = store->bytes - i * PAGE_SIZE;
|
||||
else
|
||||
count = PAGE_SIZE;
|
||||
|
||||
oldindex = index;
|
||||
if (file)
|
||||
ret = read_file_page(file, i, bitmap, count, page);
|
||||
else
|
||||
ret = read_sb_page(mddev, 0, page, i + node_offset,
|
||||
count);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (outofdate) {
|
||||
/*
|
||||
* if bitmap is out of date, dirty the
|
||||
* whole page and write it out
|
||||
*/
|
||||
paddr = kmap_atomic(page);
|
||||
memset(paddr + offset, 0xff,
|
||||
PAGE_SIZE - offset);
|
||||
kunmap_atomic(paddr);
|
||||
write_page(bitmap, page, 1);
|
||||
if (outofdate) {
|
||||
pr_warn("%s: bitmap file is out of date, doing full recovery\n",
|
||||
bmname(bitmap));
|
||||
|
||||
for (i = 0; i < store->file_pages; i++) {
|
||||
struct page *page = store->filemap[i];
|
||||
unsigned long offset = 0;
|
||||
void *paddr;
|
||||
|
||||
if (i == 0 && !mddev->bitmap_info.external)
|
||||
offset = sizeof(bitmap_super_t);
|
||||
|
||||
/*
|
||||
* If the bitmap is out of date, dirty the whole page
|
||||
* and write it out
|
||||
*/
|
||||
paddr = kmap_atomic(page);
|
||||
memset(paddr + offset, 0xff, PAGE_SIZE - offset);
|
||||
kunmap_atomic(paddr);
|
||||
|
||||
filemap_write_page(bitmap, i, true);
|
||||
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) {
|
||||
ret = -EIO;
|
||||
if (test_bit(BITMAP_WRITE_ERROR,
|
||||
&bitmap->flags))
|
||||
goto err;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < chunks; i++) {
|
||||
struct page *page = filemap_get_page(&bitmap->storage, i);
|
||||
unsigned long bit = file_page_offset(&bitmap->storage, i);
|
||||
void *paddr;
|
||||
bool was_set;
|
||||
|
||||
paddr = kmap_atomic(page);
|
||||
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
||||
b = test_bit(bit, paddr);
|
||||
was_set = test_bit(bit, paddr);
|
||||
else
|
||||
b = test_bit_le(bit, paddr);
|
||||
was_set = test_bit_le(bit, paddr);
|
||||
kunmap_atomic(paddr);
|
||||
if (b) {
|
||||
|
||||
if (was_set) {
|
||||
/* if the disk bit is set, set the memory bit */
|
||||
int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
|
||||
>= start);
|
||||
@ -1204,7 +1210,6 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||
needed);
|
||||
bit_cnt++;
|
||||
}
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n",
|
||||
@ -1396,9 +1401,8 @@ void md_bitmap_daemon_work(struct mddev *mddev)
|
||||
break;
|
||||
if (bitmap->storage.filemap &&
|
||||
test_and_clear_page_attr(bitmap, j,
|
||||
BITMAP_PAGE_NEEDWRITE)) {
|
||||
write_page(bitmap, bitmap->storage.filemap[j], 0);
|
||||
}
|
||||
BITMAP_PAGE_NEEDWRITE))
|
||||
filemap_write_page(bitmap, j, false);
|
||||
}
|
||||
|
||||
done:
|
||||
@ -2542,6 +2546,10 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (backlog > COUNTER_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
rv = mddev_lock(mddev);
|
||||
if (rv)
|
||||
return rv;
|
||||
|
||||
/*
|
||||
* Without write mostly device, it doesn't make sense to set
|
||||
* backlog for max_write_behind.
|
||||
@ -2555,6 +2563,7 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (!has_write_mostly) {
|
||||
pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n",
|
||||
mdname(mddev));
|
||||
mddev_unlock(mddev);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -2565,13 +2574,13 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
mddev_destroy_serial_pool(mddev, NULL, false);
|
||||
} else if (backlog && !mddev->serial_info_pool) {
|
||||
/* serial_info_pool is needed since backlog is not zero */
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
mddev_create_serial_pool(mddev, rdev, false);
|
||||
}
|
||||
if (old_mwb != backlog)
|
||||
md_bitmap_update_sb(mddev->bitmap);
|
||||
|
||||
mddev_unlock(mddev);
|
||||
return len;
|
||||
}
|
||||
|
||||
|
@ -201,6 +201,7 @@ struct bitmap {
|
||||
struct file *file; /* backing disk file */
|
||||
struct page *sb_page; /* cached copy of the bitmap
|
||||
* file superblock */
|
||||
unsigned long sb_index;
|
||||
struct page **filemap; /* list of cache pages for
|
||||
* the file */
|
||||
unsigned long *filemap_attr; /* attributes associated
|
||||
|
@ -952,8 +952,8 @@ static int join(struct mddev *mddev, int nodes)
|
||||
return 0;
|
||||
err:
|
||||
set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
|
||||
md_unregister_thread(&cinfo->recovery_thread);
|
||||
md_unregister_thread(&cinfo->recv_thread);
|
||||
md_unregister_thread(mddev, &cinfo->recovery_thread);
|
||||
md_unregister_thread(mddev, &cinfo->recv_thread);
|
||||
lockres_free(cinfo->message_lockres);
|
||||
lockres_free(cinfo->token_lockres);
|
||||
lockres_free(cinfo->ack_lockres);
|
||||
@ -1015,8 +1015,8 @@ static int leave(struct mddev *mddev)
|
||||
resync_bitmap(mddev);
|
||||
|
||||
set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
|
||||
md_unregister_thread(&cinfo->recovery_thread);
|
||||
md_unregister_thread(&cinfo->recv_thread);
|
||||
md_unregister_thread(mddev, &cinfo->recovery_thread);
|
||||
md_unregister_thread(mddev, &cinfo->recv_thread);
|
||||
lockres_free(cinfo->message_lockres);
|
||||
lockres_free(cinfo->token_lockres);
|
||||
lockres_free(cinfo->ack_lockres);
|
||||
|
@ -204,6 +204,8 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
|
||||
failit = 1;
|
||||
}
|
||||
}
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
if (failit) {
|
||||
struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO,
|
||||
&mddev->bio_set);
|
||||
|
@ -238,6 +238,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
bio = split;
|
||||
}
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
bio_set_dev(bio, tmp_dev->rdev->bdev);
|
||||
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
|
||||
start_sector + data_offset;
|
||||
|
@ -107,6 +107,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
|
||||
&& md_flush_request(mddev, bio))
|
||||
return true;
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
|
||||
|
||||
mp_bh->master_bio = bio;
|
||||
|
226
drivers/md/md.c
226
drivers/md/md.c
@ -453,7 +453,6 @@ void mddev_suspend(struct mddev *mddev)
|
||||
mddev->pers->prepare_suspend(mddev);
|
||||
|
||||
wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
||||
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
|
||||
|
||||
@ -465,14 +464,15 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
|
||||
|
||||
void mddev_resume(struct mddev *mddev)
|
||||
{
|
||||
/* entred the memalloc scope from mddev_suspend() */
|
||||
memalloc_noio_restore(mddev->noio_flag);
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
if (--mddev->suspended)
|
||||
return;
|
||||
|
||||
/* entred the memalloc scope from mddev_suspend() */
|
||||
memalloc_noio_restore(mddev->noio_flag);
|
||||
|
||||
percpu_ref_resurrect(&mddev->active_io);
|
||||
wake_up(&mddev->sb_wait);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
@ -643,6 +643,7 @@ void mddev_init(struct mddev *mddev)
|
||||
{
|
||||
mutex_init(&mddev->open_mutex);
|
||||
mutex_init(&mddev->reconfig_mutex);
|
||||
mutex_init(&mddev->sync_mutex);
|
||||
mutex_init(&mddev->bitmap_info.mutex);
|
||||
INIT_LIST_HEAD(&mddev->disks);
|
||||
INIT_LIST_HEAD(&mddev->all_mddevs);
|
||||
@ -650,6 +651,7 @@ void mddev_init(struct mddev *mddev)
|
||||
timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
|
||||
atomic_set(&mddev->active, 1);
|
||||
atomic_set(&mddev->openers, 0);
|
||||
atomic_set(&mddev->sync_seq, 0);
|
||||
spin_lock_init(&mddev->lock);
|
||||
atomic_set(&mddev->flush_pending, 0);
|
||||
init_waitqueue_head(&mddev->sb_wait);
|
||||
@ -2304,7 +2306,7 @@ int md_integrity_register(struct mddev *mddev)
|
||||
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
|
||||
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
|
||||
(mddev->level != 1 && mddev->level != 10 &&
|
||||
bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
|
||||
bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
|
||||
/*
|
||||
* No need to handle the failure of bioset_integrity_create,
|
||||
* because the function is called by md_run() -> pers->run(),
|
||||
@ -4747,6 +4749,62 @@ action_show(struct mddev *mddev, char *page)
|
||||
return sprintf(page, "%s\n", type);
|
||||
}
|
||||
|
||||
static void stop_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return;
|
||||
|
||||
if (mddev_lock(mddev))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Check again in case MD_RECOVERY_RUNNING is cleared before lock is
|
||||
* held.
|
||||
*/
|
||||
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
mddev_unlock(mddev);
|
||||
return;
|
||||
}
|
||||
|
||||
if (work_pending(&mddev->del_work))
|
||||
flush_workqueue(md_misc_wq);
|
||||
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
/*
|
||||
* Thread might be blocked waiting for metadata update which will now
|
||||
* never happen
|
||||
*/
|
||||
md_wakeup_thread_directly(mddev->sync_thread);
|
||||
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
|
||||
static void idle_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
int sync_seq = atomic_read(&mddev->sync_seq);
|
||||
|
||||
mutex_lock(&mddev->sync_mutex);
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
stop_sync_thread(mddev);
|
||||
|
||||
wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
|
||||
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
|
||||
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
}
|
||||
|
||||
static void frozen_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
mutex_lock(&mddev->sync_mutex);
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
stop_sync_thread(mddev);
|
||||
|
||||
wait_event(resync_wait, mddev->sync_thread == NULL &&
|
||||
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
|
||||
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
{
|
||||
@ -4754,35 +4812,11 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
return -EINVAL;
|
||||
|
||||
|
||||
if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
|
||||
if (cmd_match(page, "frozen"))
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
else
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
|
||||
mddev_lock(mddev) == 0) {
|
||||
if (work_pending(&mddev->del_work))
|
||||
flush_workqueue(md_misc_wq);
|
||||
if (mddev->sync_thread) {
|
||||
sector_t save_rp = mddev->reshape_position;
|
||||
|
||||
mddev_unlock(mddev);
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
mddev_lock_nointr(mddev);
|
||||
/*
|
||||
* set RECOVERY_INTR again and restore reshape
|
||||
* position in case others changed them after
|
||||
* got lock, eg, reshape_position_store and
|
||||
* md_check_recovery.
|
||||
*/
|
||||
mddev->reshape_position = save_rp;
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
if (cmd_match(page, "idle"))
|
||||
idle_sync_thread(mddev);
|
||||
else if (cmd_match(page, "frozen"))
|
||||
frozen_sync_thread(mddev);
|
||||
else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return -EBUSY;
|
||||
else if (cmd_match(page, "resync"))
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
@ -5842,6 +5876,13 @@ int md_run(struct mddev *mddev)
|
||||
goto exit_bio_set;
|
||||
}
|
||||
|
||||
if (!bioset_initialized(&mddev->io_clone_set)) {
|
||||
err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
|
||||
offsetof(struct md_io_clone, bio_clone), 0);
|
||||
if (err)
|
||||
goto exit_sync_set;
|
||||
}
|
||||
|
||||
spin_lock(&pers_lock);
|
||||
pers = find_pers(mddev->level, mddev->clevel);
|
||||
if (!pers || !try_module_get(pers->owner)) {
|
||||
@ -6019,6 +6060,8 @@ bitmap_abort:
|
||||
module_put(pers->owner);
|
||||
md_bitmap_destroy(mddev);
|
||||
abort:
|
||||
bioset_exit(&mddev->io_clone_set);
|
||||
exit_sync_set:
|
||||
bioset_exit(&mddev->sync_set);
|
||||
exit_bio_set:
|
||||
bioset_exit(&mddev->bio_set);
|
||||
@ -6176,7 +6219,6 @@ static void __md_stop_writes(struct mddev *mddev)
|
||||
flush_workqueue(md_misc_wq);
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
|
||||
@ -6216,7 +6258,7 @@ static void mddev_detach(struct mddev *mddev)
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
}
|
||||
md_unregister_thread(&mddev->thread);
|
||||
md_unregister_thread(mddev, &mddev->thread);
|
||||
if (mddev->queue)
|
||||
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
|
||||
}
|
||||
@ -6243,6 +6285,7 @@ static void __md_stop(struct mddev *mddev)
|
||||
percpu_ref_exit(&mddev->active_io);
|
||||
bioset_exit(&mddev->bio_set);
|
||||
bioset_exit(&mddev->sync_set);
|
||||
bioset_exit(&mddev->io_clone_set);
|
||||
}
|
||||
|
||||
void md_stop(struct mddev *mddev)
|
||||
@ -7012,6 +7055,15 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
|
||||
|
||||
if (mddev->bitmap || mddev->bitmap_info.file)
|
||||
return -EEXIST; /* cannot add when bitmap is present */
|
||||
|
||||
if (!IS_ENABLED(CONFIG_MD_BITMAP_FILE)) {
|
||||
pr_warn("%s: bitmap files not supported by this kernel\n",
|
||||
mdname(mddev));
|
||||
return -EINVAL;
|
||||
}
|
||||
pr_warn("%s: using deprecated bitmap file support\n",
|
||||
mdname(mddev));
|
||||
|
||||
f = fget(fd);
|
||||
|
||||
if (f == NULL) {
|
||||
@ -7940,9 +7992,10 @@ struct md_thread *md_register_thread(void (*run) (struct md_thread *),
|
||||
}
|
||||
EXPORT_SYMBOL(md_register_thread);
|
||||
|
||||
void md_unregister_thread(struct md_thread __rcu **threadp)
|
||||
void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp)
|
||||
{
|
||||
struct md_thread *thread = rcu_dereference_protected(*threadp, true);
|
||||
struct md_thread *thread = rcu_dereference_protected(*threadp,
|
||||
lockdep_is_held(&mddev->reconfig_mutex));
|
||||
|
||||
if (!thread)
|
||||
return;
|
||||
@ -8601,63 +8654,45 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
|
||||
|
||||
int acct_bioset_init(struct mddev *mddev)
|
||||
static void md_end_clone_io(struct bio *bio)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!bioset_initialized(&mddev->io_acct_set))
|
||||
err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
|
||||
offsetof(struct md_io_acct, bio_clone), 0);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acct_bioset_init);
|
||||
|
||||
void acct_bioset_exit(struct mddev *mddev)
|
||||
{
|
||||
bioset_exit(&mddev->io_acct_set);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acct_bioset_exit);
|
||||
|
||||
static void md_end_io_acct(struct bio *bio)
|
||||
{
|
||||
struct md_io_acct *md_io_acct = bio->bi_private;
|
||||
struct bio *orig_bio = md_io_acct->orig_bio;
|
||||
struct mddev *mddev = md_io_acct->mddev;
|
||||
struct md_io_clone *md_io_clone = bio->bi_private;
|
||||
struct bio *orig_bio = md_io_clone->orig_bio;
|
||||
struct mddev *mddev = md_io_clone->mddev;
|
||||
|
||||
orig_bio->bi_status = bio->bi_status;
|
||||
|
||||
bio_end_io_acct(orig_bio, md_io_acct->start_time);
|
||||
if (md_io_clone->start_time)
|
||||
bio_end_io_acct(orig_bio, md_io_clone->start_time);
|
||||
|
||||
bio_put(bio);
|
||||
bio_endio(orig_bio);
|
||||
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
}
|
||||
|
||||
/*
|
||||
* Used by personalities that don't already clone the bio and thus can't
|
||||
* easily add the timestamp to their extended bio structure.
|
||||
*/
|
||||
void md_account_bio(struct mddev *mddev, struct bio **bio)
|
||||
static void md_clone_bio(struct mddev *mddev, struct bio **bio)
|
||||
{
|
||||
struct block_device *bdev = (*bio)->bi_bdev;
|
||||
struct md_io_acct *md_io_acct;
|
||||
struct bio *clone;
|
||||
struct md_io_clone *md_io_clone;
|
||||
struct bio *clone =
|
||||
bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set);
|
||||
|
||||
if (!blk_queue_io_stat(bdev->bd_disk->queue))
|
||||
return;
|
||||
md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
|
||||
md_io_clone->orig_bio = *bio;
|
||||
md_io_clone->mddev = mddev;
|
||||
if (blk_queue_io_stat(bdev->bd_disk->queue))
|
||||
md_io_clone->start_time = bio_start_io_acct(*bio);
|
||||
|
||||
percpu_ref_get(&mddev->active_io);
|
||||
|
||||
clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
|
||||
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
|
||||
md_io_acct->orig_bio = *bio;
|
||||
md_io_acct->start_time = bio_start_io_acct(*bio);
|
||||
md_io_acct->mddev = mddev;
|
||||
|
||||
clone->bi_end_io = md_end_io_acct;
|
||||
clone->bi_private = md_io_acct;
|
||||
clone->bi_end_io = md_end_clone_io;
|
||||
clone->bi_private = md_io_clone;
|
||||
*bio = clone;
|
||||
}
|
||||
|
||||
void md_account_bio(struct mddev *mddev, struct bio **bio)
|
||||
{
|
||||
percpu_ref_get(&mddev->active_io);
|
||||
md_clone_bio(mddev, bio);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_account_bio);
|
||||
|
||||
/* md_allow_write(mddev)
|
||||
@ -9329,7 +9364,6 @@ void md_check_recovery(struct mddev *mddev)
|
||||
* ->spare_active and clear saved_raid_disk
|
||||
*/
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
md_reap_sync_thread(mddev);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
@ -9358,17 +9392,24 @@ void md_check_recovery(struct mddev *mddev)
|
||||
if (mddev->sb_flags)
|
||||
md_update_sb(mddev, 0);
|
||||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
|
||||
/* resync/recovery still happening */
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
goto unlock;
|
||||
}
|
||||
if (mddev->sync_thread) {
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
/*
|
||||
* Never start a new sync thread if MD_RECOVERY_RUNNING is
|
||||
* still set.
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
|
||||
/* resync/recovery still happening */
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(!mddev->sync_thread))
|
||||
goto unlock;
|
||||
|
||||
md_reap_sync_thread(mddev);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* Set RUNNING before clearing NEEDED to avoid
|
||||
* any transients in the value of "sync_action".
|
||||
*/
|
||||
@ -9445,7 +9486,10 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
sector_t old_dev_sectors = mddev->dev_sectors;
|
||||
bool is_reshaped = false;
|
||||
|
||||
/* sync_thread should be unregistered, collect result */
|
||||
/* resync has finished, collect result */
|
||||
md_unregister_thread(mddev, &mddev->sync_thread);
|
||||
atomic_inc(&mddev->sync_seq);
|
||||
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
||||
mddev->degraded != mddev->raid_disks) {
|
||||
@ -9490,7 +9534,6 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
if (mddev_is_clustered(mddev) && is_reshaped
|
||||
&& !test_bit(MD_CLOSING, &mddev->flags))
|
||||
md_cluster_ops->update_size(mddev, old_dev_sectors);
|
||||
wake_up(&resync_wait);
|
||||
/* flag recovery needed just to double check */
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
||||
@ -9498,6 +9541,7 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
md_new_event();
|
||||
if (mddev->event_work.func)
|
||||
queue_work(md_misc_wq, &mddev->event_work);
|
||||
wake_up(&resync_wait);
|
||||
}
|
||||
EXPORT_SYMBOL(md_reap_sync_thread);
|
||||
|
||||
|
@ -510,7 +510,7 @@ struct mddev {
|
||||
struct bio_set sync_set; /* for sync operations like
|
||||
* metadata and bitmap writes
|
||||
*/
|
||||
struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
|
||||
struct bio_set io_clone_set;
|
||||
|
||||
/* Generic flush handling.
|
||||
* The last to finish preflush schedules a worker to submit
|
||||
@ -535,6 +535,11 @@ struct mddev {
|
||||
*/
|
||||
struct list_head deleting;
|
||||
|
||||
/* Used to synchronize idle and frozen for action_store() */
|
||||
struct mutex sync_mutex;
|
||||
/* The sequence number for sync thread */
|
||||
atomic_t sync_seq;
|
||||
|
||||
bool has_superblocks:1;
|
||||
bool fail_last_dev:1;
|
||||
bool serialize_policy:1;
|
||||
@ -731,7 +736,7 @@ struct md_thread {
|
||||
void *private;
|
||||
};
|
||||
|
||||
struct md_io_acct {
|
||||
struct md_io_clone {
|
||||
struct mddev *mddev;
|
||||
struct bio *orig_bio;
|
||||
unsigned long start_time;
|
||||
@ -756,7 +761,7 @@ extern struct md_thread *md_register_thread(
|
||||
void (*run)(struct md_thread *thread),
|
||||
struct mddev *mddev,
|
||||
const char *name);
|
||||
extern void md_unregister_thread(struct md_thread __rcu **threadp);
|
||||
extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp);
|
||||
extern void md_wakeup_thread(struct md_thread __rcu *thread);
|
||||
extern void md_check_recovery(struct mddev *mddev);
|
||||
extern void md_reap_sync_thread(struct mddev *mddev);
|
||||
@ -769,8 +774,6 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
|
||||
extern void md_finish_reshape(struct mddev *mddev);
|
||||
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
|
||||
struct bio *bio, sector_t start, sector_t size);
|
||||
int acct_bioset_init(struct mddev *mddev);
|
||||
void acct_bioset_exit(struct mddev *mddev);
|
||||
void md_account_bio(struct mddev *mddev, struct bio **bio);
|
||||
|
||||
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
|
||||
|
@ -377,7 +377,6 @@ static void raid0_free(struct mddev *mddev, void *priv)
|
||||
struct r0conf *conf = priv;
|
||||
|
||||
free_conf(mddev, conf);
|
||||
acct_bioset_exit(mddev);
|
||||
}
|
||||
|
||||
static int raid0_run(struct mddev *mddev)
|
||||
@ -392,16 +391,11 @@ static int raid0_run(struct mddev *mddev)
|
||||
if (md_check_no_bitmap(mddev))
|
||||
return -EINVAL;
|
||||
|
||||
if (acct_bioset_init(mddev)) {
|
||||
pr_err("md/raid0:%s: alloc acct bioset failed.\n", mdname(mddev));
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* if private is not null, we are here after takeover */
|
||||
if (mddev->private == NULL) {
|
||||
ret = create_strip_zones(mddev, &conf);
|
||||
if (ret < 0)
|
||||
goto exit_acct_set;
|
||||
return ret;
|
||||
mddev->private = conf;
|
||||
}
|
||||
conf = mddev->private;
|
||||
@ -432,15 +426,9 @@ static int raid0_run(struct mddev *mddev)
|
||||
|
||||
ret = md_integrity_register(mddev);
|
||||
if (ret)
|
||||
goto free;
|
||||
free_conf(mddev, conf);
|
||||
|
||||
return ret;
|
||||
|
||||
free:
|
||||
free_conf(mddev, conf);
|
||||
exit_acct_set:
|
||||
acct_bioset_exit(mddev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -304,8 +304,6 @@ static void call_bio_endio(struct r1bio *r1_bio)
|
||||
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
bio_end_io_acct(bio, r1_bio->start_time);
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
@ -791,11 +789,17 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||
return best_disk;
|
||||
}
|
||||
|
||||
static void wake_up_barrier(struct r1conf *conf)
|
||||
{
|
||||
if (wq_has_sleeper(&conf->wait_barrier))
|
||||
wake_up(&conf->wait_barrier);
|
||||
}
|
||||
|
||||
static void flush_bio_list(struct r1conf *conf, struct bio *bio)
|
||||
{
|
||||
/* flush any pending bitmap writes to disk before proceeding w/ I/O */
|
||||
raid1_prepare_flush_writes(conf->mddev->bitmap);
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
|
||||
while (bio) { /* submit pending writes */
|
||||
struct bio *next = bio->bi_next;
|
||||
@ -972,7 +976,7 @@ static bool _wait_barrier(struct r1conf *conf, int idx, bool nowait)
|
||||
* In case freeze_array() is waiting for
|
||||
* get_unqueued_pending() == extra
|
||||
*/
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
/* Wait for the barrier in same barrier unit bucket to drop. */
|
||||
|
||||
/* Return false when nowait flag is set */
|
||||
@ -1015,7 +1019,7 @@ static bool wait_read_barrier(struct r1conf *conf, sector_t sector_nr, bool nowa
|
||||
* In case freeze_array() is waiting for
|
||||
* get_unqueued_pending() == extra
|
||||
*/
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
/* Wait for array to be unfrozen */
|
||||
|
||||
/* Return false when nowait flag is set */
|
||||
@ -1044,7 +1048,7 @@ static bool wait_barrier(struct r1conf *conf, sector_t sector_nr, bool nowait)
|
||||
static void _allow_barrier(struct r1conf *conf, int idx)
|
||||
{
|
||||
atomic_dec(&conf->nr_pending[idx]);
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
}
|
||||
|
||||
static void allow_barrier(struct r1conf *conf, sector_t sector_nr)
|
||||
@ -1173,7 +1177,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
bio_list_merge(&conf->pending_bio_list, &plug->pending);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
kfree(plug);
|
||||
return;
|
||||
@ -1303,10 +1307,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
}
|
||||
|
||||
r1_bio->read_disk = rdisk;
|
||||
|
||||
if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
r1_bio->start_time = bio_start_io_acct(bio);
|
||||
|
||||
if (!r1bio_existed) {
|
||||
md_account_bio(mddev, &bio);
|
||||
r1_bio->master_bio = bio;
|
||||
}
|
||||
read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp,
|
||||
&mddev->bio_set);
|
||||
|
||||
@ -1500,8 +1504,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r1_bio->sectors = max_sectors;
|
||||
}
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
r1_bio->start_time = bio_start_io_acct(bio);
|
||||
md_account_bio(mddev, &bio);
|
||||
r1_bio->master_bio = bio;
|
||||
atomic_set(&r1_bio->remaining, 1);
|
||||
atomic_set(&r1_bio->behind_remaining, 0);
|
||||
|
||||
@ -1576,7 +1580,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r1_bio_write_done(r1_bio);
|
||||
|
||||
/* In case raid1d snuck in to freeze_array */
|
||||
wake_up(&conf->wait_barrier);
|
||||
wake_up_barrier(conf);
|
||||
}
|
||||
|
||||
static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
|
||||
@ -1766,7 +1770,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct r1conf *conf = mddev->private;
|
||||
int err = -EEXIST;
|
||||
int mirror = 0;
|
||||
int mirror = 0, repl_slot = -1;
|
||||
struct raid1_info *p;
|
||||
int first = 0;
|
||||
int last = conf->raid_disks - 1;
|
||||
@ -1809,17 +1813,21 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
break;
|
||||
}
|
||||
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||
p[conf->raid_disks].rdev == NULL) {
|
||||
/* Add this device as a replacement */
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
rdev->raid_disk = mirror;
|
||||
err = 0;
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
|
||||
break;
|
||||
}
|
||||
p[conf->raid_disks].rdev == NULL && repl_slot < 0)
|
||||
repl_slot = mirror;
|
||||
}
|
||||
|
||||
if (err && repl_slot >= 0) {
|
||||
/* Add this device as a replacement */
|
||||
p = conf->mirrors + repl_slot;
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
rdev->raid_disk = repl_slot;
|
||||
err = 0;
|
||||
conf->fullsync = 1;
|
||||
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
|
||||
}
|
||||
|
||||
print_conf(conf);
|
||||
return err;
|
||||
}
|
||||
@ -1829,6 +1837,10 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
struct r1conf *conf = mddev->private;
|
||||
int err = 0;
|
||||
int number = rdev->raid_disk;
|
||||
|
||||
if (unlikely(number >= conf->raid_disks))
|
||||
goto abort;
|
||||
|
||||
struct raid1_info *p = conf->mirrors + number;
|
||||
|
||||
if (rdev != p->rdev)
|
||||
@ -2299,7 +2311,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||
d++;
|
||||
if (d == conf->raid_disks * 2)
|
||||
d = 0;
|
||||
} while (!success && d != read_disk);
|
||||
} while (d != read_disk);
|
||||
|
||||
if (!success) {
|
||||
/* Cannot read from anywhere - mark it bad */
|
||||
@ -3144,7 +3156,7 @@ static int raid1_run(struct mddev *mddev)
|
||||
* RAID1 needs at least one disk in active
|
||||
*/
|
||||
if (conf->raid_disks - mddev->degraded < 1) {
|
||||
md_unregister_thread(&conf->thread);
|
||||
md_unregister_thread(mddev, &conf->thread);
|
||||
ret = -EINVAL;
|
||||
goto abort;
|
||||
}
|
||||
@ -3171,7 +3183,7 @@ static int raid1_run(struct mddev *mddev)
|
||||
|
||||
ret = md_integrity_register(mddev);
|
||||
if (ret) {
|
||||
md_unregister_thread(&mddev->thread);
|
||||
md_unregister_thread(mddev, &mddev->thread);
|
||||
goto abort;
|
||||
}
|
||||
return 0;
|
||||
|
@ -157,7 +157,6 @@ struct r1bio {
|
||||
sector_t sector;
|
||||
int sectors;
|
||||
unsigned long state;
|
||||
unsigned long start_time;
|
||||
struct mddev *mddev;
|
||||
/*
|
||||
* original bio going to /dev/mdx
|
||||
|
@ -325,8 +325,6 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
|
||||
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
|
||||
if (r10_bio->start_time)
|
||||
bio_end_io_acct(bio, r10_bio->start_time);
|
||||
bio_endio(bio);
|
||||
/*
|
||||
* Wake up any possible resync thread that waits for the device
|
||||
@ -1172,7 +1170,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
|
||||
}
|
||||
|
||||
static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
struct r10bio *r10_bio)
|
||||
struct r10bio *r10_bio, bool io_accounting)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct bio *read_bio;
|
||||
@ -1243,9 +1241,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
}
|
||||
slot = r10_bio->read_slot;
|
||||
|
||||
if (!r10_bio->start_time &&
|
||||
blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
r10_bio->start_time = bio_start_io_acct(bio);
|
||||
if (io_accounting) {
|
||||
md_account_bio(mddev, &bio);
|
||||
r10_bio->master_bio = bio;
|
||||
}
|
||||
read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
|
||||
|
||||
r10_bio->devs[slot].bio = read_bio;
|
||||
@ -1322,6 +1321,25 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
||||
}
|
||||
}
|
||||
|
||||
static struct md_rdev *dereference_rdev_and_rrdev(struct raid10_info *mirror,
|
||||
struct md_rdev **prrdev)
|
||||
{
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rrdev = rcu_dereference(mirror->replacement);
|
||||
/*
|
||||
* Read replacement first to prevent reading both rdev and
|
||||
* replacement as NULL during replacement replace rdev.
|
||||
*/
|
||||
smp_mb();
|
||||
rdev = rcu_dereference(mirror->rdev);
|
||||
if (rdev == rrdev)
|
||||
rrdev = NULL;
|
||||
|
||||
*prrdev = rrdev;
|
||||
return rdev;
|
||||
}
|
||||
|
||||
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
{
|
||||
int i;
|
||||
@ -1332,11 +1350,9 @@ retry_wait:
|
||||
blocked_rdev = NULL;
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->copies; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
struct md_rdev *rrdev = rcu_dereference(
|
||||
conf->mirrors[i].replacement);
|
||||
if (rdev == rrdev)
|
||||
rrdev = NULL;
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rdev = dereference_rdev_and_rrdev(&conf->mirrors[i], &rrdev);
|
||||
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
blocked_rdev = rdev;
|
||||
@ -1465,15 +1481,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
int d = r10_bio->devs[i].devnum;
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rrdev = rcu_dereference(conf->mirrors[d].replacement);
|
||||
/*
|
||||
* Read replacement first to prevent reading both rdev and
|
||||
* replacement as NULL during replacement replace rdev.
|
||||
*/
|
||||
smp_mb();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev == rrdev)
|
||||
rrdev = NULL;
|
||||
rdev = dereference_rdev_and_rrdev(&conf->mirrors[d], &rrdev);
|
||||
if (rdev && (test_bit(Faulty, &rdev->flags)))
|
||||
rdev = NULL;
|
||||
if (rrdev && (test_bit(Faulty, &rrdev->flags)))
|
||||
@ -1543,8 +1551,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r10_bio->master_bio = bio;
|
||||
}
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
||||
r10_bio->start_time = bio_start_io_acct(bio);
|
||||
md_account_bio(mddev, &bio);
|
||||
r10_bio->master_bio = bio;
|
||||
atomic_set(&r10_bio->remaining, 1);
|
||||
md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
|
||||
|
||||
@ -1571,12 +1579,11 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
|
||||
r10_bio->sector = bio->bi_iter.bi_sector;
|
||||
r10_bio->state = 0;
|
||||
r10_bio->read_slot = -1;
|
||||
r10_bio->start_time = 0;
|
||||
memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
|
||||
conf->geo.raid_disks);
|
||||
|
||||
if (bio_data_dir(bio) == READ)
|
||||
raid10_read_request(mddev, bio, r10_bio);
|
||||
raid10_read_request(mddev, bio, r10_bio, true);
|
||||
else
|
||||
raid10_write_request(mddev, bio, r10_bio);
|
||||
}
|
||||
@ -1780,10 +1787,9 @@ retry_discard:
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (disk = 0; disk < geo->raid_disks; disk++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
struct md_rdev *rrdev = rcu_dereference(
|
||||
conf->mirrors[disk].replacement);
|
||||
struct md_rdev *rdev, *rrdev;
|
||||
|
||||
rdev = dereference_rdev_and_rrdev(&conf->mirrors[disk], &rrdev);
|
||||
r10_bio->devs[disk].bio = NULL;
|
||||
r10_bio->devs[disk].repl_bio = NULL;
|
||||
|
||||
@ -2720,10 +2726,10 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
|
||||
static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio)
|
||||
{
|
||||
int sect = 0; /* Offset from r10_bio->sector */
|
||||
int sectors = r10_bio->sectors;
|
||||
int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
|
||||
struct md_rdev *rdev;
|
||||
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
|
||||
int d = r10_bio->devs[r10_bio->read_slot].devnum;
|
||||
int d = r10_bio->devs[slot].devnum;
|
||||
|
||||
/* still own a reference to this rdev, so it cannot
|
||||
* have been cleared recently.
|
||||
@ -2744,13 +2750,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
pr_notice("md/raid10:%s: %pg: Failing raid device\n",
|
||||
mdname(mddev), rdev->bdev);
|
||||
md_error(mddev, rdev);
|
||||
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
|
||||
r10_bio->devs[slot].bio = IO_BLOCKED;
|
||||
return;
|
||||
}
|
||||
|
||||
while(sectors) {
|
||||
int s = sectors;
|
||||
int sl = r10_bio->read_slot;
|
||||
int sl = slot;
|
||||
int success = 0;
|
||||
int start;
|
||||
|
||||
@ -2785,7 +2791,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
sl++;
|
||||
if (sl == conf->copies)
|
||||
sl = 0;
|
||||
} while (!success && sl != r10_bio->read_slot);
|
||||
} while (sl != slot);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!success) {
|
||||
@ -2793,16 +2799,16 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
* as bad on the first device to discourage future
|
||||
* reads.
|
||||
*/
|
||||
int dn = r10_bio->devs[r10_bio->read_slot].devnum;
|
||||
int dn = r10_bio->devs[slot].devnum;
|
||||
rdev = conf->mirrors[dn].rdev;
|
||||
|
||||
if (!rdev_set_badblocks(
|
||||
rdev,
|
||||
r10_bio->devs[r10_bio->read_slot].addr
|
||||
r10_bio->devs[slot].addr
|
||||
+ sect,
|
||||
s, 0)) {
|
||||
md_error(mddev, rdev);
|
||||
r10_bio->devs[r10_bio->read_slot].bio
|
||||
r10_bio->devs[slot].bio
|
||||
= IO_BLOCKED;
|
||||
}
|
||||
break;
|
||||
@ -2811,7 +2817,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
start = sl;
|
||||
/* write it back and re-read */
|
||||
rcu_read_lock();
|
||||
while (sl != r10_bio->read_slot) {
|
||||
while (sl != slot) {
|
||||
if (sl==0)
|
||||
sl = conf->copies;
|
||||
sl--;
|
||||
@ -2845,7 +2851,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
rcu_read_lock();
|
||||
}
|
||||
sl = start;
|
||||
while (sl != r10_bio->read_slot) {
|
||||
while (sl != slot) {
|
||||
if (sl==0)
|
||||
sl = conf->copies;
|
||||
sl--;
|
||||
@ -2985,7 +2991,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
r10_bio->state = 0;
|
||||
raid10_read_request(mddev, r10_bio->master_bio, r10_bio);
|
||||
raid10_read_request(mddev, r10_bio->master_bio, r10_bio, false);
|
||||
/*
|
||||
* allow_barrier after re-submit to ensure no sync io
|
||||
* can be issued while regular io pending.
|
||||
@ -4314,7 +4320,7 @@ static int raid10_run(struct mddev *mddev)
|
||||
return 0;
|
||||
|
||||
out_free_conf:
|
||||
md_unregister_thread(&mddev->thread);
|
||||
md_unregister_thread(mddev, &mddev->thread);
|
||||
raid10_free_conf(conf);
|
||||
mddev->private = NULL;
|
||||
out:
|
||||
@ -4411,7 +4417,6 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
|
||||
rdev->new_raid_disk = rdev->raid_disk * 2;
|
||||
rdev->sectors = size;
|
||||
}
|
||||
WRITE_ONCE(conf->barrier, 1);
|
||||
}
|
||||
|
||||
return conf;
|
||||
|
@ -123,7 +123,6 @@ struct r10bio {
|
||||
sector_t sector; /* virtual sector number */
|
||||
int sectors;
|
||||
unsigned long state;
|
||||
unsigned long start_time;
|
||||
struct mddev *mddev;
|
||||
/*
|
||||
* original bio going to /dev/mdx
|
||||
|
@ -1260,14 +1260,13 @@ static void r5l_log_flush_endio(struct bio *bio)
|
||||
|
||||
if (bio->bi_status)
|
||||
md_error(log->rdev->mddev, log->rdev);
|
||||
bio_uninit(bio);
|
||||
|
||||
spin_lock_irqsave(&log->io_list_lock, flags);
|
||||
list_for_each_entry(io, &log->flushing_ios, log_sibling)
|
||||
r5l_io_run_stripes(io);
|
||||
list_splice_tail_init(&log->flushing_ios, &log->finished_ios);
|
||||
spin_unlock_irqrestore(&log->io_list_lock, flags);
|
||||
|
||||
bio_uninit(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3168,12 +3167,15 @@ void r5l_exit_log(struct r5conf *conf)
|
||||
{
|
||||
struct r5l_log *log = conf->log;
|
||||
|
||||
/* Ensure disable_writeback_work wakes up and exits */
|
||||
md_unregister_thread(conf->mddev, &log->reclaim_thread);
|
||||
|
||||
/*
|
||||
* 'reconfig_mutex' is held by caller, set 'confg->log' to NULL to
|
||||
* ensure disable_writeback_work wakes up and exits.
|
||||
*/
|
||||
conf->log = NULL;
|
||||
wake_up(&conf->mddev->sb_wait);
|
||||
flush_work(&log->disable_writeback_work);
|
||||
md_unregister_thread(&log->reclaim_thread);
|
||||
|
||||
conf->log = NULL;
|
||||
|
||||
mempool_exit(&log->meta_pool);
|
||||
bioset_exit(&log->bs);
|
||||
|
@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
|
||||
*/
|
||||
static void raid5_align_endio(struct bio *bi)
|
||||
{
|
||||
struct md_io_acct *md_io_acct = bi->bi_private;
|
||||
struct bio *raid_bi = md_io_acct->orig_bio;
|
||||
struct mddev *mddev;
|
||||
struct r5conf *conf;
|
||||
struct md_rdev *rdev;
|
||||
struct bio *raid_bi = bi->bi_private;
|
||||
struct md_rdev *rdev = (void *)raid_bi->bi_next;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
struct r5conf *conf = mddev->private;
|
||||
blk_status_t error = bi->bi_status;
|
||||
unsigned long start_time = md_io_acct->start_time;
|
||||
|
||||
bio_put(bi);
|
||||
|
||||
rdev = (void*)raid_bi->bi_next;
|
||||
raid_bi->bi_next = NULL;
|
||||
mddev = rdev->mddev;
|
||||
conf = mddev->private;
|
||||
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
|
||||
if (!error) {
|
||||
if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
|
||||
bio_end_io_acct(raid_bi, start_time);
|
||||
bio_endio(raid_bi);
|
||||
if (atomic_dec_and_test(&conf->active_aligned_reads))
|
||||
wake_up(&conf->wait_for_quiescent);
|
||||
@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
struct md_rdev *rdev;
|
||||
sector_t sector, end_sector, first_bad;
|
||||
int bad_sectors, dd_idx;
|
||||
struct md_io_acct *md_io_acct;
|
||||
bool did_inc;
|
||||
|
||||
if (!in_chunk_boundary(mddev, raid_bio)) {
|
||||
@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
return 0;
|
||||
}
|
||||
|
||||
align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
|
||||
&mddev->io_acct_set);
|
||||
md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
|
||||
md_account_bio(mddev, &raid_bio);
|
||||
raid_bio->bi_next = (void *)rdev;
|
||||
if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
|
||||
md_io_acct->start_time = bio_start_io_acct(raid_bio);
|
||||
md_io_acct->orig_bio = raid_bio;
|
||||
|
||||
align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
|
||||
&mddev->bio_set);
|
||||
align_bio->bi_end_io = raid5_align_endio;
|
||||
align_bio->bi_private = md_io_acct;
|
||||
align_bio->bi_private = raid_bio;
|
||||
align_bio->bi_iter.bi_sector = sector;
|
||||
|
||||
/* No reshape active, so we can trust rdev->data_offset */
|
||||
@ -7787,19 +7774,12 @@ static int raid5_run(struct mddev *mddev)
|
||||
struct md_rdev *rdev;
|
||||
struct md_rdev *journal_dev = NULL;
|
||||
sector_t reshape_offset = 0;
|
||||
int i, ret = 0;
|
||||
int i;
|
||||
long long min_offset_diff = 0;
|
||||
int first = 1;
|
||||
|
||||
if (acct_bioset_init(mddev)) {
|
||||
pr_err("md/raid456:%s: alloc acct bioset failed.\n", mdname(mddev));
|
||||
if (mddev_init_writes_pending(mddev) < 0)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (mddev_init_writes_pending(mddev) < 0) {
|
||||
ret = -ENOMEM;
|
||||
goto exit_acct_set;
|
||||
}
|
||||
|
||||
if (mddev->recovery_cp != MaxSector)
|
||||
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
|
||||
@ -7830,8 +7810,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
(mddev->bitmap_info.offset || mddev->bitmap_info.file)) {
|
||||
pr_notice("md/raid:%s: array cannot have both journal and bitmap\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mddev->reshape_position != MaxSector) {
|
||||
@ -7856,15 +7835,13 @@ static int raid5_run(struct mddev *mddev)
|
||||
if (journal_dev) {
|
||||
pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mddev->new_level != mddev->level) {
|
||||
pr_warn("md/raid:%s: unsupported reshape required - aborting.\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
old_disks = mddev->raid_disks - mddev->delta_disks;
|
||||
/* reshape_position must be on a new-stripe boundary, and one
|
||||
@ -7880,8 +7857,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
if (sector_div(here_new, chunk_sectors * new_data_disks)) {
|
||||
pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
reshape_offset = here_new * chunk_sectors;
|
||||
/* here_new is the stripe we will write to */
|
||||
@ -7903,8 +7879,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
else if (mddev->ro == 0) {
|
||||
pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (mddev->reshape_backwards
|
||||
? (here_new * chunk_sectors + min_offset_diff <=
|
||||
@ -7914,8 +7889,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
/* Reading from the same stripe as writing to - bad */
|
||||
pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n",
|
||||
mdname(mddev));
|
||||
ret = -EINVAL;
|
||||
goto exit_acct_set;
|
||||
return -EINVAL;
|
||||
}
|
||||
pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev));
|
||||
/* OK, we should be able to continue; */
|
||||
@ -7939,10 +7913,8 @@ static int raid5_run(struct mddev *mddev)
|
||||
else
|
||||
conf = mddev->private;
|
||||
|
||||
if (IS_ERR(conf)) {
|
||||
ret = PTR_ERR(conf);
|
||||
goto exit_acct_set;
|
||||
}
|
||||
if (IS_ERR(conf))
|
||||
return PTR_ERR(conf);
|
||||
|
||||
if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
|
||||
if (!journal_dev) {
|
||||
@ -8135,15 +8107,12 @@ static int raid5_run(struct mddev *mddev)
|
||||
|
||||
return 0;
|
||||
abort:
|
||||
md_unregister_thread(&mddev->thread);
|
||||
md_unregister_thread(mddev, &mddev->thread);
|
||||
print_raid5_conf(conf);
|
||||
free_conf(conf);
|
||||
mddev->private = NULL;
|
||||
pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
|
||||
ret = -EIO;
|
||||
exit_acct_set:
|
||||
acct_bioset_exit(mddev);
|
||||
return ret;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static void raid5_free(struct mddev *mddev, void *priv)
|
||||
@ -8151,7 +8120,6 @@ static void raid5_free(struct mddev *mddev, void *priv)
|
||||
struct r5conf *conf = priv;
|
||||
|
||||
free_conf(conf);
|
||||
acct_bioset_exit(mddev);
|
||||
mddev->to_remove = &raid5_attrs_group;
|
||||
}
|
||||
|
||||
|
@ -118,7 +118,6 @@ static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
|
||||
goto out_free_meta;
|
||||
}
|
||||
|
||||
bip->bip_iter.bi_size = len;
|
||||
bip->bip_iter.bi_sector = seed;
|
||||
ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
|
||||
offset_in_page(buf));
|
||||
|
@ -206,12 +206,11 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
|
||||
return PTR_ERR(bip);
|
||||
}
|
||||
|
||||
bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
|
||||
/* virtual start sector must be in integrity interval units */
|
||||
bip_set_seed(bip, bio->bi_iter.bi_sector >>
|
||||
(bi->interval_exp - SECTOR_SHIFT));
|
||||
|
||||
resid = bip->bip_iter.bi_size;
|
||||
resid = bio_integrity_bytes(bi, bio_sectors(bio));
|
||||
while (resid > 0 && sg_miter_next(miter)) {
|
||||
len = min_t(size_t, miter->length, resid);
|
||||
rc = bio_integrity_add_page(bio, miter->page, len,
|
||||
|
@ -300,11 +300,6 @@ void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd)
|
||||
cmd->budget_token = -1;
|
||||
}
|
||||
|
||||
static void scsi_kick_queue(struct request_queue *q)
|
||||
{
|
||||
blk_mq_run_hw_queues(q, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick the queue of SCSI device @sdev if @sdev != current_sdev. Called with
|
||||
* interrupts disabled.
|
||||
@ -340,7 +335,8 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev)
|
||||
* but in most cases, we will be first. Ideally, each LU on the
|
||||
* target would get some limited time or requests on the target.
|
||||
*/
|
||||
scsi_kick_queue(current_sdev->request_queue);
|
||||
blk_mq_run_hw_queues(current_sdev->request_queue,
|
||||
shost->queuecommand_may_block);
|
||||
|
||||
spin_lock_irqsave(shost->host_lock, flags);
|
||||
if (!starget->starget_sdev_user)
|
||||
@ -427,7 +423,7 @@ static void scsi_starved_list_run(struct Scsi_Host *shost)
|
||||
continue;
|
||||
spin_unlock_irqrestore(shost->host_lock, flags);
|
||||
|
||||
scsi_kick_queue(slq);
|
||||
blk_mq_run_hw_queues(slq, false);
|
||||
blk_put_queue(slq);
|
||||
|
||||
spin_lock_irqsave(shost->host_lock, flags);
|
||||
@ -452,8 +448,8 @@ static void scsi_run_queue(struct request_queue *q)
|
||||
if (!list_empty(&sdev->host->starved_list))
|
||||
scsi_starved_list_run(sdev->host);
|
||||
|
||||
/* Note: blk_mq_kick_requeue_list() runs the queue asynchronously. */
|
||||
blk_mq_kick_requeue_list(q);
|
||||
blk_mq_run_hw_queues(q, false);
|
||||
}
|
||||
|
||||
void scsi_requeue_run_queue(struct work_struct *work)
|
||||
|
@ -689,7 +689,6 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio,
|
||||
return PTR_ERR(bip);
|
||||
}
|
||||
|
||||
bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
|
||||
/* virtual start sector must be in integrity interval units */
|
||||
bip_set_seed(bip, bio->bi_iter.bi_sector >>
|
||||
(bi->interval_exp - SECTOR_SHIFT));
|
||||
@ -697,7 +696,7 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio,
|
||||
pr_debug("IBLOCK BIP Size: %u Sector: %llu\n", bip->bip_iter.bi_size,
|
||||
(unsigned long long)bip->bip_iter.bi_sector);
|
||||
|
||||
resid = bip->bip_iter.bi_size;
|
||||
resid = bio_integrity_bytes(bi, bio_sectors(bio));
|
||||
while (resid > 0 && sg_miter_next(miter)) {
|
||||
|
||||
len = min_t(size_t, miter->length, resid);
|
||||
|
@ -18,8 +18,12 @@ config VALIDATE_FS_PARSER
|
||||
config FS_IOMAP
|
||||
bool
|
||||
|
||||
config BUFFER_HEAD
|
||||
bool
|
||||
|
||||
# old blockdev_direct_IO implementation. Use iomap for new code instead
|
||||
config LEGACY_DIRECT_IO
|
||||
depends on BUFFER_HEAD
|
||||
bool
|
||||
|
||||
if BLOCK
|
||||
|
@ -17,7 +17,7 @@ obj-y := open.o read_write.o file_table.o super.o \
|
||||
fs_types.o fs_context.o fs_parser.o fsopen.o init.o \
|
||||
kernel_read_file.o mnt_idmapping.o remap_range.o
|
||||
|
||||
obj-$(CONFIG_BLOCK) += buffer.o mpage.o
|
||||
obj-$(CONFIG_BUFFER_HEAD) += buffer.o mpage.o
|
||||
obj-$(CONFIG_PROC_FS) += proc_namespace.o
|
||||
obj-$(CONFIG_LEGACY_DIRECT_IO) += direct-io.o
|
||||
obj-y += notify/
|
||||
|
@ -2,6 +2,7 @@
|
||||
config ADFS_FS
|
||||
tristate "ADFS file system support"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
help
|
||||
The Acorn Disc Filing System is the standard file system of the
|
||||
RiscOS operating system which runs on Acorn's ARM-based Risc PC
|
||||
|
@ -2,6 +2,7 @@
|
||||
config AFFS_FS
|
||||
tristate "Amiga FFS file system support"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
select LEGACY_DIRECT_IO
|
||||
help
|
||||
The Fast File System (FFS) is the common file system used on hard
|
||||
|
@ -2,6 +2,7 @@
|
||||
config BEFS_FS
|
||||
tristate "BeOS file system (BeFS) support (read only)"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
select NLS
|
||||
help
|
||||
The BeOS File System (BeFS) is the native file system of Be, Inc's
|
||||
|
@ -2,6 +2,7 @@
|
||||
config BFS_FS
|
||||
tristate "BFS file system support"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
help
|
||||
Boot File System (BFS) is a file system used under SCO UnixWare to
|
||||
allow the bootloader access to the kernel image and other important
|
||||
|
@ -562,12 +562,6 @@ repeat:
|
||||
return err;
|
||||
}
|
||||
|
||||
void emergency_thaw_bdev(struct super_block *sb)
|
||||
{
|
||||
while (sb->s_bdev && !thaw_bdev(sb->s_bdev))
|
||||
printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
|
||||
* @mapping: the mapping which wants those buffers written
|
||||
|
@ -2,6 +2,7 @@
|
||||
config EFS_FS
|
||||
tristate "EFS file system support (read only)"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
help
|
||||
EFS is an older file system used for non-ISO9660 CD-ROMs and hard
|
||||
disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
config EXFAT_FS
|
||||
tristate "exFAT filesystem support"
|
||||
select BUFFER_HEAD
|
||||
select NLS
|
||||
select LEGACY_DIRECT_IO
|
||||
help
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config EXT2_FS
|
||||
tristate "Second extended fs support"
|
||||
select BUFFER_HEAD
|
||||
select FS_IOMAP
|
||||
select LEGACY_DIRECT_IO
|
||||
help
|
||||
|
@ -28,6 +28,7 @@ config EXT3_FS_SECURITY
|
||||
|
||||
config EXT4_FS
|
||||
tristate "The Extended 4 (ext4) filesystem"
|
||||
select BUFFER_HEAD
|
||||
select JBD2
|
||||
select CRC16
|
||||
select CRYPTO
|
||||
|
@ -6140,7 +6140,7 @@ retry_alloc:
|
||||
if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||
goto retry_alloc;
|
||||
out_ret:
|
||||
ret = block_page_mkwrite_return(err);
|
||||
ret = vmf_fs_error(err);
|
||||
out:
|
||||
filemap_invalidate_unlock_shared(mapping);
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
|
@ -2,6 +2,7 @@
|
||||
config F2FS_FS
|
||||
tristate "F2FS filesystem support"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
select NLS
|
||||
select CRYPTO
|
||||
select CRYPTO_CRC32
|
||||
|
@ -159,7 +159,7 @@ out_sem:
|
||||
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
err:
|
||||
return block_page_mkwrite_return(err);
|
||||
return vmf_fs_error(err);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct f2fs_file_vm_ops = {
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config FAT_FS
|
||||
tristate
|
||||
select BUFFER_HEAD
|
||||
select NLS
|
||||
select LEGACY_DIRECT_IO
|
||||
help
|
||||
|
@ -2,6 +2,7 @@
|
||||
config VXFS_FS
|
||||
tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
help
|
||||
FreeVxFS is a file system driver that support the VERITAS VxFS(TM)
|
||||
file system format. VERITAS VxFS(TM) is the standard file system
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config GFS2_FS
|
||||
tristate "GFS2 file system support"
|
||||
select BUFFER_HEAD
|
||||
select FS_POSIX_ACL
|
||||
select CRC32
|
||||
select LIBCRC32C
|
||||
|
@ -432,7 +432,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
|
||||
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
|
||||
err = gfs2_glock_nq(&gh);
|
||||
if (err) {
|
||||
ret = block_page_mkwrite_return(err);
|
||||
ret = vmf_fs_error(err);
|
||||
goto out_uninit;
|
||||
}
|
||||
|
||||
@ -474,7 +474,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
|
||||
|
||||
err = gfs2_rindex_update(sdp);
|
||||
if (err) {
|
||||
ret = block_page_mkwrite_return(err);
|
||||
ret = vmf_fs_error(err);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
@ -482,12 +482,12 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
|
||||
ap.target = data_blocks + ind_blocks;
|
||||
err = gfs2_quota_lock_check(ip, &ap);
|
||||
if (err) {
|
||||
ret = block_page_mkwrite_return(err);
|
||||
ret = vmf_fs_error(err);
|
||||
goto out_unlock;
|
||||
}
|
||||
err = gfs2_inplace_reserve(ip, &ap);
|
||||
if (err) {
|
||||
ret = block_page_mkwrite_return(err);
|
||||
ret = vmf_fs_error(err);
|
||||
goto out_quota_unlock;
|
||||
}
|
||||
|
||||
@ -500,7 +500,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
|
||||
}
|
||||
err = gfs2_trans_begin(sdp, rblocks, 0);
|
||||
if (err) {
|
||||
ret = block_page_mkwrite_return(err);
|
||||
ret = vmf_fs_error(err);
|
||||
goto out_trans_fail;
|
||||
}
|
||||
|
||||
@ -508,7 +508,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
|
||||
if (gfs2_is_stuffed(ip)) {
|
||||
err = gfs2_unstuff_dinode(ip);
|
||||
if (err) {
|
||||
ret = block_page_mkwrite_return(err);
|
||||
ret = vmf_fs_error(err);
|
||||
goto out_trans_end;
|
||||
}
|
||||
}
|
||||
@ -524,7 +524,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
|
||||
|
||||
err = gfs2_allocate_page_backing(page, length);
|
||||
if (err)
|
||||
ret = block_page_mkwrite_return(err);
|
||||
ret = vmf_fs_error(err);
|
||||
|
||||
out_page_locked:
|
||||
if (ret != VM_FAULT_LOCKED)
|
||||
@ -558,7 +558,7 @@ static vm_fault_t gfs2_fault(struct vm_fault *vmf)
|
||||
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
|
||||
err = gfs2_glock_nq(&gh);
|
||||
if (err) {
|
||||
ret = block_page_mkwrite_return(err);
|
||||
ret = vmf_fs_error(err);
|
||||
goto out_uninit;
|
||||
}
|
||||
ret = filemap_fault(vmf);
|
||||
|
@ -2,6 +2,7 @@
|
||||
config HFS_FS
|
||||
tristate "Apple Macintosh file system support"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
select NLS
|
||||
select LEGACY_DIRECT_IO
|
||||
help
|
||||
|
@ -2,6 +2,7 @@
|
||||
config HFSPLUS_FS
|
||||
tristate "Apple Extended HFS file system support"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
select NLS
|
||||
select NLS_UTF8
|
||||
select LEGACY_DIRECT_IO
|
||||
|
@ -2,6 +2,7 @@
|
||||
config HPFS_FS
|
||||
tristate "OS/2 HPFS file system support"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
select FS_IOMAP
|
||||
help
|
||||
OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
|
||||
|
@ -23,16 +23,10 @@ struct mnt_idmap;
|
||||
*/
|
||||
#ifdef CONFIG_BLOCK
|
||||
extern void __init bdev_cache_init(void);
|
||||
|
||||
void emergency_thaw_bdev(struct super_block *sb);
|
||||
#else
|
||||
static inline void bdev_cache_init(void)
|
||||
{
|
||||
}
|
||||
static inline int emergency_thaw_bdev(struct super_block *sb)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_BLOCK */
|
||||
|
||||
/*
|
||||
|
@ -1286,7 +1286,7 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
|
||||
return VM_FAULT_LOCKED;
|
||||
out_unlock:
|
||||
folio_unlock(folio);
|
||||
return block_page_mkwrite_return(ret);
|
||||
return vmf_fs_error(ret);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config ISO9660_FS
|
||||
tristate "ISO 9660 CDROM file system support"
|
||||
select BUFFER_HEAD
|
||||
help
|
||||
This is the standard file system used on CD-ROMs. It was previously
|
||||
known as "High Sierra File System" and is called "hsfs" on other
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config JFS_FS
|
||||
tristate "JFS filesystem support"
|
||||
select BUFFER_HEAD
|
||||
select NLS
|
||||
select CRC32
|
||||
select LEGACY_DIRECT_IO
|
||||
|
@ -2,6 +2,7 @@
|
||||
config MINIX_FS
|
||||
tristate "Minix file system support"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
help
|
||||
Minix is a simple operating system used in many classes about OS's.
|
||||
The minix file system (method to organize files on a hard disk
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config NILFS2_FS
|
||||
tristate "NILFS2 file system support"
|
||||
select BUFFER_HEAD
|
||||
select CRC32
|
||||
select LEGACY_DIRECT_IO
|
||||
help
|
||||
|
@ -108,7 +108,7 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
|
||||
wait_for_stable_page(page);
|
||||
out:
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
return block_page_mkwrite_return(ret);
|
||||
return vmf_fs_error(ret);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct nilfs_file_vm_ops = {
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config NTFS_FS
|
||||
tristate "NTFS file system support"
|
||||
select BUFFER_HEAD
|
||||
select NLS
|
||||
help
|
||||
NTFS is the file system of Microsoft Windows NT, 2000, XP and 2003.
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config NTFS3_FS
|
||||
tristate "NTFS Read-Write file system support"
|
||||
select BUFFER_HEAD
|
||||
select NLS
|
||||
select LEGACY_DIRECT_IO
|
||||
help
|
||||
|
@ -2,6 +2,7 @@
|
||||
config OCFS2_FS
|
||||
tristate "OCFS2 file system support"
|
||||
depends on INET && SYSFS && CONFIGFS_FS
|
||||
select BUFFER_HEAD
|
||||
select JBD2
|
||||
select CRC32
|
||||
select QUOTA
|
||||
|
@ -2,6 +2,7 @@
|
||||
config OMFS_FS
|
||||
tristate "SonicBlue Optimized MPEG File System support"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
select CRC_ITU_T
|
||||
help
|
||||
This is the proprietary file system used by the Rio Karma music
|
||||
|
@ -2,6 +2,7 @@
|
||||
config QNX4FS_FS
|
||||
tristate "QNX4 file system support (read only)"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
help
|
||||
This is the file system used by the real-time operating systems
|
||||
QNX 4 and QNX 6 (the latter is also called QNX RTP).
|
||||
|
@ -2,6 +2,7 @@
|
||||
config QNX6FS_FS
|
||||
tristate "QNX6 file system support (read only)"
|
||||
depends on BLOCK && CRC32
|
||||
select BUFFER_HEAD
|
||||
help
|
||||
This is the file system used by the real-time operating systems
|
||||
QNX 6 (also called QNX RTP).
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config REISERFS_FS
|
||||
tristate "Reiserfs support (deprecated)"
|
||||
select BUFFER_HEAD
|
||||
select CRC32
|
||||
select LEGACY_DIRECT_IO
|
||||
help
|
||||
|
@ -57,6 +57,7 @@ endchoice
|
||||
config ROMFS_ON_BLOCK
|
||||
bool
|
||||
default y if ROMFS_BACKED_BY_BLOCK || ROMFS_BACKED_BY_BOTH
|
||||
select BUFFER_HEAD
|
||||
|
||||
config ROMFS_ON_MTD
|
||||
bool
|
||||
|
@ -1029,7 +1029,9 @@ static void do_thaw_all_callback(struct super_block *sb)
|
||||
{
|
||||
down_write(&sb->s_umount);
|
||||
if (sb->s_root && sb->s_flags & SB_BORN) {
|
||||
emergency_thaw_bdev(sb);
|
||||
if (IS_ENABLED(CONFIG_BLOCK))
|
||||
while (sb->s_bdev && !thaw_bdev(sb->s_bdev))
|
||||
pr_warn("Emergency Thaw on %pg\n", sb->s_bdev);
|
||||
thaw_super_locked(sb);
|
||||
} else {
|
||||
up_write(&sb->s_umount);
|
||||
|
@ -2,6 +2,7 @@
|
||||
config SYSV_FS
|
||||
tristate "System V/Xenix/V7/Coherent file system support"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
help
|
||||
SCO, Xenix and Coherent are commercial Unix systems for Intel
|
||||
machines, and Version 7 was used on the DEC PDP-11. Saying Y
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config UDF_FS
|
||||
tristate "UDF file system support"
|
||||
select BUFFER_HEAD
|
||||
select CRC_ITU_T
|
||||
select NLS
|
||||
select LEGACY_DIRECT_IO
|
||||
|
@ -67,7 +67,7 @@ static vm_fault_t udf_page_mkwrite(struct vm_fault *vmf)
|
||||
err = block_commit_write(page, 0, end);
|
||||
if (err < 0) {
|
||||
unlock_page(page);
|
||||
ret = block_page_mkwrite_return(err);
|
||||
ret = vmf_fs_error(err);
|
||||
goto out_unlock;
|
||||
}
|
||||
out_dirty:
|
||||
|
@ -2,6 +2,7 @@
|
||||
config UFS_FS
|
||||
tristate "UFS file system support (read only)"
|
||||
depends on BLOCK
|
||||
select BUFFER_HEAD
|
||||
help
|
||||
BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
|
||||
OpenBSD and NeXTstep) use a file system called UFS. Some System V
|
||||
|
@ -488,7 +488,12 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||
extern void bio_copy_data(struct bio *dst, struct bio *src);
|
||||
extern void bio_free_pages(struct bio *bio);
|
||||
void guard_bio_eod(struct bio *bio);
|
||||
void zero_fill_bio(struct bio *bio);
|
||||
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
|
||||
|
||||
static inline void zero_fill_bio(struct bio *bio)
|
||||
{
|
||||
zero_fill_bio_iter(bio, bio->bi_iter);
|
||||
}
|
||||
|
||||
static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
|
||||
{
|
||||
|
@ -178,14 +178,10 @@ struct request {
|
||||
|
||||
struct {
|
||||
unsigned int seq;
|
||||
struct list_head list;
|
||||
rq_end_io_fn *saved_end_io;
|
||||
} flush;
|
||||
|
||||
union {
|
||||
struct __call_single_data csd;
|
||||
u64 fifo_time;
|
||||
};
|
||||
u64 fifo_time;
|
||||
|
||||
/*
|
||||
* completion callback.
|
||||
|
@ -538,6 +538,7 @@ struct request_queue {
|
||||
#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */
|
||||
#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */
|
||||
#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */
|
||||
#define QUEUE_FLAG_HW_WC 18 /* Write back caching supported */
|
||||
#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */
|
||||
#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */
|
||||
#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */
|
||||
@ -846,6 +847,7 @@ extern const char *blk_op_str(enum req_op op);
|
||||
|
||||
int blk_status_to_errno(blk_status_t status);
|
||||
blk_status_t errno_to_blk_status(int errno);
|
||||
const char *blk_status_to_str(blk_status_t status);
|
||||
|
||||
/* only poll the hardware once, don't continue until a completion was found */
|
||||
#define BLK_POLL_ONESHOT (1 << 0)
|
||||
|
@ -16,8 +16,6 @@
|
||||
#include <linux/wait.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
|
||||
enum bh_state_bits {
|
||||
BH_Uptodate, /* Contains valid data */
|
||||
BH_Dirty, /* Is dirty */
|
||||
@ -198,7 +196,6 @@ void set_bh_page(struct buffer_head *bh,
|
||||
struct page *page, unsigned long offset);
|
||||
void folio_set_bh(struct buffer_head *bh, struct folio *folio,
|
||||
unsigned long offset);
|
||||
bool try_to_free_buffers(struct folio *);
|
||||
struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
|
||||
bool retry);
|
||||
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
|
||||
@ -213,10 +210,6 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate);
|
||||
|
||||
/* Things to do with buffers at mapping->private_list */
|
||||
void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
|
||||
int inode_has_buffers(struct inode *);
|
||||
void invalidate_inode_buffers(struct inode *);
|
||||
int remove_inode_buffers(struct inode *inode);
|
||||
int sync_mapping_buffers(struct address_space *mapping);
|
||||
int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
|
||||
bool datasync);
|
||||
int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
|
||||
@ -240,9 +233,6 @@ void __bforget(struct buffer_head *);
|
||||
void __breadahead(struct block_device *, sector_t block, unsigned int size);
|
||||
struct buffer_head *__bread_gfp(struct block_device *,
|
||||
sector_t block, unsigned size, gfp_t gfp);
|
||||
void invalidate_bh_lrus(void);
|
||||
void invalidate_bh_lrus_cpu(void);
|
||||
bool has_bh_in_lru(int cpu, void *dummy);
|
||||
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
|
||||
void free_buffer_head(struct buffer_head * bh);
|
||||
void unlock_buffer(struct buffer_head *bh);
|
||||
@ -258,8 +248,6 @@ int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
|
||||
void __bh_read_batch(int nr, struct buffer_head *bhs[],
|
||||
blk_opf_t op_flags, bool force_lock);
|
||||
|
||||
extern int buffer_heads_over_limit;
|
||||
|
||||
/*
|
||||
* Generic address_space_operations implementations for buffer_head-backed
|
||||
* address_spaces.
|
||||
@ -291,18 +279,6 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size);
|
||||
int block_commit_write(struct page *page, unsigned from, unsigned to);
|
||||
int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
|
||||
get_block_t get_block);
|
||||
/* Convert errno to return value from ->page_mkwrite() call */
|
||||
static inline vm_fault_t block_page_mkwrite_return(int err)
|
||||
{
|
||||
if (err == 0)
|
||||
return VM_FAULT_LOCKED;
|
||||
if (err == -EFAULT || err == -EAGAIN)
|
||||
return VM_FAULT_NOPAGE;
|
||||
if (err == -ENOMEM)
|
||||
return VM_FAULT_OOM;
|
||||
/* -ENOSPC, -EDQUOT, -EIO ... */
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
|
||||
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
|
||||
|
||||
@ -316,8 +292,6 @@ extern int buffer_migrate_folio_norefs(struct address_space *,
|
||||
#define buffer_migrate_folio_norefs NULL
|
||||
#endif
|
||||
|
||||
void buffer_init(void);
|
||||
|
||||
/*
|
||||
* inline definitions
|
||||
*/
|
||||
@ -477,7 +451,20 @@ __bread(struct block_device *bdev, sector_t block, unsigned size)
|
||||
|
||||
bool block_dirty_folio(struct address_space *mapping, struct folio *folio);
|
||||
|
||||
#else /* CONFIG_BLOCK */
|
||||
#ifdef CONFIG_BUFFER_HEAD
|
||||
|
||||
void buffer_init(void);
|
||||
bool try_to_free_buffers(struct folio *folio);
|
||||
int inode_has_buffers(struct inode *inode);
|
||||
void invalidate_inode_buffers(struct inode *inode);
|
||||
int remove_inode_buffers(struct inode *inode);
|
||||
int sync_mapping_buffers(struct address_space *mapping);
|
||||
void invalidate_bh_lrus(void);
|
||||
void invalidate_bh_lrus_cpu(void);
|
||||
bool has_bh_in_lru(int cpu, void *dummy);
|
||||
extern int buffer_heads_over_limit;
|
||||
|
||||
#else /* CONFIG_BUFFER_HEAD */
|
||||
|
||||
static inline void buffer_init(void) {}
|
||||
static inline bool try_to_free_buffers(struct folio *folio) { return true; }
|
||||
@ -485,9 +472,10 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; }
|
||||
static inline void invalidate_inode_buffers(struct inode *inode) {}
|
||||
static inline int remove_inode_buffers(struct inode *inode) { return 1; }
|
||||
static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
|
||||
static inline void invalidate_bh_lrus(void) {}
|
||||
static inline void invalidate_bh_lrus_cpu(void) {}
|
||||
static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; }
|
||||
#define buffer_heads_over_limit 0
|
||||
|
||||
#endif /* CONFIG_BLOCK */
|
||||
#endif /* CONFIG_BUFFER_HEAD */
|
||||
#endif /* _LINUX_BUFFER_HEAD_H */
|
||||
|
@ -58,7 +58,11 @@ struct vm_fault;
|
||||
#define IOMAP_F_DIRTY (1U << 1)
|
||||
#define IOMAP_F_SHARED (1U << 2)
|
||||
#define IOMAP_F_MERGED (1U << 3)
|
||||
#ifdef CONFIG_BUFFER_HEAD
|
||||
#define IOMAP_F_BUFFER_HEAD (1U << 4)
|
||||
#else
|
||||
#define IOMAP_F_BUFFER_HEAD 0
|
||||
#endif /* CONFIG_BUFFER_HEAD */
|
||||
#define IOMAP_F_XATTR (1U << 5)
|
||||
|
||||
/*
|
||||
|
@ -3403,6 +3403,24 @@ static inline vm_fault_t vmf_error(int err)
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert errno to return value for ->page_mkwrite() calls.
|
||||
*
|
||||
* This should eventually be merged with vmf_error() above, but will need a
|
||||
* careful audit of all vmf_error() callers.
|
||||
*/
|
||||
static inline vm_fault_t vmf_fs_error(int err)
|
||||
{
|
||||
if (err == 0)
|
||||
return VM_FAULT_LOCKED;
|
||||
if (err == -EFAULT || err == -EAGAIN)
|
||||
return VM_FAULT_NOPAGE;
|
||||
if (err == -ENOMEM)
|
||||
return VM_FAULT_OOM;
|
||||
/* -ENOSPC, -EDQUOT, -EIO ... */
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
|
||||
unsigned int foll_flags);
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#define RWBS_LEN 8
|
||||
|
||||
#ifdef CONFIG_BUFFER_HEAD
|
||||
DECLARE_EVENT_CLASS(block_buffer,
|
||||
|
||||
TP_PROTO(struct buffer_head *bh),
|
||||
@ -61,6 +62,7 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer,
|
||||
|
||||
TP_ARGS(bh)
|
||||
);
|
||||
#endif /* CONFIG_BUFFER_HEAD */
|
||||
|
||||
/**
|
||||
* block_rq_requeue - place block IO request back on a queue
|
||||
|
@ -31,8 +31,8 @@ TRACE_EVENT(kyber_latency,
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = dev;
|
||||
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
|
||||
strlcpy(__entry->type, type, sizeof(__entry->type));
|
||||
strscpy(__entry->domain, domain, sizeof(__entry->domain));
|
||||
strscpy(__entry->type, type, sizeof(__entry->type));
|
||||
__entry->percentile = percentile;
|
||||
__entry->numerator = numerator;
|
||||
__entry->denominator = denominator;
|
||||
@ -59,7 +59,7 @@ TRACE_EVENT(kyber_adjust,
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = dev;
|
||||
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
|
||||
strscpy(__entry->domain, domain, sizeof(__entry->domain));
|
||||
__entry->depth = depth;
|
||||
),
|
||||
|
||||
@ -81,7 +81,7 @@ TRACE_EVENT(kyber_throttled,
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = dev;
|
||||
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
|
||||
strscpy(__entry->domain, domain, sizeof(__entry->domain));
|
||||
),
|
||||
|
||||
TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
|
@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->name, bdi_dev_name(bdi),
|
||||
strscpy(__entry->name, bdi_dev_name(bdi),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->rmean = stat[0].mean;
|
||||
__entry->rmin = stat[0].min;
|
||||
@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->name, bdi_dev_name(bdi),
|
||||
strscpy(__entry->name, bdi_dev_name(bdi),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->lat = div_u64(lat, 1000);
|
||||
),
|
||||
@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->name, bdi_dev_name(bdi),
|
||||
strscpy(__entry->name, bdi_dev_name(bdi),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->msg = msg;
|
||||
__entry->step = step;
|
||||
@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strlcpy(__entry->name, bdi_dev_name(bdi),
|
||||
strscpy(__entry->name, bdi_dev_name(bdi),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->status = status;
|
||||
__entry->step = step;
|
||||
|
@ -107,20 +107,21 @@ enum {
|
||||
/*
|
||||
* Return an I/O priority value based on a class, a level and a hint.
|
||||
*/
|
||||
static __always_inline __u16 ioprio_value(int class, int level, int hint)
|
||||
static __always_inline __u16 ioprio_value(int prioclass, int priolevel,
|
||||
int priohint)
|
||||
{
|
||||
if (IOPRIO_BAD_VALUE(class, IOPRIO_NR_CLASSES) ||
|
||||
IOPRIO_BAD_VALUE(level, IOPRIO_NR_LEVELS) ||
|
||||
IOPRIO_BAD_VALUE(hint, IOPRIO_NR_HINTS))
|
||||
if (IOPRIO_BAD_VALUE(prioclass, IOPRIO_NR_CLASSES) ||
|
||||
IOPRIO_BAD_VALUE(priolevel, IOPRIO_NR_LEVELS) ||
|
||||
IOPRIO_BAD_VALUE(priohint, IOPRIO_NR_HINTS))
|
||||
return IOPRIO_CLASS_INVALID << IOPRIO_CLASS_SHIFT;
|
||||
|
||||
return (class << IOPRIO_CLASS_SHIFT) |
|
||||
(hint << IOPRIO_HINT_SHIFT) | level;
|
||||
return (prioclass << IOPRIO_CLASS_SHIFT) |
|
||||
(priohint << IOPRIO_HINT_SHIFT) | priolevel;
|
||||
}
|
||||
|
||||
#define IOPRIO_PRIO_VALUE(class, level) \
|
||||
ioprio_value(class, level, IOPRIO_HINT_NONE)
|
||||
#define IOPRIO_PRIO_VALUE_HINT(class, level, hint) \
|
||||
ioprio_value(class, level, hint)
|
||||
#define IOPRIO_PRIO_VALUE(prioclass, priolevel) \
|
||||
ioprio_value(prioclass, priolevel, IOPRIO_HINT_NONE)
|
||||
#define IOPRIO_PRIO_VALUE_HINT(prioclass, priolevel, priohint) \
|
||||
ioprio_value(prioclass, priolevel, priohint)
|
||||
|
||||
#endif /* _UAPI_LINUX_IOPRIO_H */
|
||||
|
@ -176,6 +176,12 @@
|
||||
/* Copy between request and user buffer by pread()/pwrite() */
|
||||
#define UBLK_F_USER_COPY (1UL << 7)
|
||||
|
||||
/*
|
||||
* User space sets this flag when setting up the device to request zoned storage support. Kernel may
|
||||
* deny the request by returning an error.
|
||||
*/
|
||||
#define UBLK_F_ZONED (1ULL << 8)
|
||||
|
||||
/* device state */
|
||||
#define UBLK_S_DEV_DEAD 0
|
||||
#define UBLK_S_DEV_LIVE 1
|
||||
@ -232,9 +238,26 @@ struct ublksrv_ctrl_dev_info {
|
||||
#define UBLK_IO_OP_READ 0
|
||||
#define UBLK_IO_OP_WRITE 1
|
||||
#define UBLK_IO_OP_FLUSH 2
|
||||
#define UBLK_IO_OP_DISCARD 3
|
||||
#define UBLK_IO_OP_WRITE_SAME 4
|
||||
#define UBLK_IO_OP_WRITE_ZEROES 5
|
||||
#define UBLK_IO_OP_DISCARD 3
|
||||
#define UBLK_IO_OP_WRITE_SAME 4
|
||||
#define UBLK_IO_OP_WRITE_ZEROES 5
|
||||
#define UBLK_IO_OP_ZONE_OPEN 10
|
||||
#define UBLK_IO_OP_ZONE_CLOSE 11
|
||||
#define UBLK_IO_OP_ZONE_FINISH 12
|
||||
#define UBLK_IO_OP_ZONE_APPEND 13
|
||||
#define UBLK_IO_OP_ZONE_RESET 15
|
||||
/*
|
||||
* Construct a zone report. The report request is carried in `struct
|
||||
* ublksrv_io_desc`. The `start_sector` field must be the first sector of a zone
|
||||
* and shall indicate the first zone of the report. The `nr_zones` shall
|
||||
* indicate how many zones should be reported at most. The report shall be
|
||||
* delivered as a `struct blk_zone` array. To report fewer zones than requested,
|
||||
* zero the last entry of the returned array.
|
||||
*
|
||||
* Related definitions(blk_zone, blk_zone_cond, blk_zone_type, ...) in
|
||||
* include/uapi/linux/blkzoned.h are part of ublk UAPI.
|
||||
*/
|
||||
#define UBLK_IO_OP_REPORT_ZONES 18
|
||||
|
||||
#define UBLK_IO_F_FAILFAST_DEV (1U << 8)
|
||||
#define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9)
|
||||
@ -255,7 +278,10 @@ struct ublksrv_io_desc {
|
||||
/* op: bit 0-7, flags: bit 8-31 */
|
||||
__u32 op_flags;
|
||||
|
||||
__u32 nr_sectors;
|
||||
union {
|
||||
__u32 nr_sectors;
|
||||
__u32 nr_zones; /* for UBLK_IO_OP_REPORT_ZONES */
|
||||
};
|
||||
|
||||
/* start sector for this io */
|
||||
__u64 start_sector;
|
||||
@ -284,11 +310,21 @@ struct ublksrv_io_cmd {
|
||||
/* io result, it is valid for COMMIT* command only */
|
||||
__s32 result;
|
||||
|
||||
/*
|
||||
* userspace buffer address in ublksrv daemon process, valid for
|
||||
* FETCH* command only
|
||||
*/
|
||||
__u64 addr;
|
||||
union {
|
||||
/*
|
||||
* userspace buffer address in ublksrv daemon process, valid for
|
||||
* FETCH* command only
|
||||
*
|
||||
* `addr` should not be used when UBLK_F_USER_COPY is enabled,
|
||||
* because userspace handles data copy by pread()/pwrite() over
|
||||
* /dev/ublkcN. But in case of UBLK_F_ZONED, this union is
|
||||
* re-used to pass back the allocated LBA for
|
||||
* UBLK_IO_OP_ZONE_APPEND which actually depends on
|
||||
* UBLK_F_USER_COPY
|
||||
*/
|
||||
__u64 addr;
|
||||
__u64 zone_append_lba;
|
||||
};
|
||||
};
|
||||
|
||||
struct ublk_param_basic {
|
||||
@ -331,6 +367,13 @@ struct ublk_param_devt {
|
||||
__u32 disk_minor;
|
||||
};
|
||||
|
||||
struct ublk_param_zoned {
|
||||
__u32 max_open_zones;
|
||||
__u32 max_active_zones;
|
||||
__u32 max_zone_append_sectors;
|
||||
__u8 reserved[20];
|
||||
};
|
||||
|
||||
struct ublk_params {
|
||||
/*
|
||||
* Total length of parameters, userspace has to set 'len' for both
|
||||
@ -342,11 +385,13 @@ struct ublk_params {
|
||||
#define UBLK_PARAM_TYPE_BASIC (1 << 0)
|
||||
#define UBLK_PARAM_TYPE_DISCARD (1 << 1)
|
||||
#define UBLK_PARAM_TYPE_DEVT (1 << 2)
|
||||
#define UBLK_PARAM_TYPE_ZONED (1 << 3)
|
||||
__u32 types; /* types of parameter included */
|
||||
|
||||
struct ublk_param_basic basic;
|
||||
struct ublk_param_discard discard;
|
||||
struct ublk_param_devt devt;
|
||||
struct ublk_param_zoned zoned;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -56,7 +56,9 @@ int main(int argc, char *argv[])
|
||||
uint8_t v;
|
||||
uint8_t exptbl[256], invtbl[256];
|
||||
|
||||
printf("#ifdef __KERNEL__\n");
|
||||
printf("#include <linux/export.h>\n");
|
||||
printf("#endif\n");
|
||||
printf("#include <linux/raid/pq.h>\n");
|
||||
|
||||
/* Compute multiplication table */
|
||||
|
@ -13,7 +13,6 @@
|
||||
* the syndrome.)
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/raid/pq.h>
|
||||
|
||||
/* Recover two failed data blocks. */
|
||||
|
3
lib/raid6/test/.gitignore
vendored
Normal file
3
lib/raid6/test/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
/int.uc
|
||||
/neon.uc
|
||||
/raid6test
|
@ -6,14 +6,15 @@
|
||||
|
||||
pound := \#
|
||||
|
||||
CC = gcc
|
||||
OPTFLAGS = -O2 # Adjust as desired
|
||||
CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
|
||||
LD = ld
|
||||
AWK = awk -f
|
||||
AR = ar
|
||||
RANLIB = ranlib
|
||||
OBJS = int1.o int2.o int4.o int8.o int16.o int32.o recov.o algos.o tables.o
|
||||
# Adjust as desired
|
||||
CC = gcc
|
||||
OPTFLAGS = -O2
|
||||
CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
|
||||
LD = ld
|
||||
AWK = awk -f
|
||||
AR = ar
|
||||
RANLIB = ranlib
|
||||
OBJS = int1.o int2.o int4.o int8.o int16.o int32.o recov.o algos.o tables.o
|
||||
|
||||
ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/)
|
||||
ifeq ($(ARCH),i386)
|
||||
@ -34,24 +35,25 @@ ifeq ($(ARCH),aarch64)
|
||||
HAS_NEON = yes
|
||||
endif
|
||||
|
||||
ifeq ($(findstring ppc,$(ARCH)),ppc)
|
||||
CFLAGS += -I../../../arch/powerpc/include
|
||||
HAS_ALTIVEC := $(shell printf '$(pound)include <altivec.h>\nvector int a;\n' |\
|
||||
gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
|
||||
endif
|
||||
|
||||
ifeq ($(IS_X86),yes)
|
||||
OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
|
||||
CFLAGS += -DCONFIG_X86
|
||||
CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \
|
||||
gcc -c -x assembler - >/dev/null 2>&1 && \
|
||||
rm ./-.o && echo -DCONFIG_AS_AVX512=1)
|
||||
CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \
|
||||
gcc -c -x assembler - >/dev/null 2>&1 && \
|
||||
rm ./-.o && echo -DCONFIG_AS_AVX512=1)
|
||||
else ifeq ($(HAS_NEON),yes)
|
||||
OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
|
||||
CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
|
||||
else
|
||||
HAS_ALTIVEC := $(shell printf '$(pound)include <altivec.h>\nvector int a;\n' |\
|
||||
gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
|
||||
ifeq ($(HAS_ALTIVEC),yes)
|
||||
CFLAGS += -I../../../arch/powerpc/include
|
||||
CFLAGS += -DCONFIG_ALTIVEC
|
||||
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
|
||||
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
|
||||
endif
|
||||
else ifeq ($(HAS_ALTIVEC),yes)
|
||||
CFLAGS += -DCONFIG_ALTIVEC
|
||||
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
|
||||
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
|
||||
endif
|
||||
|
||||
.c.o:
|
||||
@ -63,12 +65,12 @@ endif
|
||||
%.uc: ../%.uc
|
||||
cp -f $< $@
|
||||
|
||||
all: raid6.a raid6test
|
||||
all: raid6.a raid6test
|
||||
|
||||
raid6.a: $(OBJS)
|
||||
rm -f $@
|
||||
$(AR) cq $@ $^
|
||||
$(RANLIB) $@
|
||||
rm -f $@
|
||||
$(AR) cq $@ $^
|
||||
$(RANLIB) $@
|
||||
|
||||
raid6test: test.c raid6.a
|
||||
$(CC) $(CFLAGS) -o raid6test $^
|
||||
|
@ -684,7 +684,7 @@ int migrate_folio(struct address_space *mapping, struct folio *dst,
|
||||
}
|
||||
EXPORT_SYMBOL(migrate_folio);
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
#ifdef CONFIG_BUFFER_HEAD
|
||||
/* Returns true if all buffers are successfully locked */
|
||||
static bool buffer_migrate_lock_buffers(struct buffer_head *head,
|
||||
enum migrate_mode mode)
|
||||
@ -837,7 +837,7 @@ int buffer_migrate_folio_norefs(struct address_space *mapping,
|
||||
return __buffer_migrate_folio(mapping, dst, src, mode, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(buffer_migrate_folio_norefs);
|
||||
#endif
|
||||
#endif /* CONFIG_BUFFER_HEAD */
|
||||
|
||||
int filemap_migrate_folio(struct address_space *mapping,
|
||||
struct folio *dst, struct folio *src, enum migrate_mode mode)
|
||||
|
@ -100,6 +100,7 @@ class IocStat:
|
||||
self.period_at = ioc.period_at.value_() / 1_000_000
|
||||
self.vperiod_at = ioc.period_at_vtime.value_() / VTIME_PER_SEC
|
||||
self.vrate_pct = ioc.vtime_base_rate.value_() * 100 / VTIME_PER_USEC
|
||||
self.ivrate_pct = ioc.vtime_rate.counter.value_() * 100 / VTIME_PER_USEC
|
||||
self.busy_level = ioc.busy_level.value_()
|
||||
self.autop_idx = ioc.autop_idx.value_()
|
||||
self.user_cost_model = ioc.user_cost_model.value_()
|
||||
@ -119,7 +120,9 @@ class IocStat:
|
||||
'period_at' : self.period_at,
|
||||
'period_vtime_at' : self.vperiod_at,
|
||||
'busy_level' : self.busy_level,
|
||||
'vrate_pct' : self.vrate_pct, }
|
||||
'vrate_pct' : self.vrate_pct,
|
||||
'ivrate_pct' : self.ivrate_pct,
|
||||
}
|
||||
|
||||
def table_preamble_str(self):
|
||||
state = ('RUN' if self.running else 'IDLE') if self.enabled else 'OFF'
|
||||
@ -127,7 +130,7 @@ class IocStat:
|
||||
f'per={self.period_ms}ms ' \
|
||||
f'cur_per={self.period_at:.3f}:v{self.vperiod_at:.3f} ' \
|
||||
f'busy={self.busy_level:+3} ' \
|
||||
f'vrate={self.vrate_pct:6.2f}% ' \
|
||||
f'vrate={self.vrate_pct:6.2f}%:{self.ivrate_pct:6.2f}% ' \
|
||||
f'params={self.autop_name}'
|
||||
if self.user_cost_model or self.user_qos_params:
|
||||
output += f'({"C" if self.user_cost_model else ""}{"Q" if self.user_qos_params else ""})'
|
||||
@ -135,7 +138,7 @@ class IocStat:
|
||||
|
||||
def table_header_str(self):
|
||||
return f'{"":25} active {"weight":>9} {"hweight%":>13} {"inflt%":>6} ' \
|
||||
f'{"debt":>7} {"delay":>7} {"usage%"}'
|
||||
f'{"usage%":>6} {"wait":>7} {"debt":>7} {"delay":>7}'
|
||||
|
||||
class IocgStat:
|
||||
def __init__(self, iocg):
|
||||
@ -161,6 +164,8 @@ class IocgStat:
|
||||
|
||||
self.usage = (100 * iocg.usage_delta_us.value_() /
|
||||
ioc.period_us.value_()) if self.active else 0
|
||||
self.wait_ms = (iocg.stat.wait_us.value_() -
|
||||
iocg.last_stat.wait_us.value_()) / 1000
|
||||
self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
|
||||
if blkg.use_delay.counter.value_() != 0:
|
||||
self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
|
||||
@ -177,9 +182,10 @@ class IocgStat:
|
||||
'hweight_active_pct' : self.hwa_pct,
|
||||
'hweight_inuse_pct' : self.hwi_pct,
|
||||
'inflight_pct' : self.inflight_pct,
|
||||
'usage_pct' : self.usage,
|
||||
'wait_ms' : self.wait_ms,
|
||||
'debt_ms' : self.debt_ms,
|
||||
'delay_ms' : self.delay_ms,
|
||||
'usage_pct' : self.usage,
|
||||
'address' : self.address }
|
||||
return out
|
||||
|
||||
@ -189,9 +195,10 @@ class IocgStat:
|
||||
f'{round(self.inuse):5}/{round(self.active):5} ' \
|
||||
f'{self.hwi_pct:6.2f}/{self.hwa_pct:6.2f} ' \
|
||||
f'{self.inflight_pct:6.2f} ' \
|
||||
f'{min(self.usage, 999):6.2f} ' \
|
||||
f'{self.wait_ms:7.2f} ' \
|
||||
f'{self.debt_ms:7.2f} ' \
|
||||
f'{self.delay_ms:7.2f} '\
|
||||
f'{min(self.usage, 999):6.2f}'
|
||||
f'{self.delay_ms:7.2f}'
|
||||
out = out.rstrip(':')
|
||||
return out
|
||||
|
||||
@ -221,7 +228,7 @@ ioc = None
|
||||
for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree.address_of_()):
|
||||
blkg = drgn.Object(prog, 'struct blkcg_gq', address=ptr)
|
||||
try:
|
||||
if devname == blkg.q.kobj.parent.name.string_().decode('utf-8'):
|
||||
if devname == blkg.q.mq_kobj.parent.name.string_().decode('utf-8'):
|
||||
q_id = blkg.q.id.value_()
|
||||
if blkg.pd[plid]:
|
||||
root_iocg = container_of(blkg.pd[plid], 'struct ioc_gq', 'pd')
|
||||
|
Loading…
Reference in New Issue
Block a user