diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index e311bb8e647b..021dcec6f194 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -38,7 +38,6 @@ * - add a mode to also read unallocated space */ -struct scrub_block; struct scrub_ctx; /* @@ -183,19 +182,6 @@ struct scrub_stripe { struct work_struct work; }; -struct scrub_sector { - struct scrub_block *sblock; - struct list_head list; - u64 flags; /* extent flags */ - u64 generation; - /* Offset in bytes to @sblock. */ - u32 offset; - atomic_t refs; - unsigned int have_csum:1; - unsigned int io_error:1; - u8 csum[BTRFS_CSUM_SIZE]; -}; - struct scrub_bio { int index; struct scrub_ctx *sctx; @@ -204,45 +190,11 @@ struct scrub_bio { blk_status_t status; u64 logical; u64 physical; - struct scrub_sector *sectors[SCRUB_SECTORS_PER_BIO]; int sector_count; int next_free; struct work_struct work; }; -struct scrub_block { - /* - * Each page will have its page::private used to record the logical - * bytenr. - */ - struct page *pages[SCRUB_MAX_PAGES]; - struct scrub_sector *sectors[SCRUB_MAX_SECTORS_PER_BLOCK]; - struct btrfs_device *dev; - /* Logical bytenr of the sblock */ - u64 logical; - u64 physical; - u64 physical_for_dev_replace; - /* Length of sblock in bytes */ - u32 len; - int sector_count; - int mirror_num; - - atomic_t outstanding_sectors; - refcount_t refs; /* free mem on transition to zero */ - struct scrub_ctx *sctx; - struct { - unsigned int header_error:1; - unsigned int checksum_error:1; - unsigned int no_io_error_seen:1; - unsigned int generation_error:1; /* also sets header_error */ - - /* The following is for the data used to check parity */ - /* It is for the data with checksum */ - unsigned int data_corrected:1; - }; - struct work_struct work; -}; - struct scrub_ctx { struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX]; struct scrub_stripe stripes[SCRUB_STRIPES_PER_SCTX]; @@ -295,44 +247,6 @@ struct scrub_warning { struct btrfs_device *dev; }; -#ifndef CONFIG_64BIT -/* This structure is for architectures whose (void *) is smaller than u64 */ -struct scrub_page_private { - u64 logical; -}; -#endif - -static int attach_scrub_page_private(struct page *page, u64 logical) -{ -#ifdef CONFIG_64BIT - attach_page_private(page, (void *)logical); - return 0; -#else - struct scrub_page_private *spp; - - spp = kmalloc(sizeof(*spp), GFP_KERNEL); - if (!spp) - return -ENOMEM; - spp->logical = logical; - attach_page_private(page, (void *)spp); - return 0; -#endif -} - -static void detach_scrub_page_private(struct page *page) -{ -#ifdef CONFIG_64BIT - detach_page_private(page); - return; -#else - struct scrub_page_private *spp; - - spp = detach_page_private(page); - kfree(spp); - return; -#endif -} - static void release_scrub_stripe(struct scrub_stripe *stripe) { if (!stripe) @@ -391,141 +305,7 @@ static void wait_scrub_stripe_io(struct scrub_stripe *stripe) wait_event(stripe->io_wait, atomic_read(&stripe->pending_io) == 0); } -struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx, - struct btrfs_device *dev, - u64 logical, u64 physical, - u64 physical_for_dev_replace, - int mirror_num) -{ - struct scrub_block *sblock; - - sblock = kzalloc(sizeof(*sblock), GFP_KERNEL); - if (!sblock) - return NULL; - refcount_set(&sblock->refs, 1); - sblock->sctx = sctx; - sblock->logical = logical; - sblock->physical = physical; - sblock->physical_for_dev_replace = physical_for_dev_replace; - sblock->dev = dev; - sblock->mirror_num = mirror_num; - sblock->no_io_error_seen = 1; - /* - * Scrub_block::pages will be allocated at alloc_scrub_sector() when - * the corresponding page is not allocated. - */ - return sblock; -} - -/* - * Allocate a new scrub sector and attach it to @sblock. - * - * Will also allocate new pages for @sblock if needed. - */ -struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock, u64 logical) -{ - const pgoff_t page_index = (logical - sblock->logical) >> PAGE_SHIFT; - struct scrub_sector *ssector; - - /* We must never have scrub_block exceed U32_MAX in size. */ - ASSERT(logical - sblock->logical < U32_MAX); - - ssector = kzalloc(sizeof(*ssector), GFP_KERNEL); - if (!ssector) - return NULL; - - /* Allocate a new page if the slot is not allocated */ - if (!sblock->pages[page_index]) { - int ret; - - sblock->pages[page_index] = alloc_page(GFP_KERNEL); - if (!sblock->pages[page_index]) { - kfree(ssector); - return NULL; - } - ret = attach_scrub_page_private(sblock->pages[page_index], - sblock->logical + (page_index << PAGE_SHIFT)); - if (ret < 0) { - kfree(ssector); - __free_page(sblock->pages[page_index]); - sblock->pages[page_index] = NULL; - return NULL; - } - } - - atomic_set(&ssector->refs, 1); - ssector->sblock = sblock; - /* The sector to be added should not be used */ - ASSERT(sblock->sectors[sblock->sector_count] == NULL); - ssector->offset = logical - sblock->logical; - - /* The sector count must be smaller than the limit */ - ASSERT(sblock->sector_count < SCRUB_MAX_SECTORS_PER_BLOCK); - - sblock->sectors[sblock->sector_count] = ssector; - sblock->sector_count++; - sblock->len += sblock->sctx->fs_info->sectorsize; - - return ssector; -} - -static struct page *scrub_sector_get_page(struct scrub_sector *ssector) -{ - struct scrub_block *sblock = ssector->sblock; - pgoff_t index; - /* - * When calling this function, ssector must be alreaday attached to the - * parent sblock. - */ - ASSERT(sblock); - - /* The range should be inside the sblock range */ - ASSERT(ssector->offset < sblock->len); - - index = ssector->offset >> PAGE_SHIFT; - ASSERT(index < SCRUB_MAX_PAGES); - ASSERT(sblock->pages[index]); - ASSERT(PagePrivate(sblock->pages[index])); - return sblock->pages[index]; -} - -static unsigned int scrub_sector_get_page_offset(struct scrub_sector *ssector) -{ - struct scrub_block *sblock = ssector->sblock; - - /* - * When calling this function, ssector must be already attached to the - * parent sblock. - */ - ASSERT(sblock); - - /* The range should be inside the sblock range */ - ASSERT(ssector->offset < sblock->len); - - return offset_in_page(ssector->offset); -} - -static char *scrub_sector_get_kaddr(struct scrub_sector *ssector) -{ - return page_address(scrub_sector_get_page(ssector)) + - scrub_sector_get_page_offset(ssector); -} - -static int bio_add_scrub_sector(struct bio *bio, struct scrub_sector *ssector, - unsigned int len) -{ - return bio_add_page(bio, scrub_sector_get_page(ssector), len, - scrub_sector_get_page_offset(ssector)); -} - -static int scrub_checksum_data(struct scrub_block *sblock); -static int scrub_checksum_tree_block(struct scrub_block *sblock); -static int scrub_checksum_super(struct scrub_block *sblock); -static void scrub_block_put(struct scrub_block *sblock); -static void scrub_sector_put(struct scrub_sector *sector); -static void scrub_bio_end_io(struct bio *bio); static void scrub_bio_end_io_worker(struct work_struct *work); -static void scrub_block_complete(struct scrub_block *sblock); static void scrub_put_ctx(struct scrub_ctx *sctx); static void scrub_pending_bio_inc(struct scrub_ctx *sctx) @@ -595,8 +375,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) if (sctx->curr != -1) { struct scrub_bio *sbio = sctx->bios[sctx->curr]; - for (i = 0; i < sbio->sector_count; i++) - scrub_block_put(sbio->sectors[i]->sblock); bio_put(sbio->bio); } @@ -893,15 +671,6 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type, } } -static inline int scrub_check_fsid(u8 fsid[], struct scrub_sector *sector) -{ - struct btrfs_fs_devices *fs_devices = sector->sblock->dev->fs_devices; - int ret; - - ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE); - return !ret; -} - static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical) { int ret = 0; @@ -924,68 +693,6 @@ static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical) return ret; } -static void scrub_block_get(struct scrub_block *sblock) -{ - refcount_inc(&sblock->refs); -} - -static int scrub_checksum(struct scrub_block *sblock) -{ - u64 flags; - int ret; - - /* - * No need to initialize these stats currently, - * because this function only use return value - * instead of these stats value. - * - * Todo: - * always use stats - */ - sblock->header_error = 0; - sblock->generation_error = 0; - sblock->checksum_error = 0; - - WARN_ON(sblock->sector_count < 1); - flags = sblock->sectors[0]->flags; - ret = 0; - if (flags & BTRFS_EXTENT_FLAG_DATA) - ret = scrub_checksum_data(sblock); - else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) - ret = scrub_checksum_tree_block(sblock); - else if (flags & BTRFS_EXTENT_FLAG_SUPER) - ret = scrub_checksum_super(sblock); - else - WARN_ON(1); - return ret; -} - -static int scrub_checksum_data(struct scrub_block *sblock) -{ - struct scrub_ctx *sctx = sblock->sctx; - struct btrfs_fs_info *fs_info = sctx->fs_info; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); - u8 csum[BTRFS_CSUM_SIZE]; - struct scrub_sector *sector; - char *kaddr; - - BUG_ON(sblock->sector_count < 1); - sector = sblock->sectors[0]; - if (!sector->have_csum) - return 0; - - kaddr = scrub_sector_get_kaddr(sector); - - shash->tfm = fs_info->csum_shash; - crypto_shash_init(shash); - - crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum); - - if (memcmp(csum, sector->csum, fs_info->csum_size)) - sblock->checksum_error = 1; - return sblock->checksum_error; -} - static struct page *scrub_stripe_get_page(struct scrub_stripe *stripe, int sector_nr) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; @@ -1579,168 +1286,6 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str } } -static int scrub_checksum_tree_block(struct scrub_block *sblock) -{ - struct scrub_ctx *sctx = sblock->sctx; - struct btrfs_header *h; - struct btrfs_fs_info *fs_info = sctx->fs_info; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); - u8 calculated_csum[BTRFS_CSUM_SIZE]; - u8 on_disk_csum[BTRFS_CSUM_SIZE]; - /* - * This is done in sectorsize steps even for metadata as there's a - * constraint for nodesize to be aligned to sectorsize. This will need - * to change so we don't misuse data and metadata units like that. - */ - const u32 sectorsize = sctx->fs_info->sectorsize; - const int num_sectors = fs_info->nodesize >> fs_info->sectorsize_bits; - int i; - struct scrub_sector *sector; - char *kaddr; - - BUG_ON(sblock->sector_count < 1); - - /* Each member in sectors is just one sector */ - ASSERT(sblock->sector_count == num_sectors); - - sector = sblock->sectors[0]; - kaddr = scrub_sector_get_kaddr(sector); - h = (struct btrfs_header *)kaddr; - memcpy(on_disk_csum, h->csum, sctx->fs_info->csum_size); - - /* - * we don't use the getter functions here, as we - * a) don't have an extent buffer and - * b) the page is already kmapped - */ - if (sblock->logical != btrfs_stack_header_bytenr(h)) { - sblock->header_error = 1; - btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad bytenr, has %llu want %llu", - sblock->logical, sblock->mirror_num, - btrfs_stack_header_bytenr(h), - sblock->logical); - goto out; - } - - if (!scrub_check_fsid(h->fsid, sector)) { - sblock->header_error = 1; - btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad fsid, has %pU want %pU", - sblock->logical, sblock->mirror_num, - h->fsid, sblock->dev->fs_devices->fsid); - goto out; - } - - if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, BTRFS_UUID_SIZE)) { - sblock->header_error = 1; - btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", - sblock->logical, sblock->mirror_num, - h->chunk_tree_uuid, fs_info->chunk_tree_uuid); - goto out; - } - - shash->tfm = fs_info->csum_shash; - crypto_shash_init(shash); - crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE, - sectorsize - BTRFS_CSUM_SIZE); - - for (i = 1; i < num_sectors; i++) { - kaddr = scrub_sector_get_kaddr(sblock->sectors[i]); - crypto_shash_update(shash, kaddr, sectorsize); - } - - crypto_shash_final(shash, calculated_csum); - if (memcmp(calculated_csum, on_disk_csum, sctx->fs_info->csum_size)) { - sblock->checksum_error = 1; - btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, - sblock->logical, sblock->mirror_num, - CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum), - CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum)); - goto out; - } - - if (sector->generation != btrfs_stack_header_generation(h)) { - sblock->header_error = 1; - sblock->generation_error = 1; - btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad generation, has %llu want %llu", - sblock->logical, sblock->mirror_num, - btrfs_stack_header_generation(h), - sector->generation); - } - -out: - return sblock->header_error || sblock->checksum_error; -} - -static int scrub_checksum_super(struct scrub_block *sblock) -{ - struct btrfs_super_block *s; - struct scrub_ctx *sctx = sblock->sctx; - struct btrfs_fs_info *fs_info = sctx->fs_info; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); - u8 calculated_csum[BTRFS_CSUM_SIZE]; - struct scrub_sector *sector; - char *kaddr; - int fail_gen = 0; - int fail_cor = 0; - - BUG_ON(sblock->sector_count < 1); - sector = sblock->sectors[0]; - kaddr = scrub_sector_get_kaddr(sector); - s = (struct btrfs_super_block *)kaddr; - - if (sblock->logical != btrfs_super_bytenr(s)) - ++fail_cor; - - if (sector->generation != btrfs_super_generation(s)) - ++fail_gen; - - if (!scrub_check_fsid(s->fsid, sector)) - ++fail_cor; - - shash->tfm = fs_info->csum_shash; - crypto_shash_init(shash); - crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE, - BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum); - - if (memcmp(calculated_csum, s->csum, sctx->fs_info->csum_size)) - ++fail_cor; - - return fail_cor + fail_gen; -} - -static void scrub_block_put(struct scrub_block *sblock) -{ - if (refcount_dec_and_test(&sblock->refs)) { - int i; - - for (i = 0; i < sblock->sector_count; i++) - scrub_sector_put(sblock->sectors[i]); - for (i = 0; i < DIV_ROUND_UP(sblock->len, PAGE_SIZE); i++) { - if (sblock->pages[i]) { - detach_scrub_page_private(sblock->pages[i]); - __free_page(sblock->pages[i]); - } - } - kfree(sblock); - } -} - -void scrub_sector_get(struct scrub_sector *sector) -{ - atomic_inc(§or->refs); -} - -static void scrub_sector_put(struct scrub_sector *sector) -{ - if (atomic_dec_and_test(§or->refs)) - kfree(sector); -} - static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *device, unsigned int bio_size) { @@ -1820,109 +1365,12 @@ static void scrub_submit(struct scrub_ctx *sctx) submit_bio(sbio->bio); } -int scrub_add_sector_to_rd_bio(struct scrub_ctx *sctx, struct scrub_sector *sector) -{ - struct scrub_block *sblock = sector->sblock; - struct scrub_bio *sbio; - const u32 sectorsize = sctx->fs_info->sectorsize; - int ret; - -again: - /* - * grab a fresh bio or wait for one to become available - */ - while (sctx->curr == -1) { - spin_lock(&sctx->list_lock); - sctx->curr = sctx->first_free; - if (sctx->curr != -1) { - sctx->first_free = sctx->bios[sctx->curr]->next_free; - sctx->bios[sctx->curr]->next_free = -1; - sctx->bios[sctx->curr]->sector_count = 0; - spin_unlock(&sctx->list_lock); - } else { - spin_unlock(&sctx->list_lock); - wait_event(sctx->list_wait, sctx->first_free != -1); - } - } - sbio = sctx->bios[sctx->curr]; - if (sbio->sector_count == 0) { - sbio->physical = sblock->physical + sector->offset; - sbio->logical = sblock->logical + sector->offset; - sbio->dev = sblock->dev; - if (!sbio->bio) { - sbio->bio = bio_alloc(sbio->dev->bdev, sctx->sectors_per_bio, - REQ_OP_READ, GFP_NOFS); - } - sbio->bio->bi_private = sbio; - sbio->bio->bi_end_io = scrub_bio_end_io; - sbio->bio->bi_iter.bi_sector = sbio->physical >> 9; - sbio->status = 0; - } else if (sbio->physical + sbio->sector_count * sectorsize != - sblock->physical + sector->offset || - sbio->logical + sbio->sector_count * sectorsize != - sblock->logical + sector->offset || - sbio->dev != sblock->dev) { - scrub_submit(sctx); - goto again; - } - - sbio->sectors[sbio->sector_count] = sector; - ret = bio_add_scrub_sector(sbio->bio, sector, sectorsize); - if (ret != sectorsize) { - if (sbio->sector_count < 1) { - bio_put(sbio->bio); - sbio->bio = NULL; - return -EIO; - } - scrub_submit(sctx); - goto again; - } - - scrub_block_get(sblock); /* one for the page added to the bio */ - atomic_inc(&sblock->outstanding_sectors); - sbio->sector_count++; - if (sbio->sector_count == sctx->sectors_per_bio) - scrub_submit(sctx); - - return 0; -} - -static void scrub_bio_end_io(struct bio *bio) -{ - struct scrub_bio *sbio = bio->bi_private; - struct btrfs_fs_info *fs_info = sbio->dev->fs_info; - - sbio->status = bio->bi_status; - sbio->bio = bio; - - queue_work(fs_info->scrub_workers, &sbio->work); -} - static void scrub_bio_end_io_worker(struct work_struct *work) { struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); struct scrub_ctx *sctx = sbio->sctx; - int i; ASSERT(sbio->sector_count <= SCRUB_SECTORS_PER_BIO); - if (sbio->status) { - for (i = 0; i < sbio->sector_count; i++) { - struct scrub_sector *sector = sbio->sectors[i]; - - sector->io_error = 1; - sector->sblock->no_io_error_seen = 0; - } - } - - /* Now complete the scrub_block items that have all pages completed */ - for (i = 0; i < sbio->sector_count; i++) { - struct scrub_sector *sector = sbio->sectors[i]; - struct scrub_block *sblock = sector->sblock; - - if (atomic_dec_and_test(&sblock->outstanding_sectors)) - scrub_block_complete(sblock); - scrub_block_put(sblock); - } bio_put(sbio->bio); sbio->bio = NULL; @@ -1934,17 +1382,6 @@ static void scrub_bio_end_io_worker(struct work_struct *work) scrub_pending_bio_dec(sctx); } -static void scrub_block_complete(struct scrub_block *sblock) -{ - if (sblock->no_io_error_seen) - /* - * if has checksum error, write via repair mechanism in - * dev replace case, otherwise write here in dev replace - * case. - */ - scrub_checksum(sblock); -} - static void drop_csum_range(struct scrub_ctx *sctx, struct btrfs_ordered_sum *sum) { sctx->stat.csum_discards += sum->len >> sctx->fs_info->sectorsize_bits; diff --git a/fs/btrfs/scrub.h b/fs/btrfs/scrub.h index 7d1982893363..1fa4d26e8122 100644 --- a/fs/btrfs/scrub.h +++ b/fs/btrfs/scrub.h @@ -15,17 +15,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, /* Temporary declaration, would be deleted later. */ struct scrub_ctx; -struct scrub_sector; struct scrub_block; int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum); -int scrub_add_sector_to_rd_bio(struct scrub_ctx *sctx, - struct scrub_sector *sector); -void scrub_sector_get(struct scrub_sector *sector); -struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock, u64 logical); -struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx, - struct btrfs_device *dev, - u64 logical, u64 physical, - u64 physical_for_dev_replace, - int mirror_num); #endif