353767e4aa
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmLnyNUACgkQxWXV+ddt WDt9vA/9HcF+v5EkknyW07tatTap/Hm/ZB86Z5OZi6ikwIEcHsWhp3rUICejm88e GecDPIluDtCtyD6x4stuqkwOm22aDP5q2T9H6+gyw92ozyb436OV1Z8IrmftzXKY EpZO70PHZT+E6E/WYvyoTmmoCrjib7YlqCWZZhSLUFpsqqlOInmHEH49PW6KvM4r acUZ/RxHurKdmI3kNY6ECbAQl6CASvtTdYcVCx8fT2zN0azoLIQxpYa7n/9ca1R6 8WnYilCbLbNGtcUXvO2M3tMZ4/5kvxrwQsUn93ccCJYuiN0ASiDXbLZ2g4LZ+n56 JGu+y5v5oBwjpVf+46cuvnENP5BQ61594WPseiVjrqODWnPjN28XkcVC0XmPsiiZ lszeHO2cuIrIFoCah8ELMl8usu8+qxfXmPxIXtPu9rEyKsDtOjxVYc8SMXqLp0qQ qYtBoFm0JcZHqtZRpB+dhQ37/xXtH4ljUi/mI6x8iALVujeR273URs7yO9zgIdeW uZoFtbwpHFLUk+TL7Ku82/zOXp3fCwtDpNmlYbxeMbea/be3ShjncM4+mYzvHYri dYON2LFrq+mnRDqtIXTCaAYwX7zU8Y18Ev9QwlNll8dKlKwS89+jpqLoa+eVYy3c /HitHFza70KxmOj4dvDVZlzDpPvl7kW1UBkmskg4u3jnNWzedkM= =sS1q -----END PGP SIGNATURE----- Merge tag 'for-5.20-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "This brings some long awaited changes, the send protocol bump, otherwise lots of small improvements and fixes. The main core part is reworking bio handling, cleaning up the submission and endio and improving error handling. There are some changes outside of btrfs adding helpers or updating API, listed at the end of the changelog. Features: - sysfs: - export chunk size, in debug mode add tunable for setting its size - show zoned among features (was only in debug mode) - show commit stats (number, last/max/total duration) - send protocol updated to 2 - new commands: - ability write larger data chunks than 64K - send raw compressed extents (uses the encoded data ioctls), ie. no decompression on send side, no compression needed on receive side if supported - send 'otime' (inode creation time) among other timestamps - send file attributes (a.k.a file flags and xflags) - this is first version bump, backward compatibility on send and receive side is provided - there are still some known and wanted commands that will be implemented in the near future, another version bump will be needed, however we want to minimize that to avoid causing usability issues - print checksum type and implementation at mount time - don't print some messages at mount (mentioned as people asked about it), we want to print messages namely for new features so let's make some space for that - big metadata - this has been supported for a long time and is not a feature that's worth mentioning - skinny metadata - same reason, set by default by mkfs Performance improvements: - reduced amount of reserved metadata for delayed items - when inserted items can be batched into one leaf - when deleting batched directory index items - when deleting delayed items used for deletion - overall improved count of files/sec, decreased subvolume lock contention - metadata item access bounds checker micro-optimized, with a few percent of improved runtime for metadata-heavy operations - increase direct io limit for read to 256 sectors, improved throughput by 3x on sample workload Notable fixes: - raid56 - reduce parity writes, skip sectors of stripe when there are no data updates - restore reading from on-disk data instead of using stripe cache, this reduces chances to damage correct data due to RMW cycle - refuse to replay log with unknown incompat read-only feature bit set - zoned - fix page locking when COW fails in the middle of allocation - improved tracking of active zones, ZNS drives may limit the number and there are ENOSPC errors due to that limit and not actual lack of space - adjust maximum extent size for zone append so it does not cause late ENOSPC due to underreservation - mirror reading error messages show the mirror number - don't fallback to buffered IO for NOWAIT direct IO writes, we don't have the NOWAIT semantics for buffered io yet - send, fix sending link commands for existing file paths when there are deleted and created hardlinks for same files - repair all mirrors for profiles with more than 1 copy (raid1c34) - fix repair of compressed extents, unify where error detection and repair happen Core changes: - bio completion cleanups - don't double defer compression bios - simplify endio workqueues - add more data to btrfs_bio to avoid allocation for read requests - rework bio error handling so it's same what block layer does, the submission works and errors are consumed in endio - when asynchronous bio offload fails fall back to synchronous checksum calculation to avoid errors under writeback or memory pressure - new trace points - raid56 events - ordered extent operations - super block log_root_transid deprecated (never used) - mixed_backref and big_metadata sysfs feature files removed, they've been default for sufficiently long time, there are no known users and mixed_backref could be confused with mixed_groups Non-btrfs changes, API updates: - minor highmem API update to cover const arguments - switch all kmap/kmap_atomic to kmap_local - remove redundant flush_dcache_page() - address_space_operations::writepage callback removed - add bdev_max_segments() helper" * tag 'for-5.20-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (163 commits) btrfs: don't call btrfs_page_set_checked in finish_compressed_bio_read btrfs: fix repair of compressed extents btrfs: remove the start argument to check_data_csum and export btrfs: pass a btrfs_bio to btrfs_repair_one_sector btrfs: simplify the pending I/O counting in struct compressed_bio btrfs: repair all known bad mirrors btrfs: merge btrfs_dev_stat_print_on_error with its only caller btrfs: join running log transaction when logging new name btrfs: simplify error handling in btrfs_lookup_dentry btrfs: send: always use the rbtree based inode ref management infrastructure btrfs: send: fix sending link commands for existing file paths btrfs: send: introduce recorded_ref_alloc and recorded_ref_free btrfs: zoned: wait until zone is finished when allocation didn't progress btrfs: zoned: write out partially allocated region btrfs: zoned: activate necessary block group btrfs: zoned: activate metadata block group on flush_space btrfs: zoned: disable metadata overcommit for zoned btrfs: zoned: introduce space_info->active_total_bytes btrfs: zoned: finish least available block group on data bg allocation btrfs: let can_allocate_chunk return error ...
184 lines
6.0 KiB
C
184 lines
6.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (C) 2008 Oracle. All rights reserved.
|
|
*/
|
|
|
|
#ifndef BTRFS_COMPRESSION_H
|
|
#define BTRFS_COMPRESSION_H
|
|
|
|
#include <linux/sizes.h>
|
|
|
|
struct btrfs_inode;
|
|
|
|
/*
|
|
* We want to make sure that amount of RAM required to uncompress an extent is
|
|
* reasonable, so we limit the total size in ram of a compressed extent to
|
|
* 128k. This is a crucial number because it also controls how easily we can
|
|
* spread reads across cpus for decompression.
|
|
*
|
|
* We also want to make sure the amount of IO required to do a random read is
|
|
* reasonably small, so we limit the size of a compressed extent to 128k.
|
|
*/
|
|
|
|
/* Maximum length of compressed data stored on disk */
|
|
#define BTRFS_MAX_COMPRESSED (SZ_128K)
|
|
static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
|
|
|
|
/* Maximum size of data before compression */
|
|
#define BTRFS_MAX_UNCOMPRESSED (SZ_128K)
|
|
|
|
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
|
|
|
|
struct compressed_bio {
|
|
/* Number of outstanding bios */
|
|
refcount_t pending_ios;
|
|
|
|
/* Number of compressed pages in the array */
|
|
unsigned int nr_pages;
|
|
|
|
/* the pages with the compressed data on them */
|
|
struct page **compressed_pages;
|
|
|
|
/* inode that owns this data */
|
|
struct inode *inode;
|
|
|
|
/* starting offset in the inode for our pages */
|
|
u64 start;
|
|
|
|
/* Number of bytes in the inode we're working on */
|
|
unsigned int len;
|
|
|
|
/* Number of bytes on disk */
|
|
unsigned int compressed_len;
|
|
|
|
/* The compression algorithm for this bio */
|
|
u8 compress_type;
|
|
|
|
/* Whether this is a write for writeback. */
|
|
bool writeback;
|
|
|
|
/* IO errors */
|
|
blk_status_t status;
|
|
|
|
union {
|
|
/* For reads, this is the bio we are copying the data into */
|
|
struct bio *orig_bio;
|
|
struct work_struct write_end_work;
|
|
};
|
|
};
|
|
|
|
static inline unsigned int btrfs_compress_type(unsigned int type_level)
|
|
{
|
|
return (type_level & 0xF);
|
|
}
|
|
|
|
static inline unsigned int btrfs_compress_level(unsigned int type_level)
|
|
{
|
|
return ((type_level & 0xF0) >> 4);
|
|
}
|
|
|
|
void __init btrfs_init_compress(void);
|
|
void __cold btrfs_exit_compress(void);
|
|
|
|
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
|
|
u64 start, struct page **pages,
|
|
unsigned long *out_pages,
|
|
unsigned long *total_in,
|
|
unsigned long *total_out);
|
|
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
|
|
unsigned long start_byte, size_t srclen, size_t destlen);
|
|
int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
|
|
struct compressed_bio *cb, u32 decompressed);
|
|
|
|
blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
|
unsigned int len, u64 disk_start,
|
|
unsigned int compressed_len,
|
|
struct page **compressed_pages,
|
|
unsigned int nr_pages,
|
|
blk_opf_t write_flags,
|
|
struct cgroup_subsys_state *blkcg_css,
|
|
bool writeback);
|
|
void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|
int mirror_num);
|
|
|
|
unsigned int btrfs_compress_str2level(unsigned int type, const char *str);
|
|
|
|
enum btrfs_compression_type {
|
|
BTRFS_COMPRESS_NONE = 0,
|
|
BTRFS_COMPRESS_ZLIB = 1,
|
|
BTRFS_COMPRESS_LZO = 2,
|
|
BTRFS_COMPRESS_ZSTD = 3,
|
|
BTRFS_NR_COMPRESS_TYPES = 4,
|
|
};
|
|
|
|
struct workspace_manager {
|
|
struct list_head idle_ws;
|
|
spinlock_t ws_lock;
|
|
/* Number of free workspaces */
|
|
int free_ws;
|
|
/* Total number of allocated workspaces */
|
|
atomic_t total_ws;
|
|
/* Waiters for a free workspace */
|
|
wait_queue_head_t ws_wait;
|
|
};
|
|
|
|
struct list_head *btrfs_get_workspace(int type, unsigned int level);
|
|
void btrfs_put_workspace(int type, struct list_head *ws);
|
|
|
|
struct btrfs_compress_op {
|
|
struct workspace_manager *workspace_manager;
|
|
/* Maximum level supported by the compression algorithm */
|
|
unsigned int max_level;
|
|
unsigned int default_level;
|
|
};
|
|
|
|
/* The heuristic workspaces are managed via the 0th workspace manager */
|
|
#define BTRFS_NR_WORKSPACE_MANAGERS BTRFS_NR_COMPRESS_TYPES
|
|
|
|
extern const struct btrfs_compress_op btrfs_heuristic_compress;
|
|
extern const struct btrfs_compress_op btrfs_zlib_compress;
|
|
extern const struct btrfs_compress_op btrfs_lzo_compress;
|
|
extern const struct btrfs_compress_op btrfs_zstd_compress;
|
|
|
|
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
|
|
bool btrfs_compress_is_valid_type(const char *str, size_t len);
|
|
|
|
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
|
|
|
|
int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
|
|
u64 start, struct page **pages, unsigned long *out_pages,
|
|
unsigned long *total_in, unsigned long *total_out);
|
|
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
|
|
int zlib_decompress(struct list_head *ws, unsigned char *data_in,
|
|
struct page *dest_page, unsigned long start_byte, size_t srclen,
|
|
size_t destlen);
|
|
struct list_head *zlib_alloc_workspace(unsigned int level);
|
|
void zlib_free_workspace(struct list_head *ws);
|
|
struct list_head *zlib_get_workspace(unsigned int level);
|
|
|
|
int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
|
|
u64 start, struct page **pages, unsigned long *out_pages,
|
|
unsigned long *total_in, unsigned long *total_out);
|
|
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
|
|
int lzo_decompress(struct list_head *ws, unsigned char *data_in,
|
|
struct page *dest_page, unsigned long start_byte, size_t srclen,
|
|
size_t destlen);
|
|
struct list_head *lzo_alloc_workspace(unsigned int level);
|
|
void lzo_free_workspace(struct list_head *ws);
|
|
|
|
int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
|
|
u64 start, struct page **pages, unsigned long *out_pages,
|
|
unsigned long *total_in, unsigned long *total_out);
|
|
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
|
|
int zstd_decompress(struct list_head *ws, unsigned char *data_in,
|
|
struct page *dest_page, unsigned long start_byte, size_t srclen,
|
|
size_t destlen);
|
|
void zstd_init_workspace_manager(void);
|
|
void zstd_cleanup_workspace_manager(void);
|
|
struct list_head *zstd_alloc_workspace(unsigned int level);
|
|
void zstd_free_workspace(struct list_head *ws);
|
|
struct list_head *zstd_get_workspace(unsigned int level);
|
|
void zstd_put_workspace(struct list_head *ws);
|
|
|
|
#endif
|