dm vdo: implement the block map page cache

The set of leaf pages of the block map tree is too large to fit in memory,
so each block map zone maintains a cache of leaf pages. This patch adds the
implementation of that cache.

Co-developed-by: J. corwin Coburn <corwin@hurlbutnet.net>
Signed-off-by: J. corwin Coburn <corwin@hurlbutnet.net>
Co-developed-by: Michael Sclafani <dm-devel@lists.linux.dev>
Signed-off-by: Michael Sclafani <dm-devel@lists.linux.dev>
Co-developed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Matthew Sakai <msakai@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
This commit is contained in:
Matthew Sakai 2023-11-16 21:06:33 -05:00 committed by Mike Snitzer
parent ddb12d6714
commit 14d531d7b7
2 changed files with 1389 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,140 @@ typedef u32 vdo_page_generation;
extern const struct block_map_entry UNMAPPED_BLOCK_MAP_ENTRY;
/* The VDO Page Cache abstraction. */
struct vdo_page_cache {
/* the VDO which owns this cache */
struct vdo *vdo;
/* number of pages in cache */
page_count_t page_count;
/* number of pages to write in the current batch */
page_count_t pages_in_batch;
/* Whether the VDO is doing a read-only rebuild */
bool rebuilding;
/* array of page information entries */
struct page_info *infos;
/* raw memory for pages */
char *pages;
/* cache last found page info */
struct page_info *last_found;
/* map of page number to info */
struct int_map *page_map;
/* main LRU list (all infos) */
struct list_head lru_list;
/* free page list (oldest first) */
struct list_head free_list;
/* outgoing page list */
struct list_head outgoing_list;
/* number of read I/O operations pending */
page_count_t outstanding_reads;
/* number of write I/O operations pending */
page_count_t outstanding_writes;
/* number of pages covered by the current flush */
page_count_t pages_in_flush;
/* number of pages waiting to be included in the next flush */
page_count_t pages_to_flush;
/* number of discards in progress */
unsigned int discard_count;
/* how many VPCs waiting for free page */
unsigned int waiter_count;
/* queue of waiters who want a free page */
struct wait_queue free_waiters;
/*
* Statistics are only updated on the logical zone thread, but are accessed from other
* threads.
*/
struct block_map_statistics stats;
/* counter for pressure reports */
u32 pressure_report;
/* the block map zone to which this cache belongs */
struct block_map_zone *zone;
};
/*
* The state of a page buffer. If the page buffer is free no particular page is bound to it,
* otherwise the page buffer is bound to particular page whose absolute pbn is in the pbn field. If
* the page is resident or dirty the page data is stable and may be accessed. Otherwise the page is
* in flight (incoming or outgoing) and its data should not be accessed.
*
* @note Update the static data in get_page_state_name() if you change this enumeration.
*/
enum vdo_page_buffer_state {
/* this page buffer is not being used */
PS_FREE,
/* this page is being read from store */
PS_INCOMING,
/* attempt to load this page failed */
PS_FAILED,
/* this page is valid and un-modified */
PS_RESIDENT,
/* this page is valid and modified */
PS_DIRTY,
/* this page is being written and should not be used */
PS_OUTGOING,
/* not a state */
PAGE_STATE_COUNT,
} __packed;
/*
* The write status of page
*/
enum vdo_page_write_status {
WRITE_STATUS_NORMAL,
WRITE_STATUS_DISCARD,
WRITE_STATUS_DEFERRED,
} __packed;
/* Per-page-slot information. */
struct page_info {
/* Preallocated page struct vio */
struct vio *vio;
/* back-link for references */
struct vdo_page_cache *cache;
/* the pbn of the page */
physical_block_number_t pbn;
/* page is busy (temporarily locked) */
u16 busy;
/* the write status the page */
enum vdo_page_write_status write_status;
/* page state */
enum vdo_page_buffer_state state;
/* queue of completions awaiting this item */
struct wait_queue waiting;
/* state linked list entry */
struct list_head state_entry;
/* LRU entry */
struct list_head lru_entry;
/*
* The earliest recovery journal block containing uncommitted updates to the block map page
* associated with this page_info. A reference (lock) is held on that block to prevent it
* from being reaped. When this value changes, the reference on the old value must be
* released and a reference on the new value must be acquired.
*/
sequence_number_t recovery_lock;
};
/*
* A completion awaiting a specific page. Also a live reference into the page once completed, until
* freed.
*/
struct vdo_page_completion {
/* The generic completion */
struct vdo_completion completion;
/* The cache involved */
struct vdo_page_cache *cache;
/* The waiter for the pending list */
struct waiter waiter;
/* The absolute physical block number of the page on disk */
physical_block_number_t pbn;
/* Whether the page may be modified */
bool writable;
/* Whether the page is available */
bool ready;
/* The info structure for the page, only valid when ready */
struct page_info *info;
};
struct forest;
struct tree_page {
@ -141,6 +275,26 @@ struct block_map {
typedef int (*vdo_entry_callback_fn)(physical_block_number_t pbn,
struct vdo_completion *completion);
static inline struct vdo_page_completion *as_vdo_page_completion(struct vdo_completion *completion)
{
vdo_assert_completion_type(completion, VDO_PAGE_COMPLETION);
return container_of(completion, struct vdo_page_completion, completion);
}
void vdo_release_page_completion(struct vdo_completion *completion);
void vdo_get_page(struct vdo_page_completion *page_completion,
struct block_map_zone *zone, physical_block_number_t pbn,
bool writable, void *parent, vdo_action_fn callback,
vdo_action_fn error_handler, bool requeue);
void vdo_request_page_write(struct vdo_completion *completion);
int __must_check vdo_get_cached_page(struct vdo_completion *completion,
struct block_map_page **page_ptr);
int __must_check vdo_invalidate_page_cache(struct vdo_page_cache *cache);
static inline struct block_map_page * __must_check
vdo_as_block_map_page(struct tree_page *tree_page)
{