mirror of
git://sourceware.org/git/lvm2.git
synced 2025-01-02 01:18:26 +03:00
Merge branch '2018-05-30-bcache-radix-tree'
This commit is contained in:
commit
cb379c86c4
@ -21,6 +21,7 @@ ifeq ("@CLUSTER@", "shared")
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
SOURCES =\
|
SOURCES =\
|
||||||
|
../base/data-struct/radix-tree.c \
|
||||||
activate/activate.c \
|
activate/activate.c \
|
||||||
cache/lvmcache.c \
|
cache/lvmcache.c \
|
||||||
cache_segtype/cache.c \
|
cache_segtype/cache.c \
|
||||||
|
@ -15,6 +15,8 @@
|
|||||||
#define _GNU_SOURCE
|
#define _GNU_SOURCE
|
||||||
|
|
||||||
#include "bcache.h"
|
#include "bcache.h"
|
||||||
|
|
||||||
|
#include "base/data-struct/radix-tree.h"
|
||||||
#include "lvm-logging.h"
|
#include "lvm-logging.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
|
|
||||||
@ -453,12 +455,7 @@ struct bcache {
|
|||||||
struct dm_list clean;
|
struct dm_list clean;
|
||||||
struct dm_list io_pending;
|
struct dm_list io_pending;
|
||||||
|
|
||||||
/*
|
struct radix_tree *rtree;
|
||||||
* Hash table.
|
|
||||||
*/
|
|
||||||
unsigned nr_buckets;
|
|
||||||
unsigned hash_mask;
|
|
||||||
struct dm_list *buckets;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Statistics
|
* Statistics
|
||||||
@ -473,75 +470,50 @@ struct bcache {
|
|||||||
|
|
||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
|
|
||||||
/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
|
struct key_parts {
|
||||||
#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
|
uint32_t fd;
|
||||||
|
uint64_t b;
|
||||||
|
} __attribute__ ((packed));
|
||||||
|
|
||||||
static unsigned _hash(struct bcache *cache, int fd, uint64_t i)
|
union key {
|
||||||
|
struct key_parts parts;
|
||||||
|
uint8_t bytes[12];
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct block *_block_lookup(struct bcache *cache, int fd, uint64_t i)
|
||||||
{
|
{
|
||||||
uint64_t h = (i << 10) & fd;
|
union key k;
|
||||||
h *= GOLDEN_RATIO_PRIME_64;
|
union radix_value v;
|
||||||
return h & cache->hash_mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct block *_hash_lookup(struct bcache *cache, int fd, uint64_t i)
|
k.parts.fd = fd;
|
||||||
{
|
k.parts.b = i;
|
||||||
struct block *b;
|
|
||||||
unsigned h = _hash(cache, fd, i);
|
|
||||||
|
|
||||||
dm_list_iterate_items_gen (b, cache->buckets + h, hash)
|
if (radix_tree_lookup(cache->rtree, k.bytes, k.bytes + sizeof(k.bytes), &v))
|
||||||
if (b->fd == fd && b->index == i)
|
return v.ptr;
|
||||||
return b;
|
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _hash_insert(struct block *b)
|
static bool _block_insert(struct block *b)
|
||||||
{
|
{
|
||||||
unsigned h = _hash(b->cache, b->fd, b->index);
|
union key k;
|
||||||
dm_list_add_h(b->cache->buckets + h, &b->hash);
|
union radix_value v;
|
||||||
|
|
||||||
|
k.parts.fd = b->fd;
|
||||||
|
k.parts.b = b->index;
|
||||||
|
v.ptr = b;
|
||||||
|
|
||||||
|
return radix_tree_insert(b->cache->rtree, k.bytes, k.bytes + sizeof(k.bytes), v);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _hash_remove(struct block *b)
|
static void _block_remove(struct block *b)
|
||||||
{
|
{
|
||||||
dm_list_del(&b->hash);
|
union key k;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
k.parts.fd = b->fd;
|
||||||
* Must return a power of 2.
|
k.parts.b = b->index;
|
||||||
*/
|
|
||||||
static unsigned _calc_nr_buckets(unsigned nr_blocks)
|
|
||||||
{
|
|
||||||
unsigned r = 8;
|
|
||||||
unsigned n = nr_blocks / 4;
|
|
||||||
|
|
||||||
if (n < 8)
|
radix_tree_remove(b->cache->rtree, k.bytes, k.bytes + sizeof(k.bytes));
|
||||||
n = 8;
|
|
||||||
|
|
||||||
while (r < n)
|
|
||||||
r <<= 1;
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool _hash_table_init(struct bcache *cache, unsigned nr_entries)
|
|
||||||
{
|
|
||||||
unsigned i;
|
|
||||||
|
|
||||||
cache->nr_buckets = _calc_nr_buckets(nr_entries);
|
|
||||||
cache->hash_mask = cache->nr_buckets - 1;
|
|
||||||
cache->buckets = dm_malloc(cache->nr_buckets * sizeof(*cache->buckets));
|
|
||||||
if (!cache->buckets)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
for (i = 0; i < cache->nr_buckets; i++)
|
|
||||||
dm_list_init(cache->buckets + i);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void _hash_table_exit(struct bcache *cache)
|
|
||||||
{
|
|
||||||
dm_free(cache->buckets);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
@ -587,6 +559,11 @@ static struct block *_alloc_block(struct bcache *cache)
|
|||||||
return dm_list_struct_base(_list_pop(&cache->free), struct block, list);
|
return dm_list_struct_base(_list_pop(&cache->free), struct block, list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void _free_block(struct block *b)
|
||||||
|
{
|
||||||
|
dm_list_add(&b->cache->free, &b->list);
|
||||||
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------
|
/*----------------------------------------------------------------
|
||||||
* Clean/dirty list management.
|
* Clean/dirty list management.
|
||||||
* Always use these methods to ensure nr_dirty_ is correct.
|
* Always use these methods to ensure nr_dirty_ is correct.
|
||||||
@ -742,7 +719,7 @@ static struct block *_find_unused_clean_block(struct bcache *cache)
|
|||||||
dm_list_iterate_items (b, &cache->clean) {
|
dm_list_iterate_items (b, &cache->clean) {
|
||||||
if (!b->ref_count) {
|
if (!b->ref_count) {
|
||||||
_unlink_block(b);
|
_unlink_block(b);
|
||||||
_hash_remove(b);
|
_block_remove(b);
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -779,22 +756,12 @@ static struct block *_new_block(struct bcache *cache, int fd, block_address i, b
|
|||||||
b->ref_count = 0;
|
b->ref_count = 0;
|
||||||
b->error = 0;
|
b->error = 0;
|
||||||
|
|
||||||
_hash_insert(b);
|
if (!_block_insert(b)) {
|
||||||
|
log_error("bcache unable to insert block in radix tree (OOM?)");
|
||||||
|
_free_block(b);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
if (!b) {
|
|
||||||
log_error("bcache no new blocks for fd %d index %u "
|
|
||||||
"clean %u free %u dirty %u pending %u nr_data_blocks %u nr_cache_blocks %u",
|
|
||||||
fd, (uint32_t) i,
|
|
||||||
dm_list_size(&cache->clean),
|
|
||||||
dm_list_size(&cache->free),
|
|
||||||
dm_list_size(&cache->dirty),
|
|
||||||
dm_list_size(&cache->io_pending),
|
|
||||||
(uint32_t)cache->nr_data_blocks,
|
|
||||||
(uint32_t)cache->nr_cache_blocks);
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
@ -833,7 +800,7 @@ static struct block *_lookup_or_read_block(struct bcache *cache,
|
|||||||
int fd, block_address i,
|
int fd, block_address i,
|
||||||
unsigned flags)
|
unsigned flags)
|
||||||
{
|
{
|
||||||
struct block *b = _hash_lookup(cache, fd, i);
|
struct block *b = _block_lookup(cache, fd, i);
|
||||||
|
|
||||||
if (b) {
|
if (b) {
|
||||||
// FIXME: this is insufficient. We need to also catch a read
|
// FIXME: this is insufficient. We need to also catch a read
|
||||||
@ -937,7 +904,8 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks,
|
|||||||
dm_list_init(&cache->clean);
|
dm_list_init(&cache->clean);
|
||||||
dm_list_init(&cache->io_pending);
|
dm_list_init(&cache->io_pending);
|
||||||
|
|
||||||
if (!_hash_table_init(cache, nr_cache_blocks)) {
|
cache->rtree = radix_tree_create(NULL, NULL);
|
||||||
|
if (!cache->rtree) {
|
||||||
cache->engine->destroy(cache->engine);
|
cache->engine->destroy(cache->engine);
|
||||||
dm_free(cache);
|
dm_free(cache);
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -952,7 +920,7 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks,
|
|||||||
|
|
||||||
if (!_init_free_list(cache, nr_cache_blocks, pgsize)) {
|
if (!_init_free_list(cache, nr_cache_blocks, pgsize)) {
|
||||||
cache->engine->destroy(cache->engine);
|
cache->engine->destroy(cache->engine);
|
||||||
_hash_table_exit(cache);
|
radix_tree_destroy(cache->rtree);
|
||||||
dm_free(cache);
|
dm_free(cache);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -968,7 +936,7 @@ void bcache_destroy(struct bcache *cache)
|
|||||||
bcache_flush(cache);
|
bcache_flush(cache);
|
||||||
_wait_all(cache);
|
_wait_all(cache);
|
||||||
_exit_free_list(cache);
|
_exit_free_list(cache);
|
||||||
_hash_table_exit(cache);
|
radix_tree_destroy(cache->rtree);
|
||||||
cache->engine->destroy(cache->engine);
|
cache->engine->destroy(cache->engine);
|
||||||
dm_free(cache);
|
dm_free(cache);
|
||||||
}
|
}
|
||||||
@ -990,7 +958,7 @@ unsigned bcache_max_prefetches(struct bcache *cache)
|
|||||||
|
|
||||||
void bcache_prefetch(struct bcache *cache, int fd, block_address i)
|
void bcache_prefetch(struct bcache *cache, int fd, block_address i)
|
||||||
{
|
{
|
||||||
struct block *b = _hash_lookup(cache, fd, i);
|
struct block *b = _block_lookup(cache, fd, i);
|
||||||
|
|
||||||
if (!b) {
|
if (!b) {
|
||||||
if (cache->nr_io_pending < cache->max_io) {
|
if (cache->nr_io_pending < cache->max_io) {
|
||||||
@ -1003,11 +971,13 @@ void bcache_prefetch(struct bcache *cache, int fd, block_address i)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------
|
||||||
|
|
||||||
static void _recycle_block(struct bcache *cache, struct block *b)
|
static void _recycle_block(struct bcache *cache, struct block *b)
|
||||||
{
|
{
|
||||||
_unlink_block(b);
|
_unlink_block(b);
|
||||||
_hash_remove(b);
|
_block_remove(b);
|
||||||
dm_list_add(&cache->free, &b->list);
|
_free_block(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool bcache_get(struct bcache *cache, int fd, block_address i,
|
bool bcache_get(struct bcache *cache, int fd, block_address i,
|
||||||
@ -1041,6 +1011,8 @@ bool bcache_get(struct bcache *cache, int fd, block_address i,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------
|
||||||
|
|
||||||
static void _put_ref(struct block *b)
|
static void _put_ref(struct block *b)
|
||||||
{
|
{
|
||||||
if (!b->ref_count) {
|
if (!b->ref_count) {
|
||||||
@ -1061,6 +1033,8 @@ void bcache_put(struct block *b)
|
|||||||
_preemptive_writeback(b->cache);
|
_preemptive_writeback(b->cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------
|
||||||
|
|
||||||
bool bcache_flush(struct bcache *cache)
|
bool bcache_flush(struct bcache *cache)
|
||||||
{
|
{
|
||||||
// Only dirty data is on the errored list, since bad read blocks get
|
// Only dirty data is on the errored list, since bad read blocks get
|
||||||
@ -1083,6 +1057,7 @@ bool bcache_flush(struct bcache *cache)
|
|||||||
return dm_list_empty(&cache->errored);
|
return dm_list_empty(&cache->errored);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------
|
||||||
/*
|
/*
|
||||||
* You can safely call this with a NULL block.
|
* You can safely call this with a NULL block.
|
||||||
*/
|
*/
|
||||||
@ -1115,29 +1090,72 @@ static bool _invalidate_block(struct bcache *cache, struct block *b)
|
|||||||
|
|
||||||
bool bcache_invalidate(struct bcache *cache, int fd, block_address i)
|
bool bcache_invalidate(struct bcache *cache, int fd, block_address i)
|
||||||
{
|
{
|
||||||
return _invalidate_block(cache, _hash_lookup(cache, fd, i));
|
return _invalidate_block(cache, _block_lookup(cache, fd, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: switch to a trie, or maybe 1 hash table per fd? To save iterating
|
//----------------------------------------------------------------
|
||||||
// through the whole cache.
|
|
||||||
bool bcache_invalidate_fd(struct bcache *cache, int fd)
|
struct invalidate_iterator {
|
||||||
{
|
bool success;
|
||||||
struct block *b, *tmp;
|
struct radix_tree_iterator it;
|
||||||
bool r = true;
|
};
|
||||||
|
|
||||||
// Start writing back any dirty blocks on this fd.
|
static bool _writeback_v(struct radix_tree_iterator *it,
|
||||||
dm_list_iterate_items_safe (b, tmp, &cache->dirty)
|
uint8_t *kb, uint8_t *ke, union radix_value v)
|
||||||
if (b->fd == fd)
|
{
|
||||||
_issue_write(b);
|
struct block *b = v.ptr;
|
||||||
|
|
||||||
_wait_all(cache);
|
if (_test_flags(b, BF_DIRTY))
|
||||||
|
_issue_write(b);
|
||||||
// Everything should be in the clean list now.
|
|
||||||
dm_list_iterate_items_safe (b, tmp, &cache->clean)
|
return true;
|
||||||
if (b->fd == fd)
|
}
|
||||||
r = _invalidate_block(cache, b) && r;
|
|
||||||
|
static bool _invalidate_v(struct radix_tree_iterator *it,
|
||||||
return r;
|
uint8_t *kb, uint8_t *ke, union radix_value v)
|
||||||
|
{
|
||||||
|
struct block *b = v.ptr;
|
||||||
|
struct invalidate_iterator *iit = container_of(it, struct invalidate_iterator, it);
|
||||||
|
|
||||||
|
if (b->error || _test_flags(b, BF_DIRTY)) {
|
||||||
|
log_warn("bcache_invalidate: block (%d, %llu) still dirty",
|
||||||
|
b->fd, (unsigned long long) b->index);
|
||||||
|
iit->success = false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (b->ref_count) {
|
||||||
|
log_warn("bcache_invalidate: block (%d, %llu) still held",
|
||||||
|
b->fd, (unsigned long long) b->index);
|
||||||
|
iit->success = false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
_unlink_block(b);
|
||||||
|
_free_block(b);
|
||||||
|
|
||||||
|
// We can't remove the block from the radix tree yet because
|
||||||
|
// we're in the middle of an iteration.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool bcache_invalidate_fd(struct bcache *cache, int fd)
|
||||||
|
{
|
||||||
|
union key k;
|
||||||
|
struct invalidate_iterator it;
|
||||||
|
|
||||||
|
k.parts.fd = fd;
|
||||||
|
|
||||||
|
it.it.visit = _writeback_v;
|
||||||
|
radix_tree_iterate(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.fd), &it.it);
|
||||||
|
|
||||||
|
_wait_all(cache);
|
||||||
|
|
||||||
|
it.success = true;
|
||||||
|
it.it.visit = _invalidate_v;
|
||||||
|
radix_tree_iterate(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.fd), &it.it);
|
||||||
|
radix_tree_remove_prefix(cache->rtree, k.bytes, k.bytes + sizeof(k.parts.fd));
|
||||||
|
return it.success;
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
|
@ -306,7 +306,7 @@ LIB_VERSION_DM := $(shell $(AWK) -F '.' '{printf "%s.%s",$$1,$$2}' $(top_srcdir)
|
|||||||
|
|
||||||
LIB_VERSION_APP := $(shell $(AWK) -F '[(). ]' '{printf "%s.%s",$$1,$$4}' $(top_srcdir)/VERSION)
|
LIB_VERSION_APP := $(shell $(AWK) -F '[(). ]' '{printf "%s.%s",$$1,$$4}' $(top_srcdir)/VERSION)
|
||||||
|
|
||||||
INCLUDES += -I$(srcdir) -I$(top_builddir)/include
|
INCLUDES += -I$(top_srcdir) -I$(srcdir) -I$(top_builddir)/include
|
||||||
|
|
||||||
INC_LNS = $(top_builddir)/include/.symlinks_created
|
INC_LNS = $(top_builddir)/include/.symlinks_created
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user