From e18db9e984e69e3802868bac10ce0703ce31e7d0 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 16 Oct 2018 10:28:54 +0200 Subject: [PATCH] MEDIUM: pools: implement a thread-local cache for pool entries Each thread now keeps the last ~512 kB of freed objects into a local cache. There are some heuristics involved so that a specific pool cannot use more than 1/8 of the total cache in number of objects. Tests have shown that 512 kB is an optimal size on a 24-thread test running on a dual-socket machine, resulting in an overall 7.5% performance increase and a cache miss ratio reducing from 19.2 to 17.7%. Anyway it seems pointless to keep more than an L2 cache, which probably explains why sizes between 256 and 512 kB are optimal. Cached objects appear in two lists, one per pool and one LRU to help with fair eviction. Currently there is no way to check each thread's cache state nor to flush it. This cache cannot be disabled and is enabled as soon as the lockless pools are enabled (i.e.: threads are enabled, no pool debugging is in use and the CPU supports a double word CAS). --- include/common/buffer.h | 9 +++++- include/common/config.h | 3 ++ include/common/memory.h | 69 ++++++++++++++++++++++++++++++++++++++++- src/memory.c | 47 ++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 2 deletions(-) diff --git a/include/common/buffer.h b/include/common/buffer.h index ed9f90b31..1c7963fd4 100644 --- a/include/common/buffer.h +++ b/include/common/buffer.h @@ -141,10 +141,17 @@ static inline void b_free(struct buffer *buf) static inline struct buffer *b_alloc_margin(struct buffer *buf, int margin) { char *area; + ssize_t idx; + unsigned int cached; if (buf->size) return buf; + cached = 0; + idx = pool_get_index(pool_head_buffer); + if (idx >= 0) + cached = pool_cache[idx].count; + *buf = BUF_WANTED; #ifndef CONFIG_HAP_LOCKLESS_POOLS @@ -152,7 +159,7 @@ static inline struct buffer *b_alloc_margin(struct buffer *buf, int margin) #endif /* fast path */ - if ((pool_head_buffer->allocated - pool_head_buffer->used) > margin) { + if ((pool_head_buffer->allocated - pool_head_buffer->used + cached) > margin) { area = __pool_get_first(pool_head_buffer); if (likely(area)) { #ifndef CONFIG_HAP_LOCKLESS_POOLS diff --git a/include/common/config.h b/include/common/config.h index 47ff18c03..ff4a50cef 100644 --- a/include/common/config.h +++ b/include/common/config.h @@ -60,6 +60,9 @@ */ #if defined(USE_THREAD) && defined(HA_HAVE_CAS_DW) && !defined(DEBUG_NO_LOCKLESS_POOLS) && !defined(DEBUG_UAF) #define CONFIG_HAP_LOCKLESS_POOLS +#ifndef CONFIG_HAP_POOL_CACHE_SIZE +#define CONFIG_HAP_POOL_CACHE_SIZE 524288 +#endif #endif /* CONFIG_HAP_INLINE_FD_SET diff --git a/include/common/memory.h b/include/common/memory.h index af1764988..a0b6d6203 100644 --- a/include/common/memory.h +++ b/include/common/memory.h @@ -50,6 +50,22 @@ #define MAX_BASE_POOLS 32 +struct pool_cache_head { + struct list list; /* head of objects in this pool */ + size_t size; /* size of an object */ + unsigned int count; /* number of objects in this pool */ +}; + +struct pool_cache_item { + struct list by_pool; /* link to objects in this pool */ + struct list by_lru; /* link to objects by LRU order */ +}; + +extern THREAD_LOCAL struct pool_cache_head pool_cache[MAX_BASE_POOLS]; +extern THREAD_LOCAL struct list pool_lru_head; /* oldest objects */ +extern THREAD_LOCAL size_t pool_cache_bytes; /* total cache size */ +extern THREAD_LOCAL size_t pool_cache_count; /* #cache objects */ + #ifdef CONFIG_HAP_LOCKLESS_POOLS struct pool_free_list { void **free_list; @@ -141,6 +157,32 @@ static inline ssize_t pool_get_index(const struct pool_head *pool) } #ifdef CONFIG_HAP_LOCKLESS_POOLS + +/* Tries to retrieve an object from the local pool cache corresponding to pool + * . Returns NULL if none is available. + */ +static inline void *__pool_get_from_cache(struct pool_head *pool) +{ + ssize_t idx = pool_get_index(pool); + struct pool_cache_item *item; + + /* pool not in cache */ + if (idx < 0) + return NULL; + + /* never allocated or empty */ + if (pool_cache[idx].list.n == NULL || LIST_ISEMPTY(&pool_cache[idx].list)) + return NULL; + + item = LIST_NEXT(&pool_cache[idx].list, typeof(item), by_pool); + pool_cache[idx].count--; + pool_cache_bytes -= pool_cache[idx].size; + pool_cache_count--; + LIST_DEL(&item->by_pool); + LIST_DEL(&item->by_lru); + return item; +} + /* * Returns a pointer to type taken from the pool if * available, otherwise returns NULL. No malloc() is attempted, and poisonning @@ -149,6 +191,10 @@ static inline ssize_t pool_get_index(const struct pool_head *pool) static inline void *__pool_get_first(struct pool_head *pool) { struct pool_free_list cmp, new; + void *ret = __pool_get_from_cache(pool); + + if (ret) + return ret; cmp.seq = pool->seq; __ha_barrier_load(); @@ -230,6 +276,27 @@ static inline void __pool_free(struct pool_head *pool, void *ptr) HA_ATOMIC_SUB(&pool->used, 1); } +/* frees an object to the local cache, possibly pushing oldest objects to the + * global pool. + */ +void __pool_put_to_cache(struct pool_head *pool, void *ptr, ssize_t idx); +static inline void pool_put_to_cache(struct pool_head *pool, void *ptr) +{ + ssize_t idx = pool_get_index(pool); + + /* pool not in cache or too many objects for this pool (more than + * half of the cache is used and this pool uses more than 1/8 of + * the cache size). + */ + if (idx < 0 || + (pool_cache_bytes > CONFIG_HAP_POOL_CACHE_SIZE * 3 / 4 && + pool_cache[idx].count >= 16 + pool_cache_count / 8)) { + __pool_free(pool, ptr); + return; + } + __pool_put_to_cache(pool, ptr, idx); +} + /* * Puts a memory area back to the corresponding pool. * Items are chained directly through a pointer that @@ -247,7 +314,7 @@ static inline void pool_free(struct pool_head *pool, void *ptr) if (*POOL_LINK(pool, ptr) != (void *)pool) *(volatile int *)0 = 0; #endif - __pool_free(pool, ptr); + pool_put_to_cache(pool, ptr); } } diff --git a/src/memory.c b/src/memory.c index c24339616..51c623a85 100644 --- a/src/memory.c +++ b/src/memory.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -34,6 +35,11 @@ struct pool_head pool_base_start[MAX_BASE_POOLS] = { }; unsigned int pool_base_count = 0; +THREAD_LOCAL struct pool_cache_head pool_cache[MAX_BASE_POOLS] = { }; +THREAD_LOCAL struct list pool_lru_head = { }; /* oldest objects */ +THREAD_LOCAL size_t pool_cache_bytes = 0; /* total cache size */ +THREAD_LOCAL size_t pool_cache_count = 0; /* #cache objects */ + static struct list pools = LIST_HEAD_INIT(pools); int mem_poison_byte = -1; @@ -242,6 +248,47 @@ void pool_gc(struct pool_head *pool_ctx) HA_ATOMIC_STORE(&recurse, 0); } + +/* frees an object to the local cache, possibly pushing oldest objects to the + * global pool. Must not be called directly. + */ +void __pool_put_to_cache(struct pool_head *pool, void *ptr, ssize_t idx) +{ + struct pool_cache_item *item = (struct pool_cache_item *)ptr; + struct pool_cache_head *ph = &pool_cache[idx]; + + /* never allocated or empty */ + if (unlikely(ph->list.n == NULL)) { + LIST_INIT(&ph->list); + ph->size = pool->size; + if (pool_lru_head.n == NULL) + LIST_INIT(&pool_lru_head); + } + + LIST_ADD(&ph->list, &item->by_pool); + LIST_ADD(&pool_lru_head, &item->by_lru); + ph->count++; + pool_cache_count++; + pool_cache_bytes += ph->size; + + if (pool_cache_bytes <= CONFIG_HAP_POOL_CACHE_SIZE) + return; + + do { + item = LIST_PREV(&pool_lru_head, struct pool_cache_item *, by_lru); + /* note: by definition we remove oldest objects so they also are the + * oldest in their own pools, thus their next is the pool's head. + */ + ph = LIST_NEXT(&item->by_pool, struct pool_cache_head *, list); + LIST_DEL(&item->by_pool); + LIST_DEL(&item->by_lru); + ph->count--; + pool_cache_count--; + pool_cache_bytes -= ph->size; + __pool_free(pool_base_start + (ph - pool_cache), item); + } while (pool_cache_bytes > CONFIG_HAP_POOL_CACHE_SIZE * 7 / 8); +} + #else /* CONFIG_HAP_LOCKLESS_POOLS */ /* Allocates new entries for pool until there are at least + 1