3584240b9c
Rename all uds_log_* to vdo_log_*. Signed-off-by: Mike Snitzer <snitzer@kernel.org> Signed-off-by: Chung Chung <cchung@redhat.com> Signed-off-by: Matthew Sakai <msakai@redhat.com>
439 lines
13 KiB
C
439 lines
13 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright 2023 Red Hat
|
|
*/
|
|
|
|
#include <linux/delay.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include "logger.h"
|
|
#include "memory-alloc.h"
|
|
#include "permassert.h"
|
|
|
|
/*
|
|
* UDS and VDO keep track of which threads are allowed to allocate memory freely, and which threads
|
|
* must be careful to not do a memory allocation that does an I/O request. The 'allocating_threads'
|
|
* thread_registry and its associated methods implement this tracking.
|
|
*/
|
|
static struct thread_registry allocating_threads;
|
|
|
|
static inline bool allocations_allowed(void)
|
|
{
|
|
return vdo_lookup_thread(&allocating_threads) != NULL;
|
|
}
|
|
|
|
/*
|
|
* Register the current thread as an allocating thread.
|
|
*
|
|
* An optional flag location can be supplied indicating whether, at any given point in time, the
|
|
* threads associated with that flag should be allocating storage. If the flag is false, a message
|
|
* will be logged.
|
|
*
|
|
* If no flag is supplied, the thread is always allowed to allocate storage without complaint.
|
|
*
|
|
* @new_thread: registered_thread structure to use for the current thread
|
|
* @flag_ptr: Location of the allocation-allowed flag
|
|
*/
|
|
void vdo_register_allocating_thread(struct registered_thread *new_thread,
|
|
const bool *flag_ptr)
|
|
{
|
|
if (flag_ptr == NULL) {
|
|
static const bool allocation_always_allowed = true;
|
|
|
|
flag_ptr = &allocation_always_allowed;
|
|
}
|
|
|
|
vdo_register_thread(&allocating_threads, new_thread, flag_ptr);
|
|
}
|
|
|
|
/* Unregister the current thread as an allocating thread. */
|
|
void vdo_unregister_allocating_thread(void)
|
|
{
|
|
vdo_unregister_thread(&allocating_threads);
|
|
}
|
|
|
|
/*
|
|
* We track how much memory has been allocated and freed. When we unload the module, we log an
|
|
* error if we have not freed all the memory that we allocated. Nearly all memory allocation and
|
|
* freeing is done using this module.
|
|
*
|
|
* We do not use kernel functions like the kvasprintf() method, which allocate memory indirectly
|
|
* using kmalloc.
|
|
*
|
|
* These data structures and methods are used to track the amount of memory used.
|
|
*/
|
|
|
|
/*
|
|
* We allocate very few large objects, and allocation/deallocation isn't done in a
|
|
* performance-critical stage for us, so a linked list should be fine.
|
|
*/
|
|
struct vmalloc_block_info {
|
|
void *ptr;
|
|
size_t size;
|
|
struct vmalloc_block_info *next;
|
|
};
|
|
|
|
static struct {
|
|
spinlock_t lock;
|
|
size_t kmalloc_blocks;
|
|
size_t kmalloc_bytes;
|
|
size_t vmalloc_blocks;
|
|
size_t vmalloc_bytes;
|
|
size_t peak_bytes;
|
|
struct vmalloc_block_info *vmalloc_list;
|
|
} memory_stats __cacheline_aligned;
|
|
|
|
static void update_peak_usage(void)
|
|
{
|
|
size_t total_bytes = memory_stats.kmalloc_bytes + memory_stats.vmalloc_bytes;
|
|
|
|
if (total_bytes > memory_stats.peak_bytes)
|
|
memory_stats.peak_bytes = total_bytes;
|
|
}
|
|
|
|
static void add_kmalloc_block(size_t size)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&memory_stats.lock, flags);
|
|
memory_stats.kmalloc_blocks++;
|
|
memory_stats.kmalloc_bytes += size;
|
|
update_peak_usage();
|
|
spin_unlock_irqrestore(&memory_stats.lock, flags);
|
|
}
|
|
|
|
static void remove_kmalloc_block(size_t size)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&memory_stats.lock, flags);
|
|
memory_stats.kmalloc_blocks--;
|
|
memory_stats.kmalloc_bytes -= size;
|
|
spin_unlock_irqrestore(&memory_stats.lock, flags);
|
|
}
|
|
|
|
static void add_vmalloc_block(struct vmalloc_block_info *block)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&memory_stats.lock, flags);
|
|
block->next = memory_stats.vmalloc_list;
|
|
memory_stats.vmalloc_list = block;
|
|
memory_stats.vmalloc_blocks++;
|
|
memory_stats.vmalloc_bytes += block->size;
|
|
update_peak_usage();
|
|
spin_unlock_irqrestore(&memory_stats.lock, flags);
|
|
}
|
|
|
|
static void remove_vmalloc_block(void *ptr)
|
|
{
|
|
struct vmalloc_block_info *block;
|
|
struct vmalloc_block_info **block_ptr;
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&memory_stats.lock, flags);
|
|
for (block_ptr = &memory_stats.vmalloc_list;
|
|
(block = *block_ptr) != NULL;
|
|
block_ptr = &block->next) {
|
|
if (block->ptr == ptr) {
|
|
*block_ptr = block->next;
|
|
memory_stats.vmalloc_blocks--;
|
|
memory_stats.vmalloc_bytes -= block->size;
|
|
break;
|
|
}
|
|
}
|
|
|
|
spin_unlock_irqrestore(&memory_stats.lock, flags);
|
|
if (block != NULL)
|
|
vdo_free(block);
|
|
else
|
|
vdo_log_info("attempting to remove ptr %px not found in vmalloc list", ptr);
|
|
}
|
|
|
|
/*
|
|
* Determine whether allocating a memory block should use kmalloc or __vmalloc.
|
|
*
|
|
* vmalloc can allocate any integral number of pages.
|
|
*
|
|
* kmalloc can allocate any number of bytes up to a configured limit, which defaults to 8 megabytes
|
|
* on some systems. kmalloc is especially good when memory is being both allocated and freed, and
|
|
* it does this efficiently in a multi CPU environment.
|
|
*
|
|
* kmalloc usually rounds the size of the block up to the next power of two, so when the requested
|
|
* block is bigger than PAGE_SIZE / 2 bytes, kmalloc will never give you less space than the
|
|
* corresponding vmalloc allocation. Sometimes vmalloc will use less overhead than kmalloc.
|
|
*
|
|
* The advantages of kmalloc do not help out UDS or VDO, because we allocate all our memory up
|
|
* front and do not free and reallocate it. Sometimes we have problems using kmalloc, because the
|
|
* Linux memory page map can become so fragmented that kmalloc will not give us a 32KB chunk. We
|
|
* have used vmalloc as a backup to kmalloc in the past, and a follow-up vmalloc of 32KB will work.
|
|
* But there is no strong case to be made for using kmalloc over vmalloc for these size chunks.
|
|
*
|
|
* The kmalloc/vmalloc boundary is set at 4KB, and kmalloc gets the 4KB requests. There is no
|
|
* strong reason for favoring either kmalloc or vmalloc for 4KB requests, except that tracking
|
|
* vmalloc statistics uses a linked list implementation. Using a simple test, this choice of
|
|
* boundary results in 132 vmalloc calls. Using vmalloc for requests of exactly 4KB results in an
|
|
* additional 6374 vmalloc calls, which is much less efficient for tracking.
|
|
*
|
|
* @size: How many bytes to allocate
|
|
*/
|
|
static inline bool use_kmalloc(size_t size)
|
|
{
|
|
return size <= PAGE_SIZE;
|
|
}
|
|
|
|
/*
|
|
* Allocate storage based on memory size and alignment, logging an error if the allocation fails.
|
|
* The memory will be zeroed.
|
|
*
|
|
* @size: The size of an object
|
|
* @align: The required alignment
|
|
* @what: What is being allocated (for error logging)
|
|
* @ptr: A pointer to hold the allocated memory
|
|
*
|
|
* Return: VDO_SUCCESS or an error code
|
|
*/
|
|
int vdo_allocate_memory(size_t size, size_t align, const char *what, void *ptr)
|
|
{
|
|
/*
|
|
* The __GFP_RETRY_MAYFAIL flag means the VM implementation will retry memory reclaim
|
|
* procedures that have previously failed if there is some indication that progress has
|
|
* been made elsewhere. It can wait for other tasks to attempt high level approaches to
|
|
* freeing memory such as compaction (which removes fragmentation) and page-out. There is
|
|
* still a definite limit to the number of retries, but it is a larger limit than with
|
|
* __GFP_NORETRY. Allocations with this flag may fail, but only when there is genuinely
|
|
* little unused memory. While these allocations do not directly trigger the OOM killer,
|
|
* their failure indicates that the system is likely to need to use the OOM killer soon.
|
|
* The caller must handle failure, but can reasonably do so by failing a higher-level
|
|
* request, or completing it only in a much less efficient manner.
|
|
*/
|
|
const gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL;
|
|
unsigned int noio_flags;
|
|
bool allocations_restricted = !allocations_allowed();
|
|
unsigned long start_time;
|
|
void *p = NULL;
|
|
|
|
if (unlikely(ptr == NULL))
|
|
return -EINVAL;
|
|
|
|
if (size == 0) {
|
|
*((void **) ptr) = NULL;
|
|
return VDO_SUCCESS;
|
|
}
|
|
|
|
if (allocations_restricted)
|
|
noio_flags = memalloc_noio_save();
|
|
|
|
start_time = jiffies;
|
|
if (use_kmalloc(size) && (align < PAGE_SIZE)) {
|
|
p = kmalloc(size, gfp_flags | __GFP_NOWARN);
|
|
if (p == NULL) {
|
|
/*
|
|
* It is possible for kmalloc to fail to allocate memory because there is
|
|
* no page available. A short sleep may allow the page reclaimer to
|
|
* free a page.
|
|
*/
|
|
fsleep(1000);
|
|
p = kmalloc(size, gfp_flags);
|
|
}
|
|
|
|
if (p != NULL)
|
|
add_kmalloc_block(ksize(p));
|
|
} else {
|
|
struct vmalloc_block_info *block;
|
|
|
|
if (vdo_allocate(1, struct vmalloc_block_info, __func__, &block) == VDO_SUCCESS) {
|
|
/*
|
|
* It is possible for __vmalloc to fail to allocate memory because there
|
|
* are no pages available. A short sleep may allow the page reclaimer
|
|
* to free enough pages for a small allocation.
|
|
*
|
|
* For larger allocations, the page_alloc code is racing against the page
|
|
* reclaimer. If the page reclaimer can stay ahead of page_alloc, the
|
|
* __vmalloc will succeed. But if page_alloc overtakes the page reclaimer,
|
|
* the allocation fails. It is possible that more retries will succeed.
|
|
*/
|
|
for (;;) {
|
|
p = __vmalloc(size, gfp_flags | __GFP_NOWARN);
|
|
if (p != NULL)
|
|
break;
|
|
|
|
if (jiffies_to_msecs(jiffies - start_time) > 1000) {
|
|
/* Try one more time, logging a failure for this call. */
|
|
p = __vmalloc(size, gfp_flags);
|
|
break;
|
|
}
|
|
|
|
fsleep(1000);
|
|
}
|
|
|
|
if (p == NULL) {
|
|
vdo_free(block);
|
|
} else {
|
|
block->ptr = p;
|
|
block->size = PAGE_ALIGN(size);
|
|
add_vmalloc_block(block);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (allocations_restricted)
|
|
memalloc_noio_restore(noio_flags);
|
|
|
|
if (unlikely(p == NULL)) {
|
|
vdo_log_error("Could not allocate %zu bytes for %s in %u msecs",
|
|
size, what, jiffies_to_msecs(jiffies - start_time));
|
|
return -ENOMEM;
|
|
}
|
|
|
|
*((void **) ptr) = p;
|
|
return VDO_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
* Allocate storage based on memory size, failing immediately if the required memory is not
|
|
* available. The memory will be zeroed.
|
|
*
|
|
* @size: The size of an object.
|
|
* @what: What is being allocated (for error logging)
|
|
*
|
|
* Return: pointer to the allocated memory, or NULL if the required space is not available.
|
|
*/
|
|
void *vdo_allocate_memory_nowait(size_t size, const char *what __maybe_unused)
|
|
{
|
|
void *p = kmalloc(size, GFP_NOWAIT | __GFP_ZERO);
|
|
|
|
if (p != NULL)
|
|
add_kmalloc_block(ksize(p));
|
|
|
|
return p;
|
|
}
|
|
|
|
void vdo_free(void *ptr)
|
|
{
|
|
if (ptr != NULL) {
|
|
if (is_vmalloc_addr(ptr)) {
|
|
remove_vmalloc_block(ptr);
|
|
vfree(ptr);
|
|
} else {
|
|
remove_kmalloc_block(ksize(ptr));
|
|
kfree(ptr);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Reallocate dynamically allocated memory. There are no alignment guarantees for the reallocated
|
|
* memory. If the new memory is larger than the old memory, the new space will be zeroed.
|
|
*
|
|
* @ptr: The memory to reallocate.
|
|
* @old_size: The old size of the memory
|
|
* @size: The new size to allocate
|
|
* @what: What is being allocated (for error logging)
|
|
* @new_ptr: A pointer to hold the reallocated pointer
|
|
*
|
|
* Return: VDO_SUCCESS or an error code
|
|
*/
|
|
int vdo_reallocate_memory(void *ptr, size_t old_size, size_t size, const char *what,
|
|
void *new_ptr)
|
|
{
|
|
int result;
|
|
|
|
if (size == 0) {
|
|
vdo_free(ptr);
|
|
*(void **) new_ptr = NULL;
|
|
return VDO_SUCCESS;
|
|
}
|
|
|
|
result = vdo_allocate(size, char, what, new_ptr);
|
|
if (result != VDO_SUCCESS)
|
|
return result;
|
|
|
|
if (ptr != NULL) {
|
|
if (old_size < size)
|
|
size = old_size;
|
|
|
|
memcpy(*((void **) new_ptr), ptr, size);
|
|
vdo_free(ptr);
|
|
}
|
|
|
|
return VDO_SUCCESS;
|
|
}
|
|
|
|
int vdo_duplicate_string(const char *string, const char *what, char **new_string)
|
|
{
|
|
int result;
|
|
u8 *dup;
|
|
|
|
result = vdo_allocate(strlen(string) + 1, u8, what, &dup);
|
|
if (result != VDO_SUCCESS)
|
|
return result;
|
|
|
|
memcpy(dup, string, strlen(string) + 1);
|
|
*new_string = dup;
|
|
return VDO_SUCCESS;
|
|
}
|
|
|
|
void vdo_memory_init(void)
|
|
{
|
|
spin_lock_init(&memory_stats.lock);
|
|
vdo_initialize_thread_registry(&allocating_threads);
|
|
}
|
|
|
|
void vdo_memory_exit(void)
|
|
{
|
|
VDO_ASSERT_LOG_ONLY(memory_stats.kmalloc_bytes == 0,
|
|
"kmalloc memory used (%zd bytes in %zd blocks) is returned to the kernel",
|
|
memory_stats.kmalloc_bytes, memory_stats.kmalloc_blocks);
|
|
VDO_ASSERT_LOG_ONLY(memory_stats.vmalloc_bytes == 0,
|
|
"vmalloc memory used (%zd bytes in %zd blocks) is returned to the kernel",
|
|
memory_stats.vmalloc_bytes, memory_stats.vmalloc_blocks);
|
|
vdo_log_debug("peak usage %zd bytes", memory_stats.peak_bytes);
|
|
}
|
|
|
|
void vdo_get_memory_stats(u64 *bytes_used, u64 *peak_bytes_used)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&memory_stats.lock, flags);
|
|
*bytes_used = memory_stats.kmalloc_bytes + memory_stats.vmalloc_bytes;
|
|
*peak_bytes_used = memory_stats.peak_bytes;
|
|
spin_unlock_irqrestore(&memory_stats.lock, flags);
|
|
}
|
|
|
|
/*
|
|
* Report stats on any allocated memory that we're tracking. Not all allocation types are
|
|
* guaranteed to be tracked in bytes (e.g., bios).
|
|
*/
|
|
void vdo_report_memory_usage(void)
|
|
{
|
|
unsigned long flags;
|
|
u64 kmalloc_blocks;
|
|
u64 kmalloc_bytes;
|
|
u64 vmalloc_blocks;
|
|
u64 vmalloc_bytes;
|
|
u64 peak_usage;
|
|
u64 total_bytes;
|
|
|
|
spin_lock_irqsave(&memory_stats.lock, flags);
|
|
kmalloc_blocks = memory_stats.kmalloc_blocks;
|
|
kmalloc_bytes = memory_stats.kmalloc_bytes;
|
|
vmalloc_blocks = memory_stats.vmalloc_blocks;
|
|
vmalloc_bytes = memory_stats.vmalloc_bytes;
|
|
peak_usage = memory_stats.peak_bytes;
|
|
spin_unlock_irqrestore(&memory_stats.lock, flags);
|
|
total_bytes = kmalloc_bytes + vmalloc_bytes;
|
|
vdo_log_info("current module memory tracking (actual allocation sizes, not requested):");
|
|
vdo_log_info(" %llu bytes in %llu kmalloc blocks",
|
|
(unsigned long long) kmalloc_bytes,
|
|
(unsigned long long) kmalloc_blocks);
|
|
vdo_log_info(" %llu bytes in %llu vmalloc blocks",
|
|
(unsigned long long) vmalloc_bytes,
|
|
(unsigned long long) vmalloc_blocks);
|
|
vdo_log_info(" total %llu bytes, peak usage %llu bytes",
|
|
(unsigned long long) total_bytes, (unsigned long long) peak_usage);
|
|
}
|