mirror of
git://sourceware.org/git/lvm2.git
synced 2024-12-22 17:35:59 +03:00
d1be66ba37
When compiled with valgrind pool support - don't waste time with preallocation of memory - it just waste of CPU cycles to trace access to this memory. We also may get slightly better estimation about real memory usage during command processing.
595 lines
15 KiB
C
595 lines
15 KiB
C
/*
|
|
* Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
|
|
* Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
|
|
*
|
|
* This file is part of LVM2.
|
|
*
|
|
* This copyrighted material is made available to anyone wishing to use,
|
|
* modify, copy, or redistribute it subject to the terms and conditions
|
|
* of the GNU Lesser General Public License v.2.1.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#include "lib.h"
|
|
#include "memlock.h"
|
|
#include "defaults.h"
|
|
#include "config.h"
|
|
#include "toolcontext.h"
|
|
|
|
#include <limits.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/time.h>
|
|
#include <sys/resource.h>
|
|
|
|
#ifndef DEVMAPPER_SUPPORT
|
|
|
|
void memlock_inc_daemon(struct cmd_context *cmd)
|
|
{
|
|
return;
|
|
}
|
|
|
|
void memlock_dec_daemon(struct cmd_context *cmd)
|
|
{
|
|
return;
|
|
}
|
|
|
|
void critical_section_inc(struct cmd_context *cmd, const char *reason)
|
|
{
|
|
return;
|
|
}
|
|
|
|
void critical_section_dec(struct cmd_context *cmd, const char *reason)
|
|
{
|
|
return;
|
|
}
|
|
|
|
int critical_section(void)
|
|
{
|
|
return 0;
|
|
}
|
|
void memlock_init(struct cmd_context *cmd)
|
|
{
|
|
return;
|
|
}
|
|
|
|
void memlock_unlock(struct cmd_context *cmd)
|
|
{
|
|
return;
|
|
}
|
|
|
|
void memlock_reset(void)
|
|
{
|
|
return;
|
|
}
|
|
|
|
#else /* DEVMAPPER_SUPPORT */
|
|
|
|
static size_t _size_stack;
|
|
static size_t _size_malloc_tmp;
|
|
static size_t _size_malloc = 2000000;
|
|
|
|
static void *_malloc_mem = NULL;
|
|
static int _mem_locked = 0;
|
|
static int _critical_section = 0;
|
|
static int _memlock_count_daemon = 0;
|
|
static int _priority;
|
|
static int _default_priority;
|
|
|
|
/* list of maps, that are unconditionaly ignored */
|
|
static const char * const _ignore_maps[] = {
|
|
"[vdso]",
|
|
"[vsyscall]",
|
|
"[vectors]",
|
|
};
|
|
|
|
/* default blacklist for maps */
|
|
static const char * const _blacklist_maps[] = {
|
|
"locale/locale-archive",
|
|
"/LC_MESSAGES/",
|
|
"gconv/gconv-modules.cache",
|
|
"/libblkid.so.", /* not using lzma during mlock (selinux) */
|
|
"/liblzma.so.", /* not using lzma during mlock (selinux) */
|
|
"/libncurses.so.", /* not using ncurses during mlock */
|
|
"/libpcre.so.", /* not using pcre during mlock (selinux) */
|
|
"/libreadline.so.", /* not using readline during mlock */
|
|
"/libselinux.so.", /* not using selinux during mlock */
|
|
"/libsepol.so.", /* not using sepol during mlock */
|
|
"/libtinfo.so.", /* not using tinfo during mlock */
|
|
"/libudev.so.", /* not using udev during mlock */
|
|
"/libuuid.so.", /* not using uuid during mlock (blkid) */
|
|
"/libdl-", /* not using dlopen,dlsym during mlock */
|
|
"/etc/selinux", /* not using selinux during mlock */
|
|
/* "/libdevmapper-event.so" */
|
|
};
|
|
|
|
typedef enum { LVM_MLOCK, LVM_MUNLOCK } lvmlock_t;
|
|
|
|
static unsigned _use_mlockall;
|
|
static int _maps_fd;
|
|
static size_t _maps_len = 8192; /* Initial buffer size for reading /proc/self/maps */
|
|
static char *_maps_buffer;
|
|
static char _procselfmaps[PATH_MAX] = "";
|
|
#define SELF_MAPS "/self/maps"
|
|
|
|
static size_t _mstats; /* statistic for maps locking */
|
|
|
|
static void _touch_memory(void *mem, size_t size)
|
|
{
|
|
size_t pagesize = lvm_getpagesize();
|
|
char *pos = mem;
|
|
char *end = pos + size - sizeof(long);
|
|
|
|
while (pos < end) {
|
|
*(long *) pos = 1;
|
|
pos += pagesize;
|
|
}
|
|
}
|
|
|
|
static void _allocate_memory(void)
|
|
{
|
|
#ifndef VALGRIND_POOL
|
|
/* With Valgrind don't waste time in with preallocating memory */
|
|
void *stack_mem, *temp_malloc_mem;
|
|
struct rlimit limit;
|
|
|
|
/* Check if we could preallocate requested stack */
|
|
if ((getrlimit (RLIMIT_STACK, &limit) == 0) &&
|
|
((_size_stack * 2) < limit.rlim_cur) &&
|
|
((stack_mem = alloca(_size_stack))))
|
|
_touch_memory(stack_mem, _size_stack);
|
|
/* FIXME else warn user setting got ignored */
|
|
|
|
if ((temp_malloc_mem = malloc(_size_malloc_tmp)))
|
|
_touch_memory(temp_malloc_mem, _size_malloc_tmp);
|
|
|
|
if ((_malloc_mem = malloc(_size_malloc)))
|
|
_touch_memory(_malloc_mem, _size_malloc);
|
|
|
|
free(temp_malloc_mem);
|
|
#endif
|
|
}
|
|
|
|
static void _release_memory(void)
|
|
{
|
|
free(_malloc_mem);
|
|
}
|
|
|
|
/*
|
|
* mlock/munlock memory areas from /proc/self/maps
|
|
* format described in kernel/Documentation/filesystem/proc.txt
|
|
*/
|
|
static int _maps_line(const struct dm_config_node *cn, lvmlock_t lock,
|
|
const char *line, size_t *mstats)
|
|
{
|
|
const struct dm_config_value *cv;
|
|
long from, to;
|
|
int pos;
|
|
unsigned i;
|
|
char fr, fw, fx, fp;
|
|
size_t sz;
|
|
const char *lock_str = (lock == LVM_MLOCK) ? "mlock" : "munlock";
|
|
|
|
if (sscanf(line, "%lx-%lx %c%c%c%c%n",
|
|
&from, &to, &fr, &fw, &fx, &fp, &pos) != 6) {
|
|
log_error("Failed to parse maps line: %s", line);
|
|
return 0;
|
|
}
|
|
|
|
/* Select readable maps */
|
|
if (fr != 'r') {
|
|
log_debug_mem("%s area unreadable %s : Skipping.", lock_str, line);
|
|
return 1;
|
|
}
|
|
|
|
/* always ignored areas */
|
|
for (i = 0; i < DM_ARRAY_SIZE(_ignore_maps); ++i)
|
|
if (strstr(line + pos, _ignore_maps[i])) {
|
|
log_debug_mem("%s ignore filter '%s' matches '%s': Skipping.",
|
|
lock_str, _ignore_maps[i], line);
|
|
return 1;
|
|
}
|
|
|
|
sz = to - from;
|
|
if (!cn) {
|
|
/* If no blacklist configured, use an internal set */
|
|
for (i = 0; i < DM_ARRAY_SIZE(_blacklist_maps); ++i)
|
|
if (strstr(line + pos, _blacklist_maps[i])) {
|
|
log_debug_mem("%s default filter '%s' matches '%s': Skipping.",
|
|
lock_str, _blacklist_maps[i], line);
|
|
return 1;
|
|
}
|
|
} else {
|
|
for (cv = cn->v; cv; cv = cv->next) {
|
|
if ((cv->type != DM_CFG_STRING) || !cv->v.str[0])
|
|
continue;
|
|
if (strstr(line + pos, cv->v.str)) {
|
|
log_debug_mem("%s_filter '%s' matches '%s': Skipping.",
|
|
lock_str, cv->v.str, line);
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef VALGRIND_POOL
|
|
/*
|
|
* Valgrind is continually eating memory while executing code
|
|
* so we need to deactivate check of locked memory size
|
|
*/
|
|
sz -= sz; /* = 0, but avoids getting warning about dead assigment */
|
|
|
|
#endif
|
|
*mstats += sz;
|
|
log_debug_mem("%s %10ldKiB %12lx - %12lx %c%c%c%c%s", lock_str,
|
|
((long)sz + 1023) / 1024, from, to, fr, fw, fx, fp, line + pos);
|
|
|
|
if (lock == LVM_MLOCK) {
|
|
if (mlock((const void*)from, sz) < 0) {
|
|
log_sys_error("mlock", line);
|
|
return 0;
|
|
}
|
|
} else {
|
|
if (munlock((const void*)from, sz) < 0) {
|
|
log_sys_error("munlock", line);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _memlock_maps(struct cmd_context *cmd, lvmlock_t lock, size_t *mstats)
|
|
{
|
|
const struct dm_config_node *cn;
|
|
char *line, *line_end;
|
|
size_t len;
|
|
ssize_t n;
|
|
int ret = 1;
|
|
|
|
if (_use_mlockall) {
|
|
#ifdef MCL_CURRENT
|
|
if (lock == LVM_MLOCK) {
|
|
if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
|
|
log_sys_error("mlockall", "");
|
|
return 0;
|
|
}
|
|
} else {
|
|
if (munlockall()) {
|
|
log_sys_error("munlockall", "");
|
|
return 0;
|
|
}
|
|
}
|
|
return 1;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
/* Reset statistic counters */
|
|
*mstats = 0;
|
|
|
|
/* read mapping into a single memory chunk without reallocation
|
|
* in the middle of reading maps file */
|
|
for (len = 0;;) {
|
|
if (!_maps_buffer || len >= _maps_len) {
|
|
if (_maps_buffer)
|
|
_maps_len *= 2;
|
|
if (!(line = dm_realloc(_maps_buffer, _maps_len))) {
|
|
log_error("Allocation of maps buffer failed.");
|
|
return 0;
|
|
}
|
|
_maps_buffer = line;
|
|
}
|
|
if (lseek(_maps_fd, 0, SEEK_SET))
|
|
log_sys_error("lseek", _procselfmaps);
|
|
for (len = 0 ; len < _maps_len; len += n) {
|
|
if (!(n = read(_maps_fd, _maps_buffer + len, _maps_len - len)))
|
|
break; /* EOF */
|
|
if (n == -1) {
|
|
log_sys_error("read", _procselfmaps);
|
|
return 0;
|
|
}
|
|
}
|
|
if (len < _maps_len) { /* fits in buffer */
|
|
_maps_buffer[len] = '\0';
|
|
break;
|
|
}
|
|
}
|
|
|
|
line = _maps_buffer;
|
|
cn = find_config_tree_node(cmd, activation_mlock_filter_CFG, NULL);
|
|
|
|
while ((line_end = strchr(line, '\n'))) {
|
|
*line_end = '\0'; /* remove \n */
|
|
if (!_maps_line(cn, lock, line, mstats))
|
|
ret = 0;
|
|
line = line_end + 1;
|
|
}
|
|
|
|
log_debug_mem("%socked %ld bytes",
|
|
(lock == LVM_MLOCK) ? "L" : "Unl", (long)*mstats);
|
|
|
|
return ret;
|
|
}
|
|
|
|
#ifdef DEBUG_MEMLOCK
|
|
/*
|
|
* LVM is not supposed to use mmap while devices are suspended.
|
|
* This code causes a core dump if gets called."
|
|
*/
|
|
# ifdef __i386__
|
|
# define ARCH_X86
|
|
# endif /* __i386__ */
|
|
# ifdef __x86_64__
|
|
# ifndef ARCH_X86
|
|
# define ARCH_X86
|
|
# endif /* ARCH_X86 */
|
|
# endif /* __x86_64__ */
|
|
|
|
#endif /* DEBUG_MEMLOCK */
|
|
|
|
#ifdef ARCH_X86
|
|
#ifndef _GNU_SOURCE
|
|
#define _GNU_SOURCE
|
|
#endif
|
|
#include <dlfcn.h>
|
|
static const unsigned char INSTRUCTION_HLT = 0x94;
|
|
static char _mmap_orig;
|
|
static unsigned char *_mmap_addr;
|
|
#ifdef __i386__
|
|
static char _mmap64_orig;
|
|
static unsigned char *_mmap64_addr;
|
|
#endif /* __i386__ */
|
|
#endif /* ARCH_X86 */
|
|
|
|
static int _disable_mmap(void)
|
|
{
|
|
#ifdef ARCH_X86
|
|
volatile unsigned char *abs_addr;
|
|
|
|
if (!_mmap_addr) {
|
|
_mmap_addr = (unsigned char *) dlsym(RTLD_NEXT, "mmap");
|
|
if (_mmap_addr[0] == 0xff && _mmap_addr[1] == 0x25) { /* plt */
|
|
#ifdef __x86_64__
|
|
abs_addr = _mmap_addr + 6 + *(int32_t *)(_mmap_addr + 2);
|
|
#endif /* __x86_64__ */
|
|
#ifdef __i386__
|
|
abs_addr = *(void **)(_mmap_addr + 2);
|
|
#endif /* __i386__ */
|
|
_mmap_addr = *(void **)abs_addr;
|
|
} else
|
|
log_debug_mem("Can't find PLT jump entry assuming -fPIE linkage.");
|
|
if (mprotect((void *)((unsigned long)_mmap_addr & ~4095UL), 4096, PROT_READ|PROT_WRITE|PROT_EXEC)) {
|
|
log_sys_error("mprotect", "");
|
|
_mmap_addr = NULL;
|
|
return 0;
|
|
}
|
|
_mmap_orig = *_mmap_addr;
|
|
}
|
|
log_debug_mem("Remapping mmap entry %02x to %02x.", _mmap_orig, INSTRUCTION_HLT);
|
|
*_mmap_addr = INSTRUCTION_HLT;
|
|
|
|
#ifdef __i386__
|
|
if (!_mmap64_addr) {
|
|
_mmap64_addr = (unsigned char *) dlsym(RTLD_NEXT, "mmap64");
|
|
if (_mmap64_addr[0] == 0xff && _mmap64_addr[1] == 0x25) {
|
|
abs_addr = *(void **)(_mmap64_addr + 2);
|
|
_mmap64_addr = *(void **)abs_addr;
|
|
} /* Can't find PLT jump entry assuming -fPIE linkage */
|
|
if (mprotect((void *)((unsigned long)_mmap64_addr & ~4095UL), 4096, PROT_READ|PROT_WRITE|PROT_EXEC)) {
|
|
log_sys_error("mprotect", "");
|
|
_mmap64_addr = NULL;
|
|
return 0;
|
|
}
|
|
_mmap64_orig = *_mmap64_addr;
|
|
}
|
|
*_mmap64_addr = INSTRUCTION_HLT;
|
|
#endif /* __i386__ */
|
|
#endif /* ARCH_X86 */
|
|
return 1;
|
|
}
|
|
|
|
static int _restore_mmap(void)
|
|
{
|
|
#ifdef ARCH_X86
|
|
if (_mmap_addr)
|
|
*_mmap_addr = _mmap_orig;
|
|
#ifdef __i386__
|
|
if (_mmap64_addr)
|
|
*_mmap64_addr = _mmap64_orig;
|
|
#endif /* __i386__ */
|
|
log_debug_mem("Restored mmap entry.");
|
|
#endif /* ARCH_X86 */
|
|
return 1;
|
|
}
|
|
|
|
/* Stop memory getting swapped out */
|
|
static void _lock_mem(struct cmd_context *cmd)
|
|
{
|
|
_allocate_memory();
|
|
(void)strerror(0); /* Force libc.mo load */
|
|
(void)dm_udev_get_sync_support(); /* udev is initialized */
|
|
log_very_verbose("Locking memory");
|
|
|
|
/*
|
|
* For daemon we need to use mlockall()
|
|
* so even future adition of thread which may not even use lvm lib
|
|
* will not block memory locked thread
|
|
* Note: assuming _memlock_count_daemon is updated before _memlock_count
|
|
*/
|
|
_use_mlockall = _memlock_count_daemon ? 1 :
|
|
find_config_tree_bool(cmd, activation_use_mlockall_CFG, NULL);
|
|
|
|
if (!_use_mlockall) {
|
|
if (!*_procselfmaps &&
|
|
dm_snprintf(_procselfmaps, sizeof(_procselfmaps),
|
|
"%s" SELF_MAPS, cmd->proc_dir) < 0) {
|
|
log_error("proc_dir too long");
|
|
return;
|
|
}
|
|
|
|
if (!(_maps_fd = open(_procselfmaps, O_RDONLY))) {
|
|
log_sys_error("open", _procselfmaps);
|
|
return;
|
|
}
|
|
|
|
if (!_disable_mmap())
|
|
stack;
|
|
}
|
|
|
|
if (!_memlock_maps(cmd, LVM_MLOCK, &_mstats))
|
|
stack;
|
|
|
|
errno = 0;
|
|
if (((_priority = getpriority(PRIO_PROCESS, 0)) == -1) && errno)
|
|
log_sys_error("getpriority", "");
|
|
else
|
|
if (setpriority(PRIO_PROCESS, 0, _default_priority))
|
|
log_error("setpriority %d failed: %s",
|
|
_default_priority, strerror(errno));
|
|
}
|
|
|
|
static void _unlock_mem(struct cmd_context *cmd)
|
|
{
|
|
size_t unlock_mstats;
|
|
|
|
log_very_verbose("Unlocking memory");
|
|
|
|
if (!_memlock_maps(cmd, LVM_MUNLOCK, &unlock_mstats))
|
|
stack;
|
|
|
|
if (!_use_mlockall) {
|
|
_restore_mmap();
|
|
if (close(_maps_fd))
|
|
log_sys_error("close", _procselfmaps);
|
|
dm_free(_maps_buffer);
|
|
_maps_buffer = NULL;
|
|
if (_mstats < unlock_mstats) {
|
|
if ((_mstats + lvm_getpagesize()) < unlock_mstats)
|
|
log_error(INTERNAL_ERROR
|
|
"Reserved memory (%ld) not enough: used %ld. Increase activation/reserved_memory?",
|
|
(long)_mstats, (long)unlock_mstats);
|
|
else
|
|
/* FIXME Believed due to incorrect use of yes_no_prompt while locks held */
|
|
log_debug_mem("Suppressed internal error: Maps lock %ld < unlock %ld, a one-page difference.",
|
|
(long)_mstats, (long)unlock_mstats);
|
|
}
|
|
}
|
|
|
|
if (setpriority(PRIO_PROCESS, 0, _priority))
|
|
log_error("setpriority %u failed: %s", _priority,
|
|
strerror(errno));
|
|
_release_memory();
|
|
}
|
|
|
|
static void _lock_mem_if_needed(struct cmd_context *cmd)
|
|
{
|
|
log_debug_mem("Lock: Memlock counters: locked:%d critical:%d daemon:%d suspended:%d",
|
|
_mem_locked, _critical_section, _memlock_count_daemon, dm_get_suspended_counter());
|
|
if (!_mem_locked &&
|
|
((_critical_section + _memlock_count_daemon) == 1)) {
|
|
_mem_locked = 1;
|
|
_lock_mem(cmd);
|
|
}
|
|
}
|
|
|
|
static void _unlock_mem_if_possible(struct cmd_context *cmd)
|
|
{
|
|
log_debug_mem("Unlock: Memlock counters: locked:%d critical:%d daemon:%d suspended:%d",
|
|
_mem_locked, _critical_section, _memlock_count_daemon, dm_get_suspended_counter());
|
|
if (_mem_locked &&
|
|
!_critical_section &&
|
|
!_memlock_count_daemon) {
|
|
_unlock_mem(cmd);
|
|
_mem_locked = 0;
|
|
}
|
|
}
|
|
|
|
void critical_section_inc(struct cmd_context *cmd, const char *reason)
|
|
{
|
|
/*
|
|
* Profiles are loaded on-demand so make sure that before
|
|
* entering the critical section all needed profiles are
|
|
* loaded to avoid the disk access later.
|
|
*/
|
|
(void) load_pending_profiles(cmd);
|
|
|
|
if (!_critical_section) {
|
|
_critical_section = 1;
|
|
log_debug_mem("Entering critical section (%s).", reason);
|
|
}
|
|
|
|
_lock_mem_if_needed(cmd);
|
|
}
|
|
|
|
void critical_section_dec(struct cmd_context *cmd, const char *reason)
|
|
{
|
|
if (_critical_section && !dm_get_suspended_counter()) {
|
|
_critical_section = 0;
|
|
log_debug_mem("Leaving critical section (%s).", reason);
|
|
}
|
|
}
|
|
|
|
int critical_section(void)
|
|
{
|
|
return _critical_section;
|
|
}
|
|
|
|
/*
|
|
* The memlock_*_daemon functions will force the mlockall() call that we need
|
|
* to stay in memory, but they will have no effect on device scans (unlike
|
|
* normal critical_section_inc/dec). Memory is kept locked as long as either
|
|
* of critical_section or memlock_daemon is in effect.
|
|
*/
|
|
|
|
void memlock_inc_daemon(struct cmd_context *cmd)
|
|
{
|
|
++_memlock_count_daemon;
|
|
if (_memlock_count_daemon == 1 && _critical_section > 0)
|
|
log_error(INTERNAL_ERROR "_memlock_inc_daemon used in critical section.");
|
|
log_debug_mem("memlock_count_daemon inc to %d", _memlock_count_daemon);
|
|
_lock_mem_if_needed(cmd);
|
|
}
|
|
|
|
void memlock_dec_daemon(struct cmd_context *cmd)
|
|
{
|
|
if (!_memlock_count_daemon)
|
|
log_error(INTERNAL_ERROR "_memlock_count_daemon has dropped below 0.");
|
|
--_memlock_count_daemon;
|
|
log_debug_mem("memlock_count_daemon dec to %d", _memlock_count_daemon);
|
|
if (!_memlock_count_daemon && _critical_section && _mem_locked) {
|
|
log_error("Unlocking daemon memory in critical section.");
|
|
_unlock_mem(cmd);
|
|
_mem_locked = 0;
|
|
}
|
|
_unlock_mem_if_possible(cmd);
|
|
}
|
|
|
|
void memlock_init(struct cmd_context *cmd)
|
|
{
|
|
/* When threaded, caller already limited stack size so just use the default. */
|
|
_size_stack = 1024ULL * (cmd->threaded ? DEFAULT_RESERVED_STACK :
|
|
find_config_tree_int(cmd, activation_reserved_stack_CFG, NULL));
|
|
_size_malloc_tmp = find_config_tree_int(cmd, activation_reserved_memory_CFG, NULL) * 1024ULL;
|
|
_default_priority = find_config_tree_int(cmd, activation_process_priority_CFG, NULL);
|
|
}
|
|
|
|
void memlock_reset(void)
|
|
{
|
|
log_debug_mem("memlock reset.");
|
|
_mem_locked = 0;
|
|
_critical_section = 0;
|
|
_memlock_count_daemon = 0;
|
|
}
|
|
|
|
void memlock_unlock(struct cmd_context *cmd)
|
|
{
|
|
_unlock_mem_if_possible(cmd);
|
|
}
|
|
|
|
#endif
|