Merge branch 'akpm' (patches from Andrew)
Merge fourth patch-bomb from Andrew Morton: "A lot more stuff than expected, sorry. A bunch of ocfs2 reviewing was finished off. - mhocko's oom-reaper out-of-memory-handler changes - ocfs2 fixes and features - KASAN feature work - various fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (42 commits) thp: fix typo in khugepaged_scan_pmd() MAINTAINERS: fill entries for KASAN mm/filemap: generic_file_read_iter(): check for zero reads unconditionally kasan: test fix: warn if the UAF could not be detected in kmalloc_uaf2 mm, kasan: stackdepot implementation. Enable stackdepot for SLAB arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections mm, kasan: add GFP flags to KASAN API mm, kasan: SLAB support kasan: modify kmalloc_large_oob_right(), add kmalloc_pagealloc_oob_right() include/linux/oom.h: remove undefined oom_kills_count()/note_oom_kill() mm/page_alloc: prevent merging between isolated and other pageblocks drivers/memstick/host/r592.c: avoid gcc-6 warning ocfs2: extend enough credits for freeing one truncate record while replaying truncate records ocfs2: extend transaction for ocfs2_remove_rightmost_path() and ocfs2_update_edge_lengths() before to avoid inconsistency between inode and et ocfs2/dlm: move lock to the tail of grant queue while doing in-place convert ocfs2: solve a problem of crossing the boundary in updating backups ocfs2: fix occurring deadlock by changing ocfs2_wq from global to local ocfs2/dlm: fix BUG in dlm_move_lockres_to_recovery_list ocfs2/dlm: fix race between convert and recovery ocfs2: fix a deadlock issue in ocfs2_dio_end_io_write() ...
This commit is contained in:
commit
606c61a057
@ -12,8 +12,7 @@ KASAN uses compile-time instrumentation for checking every memory access,
|
||||
therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
|
||||
required for detection of out-of-bounds accesses to stack or global variables.
|
||||
|
||||
Currently KASAN is supported only for x86_64 architecture and requires the
|
||||
kernel to be built with the SLUB allocator.
|
||||
Currently KASAN is supported only for x86_64 architecture.
|
||||
|
||||
1. Usage
|
||||
========
|
||||
@ -27,7 +26,7 @@ inline are compiler instrumentation types. The former produces smaller binary
|
||||
the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
|
||||
version 5.0 or later.
|
||||
|
||||
Currently KASAN works only with the SLUB memory allocator.
|
||||
KASAN works with both SLUB and SLAB memory allocators.
|
||||
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
|
||||
|
||||
To disable instrumentation for specific files or directories, add a line
|
||||
|
14
MAINTAINERS
14
MAINTAINERS
@ -6165,6 +6165,20 @@ S: Maintained
|
||||
F: Documentation/hwmon/k8temp
|
||||
F: drivers/hwmon/k8temp.c
|
||||
|
||||
KASAN
|
||||
M: Andrey Ryabinin <aryabinin@virtuozzo.com>
|
||||
R: Alexander Potapenko <glider@google.com>
|
||||
R: Dmitry Vyukov <dvyukov@google.com>
|
||||
L: kasan-dev@googlegroups.com
|
||||
S: Maintained
|
||||
F: arch/*/include/asm/kasan.h
|
||||
F: arch/*/mm/kasan_init*
|
||||
F: Documentation/kasan.txt
|
||||
F: include/linux/kasan.h
|
||||
F: lib/test_kasan.c
|
||||
F: mm/kasan/
|
||||
F: scripts/Makefile.kasan
|
||||
|
||||
KCONFIG
|
||||
M: "Yann E. MORIN" <yann.morin.1998@free.fr>
|
||||
L: linux-kbuild@vger.kernel.org
|
||||
|
@ -7,7 +7,7 @@
|
||||
#ifndef __ASM_ARM_EXCEPTION_H
|
||||
#define __ASM_ARM_EXCEPTION_H
|
||||
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#define __exception __attribute__((section(".exception.text")))
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
|
@ -108,6 +108,7 @@ SECTIONS
|
||||
*(.exception.text)
|
||||
__exception_text_end = .;
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
TEXT_TEXT
|
||||
SCHED_TEXT
|
||||
LOCK_TEXT
|
||||
|
@ -18,7 +18,7 @@
|
||||
#ifndef __ASM_EXCEPTION_H
|
||||
#define __ASM_EXCEPTION_H
|
||||
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#define __exception __attribute__((section(".exception.text")))
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
|
@ -103,6 +103,7 @@ SECTIONS
|
||||
*(.exception.text)
|
||||
__exception_text_end = .;
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
TEXT_TEXT
|
||||
SCHED_TEXT
|
||||
LOCK_TEXT
|
||||
|
@ -35,6 +35,7 @@ SECTIONS
|
||||
#endif
|
||||
LOCK_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
KPROBES_TEXT
|
||||
#ifdef CONFIG_ROMKERNEL
|
||||
__sinittext = .;
|
||||
|
@ -72,6 +72,7 @@ SECTIONS
|
||||
SCHED_TEXT
|
||||
LOCK_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
KPROBES_TEXT
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
|
@ -24,6 +24,7 @@ SECTIONS
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.text.*)
|
||||
*(.gnu.warning)
|
||||
}
|
||||
|
@ -36,6 +36,7 @@ SECTIONS {
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
. = ALIGN (4) ;
|
||||
_etext = . ;
|
||||
}
|
||||
|
@ -58,6 +58,7 @@ SECTIONS
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.text.*)
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
|
@ -39,6 +39,7 @@ SECTIONS
|
||||
SCHED_TEXT
|
||||
LOCK_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
KPROBES_TEXT
|
||||
} =0
|
||||
_etext = .;
|
||||
|
@ -50,6 +50,7 @@ SECTIONS
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
*(.text.__*)
|
||||
_etext = .;
|
||||
|
@ -72,6 +72,7 @@ SECTIONS
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.text.do_softirq)
|
||||
*(.text.sys_exit)
|
||||
*(.text.do_sigaltstack)
|
||||
|
@ -55,6 +55,7 @@ SECTIONS
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
|
||||
#ifdef CONFIG_PPC32
|
||||
*(.got1)
|
||||
|
@ -28,6 +28,7 @@ SECTIONS
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
} :text = 0x0700
|
||||
|
@ -39,6 +39,7 @@ SECTIONS
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
_etext = .; /* End of text section */
|
||||
|
@ -48,6 +48,7 @@ SECTIONS
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.gnu.warning)
|
||||
} = 0
|
||||
_etext = .;
|
||||
|
@ -45,6 +45,7 @@ SECTIONS
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
__fix_text_end = .; /* tile-cpack won't rearrange before this */
|
||||
ALIGN_FUNCTION();
|
||||
*(.hottext*)
|
||||
|
@ -19,6 +19,7 @@ endif
|
||||
KASAN_SANITIZE_head$(BITS).o := n
|
||||
KASAN_SANITIZE_dumpstack.o := n
|
||||
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
KASAN_SANITIZE_stacktrace.o := n
|
||||
|
||||
OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
||||
|
@ -101,6 +101,7 @@ SECTIONS
|
||||
KPROBES_TEXT
|
||||
ENTRY_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
/* End of text section */
|
||||
|
@ -17,7 +17,7 @@
|
||||
int input_event_from_user(const char __user *buffer,
|
||||
struct input_event *event)
|
||||
{
|
||||
if (INPUT_COMPAT_TEST && !COMPAT_USE_64BIT_TIME) {
|
||||
if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
|
||||
struct input_event_compat compat_event;
|
||||
|
||||
if (copy_from_user(&compat_event, buffer,
|
||||
@ -41,7 +41,7 @@ int input_event_from_user(const char __user *buffer,
|
||||
int input_event_to_user(char __user *buffer,
|
||||
const struct input_event *event)
|
||||
{
|
||||
if (INPUT_COMPAT_TEST && !COMPAT_USE_64BIT_TIME) {
|
||||
if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
|
||||
struct input_event_compat compat_event;
|
||||
|
||||
compat_event.time.tv_sec = event->time.tv_sec;
|
||||
@ -65,7 +65,7 @@ int input_event_to_user(char __user *buffer,
|
||||
int input_ff_effect_from_user(const char __user *buffer, size_t size,
|
||||
struct ff_effect *effect)
|
||||
{
|
||||
if (INPUT_COMPAT_TEST) {
|
||||
if (in_compat_syscall()) {
|
||||
struct ff_effect_compat *compat_effect;
|
||||
|
||||
if (size != sizeof(struct ff_effect_compat))
|
||||
|
@ -17,8 +17,6 @@
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
||||
#define INPUT_COMPAT_TEST in_compat_syscall()
|
||||
|
||||
struct input_event_compat {
|
||||
struct compat_timeval time;
|
||||
__u16 type;
|
||||
@ -57,7 +55,7 @@ struct ff_effect_compat {
|
||||
|
||||
static inline size_t input_event_size(void)
|
||||
{
|
||||
return (INPUT_COMPAT_TEST && !COMPAT_USE_64BIT_TIME) ?
|
||||
return (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) ?
|
||||
sizeof(struct input_event_compat) : sizeof(struct input_event);
|
||||
}
|
||||
|
||||
|
@ -1015,7 +1015,7 @@ static int input_bits_to_string(char *buf, int buf_size,
|
||||
{
|
||||
int len = 0;
|
||||
|
||||
if (INPUT_COMPAT_TEST) {
|
||||
if (in_compat_syscall()) {
|
||||
u32 dword = bits >> 32;
|
||||
if (dword || !skip_empty)
|
||||
len += snprintf(buf, buf_size, "%x ", dword);
|
||||
|
@ -664,7 +664,7 @@ struct uinput_ff_upload_compat {
|
||||
static int uinput_ff_upload_to_user(char __user *buffer,
|
||||
const struct uinput_ff_upload *ff_up)
|
||||
{
|
||||
if (INPUT_COMPAT_TEST) {
|
||||
if (in_compat_syscall()) {
|
||||
struct uinput_ff_upload_compat ff_up_compat;
|
||||
|
||||
ff_up_compat.request_id = ff_up->request_id;
|
||||
@ -695,7 +695,7 @@ static int uinput_ff_upload_to_user(char __user *buffer,
|
||||
static int uinput_ff_upload_from_user(const char __user *buffer,
|
||||
struct uinput_ff_upload *ff_up)
|
||||
{
|
||||
if (INPUT_COMPAT_TEST) {
|
||||
if (in_compat_syscall()) {
|
||||
struct uinput_ff_upload_compat ff_up_compat;
|
||||
|
||||
if (copy_from_user(&ff_up_compat, buffer,
|
||||
|
@ -298,8 +298,7 @@ static int r592_transfer_fifo_dma(struct r592_device *dev)
|
||||
sg_count = dma_map_sg(&dev->pci_dev->dev, &dev->req->sg, 1, is_write ?
|
||||
PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
|
||||
|
||||
if (sg_count != 1 ||
|
||||
(sg_dma_len(&dev->req->sg) < dev->req->sg.length)) {
|
||||
if (sg_count != 1 || sg_dma_len(&dev->req->sg) < R592_LFIFO_SIZE) {
|
||||
message("problem in dma_map_sg");
|
||||
return -EIO;
|
||||
}
|
||||
|
105
fs/ocfs2/alloc.c
105
fs/ocfs2/alloc.c
@ -2516,21 +2516,6 @@ static int ocfs2_update_edge_lengths(handle_t *handle,
|
||||
struct ocfs2_extent_block *eb;
|
||||
u32 range;
|
||||
|
||||
/*
|
||||
* In normal tree rotation process, we will never touch the
|
||||
* tree branch above subtree_index and ocfs2_extend_rotate_transaction
|
||||
* doesn't reserve the credits for them either.
|
||||
*
|
||||
* But we do have a special case here which will update the rightmost
|
||||
* records for all the bh in the path.
|
||||
* So we have to allocate extra credits and access them.
|
||||
*/
|
||||
ret = ocfs2_extend_trans(handle, subtree_index);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_journal_access_path(et->et_ci, handle, path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -2956,7 +2941,7 @@ static int __ocfs2_rotate_tree_left(handle_t *handle,
|
||||
right_path->p_node[subtree_root].bh->b_blocknr,
|
||||
right_path->p_tree_depth);
|
||||
|
||||
ret = ocfs2_extend_rotate_transaction(handle, subtree_root,
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
orig_credits, left_path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -3029,21 +3014,9 @@ static int ocfs2_remove_rightmost_path(handle_t *handle,
|
||||
struct ocfs2_extent_block *eb;
|
||||
struct ocfs2_extent_list *el;
|
||||
|
||||
|
||||
ret = ocfs2_et_sanity_check(et);
|
||||
if (ret)
|
||||
goto out;
|
||||
/*
|
||||
* There's two ways we handle this depending on
|
||||
* whether path is the only existing one.
|
||||
*/
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_journal_access_path(et->et_ci, handle, path);
|
||||
if (ret) {
|
||||
@ -3641,6 +3614,14 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
|
||||
*/
|
||||
if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
|
||||
le16_to_cpu(el->l_next_free_rec) == 1) {
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
right_path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_remove_rightmost_path(handle, et,
|
||||
right_path,
|
||||
@ -3679,6 +3660,14 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
|
||||
BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
|
||||
|
||||
if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* The merge code will need to create an empty
|
||||
* extent to take the place of the newly
|
||||
@ -3727,6 +3716,15 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
|
||||
*/
|
||||
BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
|
||||
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* The merge left us with an empty extent, remove it. */
|
||||
ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
|
||||
if (ret) {
|
||||
@ -3748,6 +3746,15 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
|
||||
/*
|
||||
* Error from this last rotate is not critical, so
|
||||
@ -3783,6 +3790,16 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
|
||||
}
|
||||
|
||||
if (ctxt->c_split_covers_rec) {
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* The merge may have left an empty extent in
|
||||
* our leaf. Try to rotate it away.
|
||||
@ -5342,6 +5359,15 @@ static int ocfs2_truncate_rec(handle_t *handle,
|
||||
struct ocfs2_extent_block *eb;
|
||||
|
||||
if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -5928,16 +5954,6 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
|
||||
|
||||
ocfs2_journal_dirty(handle, tl_bh);
|
||||
|
||||
/* TODO: Perhaps we can calculate the bulk of the
|
||||
* credits up front rather than extending like
|
||||
* this. */
|
||||
status = ocfs2_extend_trans(handle,
|
||||
OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
rec = tl->tl_recs[i];
|
||||
start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
|
||||
le32_to_cpu(rec.t_start));
|
||||
@ -5958,6 +5974,13 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
status = ocfs2_extend_trans(handle,
|
||||
OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
i--;
|
||||
}
|
||||
|
||||
@ -6016,7 +6039,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
|
||||
handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
|
||||
if (IS_ERR(handle)) {
|
||||
status = PTR_ERR(handle);
|
||||
mlog_errno(status);
|
||||
@ -6079,7 +6102,7 @@ void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
|
||||
if (cancel)
|
||||
cancel_delayed_work(&osb->osb_truncate_log_wq);
|
||||
|
||||
queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq,
|
||||
queue_delayed_work(osb->ocfs2_wq, &osb->osb_truncate_log_wq,
|
||||
OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
|
||||
}
|
||||
}
|
||||
@ -6253,7 +6276,7 @@ void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
|
||||
|
||||
if (tl_inode) {
|
||||
cancel_delayed_work(&osb->osb_truncate_log_wq);
|
||||
flush_workqueue(ocfs2_wq);
|
||||
flush_workqueue(osb->ocfs2_wq);
|
||||
|
||||
status = ocfs2_flush_truncate_log(osb);
|
||||
if (status < 0)
|
||||
|
1144
fs/ocfs2/aops.c
1144
fs/ocfs2/aops.c
File diff suppressed because it is too large
Load Diff
@ -47,9 +47,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata);
|
||||
|
||||
int ocfs2_write_begin_nolock(struct file *filp,
|
||||
struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
typedef enum {
|
||||
OCFS2_WRITE_BUFFER = 0,
|
||||
OCFS2_WRITE_DIRECT,
|
||||
OCFS2_WRITE_MMAP,
|
||||
} ocfs2_write_type_t;
|
||||
|
||||
int ocfs2_write_begin_nolock(struct address_space *mapping,
|
||||
loff_t pos, unsigned len, ocfs2_write_type_t type,
|
||||
struct page **pagep, void **fsdata,
|
||||
struct buffer_head *di_bh, struct page *mmap_page);
|
||||
|
||||
@ -79,7 +84,6 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
|
||||
enum ocfs2_iocb_lock_bits {
|
||||
OCFS2_IOCB_RW_LOCK = 0,
|
||||
OCFS2_IOCB_RW_LOCK_LEVEL,
|
||||
OCFS2_IOCB_UNALIGNED_IO,
|
||||
OCFS2_IOCB_NUM_LOCKS
|
||||
};
|
||||
|
||||
@ -88,11 +92,4 @@ enum ocfs2_iocb_lock_bits {
|
||||
#define ocfs2_iocb_rw_locked_level(iocb) \
|
||||
test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
|
||||
|
||||
#define ocfs2_iocb_set_unaligned_aio(iocb) \
|
||||
set_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
|
||||
#define ocfs2_iocb_clear_unaligned_aio(iocb) \
|
||||
clear_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
|
||||
#define ocfs2_iocb_is_unaligned_aio(iocb) \
|
||||
test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
|
||||
|
||||
#endif /* OCFS2_FILE_H */
|
||||
|
@ -1444,8 +1444,8 @@ static void o2hb_region_release(struct config_item *item)
|
||||
debugfs_remove(reg->hr_debug_dir);
|
||||
kfree(reg->hr_db_livenodes);
|
||||
kfree(reg->hr_db_regnum);
|
||||
kfree(reg->hr_debug_elapsed_time);
|
||||
kfree(reg->hr_debug_pinned);
|
||||
kfree(reg->hr_db_elapsed_time);
|
||||
kfree(reg->hr_db_pinned);
|
||||
|
||||
spin_lock(&o2hb_live_lock);
|
||||
list_del(®->hr_all_item);
|
||||
|
@ -212,6 +212,12 @@ grant:
|
||||
if (lock->lksb->flags & DLM_LKSB_PUT_LVB)
|
||||
memcpy(res->lvb, lock->lksb->lvb, DLM_LVB_LEN);
|
||||
|
||||
/*
|
||||
* Move the lock to the tail because it may be the only lock which has
|
||||
* an invalid lvb.
|
||||
*/
|
||||
list_move_tail(&lock->list, &res->granted);
|
||||
|
||||
status = DLM_NORMAL;
|
||||
*call_ast = 1;
|
||||
goto unlock_exit;
|
||||
@ -262,6 +268,7 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock *lock, int flags, int type)
|
||||
{
|
||||
enum dlm_status status;
|
||||
u8 old_owner = res->owner;
|
||||
|
||||
mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
|
||||
lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
|
||||
@ -287,6 +294,19 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
|
||||
status = DLM_DENIED;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (lock->ml.type == type && lock->ml.convert_type == LKM_IVMODE) {
|
||||
mlog(0, "last convert request returned DLM_RECOVERING, but "
|
||||
"owner has already queued and sent ast to me. res %.*s, "
|
||||
"(cookie=%u:%llu, type=%d, conv=%d)\n",
|
||||
res->lockname.len, res->lockname.name,
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
|
||||
lock->ml.type, lock->ml.convert_type);
|
||||
status = DLM_NORMAL;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
res->state |= DLM_LOCK_RES_IN_PROGRESS;
|
||||
/* move lock to local convert queue */
|
||||
/* do not alter lock refcount. switching lists. */
|
||||
@ -316,11 +336,19 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
|
||||
spin_lock(&res->spinlock);
|
||||
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
|
||||
lock->convert_pending = 0;
|
||||
/* if it failed, move it back to granted queue */
|
||||
/* if it failed, move it back to granted queue.
|
||||
* if master returns DLM_NORMAL and then down before sending ast,
|
||||
* it may have already been moved to granted queue, reset to
|
||||
* DLM_RECOVERING and retry convert */
|
||||
if (status != DLM_NORMAL) {
|
||||
if (status != DLM_NOTQUEUED)
|
||||
dlm_error(status);
|
||||
dlm_revert_pending_convert(res, lock);
|
||||
} else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
|
||||
(old_owner != res->owner)) {
|
||||
mlog(0, "res %.*s is in recovering or has been recovered.\n",
|
||||
res->lockname.len, res->lockname.name);
|
||||
status = DLM_RECOVERING;
|
||||
}
|
||||
bail:
|
||||
spin_unlock(&res->spinlock);
|
||||
|
@ -2083,7 +2083,6 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
|
||||
dlm_lock_get(lock);
|
||||
if (lock->convert_pending) {
|
||||
/* move converting lock back to granted */
|
||||
BUG_ON(i != DLM_CONVERTING_LIST);
|
||||
mlog(0, "node died with convert pending "
|
||||
"on %.*s. move back to granted list.\n",
|
||||
res->lockname.len, res->lockname.name);
|
||||
|
165
fs/ocfs2/file.c
165
fs/ocfs2/file.c
@ -1381,44 +1381,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Will look for holes and unwritten extents in the range starting at
|
||||
* pos for count bytes (inclusive).
|
||||
*/
|
||||
static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
|
||||
size_t count)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned int extent_flags;
|
||||
u32 cpos, clusters, extent_len, phys_cpos;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
|
||||
clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
|
||||
|
||||
while (clusters) {
|
||||
ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
|
||||
&extent_flags);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (extent_len > clusters)
|
||||
extent_len = clusters;
|
||||
|
||||
clusters -= extent_len;
|
||||
cpos += extent_len;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_write_remove_suid(struct inode *inode)
|
||||
{
|
||||
int ret;
|
||||
@ -2129,18 +2091,12 @@ out:
|
||||
|
||||
static int ocfs2_prepare_inode_for_write(struct file *file,
|
||||
loff_t pos,
|
||||
size_t count,
|
||||
int appending,
|
||||
int *direct_io,
|
||||
int *has_refcount)
|
||||
size_t count)
|
||||
{
|
||||
int ret = 0, meta_level = 0;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct inode *inode = d_inode(dentry);
|
||||
loff_t end;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
int full_coherency = !(osb->s_mount_opt &
|
||||
OCFS2_MOUNT_COHERENCY_BUFFERED);
|
||||
|
||||
/*
|
||||
* We start with a read level meta lock and only jump to an ex
|
||||
@ -2189,10 +2145,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
|
||||
pos,
|
||||
count,
|
||||
&meta_level);
|
||||
if (has_refcount)
|
||||
*has_refcount = 1;
|
||||
if (direct_io)
|
||||
*direct_io = 0;
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
@ -2200,67 +2152,12 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip the O_DIRECT checks if we don't need
|
||||
* them.
|
||||
*/
|
||||
if (!direct_io || !(*direct_io))
|
||||
break;
|
||||
|
||||
/*
|
||||
* There's no sane way to do direct writes to an inode
|
||||
* with inline data.
|
||||
*/
|
||||
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
|
||||
*direct_io = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allowing concurrent direct writes means
|
||||
* i_size changes wouldn't be synchronized, so
|
||||
* one node could wind up truncating another
|
||||
* nodes writes.
|
||||
*/
|
||||
if (end > i_size_read(inode) && !full_coherency) {
|
||||
*direct_io = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fallback to old way if the feature bit is not set.
|
||||
*/
|
||||
if (end > i_size_read(inode) &&
|
||||
!ocfs2_supports_append_dio(osb)) {
|
||||
*direct_io = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't fill holes during direct io, so
|
||||
* check for them here. If any are found, the
|
||||
* caller will have to retake some cluster
|
||||
* locks and initiate the io as buffered.
|
||||
*/
|
||||
ret = ocfs2_check_range_for_holes(inode, pos, count);
|
||||
if (ret == 1) {
|
||||
/*
|
||||
* Fallback to old way if the feature bit is not set.
|
||||
* Otherwise try dio first and then complete the rest
|
||||
* request through buffer io.
|
||||
*/
|
||||
if (!ocfs2_supports_append_dio(osb))
|
||||
*direct_io = 0;
|
||||
ret = 0;
|
||||
} else if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
break;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
|
||||
pos, appending, count,
|
||||
direct_io, has_refcount);
|
||||
pos, count);
|
||||
|
||||
if (meta_level >= 0)
|
||||
ocfs2_inode_unlock(inode, meta_level);
|
||||
@ -2272,18 +2169,16 @@ out:
|
||||
static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
|
||||
struct iov_iter *from)
|
||||
{
|
||||
int direct_io, appending, rw_level;
|
||||
int can_do_direct, has_refcount = 0;
|
||||
int direct_io, rw_level;
|
||||
ssize_t written = 0;
|
||||
ssize_t ret;
|
||||
size_t count = iov_iter_count(from), orig_count;
|
||||
size_t count = iov_iter_count(from);
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
int full_coherency = !(osb->s_mount_opt &
|
||||
OCFS2_MOUNT_COHERENCY_BUFFERED);
|
||||
int unaligned_dio = 0;
|
||||
int dropped_dio = 0;
|
||||
void *saved_ki_complete = NULL;
|
||||
int append_write = ((iocb->ki_pos + count) >=
|
||||
i_size_read(inode) ? 1 : 0);
|
||||
|
||||
@ -2296,12 +2191,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
|
||||
if (count == 0)
|
||||
return 0;
|
||||
|
||||
appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
|
||||
direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
|
||||
|
||||
inode_lock(inode);
|
||||
|
||||
relock:
|
||||
/*
|
||||
* Concurrent O_DIRECT writes are allowed with
|
||||
* mount_option "coherency=buffered".
|
||||
@ -2334,7 +2227,6 @@ relock:
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
}
|
||||
|
||||
orig_count = iov_iter_count(from);
|
||||
ret = generic_write_checks(iocb, from);
|
||||
if (ret <= 0) {
|
||||
if (ret)
|
||||
@ -2343,41 +2235,18 @@ relock:
|
||||
}
|
||||
count = ret;
|
||||
|
||||
can_do_direct = direct_io;
|
||||
ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending,
|
||||
&can_do_direct, &has_refcount);
|
||||
ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (direct_io && !is_sync_kiocb(iocb))
|
||||
unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos);
|
||||
|
||||
/*
|
||||
* We can't complete the direct I/O as requested, fall back to
|
||||
* buffered I/O.
|
||||
*/
|
||||
if (direct_io && !can_do_direct) {
|
||||
ocfs2_rw_unlock(inode, rw_level);
|
||||
|
||||
rw_level = -1;
|
||||
|
||||
direct_io = 0;
|
||||
iocb->ki_flags &= ~IOCB_DIRECT;
|
||||
iov_iter_reexpand(from, orig_count);
|
||||
dropped_dio = 1;
|
||||
goto relock;
|
||||
}
|
||||
|
||||
if (unaligned_dio) {
|
||||
if (direct_io && !is_sync_kiocb(iocb) &&
|
||||
ocfs2_is_io_unaligned(inode, count, iocb->ki_pos)) {
|
||||
/*
|
||||
* Wait on previous unaligned aio to complete before
|
||||
* proceeding.
|
||||
* Make it a sync io if it's an unaligned aio.
|
||||
*/
|
||||
mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio);
|
||||
/* Mark the iocb as needing an unlock in ocfs2_dio_end_io */
|
||||
ocfs2_iocb_set_unaligned_aio(iocb);
|
||||
saved_ki_complete = xchg(&iocb->ki_complete, NULL);
|
||||
}
|
||||
|
||||
/* communicate with ocfs2_dio_end_io */
|
||||
@ -2398,14 +2267,13 @@ relock:
|
||||
*/
|
||||
if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) {
|
||||
rw_level = -1;
|
||||
unaligned_dio = 0;
|
||||
}
|
||||
|
||||
if (unlikely(written <= 0))
|
||||
goto no_sync;
|
||||
goto out;
|
||||
|
||||
if (((file->f_flags & O_DSYNC) && !direct_io) ||
|
||||
IS_SYNC(inode) || dropped_dio) {
|
||||
IS_SYNC(inode)) {
|
||||
ret = filemap_fdatawrite_range(file->f_mapping,
|
||||
iocb->ki_pos - written,
|
||||
iocb->ki_pos - 1);
|
||||
@ -2424,13 +2292,10 @@ relock:
|
||||
iocb->ki_pos - 1);
|
||||
}
|
||||
|
||||
no_sync:
|
||||
if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) {
|
||||
ocfs2_iocb_clear_unaligned_aio(iocb);
|
||||
mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
|
||||
}
|
||||
|
||||
out:
|
||||
if (saved_ki_complete)
|
||||
xchg(&iocb->ki_complete, saved_ki_complete);
|
||||
|
||||
if (rw_level != -1)
|
||||
ocfs2_rw_unlock(inode, rw_level);
|
||||
|
||||
|
@ -1170,6 +1170,9 @@ static void ocfs2_clear_inode(struct inode *inode)
|
||||
mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
|
||||
"Clear inode of %llu, inode has io markers\n",
|
||||
(unsigned long long)oi->ip_blkno);
|
||||
mlog_bug_on_msg(!list_empty(&oi->ip_unwritten_list),
|
||||
"Clear inode of %llu, inode has unwritten extents\n",
|
||||
(unsigned long long)oi->ip_blkno);
|
||||
|
||||
ocfs2_extent_map_trunc(inode, 0);
|
||||
|
||||
|
@ -43,9 +43,6 @@ struct ocfs2_inode_info
|
||||
/* protects extended attribute changes on this inode */
|
||||
struct rw_semaphore ip_xattr_sem;
|
||||
|
||||
/* Number of outstanding AIO's which are not page aligned */
|
||||
struct mutex ip_unaligned_aio;
|
||||
|
||||
/* These fields are protected by ip_lock */
|
||||
spinlock_t ip_lock;
|
||||
u32 ip_open_count;
|
||||
@ -57,6 +54,9 @@ struct ocfs2_inode_info
|
||||
u32 ip_flags; /* see below */
|
||||
u32 ip_attr; /* inode attributes */
|
||||
|
||||
/* Record unwritten extents during direct io. */
|
||||
struct list_head ip_unwritten_list;
|
||||
|
||||
/* protected by recovery_lock. */
|
||||
struct inode *ip_next_orphan;
|
||||
|
||||
|
@ -231,7 +231,7 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb)
|
||||
/* At this point, we know that no more recovery threads can be
|
||||
* launched, so wait for any recovery completion work to
|
||||
* complete. */
|
||||
flush_workqueue(ocfs2_wq);
|
||||
flush_workqueue(osb->ocfs2_wq);
|
||||
|
||||
/*
|
||||
* Now that recovery is shut down, and the osb is about to be
|
||||
@ -1326,7 +1326,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
|
||||
|
||||
spin_lock(&journal->j_lock);
|
||||
list_add_tail(&item->lri_list, &journal->j_la_cleanups);
|
||||
queue_work(ocfs2_wq, &journal->j_recovery_work);
|
||||
queue_work(journal->j_osb->ocfs2_wq, &journal->j_recovery_work);
|
||||
spin_unlock(&journal->j_lock);
|
||||
}
|
||||
|
||||
@ -1968,7 +1968,7 @@ static void ocfs2_orphan_scan_work(struct work_struct *work)
|
||||
mutex_lock(&os->os_lock);
|
||||
ocfs2_queue_orphan_scan(osb);
|
||||
if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
|
||||
queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
|
||||
queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
|
||||
ocfs2_orphan_scan_timeout());
|
||||
mutex_unlock(&os->os_lock);
|
||||
}
|
||||
@ -2008,7 +2008,7 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
|
||||
atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
|
||||
else {
|
||||
atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
|
||||
queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
|
||||
queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
|
||||
ocfs2_orphan_scan_timeout());
|
||||
}
|
||||
}
|
||||
|
@ -386,7 +386,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
|
||||
struct ocfs2_dinode *alloc = NULL;
|
||||
|
||||
cancel_delayed_work(&osb->la_enable_wq);
|
||||
flush_workqueue(ocfs2_wq);
|
||||
flush_workqueue(osb->ocfs2_wq);
|
||||
|
||||
if (osb->local_alloc_state == OCFS2_LA_UNUSED)
|
||||
goto out;
|
||||
@ -1085,7 +1085,7 @@ static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
|
||||
} else {
|
||||
osb->local_alloc_state = OCFS2_LA_DISABLED;
|
||||
}
|
||||
queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
|
||||
queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq,
|
||||
OCFS2_LA_ENABLE_INTERVAL);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
@ -104,8 +104,8 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
|
||||
if (page->index == last_index)
|
||||
len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
|
||||
|
||||
ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page,
|
||||
&fsdata, di_bh, page);
|
||||
ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
|
||||
&locked_page, &fsdata, di_bh, page);
|
||||
if (ret) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
|
@ -464,6 +464,14 @@ struct ocfs2_super
|
||||
struct ocfs2_refcount_tree *osb_ref_tree_lru;
|
||||
|
||||
struct mutex system_file_mutex;
|
||||
|
||||
/*
|
||||
* OCFS2 needs to schedule several different types of work which
|
||||
* require cluster locking, disk I/O, recovery waits, etc. Since these
|
||||
* types of work tend to be heavy we avoid using the kernel events
|
||||
* workqueue and schedule on our own.
|
||||
*/
|
||||
struct workqueue_struct *ocfs2_wq;
|
||||
};
|
||||
|
||||
#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
|
||||
|
@ -1450,28 +1450,20 @@ DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range);
|
||||
|
||||
TRACE_EVENT(ocfs2_prepare_inode_for_write,
|
||||
TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
|
||||
int appending, unsigned long count,
|
||||
int *direct_io, int *has_refcount),
|
||||
TP_ARGS(ino, saved_pos, appending, count, direct_io, has_refcount),
|
||||
unsigned long count),
|
||||
TP_ARGS(ino, saved_pos, count),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long long, ino)
|
||||
__field(unsigned long long, saved_pos)
|
||||
__field(int, appending)
|
||||
__field(unsigned long, count)
|
||||
__field(int, direct_io)
|
||||
__field(int, has_refcount)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ino = ino;
|
||||
__entry->saved_pos = saved_pos;
|
||||
__entry->appending = appending;
|
||||
__entry->count = count;
|
||||
__entry->direct_io = direct_io ? *direct_io : -1;
|
||||
__entry->has_refcount = has_refcount ? *has_refcount : -1;
|
||||
),
|
||||
TP_printk("%llu %llu %d %lu %d %d", __entry->ino,
|
||||
__entry->saved_pos, __entry->appending, __entry->count,
|
||||
__entry->direct_io, __entry->has_refcount)
|
||||
TP_printk("%llu %llu %lu", __entry->ino,
|
||||
__entry->saved_pos, __entry->count)
|
||||
);
|
||||
|
||||
DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
|
||||
|
@ -726,7 +726,7 @@ static int ocfs2_release_dquot(struct dquot *dquot)
|
||||
dqgrab(dquot);
|
||||
/* First entry on list -> queue work */
|
||||
if (llist_add(&OCFS2_DQUOT(dquot)->list, &osb->dquot_drop_list))
|
||||
queue_work(ocfs2_wq, &osb->dquot_drop_work);
|
||||
queue_work(osb->ocfs2_wq, &osb->dquot_drop_work);
|
||||
goto out;
|
||||
}
|
||||
status = ocfs2_lock_global_qf(oinfo, 1);
|
||||
|
@ -196,7 +196,7 @@ static int update_backups(struct inode * inode, u32 clusters, char *data)
|
||||
for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
|
||||
blkno = ocfs2_backup_super_blkno(inode->i_sb, i);
|
||||
cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
|
||||
if (cluster > clusters)
|
||||
if (cluster >= clusters)
|
||||
break;
|
||||
|
||||
ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup);
|
||||
|
@ -80,12 +80,6 @@ static struct kmem_cache *ocfs2_inode_cachep;
|
||||
struct kmem_cache *ocfs2_dquot_cachep;
|
||||
struct kmem_cache *ocfs2_qf_chunk_cachep;
|
||||
|
||||
/* OCFS2 needs to schedule several different types of work which
|
||||
* require cluster locking, disk I/O, recovery waits, etc. Since these
|
||||
* types of work tend to be heavy we avoid using the kernel events
|
||||
* workqueue and schedule on our own. */
|
||||
struct workqueue_struct *ocfs2_wq = NULL;
|
||||
|
||||
static struct dentry *ocfs2_debugfs_root;
|
||||
|
||||
MODULE_AUTHOR("Oracle");
|
||||
@ -1613,33 +1607,25 @@ static int __init ocfs2_init(void)
|
||||
if (status < 0)
|
||||
goto out2;
|
||||
|
||||
ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
|
||||
if (!ocfs2_wq) {
|
||||
status = -ENOMEM;
|
||||
goto out3;
|
||||
}
|
||||
|
||||
ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
|
||||
if (!ocfs2_debugfs_root) {
|
||||
status = -ENOMEM;
|
||||
mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
|
||||
goto out4;
|
||||
goto out3;
|
||||
}
|
||||
|
||||
ocfs2_set_locking_protocol();
|
||||
|
||||
status = register_quota_format(&ocfs2_quota_format);
|
||||
if (status < 0)
|
||||
goto out4;
|
||||
goto out3;
|
||||
status = register_filesystem(&ocfs2_fs_type);
|
||||
if (!status)
|
||||
return 0;
|
||||
|
||||
unregister_quota_format(&ocfs2_quota_format);
|
||||
out4:
|
||||
destroy_workqueue(ocfs2_wq);
|
||||
debugfs_remove(ocfs2_debugfs_root);
|
||||
out3:
|
||||
debugfs_remove(ocfs2_debugfs_root);
|
||||
ocfs2_free_mem_caches();
|
||||
out2:
|
||||
exit_ocfs2_uptodate_cache();
|
||||
@ -1650,11 +1636,6 @@ out1:
|
||||
|
||||
static void __exit ocfs2_exit(void)
|
||||
{
|
||||
if (ocfs2_wq) {
|
||||
flush_workqueue(ocfs2_wq);
|
||||
destroy_workqueue(ocfs2_wq);
|
||||
}
|
||||
|
||||
unregister_quota_format(&ocfs2_quota_format);
|
||||
|
||||
debugfs_remove(ocfs2_debugfs_root);
|
||||
@ -1745,8 +1726,8 @@ static void ocfs2_inode_init_once(void *data)
|
||||
spin_lock_init(&oi->ip_lock);
|
||||
ocfs2_extent_map_init(&oi->vfs_inode);
|
||||
INIT_LIST_HEAD(&oi->ip_io_markers);
|
||||
INIT_LIST_HEAD(&oi->ip_unwritten_list);
|
||||
oi->ip_dir_start_lookup = 0;
|
||||
mutex_init(&oi->ip_unaligned_aio);
|
||||
init_rwsem(&oi->ip_alloc_sem);
|
||||
init_rwsem(&oi->ip_xattr_sem);
|
||||
mutex_init(&oi->ip_io_mutex);
|
||||
@ -2349,6 +2330,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
|
||||
}
|
||||
cleancache_init_shared_fs(sb);
|
||||
|
||||
osb->ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
|
||||
if (!osb->ocfs2_wq) {
|
||||
status = -ENOMEM;
|
||||
mlog_errno(status);
|
||||
}
|
||||
|
||||
bail:
|
||||
return status;
|
||||
}
|
||||
@ -2536,6 +2523,12 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
|
||||
{
|
||||
/* This function assumes that the caller has the main osb resource */
|
||||
|
||||
/* ocfs2_initializer_super have already created this workqueue */
|
||||
if (osb->ocfs2_wq) {
|
||||
flush_workqueue(osb->ocfs2_wq);
|
||||
destroy_workqueue(osb->ocfs2_wq);
|
||||
}
|
||||
|
||||
ocfs2_free_slot_info(osb);
|
||||
|
||||
kfree(osb->osb_orphan_wipes);
|
||||
|
@ -26,8 +26,6 @@
|
||||
#ifndef OCFS2_SUPER_H
|
||||
#define OCFS2_SUPER_H
|
||||
|
||||
extern struct workqueue_struct *ocfs2_wq;
|
||||
|
||||
int ocfs2_publish_get_mount_state(struct ocfs2_super *osb,
|
||||
int node_num);
|
||||
|
||||
|
@ -456,7 +456,7 @@
|
||||
*(.entry.text) \
|
||||
VMLINUX_SYMBOL(__entry_text_end) = .;
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
#define IRQENTRY_TEXT \
|
||||
ALIGN_FUNCTION(); \
|
||||
VMLINUX_SYMBOL(__irqentry_text_start) = .; \
|
||||
@ -466,6 +466,16 @@
|
||||
#define IRQENTRY_TEXT
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
#define SOFTIRQENTRY_TEXT \
|
||||
ALIGN_FUNCTION(); \
|
||||
VMLINUX_SYMBOL(__softirqentry_text_start) = .; \
|
||||
*(.softirqentry.text) \
|
||||
VMLINUX_SYMBOL(__softirqentry_text_end) = .;
|
||||
#else
|
||||
#define SOFTIRQENTRY_TEXT
|
||||
#endif
|
||||
|
||||
/* Section used for early init (in .S files) */
|
||||
#define HEAD_TEXT *(.head.text)
|
||||
|
||||
|
@ -811,16 +811,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
|
||||
*/
|
||||
#define __notrace_funcgraph notrace
|
||||
|
||||
/*
|
||||
* We want to which function is an entrypoint of a hardirq.
|
||||
* That will help us to put a signal on output.
|
||||
*/
|
||||
#define __irq_entry __attribute__((__section__(".irqentry.text")))
|
||||
|
||||
/* Limits of hardirq entrypoints */
|
||||
extern char __irqentry_text_start[];
|
||||
extern char __irqentry_text_end[];
|
||||
|
||||
#define FTRACE_NOTRACE_DEPTH 65536
|
||||
#define FTRACE_RETFUNC_DEPTH 50
|
||||
#define FTRACE_RETSTACK_ALLOC_SIZE 32
|
||||
@ -857,7 +847,6 @@ static inline void unpause_graph_tracing(void)
|
||||
#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
||||
#define __notrace_funcgraph
|
||||
#define __irq_entry
|
||||
#define INIT_FTRACE_GRAPH
|
||||
|
||||
static inline void ftrace_graph_init_task(struct task_struct *t) { }
|
||||
|
@ -683,4 +683,24 @@ extern int early_irq_init(void);
|
||||
extern int arch_probe_nr_irqs(void);
|
||||
extern int arch_early_irq_init(void);
|
||||
|
||||
#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
/*
|
||||
* We want to know which function is an entrypoint of a hardirq or a softirq.
|
||||
*/
|
||||
#define __irq_entry __attribute__((__section__(".irqentry.text")))
|
||||
#define __softirq_entry \
|
||||
__attribute__((__section__(".softirqentry.text")))
|
||||
|
||||
/* Limits of hardirq entrypoints */
|
||||
extern char __irqentry_text_start[];
|
||||
extern char __irqentry_text_end[];
|
||||
/* Limits of softirq entrypoints */
|
||||
extern char __softirqentry_text_start[];
|
||||
extern char __softirqentry_text_end[];
|
||||
|
||||
#else
|
||||
#define __irq_entry
|
||||
#define __softirq_entry
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -48,19 +48,28 @@ void kasan_unpoison_task_stack(struct task_struct *task);
|
||||
void kasan_alloc_pages(struct page *page, unsigned int order);
|
||||
void kasan_free_pages(struct page *page, unsigned int order);
|
||||
|
||||
void kasan_cache_create(struct kmem_cache *cache, size_t *size,
|
||||
unsigned long *flags);
|
||||
|
||||
void kasan_poison_slab(struct page *page);
|
||||
void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
|
||||
void kasan_poison_object_data(struct kmem_cache *cache, void *object);
|
||||
|
||||
void kasan_kmalloc_large(const void *ptr, size_t size);
|
||||
void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
|
||||
void kasan_kfree_large(const void *ptr);
|
||||
void kasan_kfree(void *ptr);
|
||||
void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size);
|
||||
void kasan_krealloc(const void *object, size_t new_size);
|
||||
void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
|
||||
gfp_t flags);
|
||||
void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);
|
||||
|
||||
void kasan_slab_alloc(struct kmem_cache *s, void *object);
|
||||
void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
|
||||
void kasan_slab_free(struct kmem_cache *s, void *object);
|
||||
|
||||
struct kasan_cache {
|
||||
int alloc_meta_offset;
|
||||
int free_meta_offset;
|
||||
};
|
||||
|
||||
int kasan_module_alloc(void *addr, size_t size);
|
||||
void kasan_free_shadow(const struct vm_struct *vm);
|
||||
|
||||
@ -76,20 +85,26 @@ static inline void kasan_disable_current(void) {}
|
||||
static inline void kasan_alloc_pages(struct page *page, unsigned int order) {}
|
||||
static inline void kasan_free_pages(struct page *page, unsigned int order) {}
|
||||
|
||||
static inline void kasan_cache_create(struct kmem_cache *cache,
|
||||
size_t *size,
|
||||
unsigned long *flags) {}
|
||||
|
||||
static inline void kasan_poison_slab(struct page *page) {}
|
||||
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
|
||||
void *object) {}
|
||||
static inline void kasan_poison_object_data(struct kmem_cache *cache,
|
||||
void *object) {}
|
||||
|
||||
static inline void kasan_kmalloc_large(void *ptr, size_t size) {}
|
||||
static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
|
||||
static inline void kasan_kfree_large(const void *ptr) {}
|
||||
static inline void kasan_kfree(void *ptr) {}
|
||||
static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
|
||||
size_t size) {}
|
||||
static inline void kasan_krealloc(const void *object, size_t new_size) {}
|
||||
size_t size, gfp_t flags) {}
|
||||
static inline void kasan_krealloc(const void *object, size_t new_size,
|
||||
gfp_t flags) {}
|
||||
|
||||
static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {}
|
||||
static inline void kasan_slab_alloc(struct kmem_cache *s, void *object,
|
||||
gfp_t flags) {}
|
||||
static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
|
||||
|
||||
static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
|
||||
|
@ -1132,6 +1132,8 @@ struct zap_details {
|
||||
struct address_space *check_mapping; /* Check page->mapping if set */
|
||||
pgoff_t first_index; /* Lowest page->index to unmap */
|
||||
pgoff_t last_index; /* Highest page->index to unmap */
|
||||
bool ignore_dirty; /* Ignore dirty pages */
|
||||
bool check_swap_entries; /* Check also swap entries */
|
||||
};
|
||||
|
||||
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
|
||||
|
@ -76,8 +76,6 @@ extern unsigned long oom_badness(struct task_struct *p,
|
||||
struct mem_cgroup *memcg, const nodemask_t *nodemask,
|
||||
unsigned long totalpages);
|
||||
|
||||
extern int oom_kills_count(void);
|
||||
extern void note_oom_kill(void);
|
||||
extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
|
||||
unsigned int points, unsigned long totalpages,
|
||||
struct mem_cgroup *memcg, const char *message);
|
||||
@ -91,7 +89,7 @@ extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
|
||||
|
||||
extern bool out_of_memory(struct oom_control *oc);
|
||||
|
||||
extern void exit_oom_victim(void);
|
||||
extern void exit_oom_victim(struct task_struct *tsk);
|
||||
|
||||
extern int register_oom_notifier(struct notifier_block *nb);
|
||||
extern int unregister_oom_notifier(struct notifier_block *nb);
|
||||
|
@ -426,6 +426,7 @@ extern signed long schedule_timeout(signed long timeout);
|
||||
extern signed long schedule_timeout_interruptible(signed long timeout);
|
||||
extern signed long schedule_timeout_killable(signed long timeout);
|
||||
extern signed long schedule_timeout_uninterruptible(signed long timeout);
|
||||
extern signed long schedule_timeout_idle(signed long timeout);
|
||||
asmlinkage void schedule(void);
|
||||
extern void schedule_preempt_disabled(void);
|
||||
|
||||
@ -1848,6 +1849,9 @@ struct task_struct {
|
||||
unsigned long task_state_change;
|
||||
#endif
|
||||
int pagefault_disabled;
|
||||
#ifdef CONFIG_MMU
|
||||
struct task_struct *oom_reaper_list;
|
||||
#endif
|
||||
/* CPU-specific state of this task */
|
||||
struct thread_struct thread;
|
||||
/*
|
||||
|
@ -92,6 +92,12 @@
|
||||
# define SLAB_ACCOUNT 0x00000000UL
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
#define SLAB_KASAN 0x08000000UL
|
||||
#else
|
||||
#define SLAB_KASAN 0x00000000UL
|
||||
#endif
|
||||
|
||||
/* The following flags affect the page allocator grouping pages by mobility */
|
||||
#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
|
||||
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
|
||||
@ -370,7 +376,7 @@ static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s,
|
||||
{
|
||||
void *ret = kmem_cache_alloc(s, flags);
|
||||
|
||||
kasan_kmalloc(s, ret, size);
|
||||
kasan_kmalloc(s, ret, size, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -381,7 +387,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
|
||||
{
|
||||
void *ret = kmem_cache_alloc_node(s, gfpflags, node);
|
||||
|
||||
kasan_kmalloc(s, ret, size);
|
||||
kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_TRACING */
|
||||
|
@ -76,8 +76,22 @@ struct kmem_cache {
|
||||
#ifdef CONFIG_MEMCG
|
||||
struct memcg_cache_params memcg_params;
|
||||
#endif
|
||||
#ifdef CONFIG_KASAN
|
||||
struct kasan_cache kasan_info;
|
||||
#endif
|
||||
|
||||
struct kmem_cache_node *node[MAX_NUMNODES];
|
||||
};
|
||||
|
||||
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
|
||||
void *x) {
|
||||
void *object = x - (x - page->s_mem) % cache->size;
|
||||
void *last_object = page->s_mem + (cache->num - 1) * cache->size;
|
||||
|
||||
if (unlikely(object > last_object))
|
||||
return last_object;
|
||||
else
|
||||
return object;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_SLAB_DEF_H */
|
||||
|
@ -130,4 +130,15 @@ static inline void *virt_to_obj(struct kmem_cache *s,
|
||||
void object_err(struct kmem_cache *s, struct page *page,
|
||||
u8 *object, char *reason);
|
||||
|
||||
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
|
||||
void *x) {
|
||||
void *object = x - (x - page_address(page)) % cache->size;
|
||||
void *last_object = page_address(page) +
|
||||
(page->objects - 1) * cache->size;
|
||||
if (unlikely(object > last_object))
|
||||
return last_object;
|
||||
else
|
||||
return object;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_SLUB_DEF_H */
|
||||
|
32
include/linux/stackdepot.h
Normal file
32
include/linux/stackdepot.h
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* A generic stack depot implementation
|
||||
*
|
||||
* Author: Alexander Potapenko <glider@google.com>
|
||||
* Copyright (C) 2016 Google, Inc.
|
||||
*
|
||||
* Based on code by Dmitry Chernenkov.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_STACKDEPOT_H
|
||||
#define _LINUX_STACKDEPOT_H
|
||||
|
||||
typedef u32 depot_stack_handle_t;
|
||||
|
||||
struct stack_trace;
|
||||
|
||||
depot_stack_handle_t depot_save_stack(struct stack_trace *trace, gfp_t flags);
|
||||
|
||||
void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace);
|
||||
|
||||
#endif
|
@ -435,7 +435,7 @@ static void exit_mm(struct task_struct *tsk)
|
||||
mm_update_next_owner(mm);
|
||||
mmput(mm);
|
||||
if (test_thread_flag(TIF_MEMDIE))
|
||||
exit_oom_victim();
|
||||
exit_oom_victim(tsk);
|
||||
}
|
||||
|
||||
static struct task_struct *find_alive_thread(struct task_struct *p)
|
||||
|
@ -227,7 +227,7 @@ static inline bool lockdep_softirq_start(void) { return false; }
|
||||
static inline void lockdep_softirq_end(bool in_hardirq) { }
|
||||
#endif
|
||||
|
||||
asmlinkage __visible void __do_softirq(void)
|
||||
asmlinkage __visible void __softirq_entry __do_softirq(void)
|
||||
{
|
||||
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
|
||||
unsigned long old_flags = current->flags;
|
||||
|
@ -1566,6 +1566,17 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
|
||||
}
|
||||
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
|
||||
|
||||
/*
|
||||
* Like schedule_timeout_uninterruptible(), except this task will not contribute
|
||||
* to load average.
|
||||
*/
|
||||
signed long __sched schedule_timeout_idle(signed long timeout)
|
||||
{
|
||||
__set_current_state(TASK_IDLE);
|
||||
return schedule_timeout(timeout);
|
||||
}
|
||||
EXPORT_SYMBOL(schedule_timeout_idle);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
|
||||
{
|
||||
|
@ -8,6 +8,7 @@
|
||||
*/
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
|
@ -536,4 +536,8 @@ config ARCH_HAS_PMEM_API
|
||||
config ARCH_HAS_MMIO_FLUSH
|
||||
bool
|
||||
|
||||
config STACKDEPOT
|
||||
bool
|
||||
select STACKTRACE
|
||||
|
||||
endmenu
|
||||
|
@ -5,8 +5,9 @@ if HAVE_ARCH_KASAN
|
||||
|
||||
config KASAN
|
||||
bool "KASan: runtime memory debugger"
|
||||
depends on SLUB_DEBUG
|
||||
depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
|
||||
select CONSTRUCTORS
|
||||
select STACKDEPOT if SLAB
|
||||
help
|
||||
Enables kernel address sanitizer - runtime memory debugger,
|
||||
designed to find out-of-bounds accesses and use-after-free bugs.
|
||||
@ -16,6 +17,8 @@ config KASAN
|
||||
This feature consumes about 1/8 of available memory and brings about
|
||||
~x3 performance slowdown.
|
||||
For better error detection enable CONFIG_STACKTRACE.
|
||||
Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB
|
||||
(the resulting kernel does not boot).
|
||||
|
||||
choice
|
||||
prompt "Instrumentation type"
|
||||
|
@ -181,6 +181,9 @@ obj-$(CONFIG_SG_SPLIT) += sg_split.o
|
||||
obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
|
||||
obj-$(CONFIG_IRQ_POLL) += irq_poll.o
|
||||
|
||||
obj-$(CONFIG_STACKDEPOT) += stackdepot.o
|
||||
KASAN_SANITIZE_stackdepot.o := n
|
||||
|
||||
libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
|
||||
fdt_empty_tree.o
|
||||
$(foreach file, $(libfdt_files), \
|
||||
|
284
lib/stackdepot.c
Normal file
284
lib/stackdepot.c
Normal file
@ -0,0 +1,284 @@
|
||||
/*
|
||||
* Generic stack depot for storing stack traces.
|
||||
*
|
||||
* Some debugging tools need to save stack traces of certain events which can
|
||||
* be later presented to the user. For example, KASAN needs to safe alloc and
|
||||
* free stacks for each object, but storing two stack traces per object
|
||||
* requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for
|
||||
* that).
|
||||
*
|
||||
* Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
|
||||
* and free stacks repeat a lot, we save about 100x space.
|
||||
* Stacks are never removed from depot, so we store them contiguously one after
|
||||
* another in a contiguos memory allocation.
|
||||
*
|
||||
* Author: Alexander Potapenko <glider@google.com>
|
||||
* Copyright (C) 2016 Google, Inc.
|
||||
*
|
||||
* Based on code by Dmitry Chernenkov.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/stackdepot.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
|
||||
|
||||
#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
|
||||
#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER))
|
||||
#define STACK_ALLOC_ALIGN 4
|
||||
#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
|
||||
STACK_ALLOC_ALIGN)
|
||||
#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - STACK_ALLOC_OFFSET_BITS)
|
||||
#define STACK_ALLOC_SLABS_CAP 1024
|
||||
#define STACK_ALLOC_MAX_SLABS \
|
||||
(((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
|
||||
(1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
|
||||
|
||||
/* The compact structure to store the reference to stacks. */
|
||||
union handle_parts {
|
||||
depot_stack_handle_t handle;
|
||||
struct {
|
||||
u32 slabindex : STACK_ALLOC_INDEX_BITS;
|
||||
u32 offset : STACK_ALLOC_OFFSET_BITS;
|
||||
};
|
||||
};
|
||||
|
||||
struct stack_record {
|
||||
struct stack_record *next; /* Link in the hashtable */
|
||||
u32 hash; /* Hash in the hastable */
|
||||
u32 size; /* Number of frames in the stack */
|
||||
union handle_parts handle;
|
||||
unsigned long entries[1]; /* Variable-sized array of entries. */
|
||||
};
|
||||
|
||||
static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
|
||||
|
||||
static int depot_index;
|
||||
static int next_slab_inited;
|
||||
static size_t depot_offset;
|
||||
static DEFINE_SPINLOCK(depot_lock);
|
||||
|
||||
static bool init_stack_slab(void **prealloc)
|
||||
{
|
||||
if (!*prealloc)
|
||||
return false;
|
||||
/*
|
||||
* This smp_load_acquire() pairs with smp_store_release() to
|
||||
* |next_slab_inited| below and in depot_alloc_stack().
|
||||
*/
|
||||
if (smp_load_acquire(&next_slab_inited))
|
||||
return true;
|
||||
if (stack_slabs[depot_index] == NULL) {
|
||||
stack_slabs[depot_index] = *prealloc;
|
||||
} else {
|
||||
stack_slabs[depot_index + 1] = *prealloc;
|
||||
/*
|
||||
* This smp_store_release pairs with smp_load_acquire() from
|
||||
* |next_slab_inited| above and in depot_save_stack().
|
||||
*/
|
||||
smp_store_release(&next_slab_inited, 1);
|
||||
}
|
||||
*prealloc = NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Allocation of a new stack in raw storage */
|
||||
static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
|
||||
u32 hash, void **prealloc, gfp_t alloc_flags)
|
||||
{
|
||||
int required_size = offsetof(struct stack_record, entries) +
|
||||
sizeof(unsigned long) * size;
|
||||
struct stack_record *stack;
|
||||
|
||||
required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
|
||||
|
||||
if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
|
||||
if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
|
||||
WARN_ONCE(1, "Stack depot reached limit capacity");
|
||||
return NULL;
|
||||
}
|
||||
depot_index++;
|
||||
depot_offset = 0;
|
||||
/*
|
||||
* smp_store_release() here pairs with smp_load_acquire() from
|
||||
* |next_slab_inited| in depot_save_stack() and
|
||||
* init_stack_slab().
|
||||
*/
|
||||
if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
|
||||
smp_store_release(&next_slab_inited, 0);
|
||||
}
|
||||
init_stack_slab(prealloc);
|
||||
if (stack_slabs[depot_index] == NULL)
|
||||
return NULL;
|
||||
|
||||
stack = stack_slabs[depot_index] + depot_offset;
|
||||
|
||||
stack->hash = hash;
|
||||
stack->size = size;
|
||||
stack->handle.slabindex = depot_index;
|
||||
stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
|
||||
memcpy(stack->entries, entries, size * sizeof(unsigned long));
|
||||
depot_offset += required_size;
|
||||
|
||||
return stack;
|
||||
}
|
||||
|
||||
#define STACK_HASH_ORDER 20
|
||||
#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
|
||||
#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
|
||||
#define STACK_HASH_SEED 0x9747b28c
|
||||
|
||||
static struct stack_record *stack_table[STACK_HASH_SIZE] = {
|
||||
[0 ... STACK_HASH_SIZE - 1] = NULL
|
||||
};
|
||||
|
||||
/* Calculate hash for a stack */
|
||||
static inline u32 hash_stack(unsigned long *entries, unsigned int size)
|
||||
{
|
||||
return jhash2((u32 *)entries,
|
||||
size * sizeof(unsigned long) / sizeof(u32),
|
||||
STACK_HASH_SEED);
|
||||
}
|
||||
|
||||
/* Find a stack that is equal to the one stored in entries in the hash */
|
||||
static inline struct stack_record *find_stack(struct stack_record *bucket,
|
||||
unsigned long *entries, int size,
|
||||
u32 hash)
|
||||
{
|
||||
struct stack_record *found;
|
||||
|
||||
for (found = bucket; found; found = found->next) {
|
||||
if (found->hash == hash &&
|
||||
found->size == size &&
|
||||
!memcmp(entries, found->entries,
|
||||
size * sizeof(unsigned long))) {
|
||||
return found;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
|
||||
{
|
||||
union handle_parts parts = { .handle = handle };
|
||||
void *slab = stack_slabs[parts.slabindex];
|
||||
size_t offset = parts.offset << STACK_ALLOC_ALIGN;
|
||||
struct stack_record *stack = slab + offset;
|
||||
|
||||
trace->nr_entries = trace->max_entries = stack->size;
|
||||
trace->entries = stack->entries;
|
||||
trace->skip = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* depot_save_stack - save stack in a stack depot.
|
||||
* @trace - the stacktrace to save.
|
||||
* @alloc_flags - flags for allocating additional memory if required.
|
||||
*
|
||||
* Returns the handle of the stack struct stored in depot.
|
||||
*/
|
||||
depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
|
||||
gfp_t alloc_flags)
|
||||
{
|
||||
u32 hash;
|
||||
depot_stack_handle_t retval = 0;
|
||||
struct stack_record *found = NULL, **bucket;
|
||||
unsigned long flags;
|
||||
struct page *page = NULL;
|
||||
void *prealloc = NULL;
|
||||
|
||||
if (unlikely(trace->nr_entries == 0))
|
||||
goto fast_exit;
|
||||
|
||||
hash = hash_stack(trace->entries, trace->nr_entries);
|
||||
/* Bad luck, we won't store this stack. */
|
||||
if (hash == 0)
|
||||
goto exit;
|
||||
|
||||
bucket = &stack_table[hash & STACK_HASH_MASK];
|
||||
|
||||
/*
|
||||
* Fast path: look the stack trace up without locking.
|
||||
* The smp_load_acquire() here pairs with smp_store_release() to
|
||||
* |bucket| below.
|
||||
*/
|
||||
found = find_stack(smp_load_acquire(bucket), trace->entries,
|
||||
trace->nr_entries, hash);
|
||||
if (found)
|
||||
goto exit;
|
||||
|
||||
/*
|
||||
* Check if the current or the next stack slab need to be initialized.
|
||||
* If so, allocate the memory - we won't be able to do that under the
|
||||
* lock.
|
||||
*
|
||||
* The smp_load_acquire() here pairs with smp_store_release() to
|
||||
* |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
|
||||
*/
|
||||
if (unlikely(!smp_load_acquire(&next_slab_inited))) {
|
||||
/*
|
||||
* Zero out zone modifiers, as we don't have specific zone
|
||||
* requirements. Keep the flags related to allocation in atomic
|
||||
* contexts and I/O.
|
||||
*/
|
||||
alloc_flags &= ~GFP_ZONEMASK;
|
||||
alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
|
||||
page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
|
||||
if (page)
|
||||
prealloc = page_address(page);
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&depot_lock, flags);
|
||||
|
||||
found = find_stack(*bucket, trace->entries, trace->nr_entries, hash);
|
||||
if (!found) {
|
||||
struct stack_record *new =
|
||||
depot_alloc_stack(trace->entries, trace->nr_entries,
|
||||
hash, &prealloc, alloc_flags);
|
||||
if (new) {
|
||||
new->next = *bucket;
|
||||
/*
|
||||
* This smp_store_release() pairs with
|
||||
* smp_load_acquire() from |bucket| above.
|
||||
*/
|
||||
smp_store_release(bucket, new);
|
||||
found = new;
|
||||
}
|
||||
} else if (prealloc) {
|
||||
/*
|
||||
* We didn't need to store this stack trace, but let's keep
|
||||
* the preallocated memory for the future.
|
||||
*/
|
||||
WARN_ON(!init_stack_slab(&prealloc));
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&depot_lock, flags);
|
||||
exit:
|
||||
if (prealloc) {
|
||||
/* Nobody used this memory, ok to free it. */
|
||||
free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
|
||||
}
|
||||
if (found)
|
||||
retval = found->handle.handle;
|
||||
fast_exit:
|
||||
return retval;
|
||||
}
|
@ -65,11 +65,34 @@ static noinline void __init kmalloc_node_oob_right(void)
|
||||
kfree(ptr);
|
||||
}
|
||||
|
||||
static noinline void __init kmalloc_large_oob_right(void)
|
||||
#ifdef CONFIG_SLUB
|
||||
static noinline void __init kmalloc_pagealloc_oob_right(void)
|
||||
{
|
||||
char *ptr;
|
||||
size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
|
||||
|
||||
/* Allocate a chunk that does not fit into a SLUB cache to trigger
|
||||
* the page allocator fallback.
|
||||
*/
|
||||
pr_info("kmalloc pagealloc allocation: out-of-bounds to right\n");
|
||||
ptr = kmalloc(size, GFP_KERNEL);
|
||||
if (!ptr) {
|
||||
pr_err("Allocation failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
ptr[size] = 0;
|
||||
kfree(ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
static noinline void __init kmalloc_large_oob_right(void)
|
||||
{
|
||||
char *ptr;
|
||||
size_t size = KMALLOC_MAX_CACHE_SIZE - 256;
|
||||
/* Allocate a chunk that is large enough, but still fits into a slab
|
||||
* and does not trigger the page allocator fallback in SLUB.
|
||||
*/
|
||||
pr_info("kmalloc large allocation: out-of-bounds to right\n");
|
||||
ptr = kmalloc(size, GFP_KERNEL);
|
||||
if (!ptr) {
|
||||
@ -271,6 +294,8 @@ static noinline void __init kmalloc_uaf2(void)
|
||||
}
|
||||
|
||||
ptr1[40] = 'x';
|
||||
if (ptr1 == ptr2)
|
||||
pr_err("Could not detect use-after-free: ptr1 == ptr2\n");
|
||||
kfree(ptr2);
|
||||
}
|
||||
|
||||
@ -324,6 +349,9 @@ static int __init kmalloc_tests_init(void)
|
||||
kmalloc_oob_right();
|
||||
kmalloc_oob_left();
|
||||
kmalloc_node_oob_right();
|
||||
#ifdef CONFIG_SLUB
|
||||
kmalloc_pagealloc_oob_right();
|
||||
#endif
|
||||
kmalloc_large_oob_right();
|
||||
kmalloc_oob_krealloc_more();
|
||||
kmalloc_oob_krealloc_less();
|
||||
|
@ -3,6 +3,7 @@
|
||||
#
|
||||
|
||||
KASAN_SANITIZE_slab_common.o := n
|
||||
KASAN_SANITIZE_slab.o := n
|
||||
KASAN_SANITIZE_slub.o := n
|
||||
|
||||
# These files are disabled because they produce non-interesting and/or
|
||||
|
@ -1840,15 +1840,16 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
ssize_t retval = 0;
|
||||
loff_t *ppos = &iocb->ki_pos;
|
||||
loff_t pos = *ppos;
|
||||
size_t count = iov_iter_count(iter);
|
||||
|
||||
if (!count)
|
||||
goto out; /* skip atime */
|
||||
|
||||
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
size_t count = iov_iter_count(iter);
|
||||
loff_t size;
|
||||
|
||||
if (!count)
|
||||
goto out; /* skip atime */
|
||||
size = i_size_read(inode);
|
||||
retval = filemap_write_and_wait_range(mapping, pos,
|
||||
pos + count - 1);
|
||||
|
@ -2578,7 +2578,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
|
||||
}
|
||||
khugepaged_node_load[node]++;
|
||||
if (!PageLRU(page)) {
|
||||
result = SCAN_SCAN_ABORT;
|
||||
result = SCAN_PAGE_LRU;
|
||||
goto out_unmap;
|
||||
}
|
||||
if (PageLocked(page)) {
|
||||
|
@ -38,6 +38,11 @@
|
||||
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
|
||||
unsigned long floor, unsigned long ceiling);
|
||||
|
||||
void unmap_page_range(struct mmu_gather *tlb,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr, unsigned long end,
|
||||
struct zap_details *details);
|
||||
|
||||
extern int __do_page_cache_readahead(struct address_space *mapping,
|
||||
struct file *filp, pgoff_t offset, unsigned long nr_to_read,
|
||||
unsigned long lookahead_size);
|
||||
|
162
mm/kasan/kasan.c
162
mm/kasan/kasan.c
@ -17,7 +17,9 @@
|
||||
#define DISABLE_BRANCH_PROFILING
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/linkage.h>
|
||||
@ -32,7 +34,6 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/kasan.h>
|
||||
|
||||
#include "kasan.h"
|
||||
#include "../slab.h"
|
||||
@ -334,6 +335,59 @@ void kasan_free_pages(struct page *page, unsigned int order)
|
||||
KASAN_FREE_PAGE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
/*
|
||||
* Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
|
||||
* For larger allocations larger redzones are used.
|
||||
*/
|
||||
static size_t optimal_redzone(size_t object_size)
|
||||
{
|
||||
int rz =
|
||||
object_size <= 64 - 16 ? 16 :
|
||||
object_size <= 128 - 32 ? 32 :
|
||||
object_size <= 512 - 64 ? 64 :
|
||||
object_size <= 4096 - 128 ? 128 :
|
||||
object_size <= (1 << 14) - 256 ? 256 :
|
||||
object_size <= (1 << 15) - 512 ? 512 :
|
||||
object_size <= (1 << 16) - 1024 ? 1024 : 2048;
|
||||
return rz;
|
||||
}
|
||||
|
||||
void kasan_cache_create(struct kmem_cache *cache, size_t *size,
|
||||
unsigned long *flags)
|
||||
{
|
||||
int redzone_adjust;
|
||||
/* Make sure the adjusted size is still less than
|
||||
* KMALLOC_MAX_CACHE_SIZE.
|
||||
* TODO: this check is only useful for SLAB, but not SLUB. We'll need
|
||||
* to skip it for SLUB when it starts using kasan_cache_create().
|
||||
*/
|
||||
if (*size > KMALLOC_MAX_CACHE_SIZE -
|
||||
sizeof(struct kasan_alloc_meta) -
|
||||
sizeof(struct kasan_free_meta))
|
||||
return;
|
||||
*flags |= SLAB_KASAN;
|
||||
/* Add alloc meta. */
|
||||
cache->kasan_info.alloc_meta_offset = *size;
|
||||
*size += sizeof(struct kasan_alloc_meta);
|
||||
|
||||
/* Add free meta. */
|
||||
if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor ||
|
||||
cache->object_size < sizeof(struct kasan_free_meta)) {
|
||||
cache->kasan_info.free_meta_offset = *size;
|
||||
*size += sizeof(struct kasan_free_meta);
|
||||
}
|
||||
redzone_adjust = optimal_redzone(cache->object_size) -
|
||||
(*size - cache->object_size);
|
||||
if (redzone_adjust > 0)
|
||||
*size += redzone_adjust;
|
||||
*size = min(KMALLOC_MAX_CACHE_SIZE,
|
||||
max(*size,
|
||||
cache->object_size +
|
||||
optimal_redzone(cache->object_size)));
|
||||
}
|
||||
#endif
|
||||
|
||||
void kasan_poison_slab(struct page *page)
|
||||
{
|
||||
kasan_poison_shadow(page_address(page),
|
||||
@ -351,11 +405,81 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object)
|
||||
kasan_poison_shadow(object,
|
||||
round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE),
|
||||
KASAN_KMALLOC_REDZONE);
|
||||
#ifdef CONFIG_SLAB
|
||||
if (cache->flags & SLAB_KASAN) {
|
||||
struct kasan_alloc_meta *alloc_info =
|
||||
get_alloc_info(cache, object);
|
||||
alloc_info->state = KASAN_STATE_INIT;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void kasan_slab_alloc(struct kmem_cache *cache, void *object)
|
||||
#ifdef CONFIG_SLAB
|
||||
static inline int in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
kasan_kmalloc(cache, object, cache->object_size);
|
||||
return (ptr >= (unsigned long)&__irqentry_text_start &&
|
||||
ptr < (unsigned long)&__irqentry_text_end) ||
|
||||
(ptr >= (unsigned long)&__softirqentry_text_start &&
|
||||
ptr < (unsigned long)&__softirqentry_text_end);
|
||||
}
|
||||
|
||||
static inline void filter_irq_stacks(struct stack_trace *trace)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!trace->nr_entries)
|
||||
return;
|
||||
for (i = 0; i < trace->nr_entries; i++)
|
||||
if (in_irqentry_text(trace->entries[i])) {
|
||||
/* Include the irqentry function into the stack. */
|
||||
trace->nr_entries = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline depot_stack_handle_t save_stack(gfp_t flags)
|
||||
{
|
||||
unsigned long entries[KASAN_STACK_DEPTH];
|
||||
struct stack_trace trace = {
|
||||
.nr_entries = 0,
|
||||
.entries = entries,
|
||||
.max_entries = KASAN_STACK_DEPTH,
|
||||
.skip = 0
|
||||
};
|
||||
|
||||
save_stack_trace(&trace);
|
||||
filter_irq_stacks(&trace);
|
||||
if (trace.nr_entries != 0 &&
|
||||
trace.entries[trace.nr_entries-1] == ULONG_MAX)
|
||||
trace.nr_entries--;
|
||||
|
||||
return depot_save_stack(&trace, flags);
|
||||
}
|
||||
|
||||
static inline void set_track(struct kasan_track *track, gfp_t flags)
|
||||
{
|
||||
track->pid = current->pid;
|
||||
track->stack = save_stack(flags);
|
||||
}
|
||||
|
||||
struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
|
||||
const void *object)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32);
|
||||
return (void *)object + cache->kasan_info.alloc_meta_offset;
|
||||
}
|
||||
|
||||
struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
|
||||
const void *object)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32);
|
||||
return (void *)object + cache->kasan_info.free_meta_offset;
|
||||
}
|
||||
#endif
|
||||
|
||||
void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
|
||||
{
|
||||
kasan_kmalloc(cache, object, cache->object_size, flags);
|
||||
}
|
||||
|
||||
void kasan_slab_free(struct kmem_cache *cache, void *object)
|
||||
@ -367,10 +491,22 @@ void kasan_slab_free(struct kmem_cache *cache, void *object)
|
||||
if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
if (cache->flags & SLAB_KASAN) {
|
||||
struct kasan_free_meta *free_info =
|
||||
get_free_info(cache, object);
|
||||
struct kasan_alloc_meta *alloc_info =
|
||||
get_alloc_info(cache, object);
|
||||
alloc_info->state = KASAN_STATE_FREE;
|
||||
set_track(&free_info->track);
|
||||
}
|
||||
#endif
|
||||
|
||||
kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
|
||||
}
|
||||
|
||||
void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
|
||||
void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
|
||||
gfp_t flags)
|
||||
{
|
||||
unsigned long redzone_start;
|
||||
unsigned long redzone_end;
|
||||
@ -386,10 +522,20 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
|
||||
kasan_unpoison_shadow(object, size);
|
||||
kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
|
||||
KASAN_KMALLOC_REDZONE);
|
||||
#ifdef CONFIG_SLAB
|
||||
if (cache->flags & SLAB_KASAN) {
|
||||
struct kasan_alloc_meta *alloc_info =
|
||||
get_alloc_info(cache, object);
|
||||
|
||||
alloc_info->state = KASAN_STATE_ALLOC;
|
||||
alloc_info->alloc_size = size;
|
||||
set_track(&alloc_info->track, flags);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(kasan_kmalloc);
|
||||
|
||||
void kasan_kmalloc_large(const void *ptr, size_t size)
|
||||
void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned long redzone_start;
|
||||
@ -408,7 +554,7 @@ void kasan_kmalloc_large(const void *ptr, size_t size)
|
||||
KASAN_PAGE_REDZONE);
|
||||
}
|
||||
|
||||
void kasan_krealloc(const void *object, size_t size)
|
||||
void kasan_krealloc(const void *object, size_t size, gfp_t flags)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
@ -418,9 +564,9 @@ void kasan_krealloc(const void *object, size_t size)
|
||||
page = virt_to_head_page(object);
|
||||
|
||||
if (unlikely(!PageSlab(page)))
|
||||
kasan_kmalloc_large(object, size);
|
||||
kasan_kmalloc_large(object, size, flags);
|
||||
else
|
||||
kasan_kmalloc(page->slab_cache, object, size);
|
||||
kasan_kmalloc(page->slab_cache, object, size, flags);
|
||||
}
|
||||
|
||||
void kasan_kfree(void *ptr)
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define __MM_KASAN_KASAN_H
|
||||
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/stackdepot.h>
|
||||
|
||||
#define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
|
||||
#define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1)
|
||||
@ -54,6 +55,42 @@ struct kasan_global {
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* Structures to keep alloc and free tracks *
|
||||
*/
|
||||
|
||||
enum kasan_state {
|
||||
KASAN_STATE_INIT,
|
||||
KASAN_STATE_ALLOC,
|
||||
KASAN_STATE_FREE
|
||||
};
|
||||
|
||||
#define KASAN_STACK_DEPTH 64
|
||||
|
||||
struct kasan_track {
|
||||
u32 pid;
|
||||
depot_stack_handle_t stack;
|
||||
};
|
||||
|
||||
struct kasan_alloc_meta {
|
||||
struct kasan_track track;
|
||||
u32 state : 2; /* enum kasan_state */
|
||||
u32 alloc_size : 30;
|
||||
u32 reserved;
|
||||
};
|
||||
|
||||
struct kasan_free_meta {
|
||||
/* Allocator freelist pointer, unused by KASAN. */
|
||||
void **freelist;
|
||||
struct kasan_track track;
|
||||
};
|
||||
|
||||
struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
|
||||
const void *object);
|
||||
struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
|
||||
const void *object);
|
||||
|
||||
|
||||
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
|
||||
{
|
||||
return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/printk.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/stackdepot.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
@ -115,6 +116,53 @@ static inline bool init_task_stack_addr(const void *addr)
|
||||
sizeof(init_thread_union.stack));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
static void print_track(struct kasan_track *track)
|
||||
{
|
||||
pr_err("PID = %u\n", track->pid);
|
||||
if (track->stack) {
|
||||
struct stack_trace trace;
|
||||
|
||||
depot_fetch_stack(track->stack, &trace);
|
||||
print_stack_trace(&trace, 0);
|
||||
} else {
|
||||
pr_err("(stack is not available)\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void object_err(struct kmem_cache *cache, struct page *page,
|
||||
void *object, char *unused_reason)
|
||||
{
|
||||
struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
|
||||
struct kasan_free_meta *free_info;
|
||||
|
||||
dump_stack();
|
||||
pr_err("Object at %p, in cache %s\n", object, cache->name);
|
||||
if (!(cache->flags & SLAB_KASAN))
|
||||
return;
|
||||
switch (alloc_info->state) {
|
||||
case KASAN_STATE_INIT:
|
||||
pr_err("Object not allocated yet\n");
|
||||
break;
|
||||
case KASAN_STATE_ALLOC:
|
||||
pr_err("Object allocated with size %u bytes.\n",
|
||||
alloc_info->alloc_size);
|
||||
pr_err("Allocation:\n");
|
||||
print_track(&alloc_info->track);
|
||||
break;
|
||||
case KASAN_STATE_FREE:
|
||||
pr_err("Object freed, allocated with size %u bytes\n",
|
||||
alloc_info->alloc_size);
|
||||
free_info = get_free_info(cache, object);
|
||||
pr_err("Allocation:\n");
|
||||
print_track(&alloc_info->track);
|
||||
pr_err("Deallocation:\n");
|
||||
print_track(&free_info->track);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void print_address_description(struct kasan_access_info *info)
|
||||
{
|
||||
const void *addr = info->access_addr;
|
||||
@ -126,17 +174,10 @@ static void print_address_description(struct kasan_access_info *info)
|
||||
if (PageSlab(page)) {
|
||||
void *object;
|
||||
struct kmem_cache *cache = page->slab_cache;
|
||||
void *last_object;
|
||||
|
||||
object = virt_to_obj(cache, page_address(page), addr);
|
||||
last_object = page_address(page) +
|
||||
page->objects * cache->size;
|
||||
|
||||
if (unlikely(object > last_object))
|
||||
object = last_object; /* we hit into padding */
|
||||
|
||||
object = nearest_obj(cache, page,
|
||||
(void *)info->access_addr);
|
||||
object_err(cache, page, object,
|
||||
"kasan: bad access detected");
|
||||
"kasan: bad access detected");
|
||||
return;
|
||||
}
|
||||
dump_page(page, "kasan: bad access detected");
|
||||
@ -146,7 +187,6 @@ static void print_address_description(struct kasan_access_info *info)
|
||||
if (!init_task_stack_addr(addr))
|
||||
pr_err("Address belongs to variable %pS\n", addr);
|
||||
}
|
||||
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
|
17
mm/memory.c
17
mm/memory.c
@ -1102,6 +1102,12 @@ again:
|
||||
|
||||
if (!PageAnon(page)) {
|
||||
if (pte_dirty(ptent)) {
|
||||
/*
|
||||
* oom_reaper cannot tear down dirty
|
||||
* pages
|
||||
*/
|
||||
if (unlikely(details && details->ignore_dirty))
|
||||
continue;
|
||||
force_flush = 1;
|
||||
set_page_dirty(page);
|
||||
}
|
||||
@ -1120,8 +1126,8 @@ again:
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/* If details->check_mapping, we leave swap entries. */
|
||||
if (unlikely(details))
|
||||
/* only check swap_entries if explicitly asked for in details */
|
||||
if (unlikely(details && !details->check_swap_entries))
|
||||
continue;
|
||||
|
||||
entry = pte_to_swp_entry(ptent);
|
||||
@ -1226,7 +1232,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
|
||||
return addr;
|
||||
}
|
||||
|
||||
static void unmap_page_range(struct mmu_gather *tlb,
|
||||
void unmap_page_range(struct mmu_gather *tlb,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr, unsigned long end,
|
||||
struct zap_details *details)
|
||||
@ -1234,9 +1240,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
|
||||
pgd_t *pgd;
|
||||
unsigned long next;
|
||||
|
||||
if (details && !details->check_mapping)
|
||||
details = NULL;
|
||||
|
||||
BUG_ON(addr >= end);
|
||||
tlb_start_vma(tlb, vma);
|
||||
pgd = pgd_offset(vma->vm_mm, addr);
|
||||
@ -2432,7 +2435,7 @@ static inline void unmap_mapping_range_tree(struct rb_root *root,
|
||||
void unmap_mapping_range(struct address_space *mapping,
|
||||
loff_t const holebegin, loff_t const holelen, int even_cows)
|
||||
{
|
||||
struct zap_details details;
|
||||
struct zap_details details = { };
|
||||
pgoff_t hba = holebegin >> PAGE_SHIFT;
|
||||
pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
|
||||
|
16
mm/mempool.c
16
mm/mempool.c
@ -112,12 +112,12 @@ static void kasan_poison_element(mempool_t *pool, void *element)
|
||||
kasan_free_pages(element, (unsigned long)pool->pool_data);
|
||||
}
|
||||
|
||||
static void kasan_unpoison_element(mempool_t *pool, void *element)
|
||||
static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags)
|
||||
{
|
||||
if (pool->alloc == mempool_alloc_slab)
|
||||
kasan_slab_alloc(pool->pool_data, element);
|
||||
kasan_slab_alloc(pool->pool_data, element, flags);
|
||||
if (pool->alloc == mempool_kmalloc)
|
||||
kasan_krealloc(element, (size_t)pool->pool_data);
|
||||
kasan_krealloc(element, (size_t)pool->pool_data, flags);
|
||||
if (pool->alloc == mempool_alloc_pages)
|
||||
kasan_alloc_pages(element, (unsigned long)pool->pool_data);
|
||||
}
|
||||
@ -130,12 +130,12 @@ static void add_element(mempool_t *pool, void *element)
|
||||
pool->elements[pool->curr_nr++] = element;
|
||||
}
|
||||
|
||||
static void *remove_element(mempool_t *pool)
|
||||
static void *remove_element(mempool_t *pool, gfp_t flags)
|
||||
{
|
||||
void *element = pool->elements[--pool->curr_nr];
|
||||
|
||||
BUG_ON(pool->curr_nr < 0);
|
||||
kasan_unpoison_element(pool, element);
|
||||
kasan_unpoison_element(pool, element, flags);
|
||||
check_element(pool, element);
|
||||
return element;
|
||||
}
|
||||
@ -154,7 +154,7 @@ void mempool_destroy(mempool_t *pool)
|
||||
return;
|
||||
|
||||
while (pool->curr_nr) {
|
||||
void *element = remove_element(pool);
|
||||
void *element = remove_element(pool, GFP_KERNEL);
|
||||
pool->free(element, pool->pool_data);
|
||||
}
|
||||
kfree(pool->elements);
|
||||
@ -250,7 +250,7 @@ int mempool_resize(mempool_t *pool, int new_min_nr)
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
if (new_min_nr <= pool->min_nr) {
|
||||
while (new_min_nr < pool->curr_nr) {
|
||||
element = remove_element(pool);
|
||||
element = remove_element(pool, GFP_KERNEL);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
pool->free(element, pool->pool_data);
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
@ -347,7 +347,7 @@ repeat_alloc:
|
||||
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
if (likely(pool->curr_nr)) {
|
||||
element = remove_element(pool);
|
||||
element = remove_element(pool, gfp_temp);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
/* paired with rmb in mempool_free(), read comment there */
|
||||
smp_wmb();
|
||||
|
196
mm/oom_kill.c
196
mm/oom_kill.c
@ -35,6 +35,11 @@
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#include <asm/tlb.h>
|
||||
#include "internal.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/oom.h>
|
||||
@ -405,6 +410,172 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
|
||||
|
||||
bool oom_killer_disabled __read_mostly;
|
||||
|
||||
#define K(x) ((x) << (PAGE_SHIFT-10))
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
/*
|
||||
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
|
||||
* victim (if that is possible) to help the OOM killer to move on.
|
||||
*/
|
||||
static struct task_struct *oom_reaper_th;
|
||||
static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
|
||||
static struct task_struct *oom_reaper_list;
|
||||
static DEFINE_SPINLOCK(oom_reaper_lock);
|
||||
|
||||
|
||||
static bool __oom_reap_task(struct task_struct *tsk)
|
||||
{
|
||||
struct mmu_gather tlb;
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm;
|
||||
struct task_struct *p;
|
||||
struct zap_details details = {.check_swap_entries = true,
|
||||
.ignore_dirty = true};
|
||||
bool ret = true;
|
||||
|
||||
/*
|
||||
* Make sure we find the associated mm_struct even when the particular
|
||||
* thread has already terminated and cleared its mm.
|
||||
* We might have race with exit path so consider our work done if there
|
||||
* is no mm.
|
||||
*/
|
||||
p = find_lock_task_mm(tsk);
|
||||
if (!p)
|
||||
return true;
|
||||
|
||||
mm = p->mm;
|
||||
if (!atomic_inc_not_zero(&mm->mm_users)) {
|
||||
task_unlock(p);
|
||||
return true;
|
||||
}
|
||||
|
||||
task_unlock(p);
|
||||
|
||||
if (!down_read_trylock(&mm->mmap_sem)) {
|
||||
ret = false;
|
||||
goto out;
|
||||
}
|
||||
|
||||
tlb_gather_mmu(&tlb, mm, 0, -1);
|
||||
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* mlocked VMAs require explicit munlocking before unmap.
|
||||
* Let's keep it simple here and skip such VMAs.
|
||||
*/
|
||||
if (vma->vm_flags & VM_LOCKED)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Only anonymous pages have a good chance to be dropped
|
||||
* without additional steps which we cannot afford as we
|
||||
* are OOM already.
|
||||
*
|
||||
* We do not even care about fs backed pages because all
|
||||
* which are reclaimable have already been reclaimed and
|
||||
* we do not want to block exit_mmap by keeping mm ref
|
||||
* count elevated without a good reason.
|
||||
*/
|
||||
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
|
||||
unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
|
||||
&details);
|
||||
}
|
||||
tlb_finish_mmu(&tlb, 0, -1);
|
||||
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
|
||||
task_pid_nr(tsk), tsk->comm,
|
||||
K(get_mm_counter(mm, MM_ANONPAGES)),
|
||||
K(get_mm_counter(mm, MM_FILEPAGES)),
|
||||
K(get_mm_counter(mm, MM_SHMEMPAGES)));
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
/*
|
||||
* Clear TIF_MEMDIE because the task shouldn't be sitting on a
|
||||
* reasonably reclaimable memory anymore. OOM killer can continue
|
||||
* by selecting other victim if unmapping hasn't led to any
|
||||
* improvements. This also means that selecting this task doesn't
|
||||
* make any sense.
|
||||
*/
|
||||
tsk->signal->oom_score_adj = OOM_SCORE_ADJ_MIN;
|
||||
exit_oom_victim(tsk);
|
||||
out:
|
||||
mmput(mm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define MAX_OOM_REAP_RETRIES 10
|
||||
static void oom_reap_task(struct task_struct *tsk)
|
||||
{
|
||||
int attempts = 0;
|
||||
|
||||
/* Retry the down_read_trylock(mmap_sem) a few times */
|
||||
while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task(tsk))
|
||||
schedule_timeout_idle(HZ/10);
|
||||
|
||||
if (attempts > MAX_OOM_REAP_RETRIES) {
|
||||
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
|
||||
task_pid_nr(tsk), tsk->comm);
|
||||
debug_show_all_locks();
|
||||
}
|
||||
|
||||
/* Drop a reference taken by wake_oom_reaper */
|
||||
put_task_struct(tsk);
|
||||
}
|
||||
|
||||
static int oom_reaper(void *unused)
|
||||
{
|
||||
set_freezable();
|
||||
|
||||
while (true) {
|
||||
struct task_struct *tsk = NULL;
|
||||
|
||||
wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
|
||||
spin_lock(&oom_reaper_lock);
|
||||
if (oom_reaper_list != NULL) {
|
||||
tsk = oom_reaper_list;
|
||||
oom_reaper_list = tsk->oom_reaper_list;
|
||||
}
|
||||
spin_unlock(&oom_reaper_lock);
|
||||
|
||||
if (tsk)
|
||||
oom_reap_task(tsk);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void wake_oom_reaper(struct task_struct *tsk)
|
||||
{
|
||||
if (!oom_reaper_th || tsk->oom_reaper_list)
|
||||
return;
|
||||
|
||||
get_task_struct(tsk);
|
||||
|
||||
spin_lock(&oom_reaper_lock);
|
||||
tsk->oom_reaper_list = oom_reaper_list;
|
||||
oom_reaper_list = tsk;
|
||||
spin_unlock(&oom_reaper_lock);
|
||||
wake_up(&oom_reaper_wait);
|
||||
}
|
||||
|
||||
static int __init oom_init(void)
|
||||
{
|
||||
oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
|
||||
if (IS_ERR(oom_reaper_th)) {
|
||||
pr_err("Unable to start OOM reaper %ld. Continuing regardless\n",
|
||||
PTR_ERR(oom_reaper_th));
|
||||
oom_reaper_th = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(oom_init)
|
||||
#else
|
||||
static void wake_oom_reaper(struct task_struct *tsk)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* mark_oom_victim - mark the given task as OOM victim
|
||||
* @tsk: task to mark
|
||||
@ -431,9 +602,10 @@ void mark_oom_victim(struct task_struct *tsk)
|
||||
/**
|
||||
* exit_oom_victim - note the exit of an OOM victim
|
||||
*/
|
||||
void exit_oom_victim(void)
|
||||
void exit_oom_victim(struct task_struct *tsk)
|
||||
{
|
||||
clear_thread_flag(TIF_MEMDIE);
|
||||
if (!test_and_clear_tsk_thread_flag(tsk, TIF_MEMDIE))
|
||||
return;
|
||||
|
||||
if (!atomic_dec_return(&oom_victims))
|
||||
wake_up_all(&oom_victims_wait);
|
||||
@ -494,7 +666,6 @@ static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
|
||||
return false;
|
||||
}
|
||||
|
||||
#define K(x) ((x) << (PAGE_SHIFT-10))
|
||||
/*
|
||||
* Must be called while holding a reference to p, which will be released upon
|
||||
* returning.
|
||||
@ -510,6 +681,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
|
||||
unsigned int victim_points = 0;
|
||||
static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
|
||||
DEFAULT_RATELIMIT_BURST);
|
||||
bool can_oom_reap = true;
|
||||
|
||||
/*
|
||||
* If the task is already exiting, don't alarm the sysadmin or kill
|
||||
@ -600,17 +772,23 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
|
||||
continue;
|
||||
if (same_thread_group(p, victim))
|
||||
continue;
|
||||
if (unlikely(p->flags & PF_KTHREAD))
|
||||
if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p) ||
|
||||
p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
|
||||
/*
|
||||
* We cannot use oom_reaper for the mm shared by this
|
||||
* process because it wouldn't get killed and so the
|
||||
* memory might be still used.
|
||||
*/
|
||||
can_oom_reap = false;
|
||||
continue;
|
||||
if (is_global_init(p))
|
||||
continue;
|
||||
if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
|
||||
continue;
|
||||
|
||||
}
|
||||
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (can_oom_reap)
|
||||
wake_oom_reaper(victim);
|
||||
|
||||
mmdrop(mm);
|
||||
put_task_struct(victim);
|
||||
}
|
||||
|
@ -692,34 +692,28 @@ static inline void __free_one_page(struct page *page,
|
||||
unsigned long combined_idx;
|
||||
unsigned long uninitialized_var(buddy_idx);
|
||||
struct page *buddy;
|
||||
unsigned int max_order = MAX_ORDER;
|
||||
unsigned int max_order;
|
||||
|
||||
max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
|
||||
|
||||
VM_BUG_ON(!zone_is_initialized(zone));
|
||||
VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
|
||||
|
||||
VM_BUG_ON(migratetype == -1);
|
||||
if (is_migrate_isolate(migratetype)) {
|
||||
/*
|
||||
* We restrict max order of merging to prevent merge
|
||||
* between freepages on isolate pageblock and normal
|
||||
* pageblock. Without this, pageblock isolation
|
||||
* could cause incorrect freepage accounting.
|
||||
*/
|
||||
max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
|
||||
} else {
|
||||
if (likely(!is_migrate_isolate(migratetype)))
|
||||
__mod_zone_freepage_state(zone, 1 << order, migratetype);
|
||||
}
|
||||
|
||||
page_idx = pfn & ((1 << max_order) - 1);
|
||||
page_idx = pfn & ((1 << MAX_ORDER) - 1);
|
||||
|
||||
VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
|
||||
VM_BUG_ON_PAGE(bad_range(zone, page), page);
|
||||
|
||||
continue_merging:
|
||||
while (order < max_order - 1) {
|
||||
buddy_idx = __find_buddy_index(page_idx, order);
|
||||
buddy = page + (buddy_idx - page_idx);
|
||||
if (!page_is_buddy(page, buddy, order))
|
||||
break;
|
||||
goto done_merging;
|
||||
/*
|
||||
* Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
|
||||
* merge with it and move up one order.
|
||||
@ -736,6 +730,32 @@ static inline void __free_one_page(struct page *page,
|
||||
page_idx = combined_idx;
|
||||
order++;
|
||||
}
|
||||
if (max_order < MAX_ORDER) {
|
||||
/* If we are here, it means order is >= pageblock_order.
|
||||
* We want to prevent merge between freepages on isolate
|
||||
* pageblock and normal pageblock. Without this, pageblock
|
||||
* isolation could cause incorrect freepage or CMA accounting.
|
||||
*
|
||||
* We don't want to hit this code for the more frequent
|
||||
* low-order merging.
|
||||
*/
|
||||
if (unlikely(has_isolate_pageblock(zone))) {
|
||||
int buddy_mt;
|
||||
|
||||
buddy_idx = __find_buddy_index(page_idx, order);
|
||||
buddy = page + (buddy_idx - page_idx);
|
||||
buddy_mt = get_pageblock_migratetype(buddy);
|
||||
|
||||
if (migratetype != buddy_mt
|
||||
&& (is_migrate_isolate(migratetype) ||
|
||||
is_migrate_isolate(buddy_mt)))
|
||||
goto done_merging;
|
||||
}
|
||||
max_order++;
|
||||
goto continue_merging;
|
||||
}
|
||||
|
||||
done_merging:
|
||||
set_page_order(page, order);
|
||||
|
||||
/*
|
||||
|
42
mm/slab.c
42
mm/slab.c
@ -2086,6 +2086,8 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
|
||||
}
|
||||
#endif
|
||||
|
||||
kasan_cache_create(cachep, &size, &flags);
|
||||
|
||||
size = ALIGN(size, cachep->align);
|
||||
/*
|
||||
* We should restrict the number of objects in a slab to implement
|
||||
@ -2387,8 +2389,13 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
|
||||
* cache which they are a constructor for. Otherwise, deadlock.
|
||||
* They must also be threaded.
|
||||
*/
|
||||
if (cachep->ctor && !(cachep->flags & SLAB_POISON))
|
||||
if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {
|
||||
kasan_unpoison_object_data(cachep,
|
||||
objp + obj_offset(cachep));
|
||||
cachep->ctor(objp + obj_offset(cachep));
|
||||
kasan_poison_object_data(
|
||||
cachep, objp + obj_offset(cachep));
|
||||
}
|
||||
|
||||
if (cachep->flags & SLAB_RED_ZONE) {
|
||||
if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
|
||||
@ -2409,6 +2416,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
|
||||
struct page *page)
|
||||
{
|
||||
int i;
|
||||
void *objp;
|
||||
|
||||
cache_init_objs_debug(cachep, page);
|
||||
|
||||
@ -2419,8 +2427,12 @@ static void cache_init_objs(struct kmem_cache *cachep,
|
||||
|
||||
for (i = 0; i < cachep->num; i++) {
|
||||
/* constructor could break poison info */
|
||||
if (DEBUG == 0 && cachep->ctor)
|
||||
cachep->ctor(index_to_obj(cachep, page, i));
|
||||
if (DEBUG == 0 && cachep->ctor) {
|
||||
objp = index_to_obj(cachep, page, i);
|
||||
kasan_unpoison_object_data(cachep, objp);
|
||||
cachep->ctor(objp);
|
||||
kasan_poison_object_data(cachep, objp);
|
||||
}
|
||||
|
||||
set_free_obj(page, i, i);
|
||||
}
|
||||
@ -2550,6 +2562,7 @@ static int cache_grow(struct kmem_cache *cachep,
|
||||
|
||||
slab_map_pages(cachep, page, freelist);
|
||||
|
||||
kasan_poison_slab(page);
|
||||
cache_init_objs(cachep, page);
|
||||
|
||||
if (gfpflags_allow_blocking(local_flags))
|
||||
@ -3316,6 +3329,8 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
|
||||
{
|
||||
struct array_cache *ac = cpu_cache_get(cachep);
|
||||
|
||||
kasan_slab_free(cachep, objp);
|
||||
|
||||
check_irq_off();
|
||||
kmemleak_free_recursive(objp, cachep->flags);
|
||||
objp = cache_free_debugcheck(cachep, objp, caller);
|
||||
@ -3363,6 +3378,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
|
||||
{
|
||||
void *ret = slab_alloc(cachep, flags, _RET_IP_);
|
||||
|
||||
kasan_slab_alloc(cachep, ret, flags);
|
||||
trace_kmem_cache_alloc(_RET_IP_, ret,
|
||||
cachep->object_size, cachep->size, flags);
|
||||
|
||||
@ -3428,6 +3444,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
|
||||
|
||||
ret = slab_alloc(cachep, flags, _RET_IP_);
|
||||
|
||||
kasan_kmalloc(cachep, ret, size, flags);
|
||||
trace_kmalloc(_RET_IP_, ret,
|
||||
size, cachep->size, flags);
|
||||
return ret;
|
||||
@ -3451,6 +3468,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
|
||||
{
|
||||
void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
|
||||
|
||||
kasan_slab_alloc(cachep, ret, flags);
|
||||
trace_kmem_cache_alloc_node(_RET_IP_, ret,
|
||||
cachep->object_size, cachep->size,
|
||||
flags, nodeid);
|
||||
@ -3469,6 +3487,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
|
||||
|
||||
ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
|
||||
|
||||
kasan_kmalloc(cachep, ret, size, flags);
|
||||
trace_kmalloc_node(_RET_IP_, ret,
|
||||
size, cachep->size,
|
||||
flags, nodeid);
|
||||
@ -3481,11 +3500,15 @@ static __always_inline void *
|
||||
__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
|
||||
{
|
||||
struct kmem_cache *cachep;
|
||||
void *ret;
|
||||
|
||||
cachep = kmalloc_slab(size, flags);
|
||||
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
|
||||
return cachep;
|
||||
return kmem_cache_alloc_node_trace(cachep, flags, node, size);
|
||||
ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
|
||||
kasan_kmalloc(cachep, ret, size, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *__kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
@ -3519,6 +3542,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
|
||||
return cachep;
|
||||
ret = slab_alloc(cachep, flags, caller);
|
||||
|
||||
kasan_kmalloc(cachep, ret, size, flags);
|
||||
trace_kmalloc(caller, ret,
|
||||
size, cachep->size, flags);
|
||||
|
||||
@ -4290,10 +4314,18 @@ module_init(slab_proc_init);
|
||||
*/
|
||||
size_t ksize(const void *objp)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
BUG_ON(!objp);
|
||||
if (unlikely(objp == ZERO_SIZE_PTR))
|
||||
return 0;
|
||||
|
||||
return virt_to_cache(objp)->object_size;
|
||||
size = virt_to_cache(objp)->object_size;
|
||||
/* We assume that ksize callers could use the whole allocated area,
|
||||
* so we need to unpoison this area.
|
||||
*/
|
||||
kasan_krealloc(objp, size, GFP_NOWAIT);
|
||||
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL(ksize);
|
||||
|
@ -405,7 +405,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
|
||||
kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
|
||||
kmemleak_alloc_recursive(object, s->object_size, 1,
|
||||
s->flags, flags);
|
||||
kasan_slab_alloc(s, object);
|
||||
kasan_slab_alloc(s, object, flags);
|
||||
}
|
||||
memcg_kmem_put_cache(s);
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ struct kmem_cache *kmem_cache;
|
||||
*/
|
||||
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
|
||||
SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
|
||||
SLAB_FAILSLAB)
|
||||
SLAB_FAILSLAB | SLAB_KASAN)
|
||||
|
||||
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
|
||||
SLAB_NOTRACK | SLAB_ACCOUNT)
|
||||
@ -1013,7 +1013,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
|
||||
page = alloc_kmem_pages(flags, order);
|
||||
ret = page ? page_address(page) : NULL;
|
||||
kmemleak_alloc(ret, size, 1, flags);
|
||||
kasan_kmalloc_large(ret, size);
|
||||
kasan_kmalloc_large(ret, size, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_order);
|
||||
@ -1192,7 +1192,7 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size,
|
||||
ks = ksize(p);
|
||||
|
||||
if (ks >= new_size) {
|
||||
kasan_krealloc((void *)p, new_size);
|
||||
kasan_krealloc((void *)p, new_size, flags);
|
||||
return (void *)p;
|
||||
}
|
||||
|
||||
|
15
mm/slub.c
15
mm/slub.c
@ -1313,7 +1313,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
|
||||
static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
|
||||
{
|
||||
kmemleak_alloc(ptr, size, 1, flags);
|
||||
kasan_kmalloc_large(ptr, size);
|
||||
kasan_kmalloc_large(ptr, size, flags);
|
||||
}
|
||||
|
||||
static inline void kfree_hook(const void *x)
|
||||
@ -2596,7 +2596,7 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
|
||||
{
|
||||
void *ret = slab_alloc(s, gfpflags, _RET_IP_);
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
|
||||
kasan_kmalloc(s, ret, size);
|
||||
kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_trace);
|
||||
@ -2624,7 +2624,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
|
||||
trace_kmalloc_node(_RET_IP_, ret,
|
||||
size, s->size, gfpflags, node);
|
||||
|
||||
kasan_kmalloc(s, ret, size);
|
||||
kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
|
||||
@ -3182,7 +3182,8 @@ static void early_kmem_cache_node_alloc(int node)
|
||||
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
|
||||
init_tracking(kmem_cache_node, n);
|
||||
#endif
|
||||
kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node));
|
||||
kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
|
||||
GFP_KERNEL);
|
||||
init_kmem_cache_node(n);
|
||||
inc_slabs_node(kmem_cache_node, node, page->objects);
|
||||
|
||||
@ -3561,7 +3562,7 @@ void *__kmalloc(size_t size, gfp_t flags)
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
|
||||
|
||||
kasan_kmalloc(s, ret, size);
|
||||
kasan_kmalloc(s, ret, size, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -3606,7 +3607,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
|
||||
trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
|
||||
|
||||
kasan_kmalloc(s, ret, size);
|
||||
kasan_kmalloc(s, ret, size, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -3635,7 +3636,7 @@ size_t ksize(const void *object)
|
||||
size_t size = __ksize(object);
|
||||
/* We assume that ksize callers could use whole allocated area,
|
||||
so we need unpoison this area. */
|
||||
kasan_krealloc(object, size);
|
||||
kasan_krealloc(object, size, GFP_NOWAIT);
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL(ksize);
|
||||
|
Loading…
x
Reference in New Issue
Block a user