Merge branch 'akpm' (patches from Andrew Morton)
Merge a bunch of fixes from Andrew Morton:
"Commit 579f82901f
("swap: add a simple detector for inappropriate
swapin readahead") is a feature. No probs if you decide to defer it
until the next merge window.
It has been sitting in my tree for over a year because of my dislike
of all the magic numbers, but recent discussion with Hugh has made me
give up"
* emailed patches fron Andrew Morton <akpm@linux-foundation.org>:
mm: __set_page_dirty uses spin_lock_irqsave instead of spin_lock_irq
arch/x86/mm/numa.c: fix array index overflow when synchronizing nid to memblock.reserved.
arch/x86/mm/numa.c: initialize numa_kernel_nodes in numa_clear_kernel_node_hotplug()
mm: __set_page_dirty_nobuffers() uses spin_lock_irqsave() instead of spin_lock_irq()
mm/swap: fix race on swap_info reuse between swapoff and swapon
swap: add a simple detector for inappropriate swapin readahead
ocfs2: free allocated clusters if error occurs after ocfs2_claim_clusters
Documentation/kernel-parameters.txt: fix memmap= language
This commit is contained in:
commit
9343224bfd
@ -1726,16 +1726,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||
option description.
|
||||
|
||||
memmap=nn[KMG]@ss[KMG]
|
||||
[KNL] Force usage of a specific region of memory
|
||||
Region of memory to be used, from ss to ss+nn.
|
||||
[KNL] Force usage of a specific region of memory.
|
||||
Region of memory to be used is from ss to ss+nn.
|
||||
|
||||
memmap=nn[KMG]#ss[KMG]
|
||||
[KNL,ACPI] Mark specific memory as ACPI data.
|
||||
Region of memory to be used, from ss to ss+nn.
|
||||
Region of memory to be marked is from ss to ss+nn.
|
||||
|
||||
memmap=nn[KMG]$ss[KMG]
|
||||
[KNL,ACPI] Mark specific memory as reserved.
|
||||
Region of memory to be used, from ss to ss+nn.
|
||||
Region of memory to be reserved is from ss to ss+nn.
|
||||
Example: Exclude memory from 0x18690000-0x1869ffff
|
||||
memmap=64K$0x18690000
|
||||
or
|
||||
|
@ -493,14 +493,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
|
||||
struct numa_memblk *mb = &mi->blk[i];
|
||||
memblock_set_node(mb->start, mb->end - mb->start,
|
||||
&memblock.memory, mb->nid);
|
||||
|
||||
/*
|
||||
* At this time, all memory regions reserved by memblock are
|
||||
* used by the kernel. Set the nid in memblock.reserved will
|
||||
* mark out all the nodes the kernel resides in.
|
||||
*/
|
||||
memblock_set_node(mb->start, mb->end - mb->start,
|
||||
&memblock.reserved, mb->nid);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -565,10 +557,21 @@ static void __init numa_init_array(void)
|
||||
static void __init numa_clear_kernel_node_hotplug(void)
|
||||
{
|
||||
int i, nid;
|
||||
nodemask_t numa_kernel_nodes;
|
||||
nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
|
||||
unsigned long start, end;
|
||||
struct memblock_type *type = &memblock.reserved;
|
||||
|
||||
/*
|
||||
* At this time, all memory regions reserved by memblock are
|
||||
* used by the kernel. Set the nid in memblock.reserved will
|
||||
* mark out all the nodes the kernel resides in.
|
||||
*/
|
||||
for (i = 0; i < numa_meminfo.nr_blks; i++) {
|
||||
struct numa_memblk *mb = &numa_meminfo.blk[i];
|
||||
memblock_set_node(mb->start, mb->end - mb->start,
|
||||
&memblock.reserved, mb->nid);
|
||||
}
|
||||
|
||||
/* Mark all kernel nodes. */
|
||||
for (i = 0; i < type->cnt; i++)
|
||||
node_set(type->regions[i].nid, numa_kernel_nodes);
|
||||
|
@ -654,14 +654,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
|
||||
static void __set_page_dirty(struct page *page,
|
||||
struct address_space *mapping, int warn)
|
||||
{
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&mapping->tree_lock, flags);
|
||||
if (page->mapping) { /* Race with truncate? */
|
||||
WARN_ON_ONCE(warn && !PageUptodate(page));
|
||||
account_page_dirtied(page, mapping);
|
||||
radix_tree_tag_set(&mapping->page_tree,
|
||||
page_index(page), PAGECACHE_TAG_DIRTY);
|
||||
}
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
|
||||
}
|
||||
|
||||
|
@ -4742,6 +4742,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
|
||||
enum ocfs2_alloc_restarted *reason_ret)
|
||||
{
|
||||
int status = 0, err = 0;
|
||||
int need_free = 0;
|
||||
int free_extents;
|
||||
enum ocfs2_alloc_restarted reason = RESTART_NONE;
|
||||
u32 bit_off, num_bits;
|
||||
@ -4796,7 +4797,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
need_free = 1;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
|
||||
@ -4807,7 +4809,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
|
||||
num_bits, flags, meta_ac);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
need_free = 1;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
ocfs2_journal_dirty(handle, et->et_root_bh);
|
||||
@ -4821,6 +4824,19 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
|
||||
reason = RESTART_TRANS;
|
||||
}
|
||||
|
||||
bail:
|
||||
if (need_free) {
|
||||
if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
|
||||
ocfs2_free_local_alloc_bits(osb, handle, data_ac,
|
||||
bit_off, num_bits);
|
||||
else
|
||||
ocfs2_free_clusters(handle,
|
||||
data_ac->ac_inode,
|
||||
data_ac->ac_bh,
|
||||
ocfs2_clusters_to_blocks(osb->sb, bit_off),
|
||||
num_bits);
|
||||
}
|
||||
|
||||
leave:
|
||||
if (reason_ret)
|
||||
*reason_ret = reason;
|
||||
@ -6805,6 +6821,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
|
||||
struct buffer_head *di_bh)
|
||||
{
|
||||
int ret, i, has_data, num_pages = 0;
|
||||
int need_free = 0;
|
||||
u32 bit_off, num;
|
||||
handle_t *handle;
|
||||
u64 uninitialized_var(block);
|
||||
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||
@ -6850,7 +6868,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
|
||||
}
|
||||
|
||||
if (has_data) {
|
||||
u32 bit_off, num;
|
||||
unsigned int page_end;
|
||||
u64 phys;
|
||||
|
||||
@ -6886,6 +6903,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
|
||||
ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
need_free = 1;
|
||||
goto out_commit;
|
||||
}
|
||||
|
||||
@ -6896,6 +6914,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
|
||||
ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
need_free = 1;
|
||||
goto out_commit;
|
||||
}
|
||||
|
||||
@ -6927,6 +6946,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
|
||||
ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
need_free = 1;
|
||||
goto out_commit;
|
||||
}
|
||||
|
||||
@ -6938,6 +6958,18 @@ out_commit:
|
||||
dquot_free_space_nodirty(inode,
|
||||
ocfs2_clusters_to_bytes(osb->sb, 1));
|
||||
|
||||
if (need_free) {
|
||||
if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
|
||||
ocfs2_free_local_alloc_bits(osb, handle, data_ac,
|
||||
bit_off, num);
|
||||
else
|
||||
ocfs2_free_clusters(handle,
|
||||
data_ac->ac_inode,
|
||||
data_ac->ac_bh,
|
||||
ocfs2_clusters_to_blocks(osb->sb, bit_off),
|
||||
num);
|
||||
}
|
||||
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
|
||||
out_unlock:
|
||||
|
@ -781,6 +781,48 @@ bail:
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
|
||||
handle_t *handle,
|
||||
struct ocfs2_alloc_context *ac,
|
||||
u32 bit_off,
|
||||
u32 num_bits)
|
||||
{
|
||||
int status, start;
|
||||
u32 clear_bits;
|
||||
struct inode *local_alloc_inode;
|
||||
void *bitmap;
|
||||
struct ocfs2_dinode *alloc;
|
||||
struct ocfs2_local_alloc *la;
|
||||
|
||||
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
|
||||
|
||||
local_alloc_inode = ac->ac_inode;
|
||||
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
|
||||
la = OCFS2_LOCAL_ALLOC(alloc);
|
||||
|
||||
bitmap = la->la_bitmap;
|
||||
start = bit_off - le32_to_cpu(la->la_bm_off);
|
||||
clear_bits = num_bits;
|
||||
|
||||
status = ocfs2_journal_access_di(handle,
|
||||
INODE_CACHE(local_alloc_inode),
|
||||
osb->local_alloc_bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
while (clear_bits--)
|
||||
ocfs2_clear_bit(start++, bitmap);
|
||||
|
||||
le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits);
|
||||
ocfs2_journal_dirty(handle, osb->local_alloc_bh);
|
||||
|
||||
bail:
|
||||
return status;
|
||||
}
|
||||
|
||||
static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
|
||||
{
|
||||
u32 count;
|
||||
|
@ -55,6 +55,12 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
|
||||
u32 *bit_off,
|
||||
u32 *num_bits);
|
||||
|
||||
int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
|
||||
handle_t *handle,
|
||||
struct ocfs2_alloc_context *ac,
|
||||
u32 bit_off,
|
||||
u32 num_bits);
|
||||
|
||||
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
|
||||
unsigned int num_clusters);
|
||||
void ocfs2_la_enable_worker(struct work_struct *work);
|
||||
|
@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
|
||||
TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
|
||||
PAGEFLAG(MappedToDisk, mappedtodisk)
|
||||
|
||||
/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
|
||||
/* PG_readahead is only used for reads; PG_reclaim is only for writes */
|
||||
PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
|
||||
PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */
|
||||
PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim)
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
/*
|
||||
|
@ -2173,11 +2173,12 @@ int __set_page_dirty_nobuffers(struct page *page)
|
||||
if (!TestSetPageDirty(page)) {
|
||||
struct address_space *mapping = page_mapping(page);
|
||||
struct address_space *mapping2;
|
||||
unsigned long flags;
|
||||
|
||||
if (!mapping)
|
||||
return 1;
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
spin_lock_irqsave(&mapping->tree_lock, flags);
|
||||
mapping2 = page_mapping(page);
|
||||
if (mapping2) { /* Race with truncate? */
|
||||
BUG_ON(mapping2 != mapping);
|
||||
@ -2186,7 +2187,7 @@ int __set_page_dirty_nobuffers(struct page *page)
|
||||
radix_tree_tag_set(&mapping->page_tree,
|
||||
page_index(page), PAGECACHE_TAG_DIRTY);
|
||||
}
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
if (mapping->host) {
|
||||
/* !PageAnon && !swapper_space */
|
||||
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
|
||||
|
@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
|
||||
|
||||
void show_swap_cache_info(void)
|
||||
{
|
||||
printk("%lu pages in swap cache\n", total_swapcache_pages());
|
||||
@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry)
|
||||
|
||||
page = find_get_page(swap_address_space(entry), entry.val);
|
||||
|
||||
if (page)
|
||||
if (page) {
|
||||
INC_CACHE_INFO(find_success);
|
||||
if (TestClearPageReadahead(page))
|
||||
atomic_inc(&swapin_readahead_hits);
|
||||
}
|
||||
|
||||
INC_CACHE_INFO(find_total);
|
||||
return page;
|
||||
@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
|
||||
return found_page;
|
||||
}
|
||||
|
||||
static unsigned long swapin_nr_pages(unsigned long offset)
|
||||
{
|
||||
static unsigned long prev_offset;
|
||||
unsigned int pages, max_pages, last_ra;
|
||||
static atomic_t last_readahead_pages;
|
||||
|
||||
max_pages = 1 << ACCESS_ONCE(page_cluster);
|
||||
if (max_pages <= 1)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* This heuristic has been found to work well on both sequential and
|
||||
* random loads, swapping to hard disk or to SSD: please don't ask
|
||||
* what the "+ 2" means, it just happens to work well, that's all.
|
||||
*/
|
||||
pages = atomic_xchg(&swapin_readahead_hits, 0) + 2;
|
||||
if (pages == 2) {
|
||||
/*
|
||||
* We can have no readahead hits to judge by: but must not get
|
||||
* stuck here forever, so check for an adjacent offset instead
|
||||
* (and don't even bother to check whether swap type is same).
|
||||
*/
|
||||
if (offset != prev_offset + 1 && offset != prev_offset - 1)
|
||||
pages = 1;
|
||||
prev_offset = offset;
|
||||
} else {
|
||||
unsigned int roundup = 4;
|
||||
while (roundup < pages)
|
||||
roundup <<= 1;
|
||||
pages = roundup;
|
||||
}
|
||||
|
||||
if (pages > max_pages)
|
||||
pages = max_pages;
|
||||
|
||||
/* Don't shrink readahead too fast */
|
||||
last_ra = atomic_read(&last_readahead_pages) / 2;
|
||||
if (pages < last_ra)
|
||||
pages = last_ra;
|
||||
atomic_set(&last_readahead_pages, pages);
|
||||
|
||||
return pages;
|
||||
}
|
||||
|
||||
/**
|
||||
* swapin_readahead - swap in pages in hope we need them soon
|
||||
* @entry: swap entry of this memory
|
||||
@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
|
||||
struct vm_area_struct *vma, unsigned long addr)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned long offset = swp_offset(entry);
|
||||
unsigned long entry_offset = swp_offset(entry);
|
||||
unsigned long offset = entry_offset;
|
||||
unsigned long start_offset, end_offset;
|
||||
unsigned long mask = (1UL << page_cluster) - 1;
|
||||
unsigned long mask;
|
||||
struct blk_plug plug;
|
||||
|
||||
mask = swapin_nr_pages(offset) - 1;
|
||||
if (!mask)
|
||||
goto skip;
|
||||
|
||||
/* Read a page_cluster sized and aligned cluster around offset. */
|
||||
start_offset = offset & ~mask;
|
||||
end_offset = offset | mask;
|
||||
@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
|
||||
gfp_mask, vma, addr);
|
||||
if (!page)
|
||||
continue;
|
||||
if (offset != entry_offset)
|
||||
SetPageReadahead(page);
|
||||
page_cache_release(page);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
lru_add_drain(); /* Push any new pages onto the LRU now */
|
||||
skip:
|
||||
return read_swap_cache_async(entry, gfp_mask, vma, addr);
|
||||
}
|
||||
|
@ -1923,7 +1923,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
||||
p->swap_map = NULL;
|
||||
cluster_info = p->cluster_info;
|
||||
p->cluster_info = NULL;
|
||||
p->flags = 0;
|
||||
frontswap_map = frontswap_map_get(p);
|
||||
spin_unlock(&p->lock);
|
||||
spin_unlock(&swap_lock);
|
||||
@ -1949,6 +1948,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
filp_close(swap_file, NULL);
|
||||
|
||||
/*
|
||||
* Clear the SWP_USED flag after all resources are freed so that swapon
|
||||
* can reuse this swap_info in alloc_swap_info() safely. It is ok to
|
||||
* not hold p->lock after we cleared its SWP_WRITEOK.
|
||||
*/
|
||||
spin_lock(&swap_lock);
|
||||
p->flags = 0;
|
||||
spin_unlock(&swap_lock);
|
||||
|
||||
err = 0;
|
||||
atomic_inc(&proc_poll_event);
|
||||
wake_up_interruptible(&proc_poll_wait);
|
||||
|
Loading…
Reference in New Issue
Block a user