merge mm-hotfixes-stable into mm-nonmm-stable to pick up needed changes
This commit is contained in:
commit
5d1bc76058
3
.mailmap
3
.mailmap
@ -446,7 +446,8 @@ Mythri P K <mythripk@ti.com>
|
||||
Nadav Amit <nadav.amit@gmail.com> <namit@vmware.com>
|
||||
Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il>
|
||||
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
|
||||
Naoya Horiguchi <naoya.horiguchi@nec.com> <n-horiguchi@ah.jp.nec.com>
|
||||
Naoya Horiguchi <nao.horiguchi@gmail.com> <n-horiguchi@ah.jp.nec.com>
|
||||
Naoya Horiguchi <nao.horiguchi@gmail.com> <naoya.horiguchi@nec.com>
|
||||
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
|
||||
Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
|
||||
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
|
||||
|
@ -24,10 +24,10 @@ fragmentation statistics can be obtained through gfp flag information of
|
||||
each page. It is already implemented and activated if page owner is
|
||||
enabled. Other usages are more than welcome.
|
||||
|
||||
It can also be used to show all the stacks and their outstanding
|
||||
allocations, which gives us a quick overview of where the memory is going
|
||||
without the need to screen through all the pages and match the allocation
|
||||
and free operation.
|
||||
It can also be used to show all the stacks and their current number of
|
||||
allocated base pages, which gives us a quick overview of where the memory
|
||||
is going without the need to screen through all the pages and match the
|
||||
allocation and free operation.
|
||||
|
||||
page owner is disabled by default. So, if you'd like to use it, you need
|
||||
to add "page_owner=on" to your boot cmdline. If the kernel is built
|
||||
@ -75,42 +75,45 @@ Usage
|
||||
|
||||
cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
|
||||
cat stacks.txt
|
||||
prep_new_page+0xa9/0x120
|
||||
get_page_from_freelist+0x7e6/0x2140
|
||||
__alloc_pages+0x18a/0x370
|
||||
new_slab+0xc8/0x580
|
||||
___slab_alloc+0x1f2/0xaf0
|
||||
__slab_alloc.isra.86+0x22/0x40
|
||||
kmem_cache_alloc+0x31b/0x350
|
||||
__khugepaged_enter+0x39/0x100
|
||||
dup_mmap+0x1c7/0x5ce
|
||||
copy_process+0x1afe/0x1c90
|
||||
kernel_clone+0x9a/0x3c0
|
||||
__do_sys_clone+0x66/0x90
|
||||
do_syscall_64+0x7f/0x160
|
||||
entry_SYSCALL_64_after_hwframe+0x6c/0x74
|
||||
stack_count: 234
|
||||
post_alloc_hook+0x177/0x1a0
|
||||
get_page_from_freelist+0xd01/0xd80
|
||||
__alloc_pages+0x39e/0x7e0
|
||||
allocate_slab+0xbc/0x3f0
|
||||
___slab_alloc+0x528/0x8a0
|
||||
kmem_cache_alloc+0x224/0x3b0
|
||||
sk_prot_alloc+0x58/0x1a0
|
||||
sk_alloc+0x32/0x4f0
|
||||
inet_create+0x427/0xb50
|
||||
__sock_create+0x2e4/0x650
|
||||
inet_ctl_sock_create+0x30/0x180
|
||||
igmp_net_init+0xc1/0x130
|
||||
ops_init+0x167/0x410
|
||||
setup_net+0x304/0xa60
|
||||
copy_net_ns+0x29b/0x4a0
|
||||
create_new_namespaces+0x4a1/0x820
|
||||
nr_base_pages: 16
|
||||
...
|
||||
...
|
||||
echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
|
||||
cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
|
||||
cat stacks_7000.txt
|
||||
prep_new_page+0xa9/0x120
|
||||
get_page_from_freelist+0x7e6/0x2140
|
||||
__alloc_pages+0x18a/0x370
|
||||
alloc_pages_mpol+0xdf/0x1e0
|
||||
folio_alloc+0x14/0x50
|
||||
filemap_alloc_folio+0xb0/0x100
|
||||
page_cache_ra_unbounded+0x97/0x180
|
||||
filemap_fault+0x4b4/0x1200
|
||||
__do_fault+0x2d/0x110
|
||||
do_pte_missing+0x4b0/0xa30
|
||||
__handle_mm_fault+0x7fa/0xb70
|
||||
handle_mm_fault+0x125/0x300
|
||||
do_user_addr_fault+0x3c9/0x840
|
||||
exc_page_fault+0x68/0x150
|
||||
asm_exc_page_fault+0x22/0x30
|
||||
stack_count: 8248
|
||||
post_alloc_hook+0x177/0x1a0
|
||||
get_page_from_freelist+0xd01/0xd80
|
||||
__alloc_pages+0x39e/0x7e0
|
||||
alloc_pages_mpol+0x22e/0x490
|
||||
folio_alloc+0xd5/0x110
|
||||
filemap_alloc_folio+0x78/0x230
|
||||
page_cache_ra_order+0x287/0x6f0
|
||||
filemap_get_pages+0x517/0x1160
|
||||
filemap_read+0x304/0x9f0
|
||||
xfs_file_buffered_read+0xe6/0x1d0 [xfs]
|
||||
xfs_file_read_iter+0x1f0/0x380 [xfs]
|
||||
__kernel_read+0x3b9/0x730
|
||||
kernel_read_file+0x309/0x4d0
|
||||
__do_sys_finit_module+0x381/0x730
|
||||
do_syscall_64+0x8d/0x150
|
||||
entry_SYSCALL_64_after_hwframe+0x62/0x6a
|
||||
nr_base_pages: 20824
|
||||
...
|
||||
|
||||
cat /sys/kernel/debug/page_owner > page_owner_full.txt
|
||||
|
@ -10024,7 +10024,7 @@ F: drivers/media/platform/st/sti/hva
|
||||
|
||||
HWPOISON MEMORY FAILURE HANDLING
|
||||
M: Miaohe Lin <linmiaohe@huawei.com>
|
||||
R: Naoya Horiguchi <naoya.horiguchi@nec.com>
|
||||
R: Naoya Horiguchi <nao.horiguchi@gmail.com>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: mm/hwpoison-inject.c
|
||||
|
@ -240,7 +240,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = {
|
||||
|
||||
#define S_SHIFT 12
|
||||
static unsigned char
|
||||
nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
|
||||
nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
|
||||
[S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
|
||||
[S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
|
||||
[S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
|
||||
|
@ -67,7 +67,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
|
||||
*/
|
||||
ppage = pfn_to_online_page(pfn);
|
||||
|
||||
if (!ppage || PageSlab(ppage) || page_has_type(ppage))
|
||||
if (!ppage)
|
||||
pcount = 0;
|
||||
else
|
||||
pcount = page_mapcount(ppage);
|
||||
@ -124,11 +124,8 @@ u64 stable_page_flags(struct page *page)
|
||||
|
||||
/*
|
||||
* pseudo flags for the well known (anonymous) memory mapped pages
|
||||
*
|
||||
* Note that page->_mapcount is overloaded in SLAB, so the
|
||||
* simple test in page_mapped() is not enough.
|
||||
*/
|
||||
if (!PageSlab(page) && page_mapped(page))
|
||||
if (page_mapped(page))
|
||||
u |= 1 << KPF_MMAP;
|
||||
if (PageAnon(page))
|
||||
u |= 1 << KPF_ANON;
|
||||
|
@ -48,6 +48,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
|
||||
gid_t i_gid;
|
||||
int err;
|
||||
|
||||
inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
|
||||
if (inode->i_ino == 0)
|
||||
return -EINVAL;
|
||||
|
||||
err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
|
||||
if (err)
|
||||
return err;
|
||||
@ -58,7 +62,6 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
|
||||
|
||||
i_uid_write(inode, i_uid);
|
||||
i_gid_write(inode, i_gid);
|
||||
inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
|
||||
inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
|
||||
inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
|
||||
inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);
|
||||
|
@ -1223,14 +1223,16 @@ static inline void page_mapcount_reset(struct page *page)
|
||||
* a large folio, it includes the number of times this page is mapped
|
||||
* as part of that folio.
|
||||
*
|
||||
* The result is undefined for pages which cannot be mapped into userspace.
|
||||
* For example SLAB or special types of pages. See function page_has_type().
|
||||
* They use this field in struct page differently.
|
||||
* Will report 0 for pages which cannot be mapped into userspace, eg
|
||||
* slab, page tables and similar.
|
||||
*/
|
||||
static inline int page_mapcount(struct page *page)
|
||||
{
|
||||
int mapcount = atomic_read(&page->_mapcount) + 1;
|
||||
|
||||
/* Handle page_has_type() pages */
|
||||
if (mapcount < 0)
|
||||
mapcount = 0;
|
||||
if (unlikely(PageCompound(page)))
|
||||
mapcount += folio_entire_mapcount(page_folio(page));
|
||||
|
||||
|
@ -190,7 +190,6 @@ enum pageflags {
|
||||
|
||||
/* At least one page in this folio has the hwpoison flag set */
|
||||
PG_has_hwpoisoned = PG_error,
|
||||
PG_hugetlb = PG_active,
|
||||
PG_large_rmappable = PG_workingset, /* anon or file-backed */
|
||||
};
|
||||
|
||||
@ -458,30 +457,51 @@ static __always_inline int TestClearPage##uname(struct page *page) \
|
||||
TESTSETFLAG(uname, lname, policy) \
|
||||
TESTCLEARFLAG(uname, lname, policy)
|
||||
|
||||
#define FOLIO_TEST_FLAG_FALSE(name) \
|
||||
static inline bool folio_test_##name(const struct folio *folio) \
|
||||
{ return false; }
|
||||
#define FOLIO_SET_FLAG_NOOP(name) \
|
||||
static inline void folio_set_##name(struct folio *folio) { }
|
||||
#define FOLIO_CLEAR_FLAG_NOOP(name) \
|
||||
static inline void folio_clear_##name(struct folio *folio) { }
|
||||
#define __FOLIO_SET_FLAG_NOOP(name) \
|
||||
static inline void __folio_set_##name(struct folio *folio) { }
|
||||
#define __FOLIO_CLEAR_FLAG_NOOP(name) \
|
||||
static inline void __folio_clear_##name(struct folio *folio) { }
|
||||
#define FOLIO_TEST_SET_FLAG_FALSE(name) \
|
||||
static inline bool folio_test_set_##name(struct folio *folio) \
|
||||
{ return false; }
|
||||
#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \
|
||||
static inline bool folio_test_clear_##name(struct folio *folio) \
|
||||
{ return false; }
|
||||
|
||||
#define FOLIO_FLAG_FALSE(name) \
|
||||
FOLIO_TEST_FLAG_FALSE(name) \
|
||||
FOLIO_SET_FLAG_NOOP(name) \
|
||||
FOLIO_CLEAR_FLAG_NOOP(name)
|
||||
|
||||
#define TESTPAGEFLAG_FALSE(uname, lname) \
|
||||
static inline bool folio_test_##lname(const struct folio *folio) { return false; } \
|
||||
FOLIO_TEST_FLAG_FALSE(lname) \
|
||||
static inline int Page##uname(const struct page *page) { return 0; }
|
||||
|
||||
#define SETPAGEFLAG_NOOP(uname, lname) \
|
||||
static inline void folio_set_##lname(struct folio *folio) { } \
|
||||
FOLIO_SET_FLAG_NOOP(lname) \
|
||||
static inline void SetPage##uname(struct page *page) { }
|
||||
|
||||
#define CLEARPAGEFLAG_NOOP(uname, lname) \
|
||||
static inline void folio_clear_##lname(struct folio *folio) { } \
|
||||
FOLIO_CLEAR_FLAG_NOOP(lname) \
|
||||
static inline void ClearPage##uname(struct page *page) { }
|
||||
|
||||
#define __CLEARPAGEFLAG_NOOP(uname, lname) \
|
||||
static inline void __folio_clear_##lname(struct folio *folio) { } \
|
||||
__FOLIO_CLEAR_FLAG_NOOP(lname) \
|
||||
static inline void __ClearPage##uname(struct page *page) { }
|
||||
|
||||
#define TESTSETFLAG_FALSE(uname, lname) \
|
||||
static inline bool folio_test_set_##lname(struct folio *folio) \
|
||||
{ return 0; } \
|
||||
FOLIO_TEST_SET_FLAG_FALSE(lname) \
|
||||
static inline int TestSetPage##uname(struct page *page) { return 0; }
|
||||
|
||||
#define TESTCLEARFLAG_FALSE(uname, lname) \
|
||||
static inline bool folio_test_clear_##lname(struct folio *folio) \
|
||||
{ return 0; } \
|
||||
FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \
|
||||
static inline int TestClearPage##uname(struct page *page) { return 0; }
|
||||
|
||||
#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \
|
||||
@ -855,29 +875,6 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable)
|
||||
|
||||
#define PG_head_mask ((1UL << PG_head))
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
int PageHuge(const struct page *page);
|
||||
SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
|
||||
CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
|
||||
|
||||
/**
|
||||
* folio_test_hugetlb - Determine if the folio belongs to hugetlbfs
|
||||
* @folio: The folio to test.
|
||||
*
|
||||
* Context: Any context. Caller should have a reference on the folio to
|
||||
* prevent it from being turned into a tail page.
|
||||
* Return: True for hugetlbfs folios, false for anon folios or folios
|
||||
* belonging to other filesystems.
|
||||
*/
|
||||
static inline bool folio_test_hugetlb(const struct folio *folio)
|
||||
{
|
||||
return folio_test_large(folio) &&
|
||||
test_bit(PG_hugetlb, const_folio_flags(folio, 1));
|
||||
}
|
||||
#else
|
||||
TESTPAGEFLAG_FALSE(Huge, hugetlb)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
/*
|
||||
* PageHuge() only returns true for hugetlbfs pages, but not for
|
||||
@ -933,34 +930,23 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
|
||||
TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check if a page is currently marked HWPoisoned. Note that this check is
|
||||
* best effort only and inherently racy: there is no way to synchronize with
|
||||
* failing hardware.
|
||||
*/
|
||||
static inline bool is_page_hwpoison(struct page *page)
|
||||
{
|
||||
if (PageHWPoison(page))
|
||||
return true;
|
||||
return PageHuge(page) && PageHWPoison(compound_head(page));
|
||||
}
|
||||
|
||||
/*
|
||||
* For pages that are never mapped to userspace (and aren't PageSlab),
|
||||
* page_type may be used. Because it is initialised to -1, we invert the
|
||||
* sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
|
||||
* __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and
|
||||
* low bits so that an underflow or overflow of page_mapcount() won't be
|
||||
* low bits so that an underflow or overflow of _mapcount won't be
|
||||
* mistaken for a page type value.
|
||||
*/
|
||||
|
||||
#define PAGE_TYPE_BASE 0xf0000000
|
||||
/* Reserve 0x0000007f to catch underflows of page_mapcount */
|
||||
/* Reserve 0x0000007f to catch underflows of _mapcount */
|
||||
#define PAGE_MAPCOUNT_RESERVE -128
|
||||
#define PG_buddy 0x00000080
|
||||
#define PG_offline 0x00000100
|
||||
#define PG_table 0x00000200
|
||||
#define PG_guard 0x00000400
|
||||
#define PG_hugetlb 0x00000800
|
||||
|
||||
#define PageType(page, flag) \
|
||||
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
|
||||
@ -977,35 +963,38 @@ static inline int page_has_type(const struct page *page)
|
||||
return page_type_has_type(page->page_type);
|
||||
}
|
||||
|
||||
#define PAGE_TYPE_OPS(uname, lname, fname) \
|
||||
static __always_inline int Page##uname(const struct page *page) \
|
||||
{ \
|
||||
return PageType(page, PG_##lname); \
|
||||
} \
|
||||
static __always_inline int folio_test_##fname(const struct folio *folio)\
|
||||
#define FOLIO_TYPE_OPS(lname, fname) \
|
||||
static __always_inline bool folio_test_##fname(const struct folio *folio)\
|
||||
{ \
|
||||
return folio_test_type(folio, PG_##lname); \
|
||||
} \
|
||||
static __always_inline void __SetPage##uname(struct page *page) \
|
||||
{ \
|
||||
VM_BUG_ON_PAGE(!PageType(page, 0), page); \
|
||||
page->page_type &= ~PG_##lname; \
|
||||
} \
|
||||
static __always_inline void __folio_set_##fname(struct folio *folio) \
|
||||
{ \
|
||||
VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \
|
||||
folio->page.page_type &= ~PG_##lname; \
|
||||
} \
|
||||
static __always_inline void __ClearPage##uname(struct page *page) \
|
||||
{ \
|
||||
VM_BUG_ON_PAGE(!Page##uname(page), page); \
|
||||
page->page_type |= PG_##lname; \
|
||||
} \
|
||||
static __always_inline void __folio_clear_##fname(struct folio *folio) \
|
||||
{ \
|
||||
VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
|
||||
folio->page.page_type |= PG_##lname; \
|
||||
}
|
||||
|
||||
#define PAGE_TYPE_OPS(uname, lname, fname) \
|
||||
FOLIO_TYPE_OPS(lname, fname) \
|
||||
static __always_inline int Page##uname(const struct page *page) \
|
||||
{ \
|
||||
return PageType(page, PG_##lname); \
|
||||
} \
|
||||
static __always_inline void __SetPage##uname(struct page *page) \
|
||||
{ \
|
||||
VM_BUG_ON_PAGE(!PageType(page, 0), page); \
|
||||
page->page_type &= ~PG_##lname; \
|
||||
} \
|
||||
static __always_inline void __ClearPage##uname(struct page *page) \
|
||||
{ \
|
||||
VM_BUG_ON_PAGE(!Page##uname(page), page); \
|
||||
page->page_type |= PG_##lname; \
|
||||
}
|
||||
|
||||
/*
|
||||
* PageBuddy() indicates that the page is free and in the buddy system
|
||||
@ -1052,6 +1041,37 @@ PAGE_TYPE_OPS(Table, table, pgtable)
|
||||
*/
|
||||
PAGE_TYPE_OPS(Guard, guard, guard)
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
FOLIO_TYPE_OPS(hugetlb, hugetlb)
|
||||
#else
|
||||
FOLIO_TEST_FLAG_FALSE(hugetlb)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* PageHuge - Determine if the page belongs to hugetlbfs
|
||||
* @page: The page to test.
|
||||
*
|
||||
* Context: Any context.
|
||||
* Return: True for hugetlbfs pages, false for anon pages or pages
|
||||
* belonging to other filesystems.
|
||||
*/
|
||||
static inline bool PageHuge(const struct page *page)
|
||||
{
|
||||
return folio_test_hugetlb(page_folio(page));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a page is currently marked HWPoisoned. Note that this check is
|
||||
* best effort only and inherently racy: there is no way to synchronize with
|
||||
* failing hardware.
|
||||
*/
|
||||
static inline bool is_page_hwpoison(struct page *page)
|
||||
{
|
||||
if (PageHWPoison(page))
|
||||
return true;
|
||||
return PageHuge(page) && PageHWPoison(compound_head(page));
|
||||
}
|
||||
|
||||
extern bool is_free_buddy_page(struct page *page);
|
||||
|
||||
PAGEFLAG(Isolated, isolated, PF_ANY);
|
||||
@ -1118,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
|
||||
*/
|
||||
#define PAGE_FLAGS_SECOND \
|
||||
(0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \
|
||||
1UL << PG_hugetlb | 1UL << PG_large_rmappable)
|
||||
1UL << PG_large_rmappable)
|
||||
|
||||
#define PAGE_FLAGS_PRIVATE \
|
||||
(1UL << PG_private | 1UL << PG_private_2)
|
||||
|
@ -110,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
|
||||
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
|
||||
int shmem_unuse(unsigned int type);
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
|
||||
struct mm_struct *mm, unsigned long vm_flags);
|
||||
#else
|
||||
static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
|
||||
struct mm_struct *mm, unsigned long vm_flags)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SHMEM
|
||||
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
|
||||
#else
|
||||
|
@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
|
||||
}
|
||||
#endif /* CONFIG_MIGRATION */
|
||||
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
|
||||
/*
|
||||
* Support for hardware poisoned pages
|
||||
*/
|
||||
static inline swp_entry_t make_hwpoison_entry(struct page *page)
|
||||
{
|
||||
BUG_ON(!PageLocked(page));
|
||||
return swp_entry(SWP_HWPOISON, page_to_pfn(page));
|
||||
}
|
||||
|
||||
static inline int is_hwpoison_entry(swp_entry_t entry)
|
||||
{
|
||||
return swp_type(entry) == SWP_HWPOISON;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline swp_entry_t make_hwpoison_entry(struct page *page)
|
||||
{
|
||||
return swp_entry(0, 0);
|
||||
}
|
||||
|
||||
static inline int is_hwpoison_entry(swp_entry_t swp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef unsigned long pte_marker;
|
||||
|
||||
#define PTE_MARKER_UFFD_WP BIT(0)
|
||||
@ -483,8 +512,9 @@ static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
|
||||
|
||||
/*
|
||||
* A pfn swap entry is a special type of swap entry that always has a pfn stored
|
||||
* in the swap offset. They are used to represent unaddressable device memory
|
||||
* and to restrict access to a page undergoing migration.
|
||||
* in the swap offset. They can either be used to represent unaddressable device
|
||||
* memory, to restrict access to a page undergoing migration or to represent a
|
||||
* pfn which has been hwpoisoned and unmapped.
|
||||
*/
|
||||
static inline bool is_pfn_swap_entry(swp_entry_t entry)
|
||||
{
|
||||
@ -492,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry)
|
||||
BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
|
||||
|
||||
return is_migration_entry(entry) || is_device_private_entry(entry) ||
|
||||
is_device_exclusive_entry(entry);
|
||||
is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
|
||||
}
|
||||
|
||||
struct page_vma_mapped_walk;
|
||||
@ -561,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
|
||||
}
|
||||
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
|
||||
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
|
||||
/*
|
||||
* Support for hardware poisoned pages
|
||||
*/
|
||||
static inline swp_entry_t make_hwpoison_entry(struct page *page)
|
||||
{
|
||||
BUG_ON(!PageLocked(page));
|
||||
return swp_entry(SWP_HWPOISON, page_to_pfn(page));
|
||||
}
|
||||
|
||||
static inline int is_hwpoison_entry(swp_entry_t entry)
|
||||
{
|
||||
return swp_type(entry) == SWP_HWPOISON;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline swp_entry_t make_hwpoison_entry(struct page *page)
|
||||
{
|
||||
return swp_entry(0, 0);
|
||||
}
|
||||
|
||||
static inline int is_hwpoison_entry(swp_entry_t swp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int non_swap_entry(swp_entry_t entry)
|
||||
{
|
||||
return swp_type(entry) >= MAX_SWAPFILES;
|
||||
|
@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3)
|
||||
#define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
|
||||
|
||||
#define __def_pagetype_names \
|
||||
DEF_PAGETYPE_NAME(hugetlb), \
|
||||
DEF_PAGETYPE_NAME(offline), \
|
||||
DEF_PAGETYPE_NAME(guard), \
|
||||
DEF_PAGETYPE_NAME(table), \
|
||||
|
@ -714,6 +714,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
} else if (anon_vma_fork(tmp, mpnt))
|
||||
goto fail_nomem_anon_vma_fork;
|
||||
vm_flags_clear(tmp, VM_LOCKED_MASK);
|
||||
/*
|
||||
* Copy/update hugetlb private vma information.
|
||||
*/
|
||||
if (is_vm_hugetlb_page(tmp))
|
||||
hugetlb_dup_vma_private(tmp);
|
||||
|
||||
/*
|
||||
* Link the vma into the MT. After using __mt_dup(), memory
|
||||
* allocation is not necessary here, so it cannot fail.
|
||||
*/
|
||||
vma_iter_bulk_store(&vmi, tmp);
|
||||
|
||||
mm->map_count++;
|
||||
|
||||
if (tmp->vm_ops && tmp->vm_ops->open)
|
||||
tmp->vm_ops->open(tmp);
|
||||
|
||||
file = tmp->vm_file;
|
||||
if (file) {
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
@ -730,25 +747,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
i_mmap_unlock_write(mapping);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy/update hugetlb private vma information.
|
||||
*/
|
||||
if (is_vm_hugetlb_page(tmp))
|
||||
hugetlb_dup_vma_private(tmp);
|
||||
|
||||
/*
|
||||
* Link the vma into the MT. After using __mt_dup(), memory
|
||||
* allocation is not necessary here, so it cannot fail.
|
||||
*/
|
||||
vma_iter_bulk_store(&vmi, tmp);
|
||||
|
||||
mm->map_count++;
|
||||
if (!(tmp->vm_flags & VM_WIPEONFORK))
|
||||
retval = copy_page_range(tmp, mpnt);
|
||||
|
||||
if (tmp->vm_ops && tmp->vm_ops->open)
|
||||
tmp->vm_ops->open(tmp);
|
||||
|
||||
if (retval) {
|
||||
mpnt = vma_next(&vmi);
|
||||
goto loop_out;
|
||||
|
@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void)
|
||||
VMCOREINFO_NUMBER(PG_head_mask);
|
||||
#define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy)
|
||||
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
VMCOREINFO_NUMBER(PG_hugetlb);
|
||||
#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb)
|
||||
VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
|
||||
#define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline)
|
||||
VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KALLSYMS
|
||||
VMCOREINFO_SYMBOL(kallsyms_names);
|
||||
|
@ -627,10 +627,10 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
|
||||
/*
|
||||
* Zero out zone modifiers, as we don't have specific zone
|
||||
* requirements. Keep the flags related to allocation in atomic
|
||||
* contexts and I/O.
|
||||
* contexts, I/O, nolockdep.
|
||||
*/
|
||||
alloc_flags &= ~GFP_ZONEMASK;
|
||||
alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
|
||||
alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP);
|
||||
alloc_flags |= __GFP_NOWARN;
|
||||
page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER);
|
||||
if (page)
|
||||
|
54
mm/gup.c
54
mm/gup.c
@ -1206,6 +1206,22 @@ static long __get_user_pages(struct mm_struct *mm,
|
||||
|
||||
/* first iteration or cross vma bound */
|
||||
if (!vma || start >= vma->vm_end) {
|
||||
/*
|
||||
* MADV_POPULATE_(READ|WRITE) wants to handle VMA
|
||||
* lookups+error reporting differently.
|
||||
*/
|
||||
if (gup_flags & FOLL_MADV_POPULATE) {
|
||||
vma = vma_lookup(mm, start);
|
||||
if (!vma) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
if (check_vma_flags(vma, gup_flags)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
goto retry;
|
||||
}
|
||||
vma = gup_vma_lookup(mm, start);
|
||||
if (!vma && in_gate_area(mm, start)) {
|
||||
ret = get_gate_page(mm, start & PAGE_MASK,
|
||||
@ -1685,35 +1701,35 @@ long populate_vma_page_range(struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
/*
|
||||
* faultin_vma_page_range() - populate (prefault) page tables inside the
|
||||
* given VMA range readable/writable
|
||||
* faultin_page_range() - populate (prefault) page tables inside the
|
||||
* given range readable/writable
|
||||
*
|
||||
* This takes care of mlocking the pages, too, if VM_LOCKED is set.
|
||||
*
|
||||
* @vma: target vma
|
||||
* @mm: the mm to populate page tables in
|
||||
* @start: start address
|
||||
* @end: end address
|
||||
* @write: whether to prefault readable or writable
|
||||
* @locked: whether the mmap_lock is still held
|
||||
*
|
||||
* Returns either number of processed pages in the vma, or a negative error
|
||||
* code on error (see __get_user_pages()).
|
||||
* Returns either number of processed pages in the MM, or a negative error
|
||||
* code on error (see __get_user_pages()). Note that this function reports
|
||||
* errors related to VMAs, such as incompatible mappings, as expected by
|
||||
* MADV_POPULATE_(READ|WRITE).
|
||||
*
|
||||
* vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
|
||||
* covered by the VMA. If it's released, *@locked will be set to 0.
|
||||
* The range must be page-aligned.
|
||||
*
|
||||
* mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
|
||||
*/
|
||||
long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long end, bool write, int *locked)
|
||||
long faultin_page_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, bool write, int *locked)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned long nr_pages = (end - start) / PAGE_SIZE;
|
||||
int gup_flags;
|
||||
long ret;
|
||||
|
||||
VM_BUG_ON(!PAGE_ALIGNED(start));
|
||||
VM_BUG_ON(!PAGE_ALIGNED(end));
|
||||
VM_BUG_ON_VMA(start < vma->vm_start, vma);
|
||||
VM_BUG_ON_VMA(end > vma->vm_end, vma);
|
||||
mmap_assert_locked(mm);
|
||||
|
||||
/*
|
||||
@ -1725,19 +1741,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
|
||||
* a poisoned page.
|
||||
* !FOLL_FORCE: Require proper access permissions.
|
||||
*/
|
||||
gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
|
||||
gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
|
||||
FOLL_MADV_POPULATE;
|
||||
if (write)
|
||||
gup_flags |= FOLL_WRITE;
|
||||
|
||||
/*
|
||||
* We want to report -EINVAL instead of -EFAULT for any permission
|
||||
* problems or incompatible mappings.
|
||||
*/
|
||||
if (check_vma_flags(vma, gup_flags))
|
||||
return -EINVAL;
|
||||
|
||||
ret = __get_user_pages(mm, start, nr_pages, gup_flags,
|
||||
NULL, locked);
|
||||
ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
|
||||
gup_flags);
|
||||
lru_add_drain();
|
||||
return ret;
|
||||
}
|
||||
|
@ -2259,9 +2259,6 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
|
||||
goto unlock_ptls;
|
||||
}
|
||||
|
||||
folio_move_anon_rmap(src_folio, dst_vma);
|
||||
WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
|
||||
|
||||
src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
|
||||
/* Folio got pinned from under us. Put it back and fail the move. */
|
||||
if (folio_maybe_dma_pinned(src_folio)) {
|
||||
@ -2270,6 +2267,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
|
||||
goto unlock_ptls;
|
||||
}
|
||||
|
||||
folio_move_anon_rmap(src_folio, dst_vma);
|
||||
WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
|
||||
|
||||
_dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
|
||||
/* Follow mremap() behavior and treat the entry dirty after the move */
|
||||
_dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
|
||||
|
50
mm/hugetlb.c
50
mm/hugetlb.c
@ -1624,7 +1624,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h,
|
||||
{
|
||||
lockdep_assert_held(&hugetlb_lock);
|
||||
|
||||
folio_clear_hugetlb(folio);
|
||||
__folio_clear_hugetlb(folio);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1711,7 +1711,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
|
||||
h->surplus_huge_pages_node[nid]++;
|
||||
}
|
||||
|
||||
folio_set_hugetlb(folio);
|
||||
__folio_set_hugetlb(folio);
|
||||
folio_change_private(folio, NULL);
|
||||
/*
|
||||
* We have to set hugetlb_vmemmap_optimized again as above
|
||||
@ -1781,7 +1781,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
|
||||
* If vmemmap pages were allocated above, then we need to clear the
|
||||
* hugetlb destructor under the hugetlb lock.
|
||||
*/
|
||||
if (clear_dtor) {
|
||||
if (folio_test_hugetlb(folio)) {
|
||||
spin_lock_irq(&hugetlb_lock);
|
||||
__clear_hugetlb_destructor(h, folio);
|
||||
spin_unlock_irq(&hugetlb_lock);
|
||||
@ -2049,7 +2049,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
|
||||
|
||||
static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
|
||||
{
|
||||
folio_set_hugetlb(folio);
|
||||
__folio_set_hugetlb(folio);
|
||||
INIT_LIST_HEAD(&folio->lru);
|
||||
hugetlb_set_folio_subpool(folio, NULL);
|
||||
set_hugetlb_cgroup(folio, NULL);
|
||||
@ -2159,22 +2159,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
|
||||
return __prep_compound_gigantic_folio(folio, order, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* PageHuge() only returns true for hugetlbfs pages, but not for normal or
|
||||
* transparent huge pages. See the PageTransHuge() documentation for more
|
||||
* details.
|
||||
*/
|
||||
int PageHuge(const struct page *page)
|
||||
{
|
||||
const struct folio *folio;
|
||||
|
||||
if (!PageCompound(page))
|
||||
return 0;
|
||||
folio = page_folio(page);
|
||||
return folio_test_hugetlb(folio);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(PageHuge);
|
||||
|
||||
/*
|
||||
* Find and lock address space (mapping) in write mode.
|
||||
*
|
||||
@ -3268,9 +3252,12 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
|
||||
|
||||
rsv_adjust = hugepage_subpool_put_pages(spool, 1);
|
||||
hugetlb_acct_memory(h, -rsv_adjust);
|
||||
if (deferred_reserve)
|
||||
if (deferred_reserve) {
|
||||
spin_lock_irq(&hugetlb_lock);
|
||||
hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
|
||||
pages_per_huge_page(h), folio);
|
||||
spin_unlock_irq(&hugetlb_lock);
|
||||
}
|
||||
}
|
||||
|
||||
if (!memcg_charge_ret)
|
||||
@ -6274,6 +6261,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
|
||||
VM_UFFD_MISSING);
|
||||
}
|
||||
|
||||
if (!(vma->vm_flags & VM_MAYSHARE)) {
|
||||
ret = vmf_anon_prepare(vmf);
|
||||
if (unlikely(ret))
|
||||
goto out;
|
||||
}
|
||||
|
||||
folio = alloc_hugetlb_folio(vma, haddr, 0);
|
||||
if (IS_ERR(folio)) {
|
||||
/*
|
||||
@ -6310,15 +6303,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
|
||||
*/
|
||||
restore_reserve_on_error(h, vma, haddr, folio);
|
||||
folio_put(folio);
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
goto out;
|
||||
}
|
||||
new_pagecache_folio = true;
|
||||
} else {
|
||||
folio_lock(folio);
|
||||
|
||||
ret = vmf_anon_prepare(vmf);
|
||||
if (unlikely(ret))
|
||||
goto backout_unlocked;
|
||||
anon_rmap = 1;
|
||||
}
|
||||
} else {
|
||||
@ -7044,9 +7034,13 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
|
||||
if (!pte_same(pte, newpte))
|
||||
set_huge_pte_at(mm, address, ptep, newpte, psize);
|
||||
} else if (unlikely(is_pte_marker(pte))) {
|
||||
/* No other markers apply for now. */
|
||||
WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
|
||||
if (uffd_wp_resolve)
|
||||
/*
|
||||
* Do nothing on a poison marker; page is
|
||||
* corrupted, permissons do not apply. Here
|
||||
* pte_marker_uffd_wp()==true implies !poison
|
||||
* because they're mutual exclusive.
|
||||
*/
|
||||
if (pte_marker_uffd_wp(pte) && uffd_wp_resolve)
|
||||
/* Safe to modify directly (non-present->none). */
|
||||
huge_pte_clear(mm, address, ptep, psize);
|
||||
} else if (!huge_pte_none(pte)) {
|
||||
|
@ -686,9 +686,8 @@ struct anon_vma *folio_anon_vma(struct folio *folio);
|
||||
void unmap_mapping_folio(struct folio *folio);
|
||||
extern long populate_vma_page_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end, int *locked);
|
||||
extern long faultin_vma_page_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
bool write, int *locked);
|
||||
extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, bool write, int *locked);
|
||||
extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
|
||||
unsigned long bytes);
|
||||
|
||||
@ -1127,10 +1126,13 @@ enum {
|
||||
FOLL_FAST_ONLY = 1 << 20,
|
||||
/* allow unlocking the mmap lock */
|
||||
FOLL_UNLOCKABLE = 1 << 21,
|
||||
/* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
|
||||
FOLL_MADV_POPULATE = 1 << 22,
|
||||
};
|
||||
|
||||
#define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
|
||||
FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
|
||||
FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
|
||||
FOLL_MADV_POPULATE)
|
||||
|
||||
/*
|
||||
* Indicates for which pages that are write-protected in the page table,
|
||||
|
17
mm/madvise.c
17
mm/madvise.c
@ -908,27 +908,14 @@ static long madvise_populate(struct vm_area_struct *vma,
|
||||
{
|
||||
const bool write = behavior == MADV_POPULATE_WRITE;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned long tmp_end;
|
||||
int locked = 1;
|
||||
long pages;
|
||||
|
||||
*prev = vma;
|
||||
|
||||
while (start < end) {
|
||||
/*
|
||||
* We might have temporarily dropped the lock. For example,
|
||||
* our VMA might have been split.
|
||||
*/
|
||||
if (!vma || start >= vma->vm_end) {
|
||||
vma = vma_lookup(mm, start);
|
||||
if (!vma)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
tmp_end = min_t(unsigned long, end, vma->vm_end);
|
||||
/* Populate (prefault) page tables readable/writable. */
|
||||
pages = faultin_vma_page_range(vma, start, tmp_end, write,
|
||||
&locked);
|
||||
pages = faultin_page_range(mm, start, end, write, &locked);
|
||||
if (!locked) {
|
||||
mmap_read_lock(mm);
|
||||
locked = 1;
|
||||
@ -949,7 +936,7 @@ static long madvise_populate(struct vm_area_struct *vma,
|
||||
pr_warn_once("%s: unhandled return value: %ld\n",
|
||||
__func__, pages);
|
||||
fallthrough;
|
||||
case -ENOMEM:
|
||||
case -ENOMEM: /* No VMA or out of memory. */
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
@ -154,11 +154,23 @@ static int __page_handle_poison(struct page *page)
|
||||
{
|
||||
int ret;
|
||||
|
||||
zone_pcp_disable(page_zone(page));
|
||||
/*
|
||||
* zone_pcp_disable() can't be used here. It will
|
||||
* hold pcp_batch_high_lock and dissolve_free_huge_page() might hold
|
||||
* cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
|
||||
* optimization is enabled. This will break current lock dependency
|
||||
* chain and leads to deadlock.
|
||||
* Disabling pcp before dissolving the page was a deterministic
|
||||
* approach because we made sure that those pages cannot end up in any
|
||||
* PCP list. Draining PCP lists expels those pages to the buddy system,
|
||||
* but nothing guarantees that those pages do not get back to a PCP
|
||||
* queue if we need to refill those.
|
||||
*/
|
||||
ret = dissolve_free_huge_page(page);
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
drain_all_pages(page_zone(page));
|
||||
ret = take_page_off_buddy(page);
|
||||
zone_pcp_enable(page_zone(page));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
190
mm/page_owner.c
190
mm/page_owner.c
@ -118,7 +118,6 @@ static __init void init_page_owner(void)
|
||||
register_dummy_stack();
|
||||
register_failure_stack();
|
||||
register_early_stack();
|
||||
static_branch_enable(&page_owner_inited);
|
||||
init_early_allocated_pages();
|
||||
/* Initialize dummy and failure stacks and link them to stack_list */
|
||||
dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
|
||||
@ -129,6 +128,7 @@ static __init void init_page_owner(void)
|
||||
refcount_set(&failure_stack.stack_record->count, 1);
|
||||
dummy_stack.next = &failure_stack;
|
||||
stack_list = &dummy_stack;
|
||||
static_branch_enable(&page_owner_inited);
|
||||
}
|
||||
|
||||
struct page_ext_operations page_owner_ops = {
|
||||
@ -196,7 +196,8 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
|
||||
spin_unlock_irqrestore(&stack_list_lock, flags);
|
||||
}
|
||||
|
||||
static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
|
||||
static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
|
||||
int nr_base_pages)
|
||||
{
|
||||
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
|
||||
|
||||
@ -217,20 +218,74 @@ static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
|
||||
/* Add the new stack_record to our list */
|
||||
add_stack_record_to_list(stack_record, gfp_mask);
|
||||
}
|
||||
refcount_inc(&stack_record->count);
|
||||
refcount_add(nr_base_pages, &stack_record->count);
|
||||
}
|
||||
|
||||
static void dec_stack_record_count(depot_stack_handle_t handle)
|
||||
static void dec_stack_record_count(depot_stack_handle_t handle,
|
||||
int nr_base_pages)
|
||||
{
|
||||
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
|
||||
|
||||
if (stack_record)
|
||||
refcount_dec(&stack_record->count);
|
||||
if (!stack_record)
|
||||
return;
|
||||
|
||||
if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
|
||||
pr_warn("%s: refcount went to 0 for %u handle\n", __func__,
|
||||
handle);
|
||||
}
|
||||
|
||||
static inline void __update_page_owner_handle(struct page_ext *page_ext,
|
||||
depot_stack_handle_t handle,
|
||||
unsigned short order,
|
||||
gfp_t gfp_mask,
|
||||
short last_migrate_reason, u64 ts_nsec,
|
||||
pid_t pid, pid_t tgid, char *comm)
|
||||
{
|
||||
int i;
|
||||
struct page_owner *page_owner;
|
||||
|
||||
for (i = 0; i < (1 << order); i++) {
|
||||
page_owner = get_page_owner(page_ext);
|
||||
page_owner->handle = handle;
|
||||
page_owner->order = order;
|
||||
page_owner->gfp_mask = gfp_mask;
|
||||
page_owner->last_migrate_reason = last_migrate_reason;
|
||||
page_owner->pid = pid;
|
||||
page_owner->tgid = tgid;
|
||||
page_owner->ts_nsec = ts_nsec;
|
||||
strscpy(page_owner->comm, comm,
|
||||
sizeof(page_owner->comm));
|
||||
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
|
||||
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
|
||||
page_ext = page_ext_next(page_ext);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
|
||||
depot_stack_handle_t handle,
|
||||
unsigned short order,
|
||||
pid_t pid, pid_t tgid,
|
||||
u64 free_ts_nsec)
|
||||
{
|
||||
int i;
|
||||
struct page_owner *page_owner;
|
||||
|
||||
for (i = 0; i < (1 << order); i++) {
|
||||
page_owner = get_page_owner(page_ext);
|
||||
/* Only __reset_page_owner() wants to clear the bit */
|
||||
if (handle) {
|
||||
__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
|
||||
page_owner->free_handle = handle;
|
||||
}
|
||||
page_owner->free_ts_nsec = free_ts_nsec;
|
||||
page_owner->free_pid = current->pid;
|
||||
page_owner->free_tgid = current->tgid;
|
||||
page_ext = page_ext_next(page_ext);
|
||||
}
|
||||
}
|
||||
|
||||
void __reset_page_owner(struct page *page, unsigned short order)
|
||||
{
|
||||
int i;
|
||||
struct page_ext *page_ext;
|
||||
depot_stack_handle_t handle;
|
||||
depot_stack_handle_t alloc_handle;
|
||||
@ -245,16 +300,10 @@ void __reset_page_owner(struct page *page, unsigned short order)
|
||||
alloc_handle = page_owner->handle;
|
||||
|
||||
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
|
||||
for (i = 0; i < (1 << order); i++) {
|
||||
__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
|
||||
page_owner->free_handle = handle;
|
||||
page_owner->free_ts_nsec = free_ts_nsec;
|
||||
page_owner->free_pid = current->pid;
|
||||
page_owner->free_tgid = current->tgid;
|
||||
page_ext = page_ext_next(page_ext);
|
||||
page_owner = get_page_owner(page_ext);
|
||||
}
|
||||
__update_page_owner_free_handle(page_ext, handle, order, current->pid,
|
||||
current->tgid, free_ts_nsec);
|
||||
page_ext_put(page_ext);
|
||||
|
||||
if (alloc_handle != early_handle)
|
||||
/*
|
||||
* early_handle is being set as a handle for all those
|
||||
@ -263,39 +312,14 @@ void __reset_page_owner(struct page *page, unsigned short order)
|
||||
* the machinery is not ready yet, we cannot decrement
|
||||
* their refcount either.
|
||||
*/
|
||||
dec_stack_record_count(alloc_handle);
|
||||
}
|
||||
|
||||
static inline void __set_page_owner_handle(struct page_ext *page_ext,
|
||||
depot_stack_handle_t handle,
|
||||
unsigned short order, gfp_t gfp_mask)
|
||||
{
|
||||
struct page_owner *page_owner;
|
||||
int i;
|
||||
u64 ts_nsec = local_clock();
|
||||
|
||||
for (i = 0; i < (1 << order); i++) {
|
||||
page_owner = get_page_owner(page_ext);
|
||||
page_owner->handle = handle;
|
||||
page_owner->order = order;
|
||||
page_owner->gfp_mask = gfp_mask;
|
||||
page_owner->last_migrate_reason = -1;
|
||||
page_owner->pid = current->pid;
|
||||
page_owner->tgid = current->tgid;
|
||||
page_owner->ts_nsec = ts_nsec;
|
||||
strscpy(page_owner->comm, current->comm,
|
||||
sizeof(page_owner->comm));
|
||||
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
|
||||
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
|
||||
|
||||
page_ext = page_ext_next(page_ext);
|
||||
}
|
||||
dec_stack_record_count(alloc_handle, 1 << order);
|
||||
}
|
||||
|
||||
noinline void __set_page_owner(struct page *page, unsigned short order,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct page_ext *page_ext;
|
||||
u64 ts_nsec = local_clock();
|
||||
depot_stack_handle_t handle;
|
||||
|
||||
handle = save_stack(gfp_mask);
|
||||
@ -303,9 +327,11 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
|
||||
page_ext = page_ext_get(page);
|
||||
if (unlikely(!page_ext))
|
||||
return;
|
||||
__set_page_owner_handle(page_ext, handle, order, gfp_mask);
|
||||
__update_page_owner_handle(page_ext, handle, order, gfp_mask, -1,
|
||||
current->pid, current->tgid, ts_nsec,
|
||||
current->comm);
|
||||
page_ext_put(page_ext);
|
||||
inc_stack_record_count(handle, gfp_mask);
|
||||
inc_stack_record_count(handle, gfp_mask, 1 << order);
|
||||
}
|
||||
|
||||
void __set_page_owner_migrate_reason(struct page *page, int reason)
|
||||
@ -340,9 +366,12 @@ void __split_page_owner(struct page *page, int old_order, int new_order)
|
||||
|
||||
void __folio_copy_owner(struct folio *newfolio, struct folio *old)
|
||||
{
|
||||
int i;
|
||||
struct page_ext *old_ext;
|
||||
struct page_ext *new_ext;
|
||||
struct page_owner *old_page_owner, *new_page_owner;
|
||||
struct page_owner *old_page_owner;
|
||||
struct page_owner *new_page_owner;
|
||||
depot_stack_handle_t migrate_handle;
|
||||
|
||||
old_ext = page_ext_get(&old->page);
|
||||
if (unlikely(!old_ext))
|
||||
@ -356,30 +385,32 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
|
||||
|
||||
old_page_owner = get_page_owner(old_ext);
|
||||
new_page_owner = get_page_owner(new_ext);
|
||||
new_page_owner->order = old_page_owner->order;
|
||||
new_page_owner->gfp_mask = old_page_owner->gfp_mask;
|
||||
new_page_owner->last_migrate_reason =
|
||||
old_page_owner->last_migrate_reason;
|
||||
new_page_owner->handle = old_page_owner->handle;
|
||||
new_page_owner->pid = old_page_owner->pid;
|
||||
new_page_owner->tgid = old_page_owner->tgid;
|
||||
new_page_owner->free_pid = old_page_owner->free_pid;
|
||||
new_page_owner->free_tgid = old_page_owner->free_tgid;
|
||||
new_page_owner->ts_nsec = old_page_owner->ts_nsec;
|
||||
new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
|
||||
strcpy(new_page_owner->comm, old_page_owner->comm);
|
||||
|
||||
migrate_handle = new_page_owner->handle;
|
||||
__update_page_owner_handle(new_ext, old_page_owner->handle,
|
||||
old_page_owner->order, old_page_owner->gfp_mask,
|
||||
old_page_owner->last_migrate_reason,
|
||||
old_page_owner->ts_nsec, old_page_owner->pid,
|
||||
old_page_owner->tgid, old_page_owner->comm);
|
||||
/*
|
||||
* We don't clear the bit on the old folio as it's going to be freed
|
||||
* after migration. Until then, the info can be useful in case of
|
||||
* a bug, and the overall stats will be off a bit only temporarily.
|
||||
* Also, migrate_misplaced_transhuge_page() can still fail the
|
||||
* migration and then we want the old folio to retain the info. But
|
||||
* in that case we also don't need to explicitly clear the info from
|
||||
* the new page, which will be freed.
|
||||
* Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio
|
||||
* will be freed after migration. Keep them until then as they may be
|
||||
* useful.
|
||||
*/
|
||||
__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
|
||||
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
|
||||
__update_page_owner_free_handle(new_ext, 0, old_page_owner->order,
|
||||
old_page_owner->free_pid,
|
||||
old_page_owner->free_tgid,
|
||||
old_page_owner->free_ts_nsec);
|
||||
/*
|
||||
* We linked the original stack to the new folio, we need to do the same
|
||||
* for the new one and the old folio otherwise there will be an imbalance
|
||||
* when subtracting those pages from the stack.
|
||||
*/
|
||||
for (i = 0; i < (1 << new_page_owner->order); i++) {
|
||||
old_page_owner->handle = migrate_handle;
|
||||
old_ext = page_ext_next(old_ext);
|
||||
old_page_owner = get_page_owner(old_ext);
|
||||
}
|
||||
|
||||
page_ext_put(new_ext);
|
||||
page_ext_put(old_ext);
|
||||
}
|
||||
@ -787,8 +818,9 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
|
||||
goto ext_put_continue;
|
||||
|
||||
/* Found early allocated page */
|
||||
__set_page_owner_handle(page_ext, early_handle,
|
||||
0, 0);
|
||||
__update_page_owner_handle(page_ext, early_handle, 0, 0,
|
||||
-1, local_clock(), current->pid,
|
||||
current->tgid, current->comm);
|
||||
count++;
|
||||
ext_put_continue:
|
||||
page_ext_put(page_ext);
|
||||
@ -840,13 +872,11 @@ static void *stack_start(struct seq_file *m, loff_t *ppos)
|
||||
* value of stack_list.
|
||||
*/
|
||||
stack = smp_load_acquire(&stack_list);
|
||||
m->private = stack;
|
||||
} else {
|
||||
stack = m->private;
|
||||
stack = stack->next;
|
||||
}
|
||||
|
||||
m->private = stack;
|
||||
|
||||
return stack;
|
||||
}
|
||||
|
||||
@ -861,11 +891,11 @@ static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
|
||||
return stack;
|
||||
}
|
||||
|
||||
static unsigned long page_owner_stack_threshold;
|
||||
static unsigned long page_owner_pages_threshold;
|
||||
|
||||
static int stack_print(struct seq_file *m, void *v)
|
||||
{
|
||||
int i, stack_count;
|
||||
int i, nr_base_pages;
|
||||
struct stack *stack = v;
|
||||
unsigned long *entries;
|
||||
unsigned long nr_entries;
|
||||
@ -876,14 +906,14 @@ static int stack_print(struct seq_file *m, void *v)
|
||||
|
||||
nr_entries = stack_record->size;
|
||||
entries = stack_record->entries;
|
||||
stack_count = refcount_read(&stack_record->count) - 1;
|
||||
nr_base_pages = refcount_read(&stack_record->count) - 1;
|
||||
|
||||
if (stack_count < 1 || stack_count < page_owner_stack_threshold)
|
||||
if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < nr_entries; i++)
|
||||
seq_printf(m, " %pS\n", (void *)entries[i]);
|
||||
seq_printf(m, "stack_count: %d\n\n", stack_count);
|
||||
seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -913,13 +943,13 @@ static const struct file_operations page_owner_stack_operations = {
|
||||
|
||||
static int page_owner_threshold_get(void *data, u64 *val)
|
||||
{
|
||||
*val = READ_ONCE(page_owner_stack_threshold);
|
||||
*val = READ_ONCE(page_owner_pages_threshold);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int page_owner_threshold_set(void *data, u64 val)
|
||||
{
|
||||
WRITE_ONCE(page_owner_stack_threshold, val);
|
||||
WRITE_ONCE(page_owner_pages_threshold, val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -748,12 +748,6 @@ static long shmem_unused_huge_count(struct super_block *sb,
|
||||
|
||||
#define shmem_huge SHMEM_HUGE_DENY
|
||||
|
||||
bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
|
||||
struct mm_struct *mm, unsigned long vm_flags)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
|
||||
struct shrink_control *sc, unsigned long nr_to_split)
|
||||
{
|
||||
|
25
mm/zswap.c
25
mm/zswap.c
@ -1331,15 +1331,22 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
|
||||
if (!gfp_has_io_fs(sc->gfp_mask))
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
mem_cgroup_flush_stats(memcg);
|
||||
nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
|
||||
nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
|
||||
#else
|
||||
/* use pool stats instead of memcg stats */
|
||||
nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
|
||||
nr_stored = atomic_read(&zswap_nr_stored);
|
||||
#endif
|
||||
/*
|
||||
* For memcg, use the cgroup-wide ZSWAP stats since we don't
|
||||
* have them per-node and thus per-lruvec. Careful if memcg is
|
||||
* runtime-disabled: we can get sc->memcg == NULL, which is ok
|
||||
* for the lruvec, but not for memcg_page_state().
|
||||
*
|
||||
* Without memcg, use the zswap pool-wide metrics.
|
||||
*/
|
||||
if (!mem_cgroup_disabled()) {
|
||||
mem_cgroup_flush_stats(memcg);
|
||||
nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
|
||||
nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
|
||||
} else {
|
||||
nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
|
||||
nr_stored = atomic_read(&zswap_nr_stored);
|
||||
}
|
||||
|
||||
if (!nr_stored)
|
||||
return 0;
|
||||
|
@ -56,7 +56,6 @@
|
||||
#include <asm/types.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
@ -1156,7 +1155,7 @@ void __run_test(struct __fixture_metadata *f,
|
||||
struct __test_metadata *t)
|
||||
{
|
||||
struct __test_xfail *xfail;
|
||||
char test_name[LINE_MAX];
|
||||
char *test_name;
|
||||
const char *diagnostic;
|
||||
|
||||
/* reset test struct */
|
||||
@ -1164,8 +1163,12 @@ void __run_test(struct __fixture_metadata *f,
|
||||
t->trigger = 0;
|
||||
memset(t->results->reason, 0, sizeof(t->results->reason));
|
||||
|
||||
snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
|
||||
f->name, variant->name[0] ? "." : "", variant->name, t->name);
|
||||
if (asprintf(&test_name, "%s%s%s.%s", f->name,
|
||||
variant->name[0] ? "." : "", variant->name, t->name) == -1) {
|
||||
ksft_print_msg("ERROR ALLOCATING MEMORY\n");
|
||||
t->exit_code = KSFT_FAIL;
|
||||
_exit(t->exit_code);
|
||||
}
|
||||
|
||||
ksft_print_msg(" RUN %s ...\n", test_name);
|
||||
|
||||
@ -1203,6 +1206,7 @@ void __run_test(struct __fixture_metadata *f,
|
||||
|
||||
ksft_test_result_code(t->exit_code, test_name,
|
||||
diagnostic ? "%s" : "", diagnostic);
|
||||
free(test_name);
|
||||
}
|
||||
|
||||
static int test_harness_run(int argc, char **argv)
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/mman.h>
|
||||
#include <linux/prctl.h>
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/auxv.h>
|
||||
|
@ -54,7 +54,6 @@ int test_nr;
|
||||
u64 shadow_pkey_reg;
|
||||
int dprint_in_signal;
|
||||
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
|
||||
char buf[256];
|
||||
|
||||
void cat_into_file(char *str, char *file)
|
||||
{
|
||||
@ -1745,42 +1744,6 @@ void pkey_setup_shadow(void)
|
||||
shadow_pkey_reg = __read_pkey_reg();
|
||||
}
|
||||
|
||||
pid_t parent_pid;
|
||||
|
||||
void restore_settings_atexit(void)
|
||||
{
|
||||
if (parent_pid == getpid())
|
||||
cat_into_file(buf, "/proc/sys/vm/nr_hugepages");
|
||||
}
|
||||
|
||||
void save_settings(void)
|
||||
{
|
||||
int fd;
|
||||
int err;
|
||||
|
||||
if (geteuid())
|
||||
return;
|
||||
|
||||
fd = open("/proc/sys/vm/nr_hugepages", O_RDONLY);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "error opening\n");
|
||||
perror("error: ");
|
||||
exit(__LINE__);
|
||||
}
|
||||
|
||||
/* -1 to guarantee leaving the trailing \0 */
|
||||
err = read(fd, buf, sizeof(buf)-1);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "error reading\n");
|
||||
perror("error: ");
|
||||
exit(__LINE__);
|
||||
}
|
||||
|
||||
parent_pid = getpid();
|
||||
atexit(restore_settings_atexit);
|
||||
close(fd);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int nr_iterations = 22;
|
||||
@ -1788,7 +1751,6 @@ int main(void)
|
||||
|
||||
srand((unsigned int)time(NULL));
|
||||
|
||||
save_settings();
|
||||
setup_handlers();
|
||||
|
||||
printf("has pkeys: %d\n", pkeys_supported);
|
||||
|
@ -385,6 +385,7 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0
|
||||
CATEGORY="ksm" run_test ./ksm_functional_tests
|
||||
|
||||
# protection_keys tests
|
||||
nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
|
||||
if [ -x ./protection_keys_32 ]
|
||||
then
|
||||
CATEGORY="pkey" run_test ./protection_keys_32
|
||||
@ -394,6 +395,7 @@ if [ -x ./protection_keys_64 ]
|
||||
then
|
||||
CATEGORY="pkey" run_test ./protection_keys_64
|
||||
fi
|
||||
echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
|
||||
|
||||
if [ -x ./soft-dirty ]
|
||||
then
|
||||
|
@ -300,7 +300,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
|
||||
char **addr)
|
||||
{
|
||||
size_t i;
|
||||
int dummy;
|
||||
int __attribute__((unused)) dummy = 0;
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user