mm/pgtable: delete pmd_trans_unstable() and friends
Delete pmd_trans_unstable, pmd_none_or_trans_huge_or_clear_bad() and pmd_devmap_trans_unstable(), all now unused. With mixed feelings, delete all the comments on pmd_trans_unstable(). That was very good documentation of a subtle state, and this series does not even eliminate that state: but rather, normalizes and extends it, asking pte_offset_map[_lock]() callers to anticipate failure, without regard for whether mmap_read_lock() or mmap_write_lock() is held. Retain pud_trans_unstable(), which has one use in __handle_mm_fault(), but delete its equivalent pud_none_or_trans_huge_or_dev_or_clear_bad(). While there, move the default arch_needs_pgtable_deposit() definition up near where pgtable_trans_huge_deposit() and withdraw() are declared. Link: https://lkml.kernel.org/r/5abdab3-3136-b42e-274d-9c6281bfb79@google.com Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Alistair Popple <apopple@nvidia.com> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Christoph Hellwig <hch@infradead.org> Cc: David Hildenbrand <david@redhat.com> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport (IBM) <rppt@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Naoya Horiguchi <naoya.horiguchi@nec.com> Cc: Pavel Tatashin <pasha.tatashin@soleen.com> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Qi Zheng <zhengqi.arch@bytedance.com> Cc: Ralph Campbell <rcampbell@nvidia.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: SeongJae Park <sj@kernel.org> Cc: Song Liu <song@kernel.org> Cc: Steven Price <steven.price@arm.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Will Deacon <will@kernel.org> Cc: Yang Shi <shy828301@gmail.com> Cc: Yu Zhao <yuzhao@google.com> Cc: Zack Rusin <zackr@vmware.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
c7ad08804f
commit
feda5c393a
@ -599,6 +599,10 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
|
||||
extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
|
||||
#endif
|
||||
|
||||
#ifndef arch_needs_pgtable_deposit
|
||||
#define arch_needs_pgtable_deposit() (false)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
/*
|
||||
* This is an implementation of pmdp_establish() that is only suitable for an
|
||||
@ -1300,9 +1304,10 @@ static inline int pud_trans_huge(pud_t pud)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* See pmd_none_or_trans_huge_or_clear_bad for discussion. */
|
||||
static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud)
|
||||
static inline int pud_trans_unstable(pud_t *pud)
|
||||
{
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
|
||||
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
|
||||
pud_t pudval = READ_ONCE(*pud);
|
||||
|
||||
if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
|
||||
@ -1311,104 +1316,10 @@ static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud)
|
||||
pud_clear_bad(pud);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* See pmd_trans_unstable for discussion. */
|
||||
static inline int pud_trans_unstable(pud_t *pud)
|
||||
{
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
|
||||
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
|
||||
return pud_none_or_trans_huge_or_dev_or_clear_bad(pud);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef arch_needs_pgtable_deposit
|
||||
#define arch_needs_pgtable_deposit() (false)
|
||||
#endif
|
||||
/*
|
||||
* This function is meant to be used by sites walking pagetables with
|
||||
* the mmap_lock held in read mode to protect against MADV_DONTNEED and
|
||||
* transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
|
||||
* into a null pmd and the transhuge page fault can convert a null pmd
|
||||
* into an hugepmd or into a regular pmd (if the hugepage allocation
|
||||
* fails). While holding the mmap_lock in read mode the pmd becomes
|
||||
* stable and stops changing under us only if it's not null and not a
|
||||
* transhuge pmd. When those races occurs and this function makes a
|
||||
* difference vs the standard pmd_none_or_clear_bad, the result is
|
||||
* undefined so behaving like if the pmd was none is safe (because it
|
||||
* can return none anyway). The compiler level barrier() is critically
|
||||
* important to compute the two checks atomically on the same pmdval.
|
||||
*
|
||||
* For 32bit kernels with a 64bit large pmd_t this automatically takes
|
||||
* care of reading the pmd atomically to avoid SMP race conditions
|
||||
* against pmd_populate() when the mmap_lock is hold for reading by the
|
||||
* caller (a special atomic read not done by "gcc" as in the generic
|
||||
* version above, is also needed when THP is disabled because the page
|
||||
* fault can populate the pmd from under us).
|
||||
*/
|
||||
static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
|
||||
{
|
||||
pmd_t pmdval = pmdp_get_lockless(pmd);
|
||||
/*
|
||||
* !pmd_present() checks for pmd migration entries
|
||||
*
|
||||
* The complete check uses is_pmd_migration_entry() in linux/swapops.h
|
||||
* But using that requires moving current function and pmd_trans_unstable()
|
||||
* to linux/swapops.h to resolve dependency, which is too much code move.
|
||||
*
|
||||
* !pmd_present() is equivalent to is_pmd_migration_entry() currently,
|
||||
* because !pmd_present() pages can only be under migration not swapped
|
||||
* out.
|
||||
*
|
||||
* pmd_none() is preserved for future condition checks on pmd migration
|
||||
* entries and not confusing with this function name, although it is
|
||||
* redundant with !pmd_present().
|
||||
*/
|
||||
if (pmd_none(pmdval) || pmd_trans_huge(pmdval) ||
|
||||
(IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval)))
|
||||
return 1;
|
||||
if (unlikely(pmd_bad(pmdval))) {
|
||||
pmd_clear_bad(pmd);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a noop if Transparent Hugepage Support is not built into
|
||||
* the kernel. Otherwise it is equivalent to
|
||||
* pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
|
||||
* places that already verified the pmd is not none and they want to
|
||||
* walk ptes while holding the mmap sem in read mode (write mode don't
|
||||
* need this). If THP is not enabled, the pmd can't go away under the
|
||||
* code even if MADV_DONTNEED runs, but if THP is enabled we need to
|
||||
* run a pmd_trans_unstable before walking the ptes after
|
||||
* split_huge_pmd returns (because it may have run when the pmd become
|
||||
* null, but then a page fault can map in a THP and not a regular page).
|
||||
*/
|
||||
static inline int pmd_trans_unstable(pmd_t *pmd)
|
||||
{
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
return pmd_none_or_trans_huge_or_clear_bad(pmd);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* the ordering of these checks is important for pmds with _page_devmap set.
|
||||
* if we check pmd_trans_unstable() first we will trip the bad_pmd() check
|
||||
* inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly
|
||||
* returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
|
||||
*/
|
||||
static inline int pmd_devmap_trans_unstable(pmd_t *pmd)
|
||||
{
|
||||
return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NUMA_BALANCING
|
||||
/*
|
||||
* Technically a PTE can be PROTNONE even when not doing NUMA balancing but
|
||||
|
@ -944,10 +944,6 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
|
||||
return SCAN_SUCCEED;
|
||||
}
|
||||
|
||||
/*
|
||||
* See pmd_trans_unstable() for how the result may change out from
|
||||
* underneath us, even if we hold mmap_lock in read.
|
||||
*/
|
||||
static int find_pmd_or_thp_or_none(struct mm_struct *mm,
|
||||
unsigned long address,
|
||||
pmd_t **pmd)
|
||||
|
Loading…
Reference in New Issue
Block a user