26aa2d199d
This is mostly derived from a patch from Yang Shi: https://lore.kernel.org/linux-mm/1560468577-101178-10-git-send-email-yang.shi@linux.alibaba.com/ Add code to the reclaim path (shrink_page_list()) to "demote" data to another NUMA node instead of discarding the data. This always avoids the cost of I/O needed to read the page back in and sometimes avoids the writeout cost when the page is dirty. A second pass through shrink_page_list() will be made if any demotions fail. This essentially falls back to normal reclaim behavior in the case that demotions fail. Previous versions of this patch may have simply failed to reclaim pages which were eligible for demotion but were unable to be demoted in practice. For some cases, for example, MADV_PAGEOUT, the pages are always discarded instead of demoted to follow the kernel API definition. Because MADV_PAGEOUT is defined as freeing specified pages regardless in which tier they are. Note: This just adds the start of infrastructure for migration. It is actually disabled next to the FIXME in migrate_demote_page_ok(). [dave.hansen@linux.intel.com: v11] Link: https://lkml.kernel.org/r/20210715055145.195411-5-ying.huang@intel.com Link: https://lkml.kernel.org/r/20210721063926.3024591-4-ying.huang@intel.com Link: https://lkml.kernel.org/r/20210715055145.195411-5-ying.huang@intel.com Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Reviewed-by: Yang Shi <shy828301@gmail.com> Reviewed-by: Wei Xu <weixugc@google.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Reviewed-by: Zi Yan <ziy@nvidia.com> Cc: Michal Hocko <mhocko@suse.com> Cc: David Rientjes <rientjes@google.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: Keith Busch <kbusch@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
112 lines
2.8 KiB
C
112 lines
2.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#undef TRACE_SYSTEM
|
|
#define TRACE_SYSTEM migrate
|
|
|
|
#if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
|
|
#define _TRACE_MIGRATE_H
|
|
|
|
#include <linux/tracepoint.h>
|
|
|
|
#define MIGRATE_MODE \
|
|
EM( MIGRATE_ASYNC, "MIGRATE_ASYNC") \
|
|
EM( MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT") \
|
|
EMe(MIGRATE_SYNC, "MIGRATE_SYNC")
|
|
|
|
|
|
#define MIGRATE_REASON \
|
|
EM( MR_COMPACTION, "compaction") \
|
|
EM( MR_MEMORY_FAILURE, "memory_failure") \
|
|
EM( MR_MEMORY_HOTPLUG, "memory_hotplug") \
|
|
EM( MR_SYSCALL, "syscall_or_cpuset") \
|
|
EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \
|
|
EM( MR_NUMA_MISPLACED, "numa_misplaced") \
|
|
EM( MR_CONTIG_RANGE, "contig_range") \
|
|
EM( MR_LONGTERM_PIN, "longterm_pin") \
|
|
EMe(MR_DEMOTION, "demotion")
|
|
|
|
/*
|
|
* First define the enums in the above macros to be exported to userspace
|
|
* via TRACE_DEFINE_ENUM().
|
|
*/
|
|
#undef EM
|
|
#undef EMe
|
|
#define EM(a, b) TRACE_DEFINE_ENUM(a);
|
|
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
|
|
|
|
MIGRATE_MODE
|
|
MIGRATE_REASON
|
|
|
|
/*
|
|
* Now redefine the EM() and EMe() macros to map the enums to the strings
|
|
* that will be printed in the output.
|
|
*/
|
|
#undef EM
|
|
#undef EMe
|
|
#define EM(a, b) {a, b},
|
|
#define EMe(a, b) {a, b}
|
|
|
|
TRACE_EVENT(mm_migrate_pages,
|
|
|
|
TP_PROTO(unsigned long succeeded, unsigned long failed,
|
|
unsigned long thp_succeeded, unsigned long thp_failed,
|
|
unsigned long thp_split, enum migrate_mode mode, int reason),
|
|
|
|
TP_ARGS(succeeded, failed, thp_succeeded, thp_failed,
|
|
thp_split, mode, reason),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( unsigned long, succeeded)
|
|
__field( unsigned long, failed)
|
|
__field( unsigned long, thp_succeeded)
|
|
__field( unsigned long, thp_failed)
|
|
__field( unsigned long, thp_split)
|
|
__field( enum migrate_mode, mode)
|
|
__field( int, reason)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->succeeded = succeeded;
|
|
__entry->failed = failed;
|
|
__entry->thp_succeeded = thp_succeeded;
|
|
__entry->thp_failed = thp_failed;
|
|
__entry->thp_split = thp_split;
|
|
__entry->mode = mode;
|
|
__entry->reason = reason;
|
|
),
|
|
|
|
TP_printk("nr_succeeded=%lu nr_failed=%lu nr_thp_succeeded=%lu nr_thp_failed=%lu nr_thp_split=%lu mode=%s reason=%s",
|
|
__entry->succeeded,
|
|
__entry->failed,
|
|
__entry->thp_succeeded,
|
|
__entry->thp_failed,
|
|
__entry->thp_split,
|
|
__print_symbolic(__entry->mode, MIGRATE_MODE),
|
|
__print_symbolic(__entry->reason, MIGRATE_REASON))
|
|
);
|
|
|
|
TRACE_EVENT(mm_migrate_pages_start,
|
|
|
|
TP_PROTO(enum migrate_mode mode, int reason),
|
|
|
|
TP_ARGS(mode, reason),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(enum migrate_mode, mode)
|
|
__field(int, reason)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mode = mode;
|
|
__entry->reason = reason;
|
|
),
|
|
|
|
TP_printk("mode=%s reason=%s",
|
|
__print_symbolic(__entry->mode, MIGRATE_MODE),
|
|
__print_symbolic(__entry->reason, MIGRATE_REASON))
|
|
);
|
|
|
|
#endif /* _TRACE_MIGRATE_H */
|
|
|
|
/* This part must be outside protection */
|
|
#include <trace/define_trace.h>
|