Dave Hansen 26aa2d199d mm/migrate: demote pages during reclaim
This is mostly derived from a patch from Yang Shi:

	https://lore.kernel.org/linux-mm/1560468577-101178-10-git-send-email-yang.shi@linux.alibaba.com/

Add code to the reclaim path (shrink_page_list()) to "demote" data to
another NUMA node instead of discarding the data.  This always avoids the
cost of I/O needed to read the page back in and sometimes avoids the
writeout cost when the page is dirty.

A second pass through shrink_page_list() will be made if any demotions
fail.  This essentially falls back to normal reclaim behavior in the case
that demotions fail.  Previous versions of this patch may have simply
failed to reclaim pages which were eligible for demotion but were unable
to be demoted in practice.

For some cases, for example, MADV_PAGEOUT, the pages are always discarded
instead of demoted to follow the kernel API definition.  Because
MADV_PAGEOUT is defined as freeing specified pages regardless in which
tier they are.

Note: This just adds the start of infrastructure for migration.  It is
actually disabled next to the FIXME in migrate_demote_page_ok().

[dave.hansen@linux.intel.com: v11]
  Link: https://lkml.kernel.org/r/20210715055145.195411-5-ying.huang@intel.com
  Link: https://lkml.kernel.org/r/20210721063926.3024591-4-ying.huang@intel.com

Link: https://lkml.kernel.org/r/20210715055145.195411-5-ying.huang@intel.com
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Reviewed-by: Wei Xu <weixugc@google.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Keith Busch <kbusch@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-09-03 09:58:16 -07:00

112 lines
2.8 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM migrate
#if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_MIGRATE_H
#include <linux/tracepoint.h>
#define MIGRATE_MODE \
EM( MIGRATE_ASYNC, "MIGRATE_ASYNC") \
EM( MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT") \
EMe(MIGRATE_SYNC, "MIGRATE_SYNC")
#define MIGRATE_REASON \
EM( MR_COMPACTION, "compaction") \
EM( MR_MEMORY_FAILURE, "memory_failure") \
EM( MR_MEMORY_HOTPLUG, "memory_hotplug") \
EM( MR_SYSCALL, "syscall_or_cpuset") \
EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \
EM( MR_NUMA_MISPLACED, "numa_misplaced") \
EM( MR_CONTIG_RANGE, "contig_range") \
EM( MR_LONGTERM_PIN, "longterm_pin") \
EMe(MR_DEMOTION, "demotion")
/*
* First define the enums in the above macros to be exported to userspace
* via TRACE_DEFINE_ENUM().
*/
#undef EM
#undef EMe
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
MIGRATE_MODE
MIGRATE_REASON
/*
* Now redefine the EM() and EMe() macros to map the enums to the strings
* that will be printed in the output.
*/
#undef EM
#undef EMe
#define EM(a, b) {a, b},
#define EMe(a, b) {a, b}
TRACE_EVENT(mm_migrate_pages,
TP_PROTO(unsigned long succeeded, unsigned long failed,
unsigned long thp_succeeded, unsigned long thp_failed,
unsigned long thp_split, enum migrate_mode mode, int reason),
TP_ARGS(succeeded, failed, thp_succeeded, thp_failed,
thp_split, mode, reason),
TP_STRUCT__entry(
__field( unsigned long, succeeded)
__field( unsigned long, failed)
__field( unsigned long, thp_succeeded)
__field( unsigned long, thp_failed)
__field( unsigned long, thp_split)
__field( enum migrate_mode, mode)
__field( int, reason)
),
TP_fast_assign(
__entry->succeeded = succeeded;
__entry->failed = failed;
__entry->thp_succeeded = thp_succeeded;
__entry->thp_failed = thp_failed;
__entry->thp_split = thp_split;
__entry->mode = mode;
__entry->reason = reason;
),
TP_printk("nr_succeeded=%lu nr_failed=%lu nr_thp_succeeded=%lu nr_thp_failed=%lu nr_thp_split=%lu mode=%s reason=%s",
__entry->succeeded,
__entry->failed,
__entry->thp_succeeded,
__entry->thp_failed,
__entry->thp_split,
__print_symbolic(__entry->mode, MIGRATE_MODE),
__print_symbolic(__entry->reason, MIGRATE_REASON))
);
TRACE_EVENT(mm_migrate_pages_start,
TP_PROTO(enum migrate_mode mode, int reason),
TP_ARGS(mode, reason),
TP_STRUCT__entry(
__field(enum migrate_mode, mode)
__field(int, reason)
),
TP_fast_assign(
__entry->mode = mode;
__entry->reason = reason;
),
TP_printk("mode=%s reason=%s",
__print_symbolic(__entry->mode, MIGRATE_MODE),
__print_symbolic(__entry->reason, MIGRATE_REASON))
);
#endif /* _TRACE_MIGRATE_H */
/* This part must be outside protection */
#include <trace/define_trace.h>