d7911ef30c
While testing for memcg aware swap token, I observed a swap token was often grabbed an intermittent running process (eg init, auditd) and they never release a token. Why? Some processes (eg init, auditd, audispd) wake up when a process exiting. And swap token can be get first page-in process when a process exiting makes no swap token owner. Thus such above intermittent running process often get a token. And currently, swap token priority is only decreased at page fault path. Then, if the process sleep immediately after to grab swap token, the swap token priority never be decreased. That's obviously undesirable. This patch implement very poor (and lightweight) priority aging. It only be affect to the above corner case and doesn't change swap tendency workload performance (eg multi process qsbench load) Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
401 lines
9.4 KiB
C
401 lines
9.4 KiB
C
#undef TRACE_SYSTEM
|
|
#define TRACE_SYSTEM vmscan
|
|
|
|
#if !defined(_TRACE_VMSCAN_H) || defined(TRACE_HEADER_MULTI_READ)
|
|
#define _TRACE_VMSCAN_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/tracepoint.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/memcontrol.h>
|
|
#include "gfpflags.h"
|
|
|
|
#define RECLAIM_WB_ANON 0x0001u
|
|
#define RECLAIM_WB_FILE 0x0002u
|
|
#define RECLAIM_WB_MIXED 0x0010u
|
|
#define RECLAIM_WB_SYNC 0x0004u
|
|
#define RECLAIM_WB_ASYNC 0x0008u
|
|
|
|
#define show_reclaim_flags(flags) \
|
|
(flags) ? __print_flags(flags, "|", \
|
|
{RECLAIM_WB_ANON, "RECLAIM_WB_ANON"}, \
|
|
{RECLAIM_WB_FILE, "RECLAIM_WB_FILE"}, \
|
|
{RECLAIM_WB_MIXED, "RECLAIM_WB_MIXED"}, \
|
|
{RECLAIM_WB_SYNC, "RECLAIM_WB_SYNC"}, \
|
|
{RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \
|
|
) : "RECLAIM_WB_NONE"
|
|
|
|
#define trace_reclaim_flags(page, sync) ( \
|
|
(page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
|
|
(sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
|
|
)
|
|
|
|
#define trace_shrink_flags(file, sync) ( \
|
|
(sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_MIXED : \
|
|
(file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \
|
|
(sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
|
|
)
|
|
|
|
TRACE_EVENT(mm_vmscan_kswapd_sleep,
|
|
|
|
TP_PROTO(int nid),
|
|
|
|
TP_ARGS(nid),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, nid )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->nid = nid;
|
|
),
|
|
|
|
TP_printk("nid=%d", __entry->nid)
|
|
);
|
|
|
|
TRACE_EVENT(mm_vmscan_kswapd_wake,
|
|
|
|
TP_PROTO(int nid, int order),
|
|
|
|
TP_ARGS(nid, order),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, nid )
|
|
__field( int, order )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->nid = nid;
|
|
__entry->order = order;
|
|
),
|
|
|
|
TP_printk("nid=%d order=%d", __entry->nid, __entry->order)
|
|
);
|
|
|
|
TRACE_EVENT(mm_vmscan_wakeup_kswapd,
|
|
|
|
TP_PROTO(int nid, int zid, int order),
|
|
|
|
TP_ARGS(nid, zid, order),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, nid )
|
|
__field( int, zid )
|
|
__field( int, order )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->nid = nid;
|
|
__entry->zid = zid;
|
|
__entry->order = order;
|
|
),
|
|
|
|
TP_printk("nid=%d zid=%d order=%d",
|
|
__entry->nid,
|
|
__entry->zid,
|
|
__entry->order)
|
|
);
|
|
|
|
DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template,
|
|
|
|
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
|
|
|
|
TP_ARGS(order, may_writepage, gfp_flags),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( int, order )
|
|
__field( int, may_writepage )
|
|
__field( gfp_t, gfp_flags )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->order = order;
|
|
__entry->may_writepage = may_writepage;
|
|
__entry->gfp_flags = gfp_flags;
|
|
),
|
|
|
|
TP_printk("order=%d may_writepage=%d gfp_flags=%s",
|
|
__entry->order,
|
|
__entry->may_writepage,
|
|
show_gfp_flags(__entry->gfp_flags))
|
|
);
|
|
|
|
DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin,
|
|
|
|
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
|
|
|
|
TP_ARGS(order, may_writepage, gfp_flags)
|
|
);
|
|
|
|
DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
|
|
|
|
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
|
|
|
|
TP_ARGS(order, may_writepage, gfp_flags)
|
|
);
|
|
|
|
DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
|
|
|
|
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
|
|
|
|
TP_ARGS(order, may_writepage, gfp_flags)
|
|
);
|
|
|
|
DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template,
|
|
|
|
TP_PROTO(unsigned long nr_reclaimed),
|
|
|
|
TP_ARGS(nr_reclaimed),
|
|
|
|
TP_STRUCT__entry(
|
|
__field( unsigned long, nr_reclaimed )
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->nr_reclaimed = nr_reclaimed;
|
|
),
|
|
|
|
TP_printk("nr_reclaimed=%lu", __entry->nr_reclaimed)
|
|
);
|
|
|
|
DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_direct_reclaim_end,
|
|
|
|
TP_PROTO(unsigned long nr_reclaimed),
|
|
|
|
TP_ARGS(nr_reclaimed)
|
|
);
|
|
|
|
DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end,
|
|
|
|
TP_PROTO(unsigned long nr_reclaimed),
|
|
|
|
TP_ARGS(nr_reclaimed)
|
|
);
|
|
|
|
DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end,
|
|
|
|
TP_PROTO(unsigned long nr_reclaimed),
|
|
|
|
TP_ARGS(nr_reclaimed)
|
|
);
|
|
|
|
|
|
DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
|
|
|
|
TP_PROTO(int order,
|
|
unsigned long nr_requested,
|
|
unsigned long nr_scanned,
|
|
unsigned long nr_taken,
|
|
unsigned long nr_lumpy_taken,
|
|
unsigned long nr_lumpy_dirty,
|
|
unsigned long nr_lumpy_failed,
|
|
int isolate_mode),
|
|
|
|
TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(int, order)
|
|
__field(unsigned long, nr_requested)
|
|
__field(unsigned long, nr_scanned)
|
|
__field(unsigned long, nr_taken)
|
|
__field(unsigned long, nr_lumpy_taken)
|
|
__field(unsigned long, nr_lumpy_dirty)
|
|
__field(unsigned long, nr_lumpy_failed)
|
|
__field(int, isolate_mode)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->order = order;
|
|
__entry->nr_requested = nr_requested;
|
|
__entry->nr_scanned = nr_scanned;
|
|
__entry->nr_taken = nr_taken;
|
|
__entry->nr_lumpy_taken = nr_lumpy_taken;
|
|
__entry->nr_lumpy_dirty = nr_lumpy_dirty;
|
|
__entry->nr_lumpy_failed = nr_lumpy_failed;
|
|
__entry->isolate_mode = isolate_mode;
|
|
),
|
|
|
|
TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu",
|
|
__entry->isolate_mode,
|
|
__entry->order,
|
|
__entry->nr_requested,
|
|
__entry->nr_scanned,
|
|
__entry->nr_taken,
|
|
__entry->nr_lumpy_taken,
|
|
__entry->nr_lumpy_dirty,
|
|
__entry->nr_lumpy_failed)
|
|
);
|
|
|
|
DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate,
|
|
|
|
TP_PROTO(int order,
|
|
unsigned long nr_requested,
|
|
unsigned long nr_scanned,
|
|
unsigned long nr_taken,
|
|
unsigned long nr_lumpy_taken,
|
|
unsigned long nr_lumpy_dirty,
|
|
unsigned long nr_lumpy_failed,
|
|
int isolate_mode),
|
|
|
|
TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode)
|
|
|
|
);
|
|
|
|
DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate,
|
|
|
|
TP_PROTO(int order,
|
|
unsigned long nr_requested,
|
|
unsigned long nr_scanned,
|
|
unsigned long nr_taken,
|
|
unsigned long nr_lumpy_taken,
|
|
unsigned long nr_lumpy_dirty,
|
|
unsigned long nr_lumpy_failed,
|
|
int isolate_mode),
|
|
|
|
TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode)
|
|
|
|
);
|
|
|
|
TRACE_EVENT(mm_vmscan_writepage,
|
|
|
|
TP_PROTO(struct page *page,
|
|
int reclaim_flags),
|
|
|
|
TP_ARGS(page, reclaim_flags),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct page *, page)
|
|
__field(int, reclaim_flags)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->page = page;
|
|
__entry->reclaim_flags = reclaim_flags;
|
|
),
|
|
|
|
TP_printk("page=%p pfn=%lu flags=%s",
|
|
__entry->page,
|
|
page_to_pfn(__entry->page),
|
|
show_reclaim_flags(__entry->reclaim_flags))
|
|
);
|
|
|
|
TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
|
|
|
|
TP_PROTO(int nid, int zid,
|
|
unsigned long nr_scanned, unsigned long nr_reclaimed,
|
|
int priority, int reclaim_flags),
|
|
|
|
TP_ARGS(nid, zid, nr_scanned, nr_reclaimed, priority, reclaim_flags),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(int, nid)
|
|
__field(int, zid)
|
|
__field(unsigned long, nr_scanned)
|
|
__field(unsigned long, nr_reclaimed)
|
|
__field(int, priority)
|
|
__field(int, reclaim_flags)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->nid = nid;
|
|
__entry->zid = zid;
|
|
__entry->nr_scanned = nr_scanned;
|
|
__entry->nr_reclaimed = nr_reclaimed;
|
|
__entry->priority = priority;
|
|
__entry->reclaim_flags = reclaim_flags;
|
|
),
|
|
|
|
TP_printk("nid=%d zid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",
|
|
__entry->nid, __entry->zid,
|
|
__entry->nr_scanned, __entry->nr_reclaimed,
|
|
__entry->priority,
|
|
show_reclaim_flags(__entry->reclaim_flags))
|
|
);
|
|
|
|
TRACE_EVENT(replace_swap_token,
|
|
TP_PROTO(struct mm_struct *old_mm,
|
|
struct mm_struct *new_mm),
|
|
|
|
TP_ARGS(old_mm, new_mm),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct*, old_mm)
|
|
__field(unsigned int, old_prio)
|
|
__field(struct mm_struct*, new_mm)
|
|
__field(unsigned int, new_prio)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->old_mm = old_mm;
|
|
__entry->old_prio = old_mm ? old_mm->token_priority : 0;
|
|
__entry->new_mm = new_mm;
|
|
__entry->new_prio = new_mm->token_priority;
|
|
),
|
|
|
|
TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u",
|
|
__entry->old_mm, __entry->old_prio,
|
|
__entry->new_mm, __entry->new_prio)
|
|
);
|
|
|
|
DECLARE_EVENT_CLASS(put_swap_token_template,
|
|
TP_PROTO(struct mm_struct *swap_token_mm),
|
|
|
|
TP_ARGS(swap_token_mm),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct*, swap_token_mm)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->swap_token_mm = swap_token_mm;
|
|
),
|
|
|
|
TP_printk("token_mm=%p", __entry->swap_token_mm)
|
|
);
|
|
|
|
DEFINE_EVENT(put_swap_token_template, put_swap_token,
|
|
TP_PROTO(struct mm_struct *swap_token_mm),
|
|
TP_ARGS(swap_token_mm)
|
|
);
|
|
|
|
DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,
|
|
TP_PROTO(struct mm_struct *swap_token_mm),
|
|
TP_ARGS(swap_token_mm),
|
|
TP_CONDITION(swap_token_mm != NULL)
|
|
);
|
|
|
|
TRACE_EVENT_CONDITION(update_swap_token_priority,
|
|
TP_PROTO(struct mm_struct *mm,
|
|
unsigned int old_prio,
|
|
struct mm_struct *swap_token_mm),
|
|
|
|
TP_ARGS(mm, old_prio, swap_token_mm),
|
|
|
|
TP_CONDITION(mm->token_priority != old_prio),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct*, mm)
|
|
__field(unsigned int, old_prio)
|
|
__field(unsigned int, new_prio)
|
|
__field(struct mm_struct*, swap_token_mm)
|
|
__field(unsigned int, swap_token_prio)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->old_prio = old_prio;
|
|
__entry->new_prio = mm->token_priority;
|
|
__entry->swap_token_mm = swap_token_mm;
|
|
__entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0;
|
|
),
|
|
|
|
TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u",
|
|
__entry->mm, __entry->old_prio, __entry->new_prio,
|
|
__entry->swap_token_mm, __entry->swap_token_prio)
|
|
);
|
|
|
|
#endif /* _TRACE_VMSCAN_H */
|
|
|
|
/* This part must be outside protection */
|
|
#include <trace/define_trace.h>
|