2018-06-06 05:42:14 +03:00
// SPDX-License-Identifier: GPL-2.0
2009-12-15 02:14:59 +03:00
/*
* Copyright ( c ) 2009 , Christoph Hellwig
* All Rights Reserved .
*/
# undef TRACE_SYSTEM
# define TRACE_SYSTEM xfs
# if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
# define _TRACE_XFS_H
# include <linux/tracepoint.h>
struct xfs_agf ;
struct xfs_alloc_arg ;
struct xfs_attr_list_context ;
struct xfs_buf_log_item ;
struct xfs_da_args ;
struct xfs_da_node_entry ;
struct xfs_dquot ;
2011-10-11 19:14:11 +04:00
struct xfs_log_item ;
2012-06-14 18:22:15 +04:00
struct xlog ;
2013-11-01 08:27:18 +04:00
struct xlog_ticket ;
2010-04-13 09:06:46 +04:00
struct xlog_recover ;
struct xlog_recover_item ;
2019-08-26 22:08:10 +03:00
struct xlog_rec_header ;
2021-06-18 21:57:05 +03:00
struct xlog_in_core ;
2010-04-13 09:06:46 +04:00
struct xfs_buf_log_format ;
struct xfs_inode_log_format ;
2012-08-01 18:56:49 +04:00
struct xfs_bmbt_irec ;
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 04:08:36 +03:00
struct xfs_btree_cur ;
2016-10-03 19:11:18 +03:00
struct xfs_refcount_irec ;
2017-03-29 00:56:37 +03:00
struct xfs_fsmap ;
struct xfs_rmap_irec ;
2019-08-26 22:08:10 +03:00
struct xfs_icreate_log ;
struct xfs_owner_info ;
struct xfs_trans_res ;
struct xfs_inobt_rec_incore ;
2020-03-11 20:51:50 +03:00
union xfs_btree_ptr ;
2020-07-14 20:37:35 +03:00
struct xfs_dqtrx ;
2021-06-07 19:34:51 +03:00
struct xfs_icwalk ;
2009-12-15 02:14:59 +03:00
2020-02-27 04:30:42 +03:00
# define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT , " ROOT " } , \
2020-02-27 04:30:43 +03:00
{ XFS_ATTR_SECURE , " SECURE " } , \
{ XFS_ATTR_INCOMPLETE , " INCOMPLETE " }
2020-02-27 04:30:42 +03:00
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_attr_list_class ,
TP_PROTO ( struct xfs_attr_list_context * ctx ) ,
TP_ARGS ( ctx ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( u32 , hashval )
__field ( u32 , blkno )
__field ( u32 , offset )
2020-02-27 04:30:37 +03:00
__field ( void * , buffer )
2009-12-21 17:03:03 +03:00
__field ( int , bufsize )
__field ( int , count )
__field ( int , firstu )
__field ( int , dupcnt )
2020-02-27 04:30:42 +03:00
__field ( unsigned int , attr_filter )
2009-12-21 17:03:03 +03:00
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ctx - > dp ) - > i_sb - > s_dev ;
__entry - > ino = ctx - > dp - > i_ino ;
2020-02-27 04:30:43 +03:00
__entry - > hashval = ctx - > cursor . hashval ;
__entry - > blkno = ctx - > cursor . blkno ;
__entry - > offset = ctx - > cursor . offset ;
2020-02-27 04:30:37 +03:00
__entry - > buffer = ctx - > buffer ;
2009-12-21 17:03:03 +03:00
__entry - > bufsize = ctx - > bufsize ;
__entry - > count = ctx - > count ;
__entry - > firstu = ctx - > firstu ;
2020-02-27 04:30:42 +03:00
__entry - > attr_filter = ctx - > attr_filter ;
2009-12-21 17:03:03 +03:00
) ,
TP_printk ( " dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
2020-02-27 04:30:42 +03:00
" buffer %p size %u count %u firstu %u filter %s " ,
2009-12-21 17:03:03 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > hashval ,
__entry - > blkno ,
__entry - > offset ,
__entry - > dupcnt ,
2020-02-27 04:30:37 +03:00
__entry - > buffer ,
2009-12-21 17:03:03 +03:00
__entry - > bufsize ,
__entry - > count ,
__entry - > firstu ,
2020-02-27 04:30:42 +03:00
__print_flags ( __entry - > attr_filter , " | " ,
XFS_ATTR_FILTER_FLAGS )
2009-12-21 17:03:03 +03:00
)
)
2009-12-15 02:14:59 +03:00
# define DEFINE_ATTR_LIST_EVENT(name) \
2009-12-21 17:03:03 +03:00
DEFINE_EVENT ( xfs_attr_list_class , name , \
2009-12-15 02:14:59 +03:00
TP_PROTO ( struct xfs_attr_list_context * ctx ) , \
2009-12-21 17:03:03 +03:00
TP_ARGS ( ctx ) )
2009-12-15 02:14:59 +03:00
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_sf ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_sf_all ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_leaf ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_leaf_end ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_full ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_add ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_wrong_blk ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_notfound ) ;
2012-11-12 15:53:53 +04:00
DEFINE_ATTR_LIST_EVENT ( xfs_attr_leaf_list ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_node_list ) ;
2009-12-15 02:14:59 +03:00
2020-11-30 03:33:39 +03:00
TRACE_EVENT ( xlog_intent_recovery_failed ,
TP_PROTO ( struct xfs_mount * mp , int error , void * function ) ,
TP_ARGS ( mp , error , function ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( int , error )
__field ( void * , function )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > error = error ;
__entry - > function = function ;
) ,
TP_printk ( " dev %d:%d error %d function %pS " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > error , __entry - > function )
) ;
2010-05-24 12:25:57 +04:00
DECLARE_EVENT_CLASS ( xfs_perag_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , int refcount ,
unsigned long caller_ip ) ,
TP_ARGS ( mp , agno , refcount , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( int , refcount )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > refcount = refcount ;
__entry - > caller_ip = caller_ip ;
) ,
2018-01-09 22:46:05 +03:00
TP_printk ( " dev %d:%d agno %u refcount %d caller %pS " ,
2010-05-24 12:25:57 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > refcount ,
( char * ) __entry - > caller_ip )
) ;
# define DEFINE_PERAG_REF_EVENT(name) \
DEFINE_EVENT ( xfs_perag_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , int refcount , \
unsigned long caller_ip ) , \
TP_ARGS ( mp , agno , refcount , caller_ip ) )
DEFINE_PERAG_REF_EVENT ( xfs_perag_get ) ;
2010-09-24 12:40:15 +04:00
DEFINE_PERAG_REF_EVENT ( xfs_perag_get_tag ) ;
2010-05-24 12:25:57 +04:00
DEFINE_PERAG_REF_EVENT ( xfs_perag_put ) ;
2021-05-31 21:32:02 +03:00
DEFINE_PERAG_REF_EVENT ( xfs_perag_set_inode_tag ) ;
DEFINE_PERAG_REF_EVENT ( xfs_perag_clear_inode_tag ) ;
2010-05-24 12:25:57 +04:00
2021-08-06 21:05:43 +03:00
TRACE_EVENT ( xfs_inodegc_worker ,
TP_PROTO ( struct xfs_mount * mp , unsigned int shrinker_hits ) ,
TP_ARGS ( mp , shrinker_hits ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned int , shrinker_hits )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > shrinker_hits = shrinker_hits ;
) ,
TP_printk ( " dev %d:%d shrinker_hits %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > shrinker_hits )
) ;
2021-08-06 21:05:39 +03:00
DECLARE_EVENT_CLASS ( xfs_fs_class ,
TP_PROTO ( struct xfs_mount * mp , void * caller_ip ) ,
TP_ARGS ( mp , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned long long , mflags )
__field ( unsigned long , opstate )
__field ( unsigned long , sbflags )
__field ( void * , caller_ip )
) ,
TP_fast_assign (
if ( mp ) {
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > mflags = mp - > m_flags ;
__entry - > opstate = mp - > m_opstate ;
__entry - > sbflags = mp - > m_super - > s_flags ;
}
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d m_flags 0x%llx opstate (%s) s_flags 0x%lx caller %pS " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > mflags ,
__print_flags ( __entry - > opstate , " | " , XFS_OPSTATE_STRINGS ) ,
__entry - > sbflags ,
__entry - > caller_ip )
) ;
# define DEFINE_FS_EVENT(name) \
DEFINE_EVENT ( xfs_fs_class , name , \
TP_PROTO ( struct xfs_mount * mp , void * caller_ip ) , \
TP_ARGS ( mp , caller_ip ) )
DEFINE_FS_EVENT ( xfs_inodegc_flush ) ;
DEFINE_FS_EVENT ( xfs_inodegc_start ) ;
DEFINE_FS_EVENT ( xfs_inodegc_stop ) ;
DEFINE_FS_EVENT ( xfs_inodegc_queue ) ;
DEFINE_FS_EVENT ( xfs_inodegc_throttle ) ;
DEFINE_FS_EVENT ( xfs_fs_sync_fs ) ;
2021-08-06 21:05:42 +03:00
DEFINE_FS_EVENT ( xfs_blockgc_start ) ;
DEFINE_FS_EVENT ( xfs_blockgc_stop ) ;
DEFINE_FS_EVENT ( xfs_blockgc_worker ) ;
2021-08-06 21:05:42 +03:00
DEFINE_FS_EVENT ( xfs_blockgc_flush_all ) ;
2021-08-06 21:05:39 +03:00
2021-08-06 21:05:43 +03:00
TRACE_EVENT ( xfs_inodegc_shrinker_scan ,
TP_PROTO ( struct xfs_mount * mp , struct shrink_control * sc ,
void * caller_ip ) ,
TP_ARGS ( mp , sc , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned long , nr_to_scan )
__field ( void * , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > nr_to_scan = sc - > nr_to_scan ;
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d nr_to_scan %lu caller %pS " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > nr_to_scan ,
__entry - > caller_ip )
) ;
2013-11-01 08:27:19 +04:00
DECLARE_EVENT_CLASS ( xfs_ag_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ) ,
TP_ARGS ( mp , agno ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
) ,
TP_printk ( " dev %d:%d agno %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno )
) ;
# define DEFINE_AG_EVENT(name) \
DEFINE_EVENT ( xfs_ag_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ) , \
TP_ARGS ( mp , agno ) )
DEFINE_AG_EVENT ( xfs_read_agf ) ;
DEFINE_AG_EVENT ( xfs_alloc_read_agf ) ;
DEFINE_AG_EVENT ( xfs_read_agi ) ;
DEFINE_AG_EVENT ( xfs_ialloc_read_agi ) ;
2009-12-15 02:14:59 +03:00
TRACE_EVENT ( xfs_attr_list_node_descend ,
TP_PROTO ( struct xfs_attr_list_context * ctx ,
struct xfs_da_node_entry * btree ) ,
TP_ARGS ( ctx , btree ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( u32 , hashval )
__field ( u32 , blkno )
__field ( u32 , offset )
2020-02-27 04:30:37 +03:00
__field ( void * , buffer )
2009-12-15 02:14:59 +03:00
__field ( int , bufsize )
__field ( int , count )
__field ( int , firstu )
__field ( int , dupcnt )
2020-02-27 04:30:42 +03:00
__field ( unsigned int , attr_filter )
2009-12-15 02:14:59 +03:00
__field ( u32 , bt_hashval )
__field ( u32 , bt_before )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ctx - > dp ) - > i_sb - > s_dev ;
__entry - > ino = ctx - > dp - > i_ino ;
2020-02-27 04:30:43 +03:00
__entry - > hashval = ctx - > cursor . hashval ;
__entry - > blkno = ctx - > cursor . blkno ;
__entry - > offset = ctx - > cursor . offset ;
2020-02-27 04:30:37 +03:00
__entry - > buffer = ctx - > buffer ;
2009-12-15 02:14:59 +03:00
__entry - > bufsize = ctx - > bufsize ;
__entry - > count = ctx - > count ;
__entry - > firstu = ctx - > firstu ;
2020-02-27 04:30:42 +03:00
__entry - > attr_filter = ctx - > attr_filter ;
2009-12-15 02:14:59 +03:00
__entry - > bt_hashval = be32_to_cpu ( btree - > hashval ) ;
__entry - > bt_before = be32_to_cpu ( btree - > before ) ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
2020-02-27 04:30:42 +03:00
" buffer %p size %u count %u firstu %u filter %s "
2009-12-15 02:14:59 +03:00
" node hashval %u, node before %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > hashval ,
__entry - > blkno ,
__entry - > offset ,
__entry - > dupcnt ,
2020-02-27 04:30:37 +03:00
__entry - > buffer ,
2009-12-15 02:14:59 +03:00
__entry - > bufsize ,
__entry - > count ,
__entry - > firstu ,
2020-02-27 04:30:42 +03:00
__print_flags ( __entry - > attr_filter , " | " ,
XFS_ATTR_FILTER_FLAGS ) ,
2009-12-15 02:14:59 +03:00
__entry - > bt_hashval ,
__entry - > bt_before )
) ;
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_bmap_class ,
2017-11-03 20:34:43 +03:00
TP_PROTO ( struct xfs_inode * ip , struct xfs_iext_cursor * cur , int state ,
2009-12-21 17:03:03 +03:00
unsigned long caller_ip ) ,
2017-11-03 20:34:43 +03:00
TP_ARGS ( ip , cur , state , caller_ip ) ,
2009-12-21 17:03:03 +03:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
2019-10-24 23:26:59 +03:00
__field ( void * , leaf )
__field ( int , pos )
2009-12-21 17:03:03 +03:00
__field ( xfs_fileoff_t , startoff )
__field ( xfs_fsblock_t , startblock )
__field ( xfs_filblks_t , blockcount )
__field ( xfs_exntst_t , state )
__field ( int , bmap_state )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
2016-10-03 19:11:32 +03:00
struct xfs_ifork * ifp ;
2009-12-21 17:03:03 +03:00
struct xfs_bmbt_irec r ;
2016-10-03 19:11:32 +03:00
ifp = xfs_iext_state_to_fork ( ip , state ) ;
2017-11-03 20:34:43 +03:00
xfs_iext_get_extent ( ifp , cur , & r ) ;
2009-12-21 17:03:03 +03:00
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2017-11-03 20:34:46 +03:00
__entry - > leaf = cur - > leaf ;
__entry - > pos = cur - > pos ;
2009-12-21 17:03:03 +03:00
__entry - > startoff = r . br_startoff ;
__entry - > startblock = r . br_startblock ;
__entry - > blockcount = r . br_blockcount ;
__entry - > state = r . br_state ;
__entry - > bmap_state = state ;
__entry - > caller_ip = caller_ip ;
) ,
2018-01-09 22:43:36 +03:00
TP_printk ( " dev %d:%d ino 0x%llx state %s cur %p/%d "
2018-01-09 22:46:05 +03:00
" offset %lld block %lld count %lld flag %d caller %pS " ,
2009-12-21 17:03:03 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_flags ( __entry - > bmap_state , " | " , XFS_BMAP_EXT_FLAGS ) ,
2017-11-03 20:34:46 +03:00
__entry - > leaf ,
__entry - > pos ,
2009-12-21 17:03:03 +03:00
__entry - > startoff ,
2017-06-16 21:00:05 +03:00
( int64_t ) __entry - > startblock ,
2009-12-21 17:03:03 +03:00
__entry - > blockcount ,
__entry - > state ,
( char * ) __entry - > caller_ip )
)
2009-12-15 02:14:59 +03:00
# define DEFINE_BMAP_EVENT(name) \
2009-12-21 17:03:03 +03:00
DEFINE_EVENT ( xfs_bmap_class , name , \
2017-11-03 20:34:43 +03:00
TP_PROTO ( struct xfs_inode * ip , struct xfs_iext_cursor * cur , int state , \
2009-12-15 02:14:59 +03:00
unsigned long caller_ip ) , \
2017-11-03 20:34:43 +03:00
TP_ARGS ( ip , cur , state , caller_ip ) )
2017-11-03 20:34:46 +03:00
DEFINE_BMAP_EVENT ( xfs_iext_insert ) ;
2009-12-15 02:14:59 +03:00
DEFINE_BMAP_EVENT ( xfs_iext_remove ) ;
DEFINE_BMAP_EVENT ( xfs_bmap_pre_update ) ;
DEFINE_BMAP_EVENT ( xfs_bmap_post_update ) ;
2017-10-19 21:06:29 +03:00
DEFINE_BMAP_EVENT ( xfs_read_extent ) ;
DEFINE_BMAP_EVENT ( xfs_write_extent ) ;
2009-12-15 02:14:59 +03:00
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_buf_class ,
TP_PROTO ( struct xfs_buf * bp , unsigned long caller_ip ) ,
TP_ARGS ( bp , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_daddr_t , bno )
2012-04-23 09:58:51 +04:00
__field ( int , nblks )
2009-12-21 17:03:03 +03:00
__field ( int , hold )
__field ( int , pincount )
__field ( unsigned , lockval )
__field ( unsigned , flags )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = bp - > b_target - > bt_dev ;
2018-11-20 00:31:07 +03:00
if ( bp - > b_bn = = XFS_BUF_DADDR_NULL )
__entry - > bno = bp - > b_maps [ 0 ] . bm_bn ;
else
__entry - > bno = bp - > b_bn ;
2012-04-23 09:58:51 +04:00
__entry - > nblks = bp - > b_length ;
2009-12-21 17:03:03 +03:00
__entry - > hold = atomic_read ( & bp - > b_hold ) ;
__entry - > pincount = atomic_read ( & bp - > b_pin_count ) ;
2011-07-08 16:36:19 +04:00
__entry - > lockval = bp - > b_sema . count ;
2009-12-21 17:03:03 +03:00
__entry - > flags = bp - > b_flags ;
__entry - > caller_ip = caller_ip ;
) ,
2012-04-23 09:58:51 +04:00
TP_printk ( " dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
2018-01-09 22:46:05 +03:00
" lock %d flags %s caller %pS " ,
2009-12-21 17:03:03 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
( unsigned long long ) __entry - > bno ,
2012-04-23 09:58:51 +04:00
__entry - > nblks ,
2009-12-21 17:03:03 +03:00
__entry - > hold ,
__entry - > pincount ,
__entry - > lockval ,
__print_flags ( __entry - > flags , " | " , XFS_BUF_FLAGS ) ,
( void * ) __entry - > caller_ip )
2009-12-15 02:14:59 +03:00
)
2009-12-21 17:03:03 +03:00
# define DEFINE_BUF_EVENT(name) \
DEFINE_EVENT ( xfs_buf_class , name , \
TP_PROTO ( struct xfs_buf * bp , unsigned long caller_ip ) , \
TP_ARGS ( bp , caller_ip ) )
2009-12-15 02:14:59 +03:00
DEFINE_BUF_EVENT ( xfs_buf_init ) ;
DEFINE_BUF_EVENT ( xfs_buf_free ) ;
DEFINE_BUF_EVENT ( xfs_buf_hold ) ;
DEFINE_BUF_EVENT ( xfs_buf_rele ) ;
DEFINE_BUF_EVENT ( xfs_buf_iodone ) ;
2014-10-02 03:05:14 +04:00
DEFINE_BUF_EVENT ( xfs_buf_submit ) ;
2009-12-15 02:14:59 +03:00
DEFINE_BUF_EVENT ( xfs_buf_lock ) ;
DEFINE_BUF_EVENT ( xfs_buf_lock_done ) ;
2016-06-21 04:53:28 +03:00
DEFINE_BUF_EVENT ( xfs_buf_trylock_fail ) ;
2011-07-08 16:36:19 +04:00
DEFINE_BUF_EVENT ( xfs_buf_trylock ) ;
2009-12-15 02:14:59 +03:00
DEFINE_BUF_EVENT ( xfs_buf_unlock ) ;
DEFINE_BUF_EVENT ( xfs_buf_iowait ) ;
DEFINE_BUF_EVENT ( xfs_buf_iowait_done ) ;
DEFINE_BUF_EVENT ( xfs_buf_delwri_queue ) ;
xfs: on-stack delayed write buffer lists
Queue delwri buffers on a local on-stack list instead of a per-buftarg one,
and write back the buffers per-process instead of by waking up xfsbufd.
This is now easily doable given that we have very few places left that write
delwri buffers:
- log recovery:
Only done at mount time, and already forcing out the buffers
synchronously using xfs_flush_buftarg
- quotacheck:
Same story.
- dquot reclaim:
Writes out dirty dquots on the LRU under memory pressure. We might
want to look into doing more of this via xfsaild, but it's already
more optimal than the synchronous inode reclaim that writes each
buffer synchronously.
- xfsaild:
This is the main beneficiary of the change. By keeping a local list
of buffers to write we reduce latency of writing out buffers, and
more importably we can remove all the delwri list promotions which
were hitting the buffer cache hard under sustained metadata loads.
The implementation is very straight forward - xfs_buf_delwri_queue now gets
a new list_head pointer that it adds the delwri buffers to, and all callers
need to eventually submit the list using xfs_buf_delwi_submit or
xfs_buf_delwi_submit_nowait. Buffers that already are on a delwri list are
skipped in xfs_buf_delwri_queue, assuming they already are on another delwri
list. The biggest change to pass down the buffer list was done to the AIL
pushing. Now that we operate on buffers the trylock, push and pushbuf log
item methods are merged into a single push routine, which tries to lock the
item, and if possible add the buffer that needs writeback to the buffer list.
This leads to much simpler code than the previous split but requires the
individual IOP_PUSH instances to unlock and reacquire the AIL around calls
to blocking routines.
Given that xfsailds now also handle writing out buffers, the conditions for
log forcing and the sleep times needed some small changes. The most
important one is that we consider an AIL busy as long we still have buffers
to push, and the other one is that we do increment the pushed LSN for
buffers that are under flushing at this moment, but still count them towards
the stuck items for restart purposes. Without this we could hammer on stuck
items without ever forcing the log and not make progress under heavy random
delete workloads on fast flash storage devices.
[ Dave Chinner:
- rebase on previous patches.
- improved comments for XBF_DELWRI_Q handling
- fix XBF_ASYNC handling in queue submission (test 106 failure)
- rename delwri submit function buffer list parameters for clarity
- xfs_efd_item_push() should return XFS_ITEM_PINNED ]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
2012-04-23 09:58:39 +04:00
DEFINE_BUF_EVENT ( xfs_buf_delwri_queued ) ;
2009-12-15 02:14:59 +03:00
DEFINE_BUF_EVENT ( xfs_buf_delwri_split ) ;
2017-06-15 07:21:45 +03:00
DEFINE_BUF_EVENT ( xfs_buf_delwri_pushbuf ) ;
2010-09-24 14:07:47 +04:00
DEFINE_BUF_EVENT ( xfs_buf_get_uncached ) ;
2009-12-15 02:14:59 +03:00
DEFINE_BUF_EVENT ( xfs_buf_item_relse ) ;
2020-09-01 20:55:29 +03:00
DEFINE_BUF_EVENT ( xfs_buf_iodone_async ) ;
2009-12-15 02:14:59 +03:00
DEFINE_BUF_EVENT ( xfs_buf_error_relse ) ;
2021-01-23 03:48:19 +03:00
DEFINE_BUF_EVENT ( xfs_buf_drain_buftarg ) ;
2009-12-15 02:14:59 +03:00
DEFINE_BUF_EVENT ( xfs_trans_read_buf_shut ) ;
/* not really buffer traces, but the buf provides useful information */
DEFINE_BUF_EVENT ( xfs_btree_corrupt ) ;
DEFINE_BUF_EVENT ( xfs_reset_dqcounts ) ;
/* pass flags explicitly */
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_buf_flags_class ,
TP_PROTO ( struct xfs_buf * bp , unsigned flags , unsigned long caller_ip ) ,
TP_ARGS ( bp , flags , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_daddr_t , bno )
__field ( size_t , buffer_length )
__field ( int , hold )
__field ( int , pincount )
__field ( unsigned , lockval )
__field ( unsigned , flags )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = bp - > b_target - > bt_dev ;
__entry - > bno = bp - > b_bn ;
2012-04-23 09:58:51 +04:00
__entry - > buffer_length = BBTOB ( bp - > b_length ) ;
2009-12-21 17:03:03 +03:00
__entry - > flags = flags ;
__entry - > hold = atomic_read ( & bp - > b_hold ) ;
__entry - > pincount = atomic_read ( & bp - > b_pin_count ) ;
2011-07-08 16:36:19 +04:00
__entry - > lockval = bp - > b_sema . count ;
2009-12-21 17:03:03 +03:00
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
2018-01-09 22:46:05 +03:00
" lock %d flags %s caller %pS " ,
2009-12-21 17:03:03 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
( unsigned long long ) __entry - > bno ,
__entry - > buffer_length ,
__entry - > hold ,
__entry - > pincount ,
__entry - > lockval ,
__print_flags ( __entry - > flags , " | " , XFS_BUF_FLAGS ) ,
( void * ) __entry - > caller_ip )
2009-12-15 02:14:59 +03:00
)
2009-12-21 17:03:03 +03:00
# define DEFINE_BUF_FLAGS_EVENT(name) \
DEFINE_EVENT ( xfs_buf_flags_class , name , \
TP_PROTO ( struct xfs_buf * bp , unsigned flags , unsigned long caller_ip ) , \
TP_ARGS ( bp , flags , caller_ip ) )
2009-12-15 02:14:59 +03:00
DEFINE_BUF_FLAGS_EVENT ( xfs_buf_find ) ;
DEFINE_BUF_FLAGS_EVENT ( xfs_buf_get ) ;
DEFINE_BUF_FLAGS_EVENT ( xfs_buf_read ) ;
TRACE_EVENT ( xfs_buf_ioerror ,
2018-01-08 21:51:02 +03:00
TP_PROTO ( struct xfs_buf * bp , int error , xfs_failaddr_t caller_ip ) ,
2009-12-15 02:14:59 +03:00
TP_ARGS ( bp , error , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_daddr_t , bno )
__field ( size_t , buffer_length )
__field ( unsigned , flags )
__field ( int , hold )
__field ( int , pincount )
__field ( unsigned , lockval )
__field ( int , error )
2018-01-08 21:51:02 +03:00
__field ( xfs_failaddr_t , caller_ip )
2009-12-15 02:14:59 +03:00
) ,
TP_fast_assign (
__entry - > dev = bp - > b_target - > bt_dev ;
__entry - > bno = bp - > b_bn ;
2012-04-23 09:58:51 +04:00
__entry - > buffer_length = BBTOB ( bp - > b_length ) ;
2009-12-15 02:14:59 +03:00
__entry - > hold = atomic_read ( & bp - > b_hold ) ;
__entry - > pincount = atomic_read ( & bp - > b_pin_count ) ;
2011-07-08 16:36:19 +04:00
__entry - > lockval = bp - > b_sema . count ;
2009-12-15 02:14:59 +03:00
__entry - > error = error ;
__entry - > flags = bp - > b_flags ;
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
2018-01-08 21:51:02 +03:00
" lock %d error %d flags %s caller %pS " ,
2009-12-15 02:14:59 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
( unsigned long long ) __entry - > bno ,
__entry - > buffer_length ,
__entry - > hold ,
__entry - > pincount ,
__entry - > lockval ,
__entry - > error ,
__print_flags ( __entry - > flags , " | " , XFS_BUF_FLAGS ) ,
( void * ) __entry - > caller_ip )
) ;
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_buf_item_class ,
TP_PROTO ( struct xfs_buf_log_item * bip ) ,
TP_ARGS ( bip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_daddr_t , buf_bno )
__field ( size_t , buf_len )
__field ( int , buf_hold )
__field ( int , buf_pincount )
__field ( int , buf_lockval )
__field ( unsigned , buf_flags )
__field ( unsigned , bli_recur )
__field ( int , bli_refcount )
__field ( unsigned , bli_flags )
2018-05-09 17:47:34 +03:00
__field ( unsigned long , li_flags )
2009-12-21 17:03:03 +03:00
) ,
TP_fast_assign (
__entry - > dev = bip - > bli_buf - > b_target - > bt_dev ;
__entry - > bli_flags = bip - > bli_flags ;
__entry - > bli_recur = bip - > bli_recur ;
__entry - > bli_refcount = atomic_read ( & bip - > bli_refcount ) ;
__entry - > buf_bno = bip - > bli_buf - > b_bn ;
2012-04-23 09:58:51 +04:00
__entry - > buf_len = BBTOB ( bip - > bli_buf - > b_length ) ;
2009-12-21 17:03:03 +03:00
__entry - > buf_flags = bip - > bli_buf - > b_flags ;
__entry - > buf_hold = atomic_read ( & bip - > bli_buf - > b_hold ) ;
__entry - > buf_pincount = atomic_read ( & bip - > bli_buf - > b_pin_count ) ;
2011-07-08 16:36:19 +04:00
__entry - > buf_lockval = bip - > bli_buf - > b_sema . count ;
2009-12-21 17:03:03 +03:00
__entry - > li_flags = bip - > bli_item . li_flags ;
) ,
TP_printk ( " dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
" lock %d flags %s recur %d refcount %d bliflags %s "
2018-05-09 17:49:37 +03:00
" liflags %s " ,
2009-12-21 17:03:03 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
( unsigned long long ) __entry - > buf_bno ,
__entry - > buf_len ,
__entry - > buf_hold ,
__entry - > buf_pincount ,
__entry - > buf_lockval ,
__print_flags ( __entry - > buf_flags , " | " , XFS_BUF_FLAGS ) ,
__entry - > bli_recur ,
__entry - > bli_refcount ,
__print_flags ( __entry - > bli_flags , " | " , XFS_BLI_FLAGS ) ,
__print_flags ( __entry - > li_flags , " | " , XFS_LI_FLAGS ) )
2009-12-15 02:14:59 +03:00
)
2009-12-21 17:03:03 +03:00
# define DEFINE_BUF_ITEM_EVENT(name) \
DEFINE_EVENT ( xfs_buf_item_class , name , \
TP_PROTO ( struct xfs_buf_log_item * bip ) , \
TP_ARGS ( bip ) )
2009-12-15 02:14:59 +03:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_size ) ;
2013-06-27 10:04:52 +04:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_size_ordered ) ;
2009-12-15 02:14:59 +03:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_size_stale ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_format ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_format_stale ) ;
2013-06-27 10:04:52 +04:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_ordered ) ;
2009-12-15 02:14:59 +03:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_pin ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_unpin ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_unpin_stale ) ;
2019-06-29 05:27:32 +03:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_release ) ;
2009-12-15 02:14:59 +03:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_committed ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_push ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_get_buf ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_get_buf_recur ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_getsb ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_getsb_recur ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_read_buf ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_read_buf_recur ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_log_buf ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_brelse ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_bjoin ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_bhold ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_bhold_release ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_binval ) ;
2014-04-23 01:11:52 +04:00
DECLARE_EVENT_CLASS ( xfs_filestream_class ,
2018-04-09 20:23:39 +03:00
TP_PROTO ( struct xfs_mount * mp , xfs_ino_t ino , xfs_agnumber_t agno ) ,
TP_ARGS ( mp , ino , agno ) ,
2014-04-23 01:11:52 +04:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_agnumber_t , agno )
__field ( int , streams )
) ,
TP_fast_assign (
2018-04-09 20:23:39 +03:00
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > ino = ino ;
2014-04-23 01:11:52 +04:00
__entry - > agno = agno ;
2018-04-09 20:23:39 +03:00
__entry - > streams = xfs_filestream_peek_ag ( mp , agno ) ;
2014-04-23 01:11:52 +04:00
) ,
TP_printk ( " dev %d:%d ino 0x%llx agno %u streams %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > agno ,
__entry - > streams )
)
# define DEFINE_FILESTREAM_EVENT(name) \
DEFINE_EVENT ( xfs_filestream_class , name , \
2018-04-09 20:23:39 +03:00
TP_PROTO ( struct xfs_mount * mp , xfs_ino_t ino , xfs_agnumber_t agno ) , \
TP_ARGS ( mp , ino , agno ) )
2014-04-23 01:11:52 +04:00
DEFINE_FILESTREAM_EVENT ( xfs_filestream_free ) ;
DEFINE_FILESTREAM_EVENT ( xfs_filestream_lookup ) ;
DEFINE_FILESTREAM_EVENT ( xfs_filestream_scan ) ;
TRACE_EVENT ( xfs_filestream_pick ,
TP_PROTO ( struct xfs_inode * ip , xfs_agnumber_t agno ,
xfs_extlen_t free , int nscan ) ,
TP_ARGS ( ip , agno , free , nscan ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_agnumber_t , agno )
__field ( int , streams )
__field ( xfs_extlen_t , free )
__field ( int , nscan )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > agno = agno ;
__entry - > streams = xfs_filestream_peek_ag ( ip - > i_mount , agno ) ;
__entry - > free = free ;
__entry - > nscan = nscan ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > agno ,
__entry - > streams ,
__entry - > free ,
__entry - > nscan )
) ;
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_lock_class ,
TP_PROTO ( struct xfs_inode * ip , unsigned lock_flags ,
unsigned long caller_ip ) ,
TP_ARGS ( ip , lock_flags , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( int , lock_flags )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > lock_flags = lock_flags ;
__entry - > caller_ip = caller_ip ;
) ,
2018-01-09 22:46:05 +03:00
TP_printk ( " dev %d:%d ino 0x%llx flags %s caller %pS " ,
2009-12-21 17:03:03 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_flags ( __entry - > lock_flags , " | " , XFS_LOCK_FLAGS ) ,
( void * ) __entry - > caller_ip )
)
2009-12-15 02:14:59 +03:00
# define DEFINE_LOCK_EVENT(name) \
2009-12-21 17:03:03 +03:00
DEFINE_EVENT ( xfs_lock_class , name , \
2009-12-15 02:14:59 +03:00
TP_PROTO ( struct xfs_inode * ip , unsigned lock_flags , \
unsigned long caller_ip ) , \
2009-12-21 17:03:03 +03:00
TP_ARGS ( ip , lock_flags , caller_ip ) )
2009-12-15 02:14:59 +03:00
DEFINE_LOCK_EVENT ( xfs_ilock ) ;
DEFINE_LOCK_EVENT ( xfs_ilock_nowait ) ;
DEFINE_LOCK_EVENT ( xfs_ilock_demote ) ;
DEFINE_LOCK_EVENT ( xfs_iunlock ) ;
2010-06-24 05:57:09 +04:00
DECLARE_EVENT_CLASS ( xfs_inode_class ,
2009-12-21 17:03:03 +03:00
TP_PROTO ( struct xfs_inode * ip ) ,
TP_ARGS ( ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
2021-08-06 21:05:39 +03:00
__field ( unsigned long , iflags )
2009-12-21 17:03:03 +03:00
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2021-08-06 21:05:39 +03:00
__entry - > iflags = ip - > i_flags ;
2009-12-21 17:03:03 +03:00
) ,
2021-08-06 21:05:39 +03:00
TP_printk ( " dev %d:%d ino 0x%llx iflags 0x%lx " ,
2009-12-21 17:03:03 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2021-08-06 21:05:39 +03:00
__entry - > ino ,
__entry - > iflags )
2009-12-21 17:03:03 +03:00
)
2010-06-24 05:57:09 +04:00
# define DEFINE_INODE_EVENT(name) \
DEFINE_EVENT ( xfs_inode_class , name , \
2009-12-15 02:14:59 +03:00
TP_PROTO ( struct xfs_inode * ip ) , \
2009-12-21 17:03:03 +03:00
TP_ARGS ( ip ) )
2010-06-24 05:57:09 +04:00
DEFINE_INODE_EVENT ( xfs_iget_skip ) ;
2021-06-18 21:57:05 +03:00
DEFINE_INODE_EVENT ( xfs_iget_recycle ) ;
DEFINE_INODE_EVENT ( xfs_iget_recycle_fail ) ;
2010-06-24 05:57:09 +04:00
DEFINE_INODE_EVENT ( xfs_iget_hit ) ;
DEFINE_INODE_EVENT ( xfs_iget_miss ) ;
2009-12-15 02:14:59 +03:00
2010-06-24 05:57:09 +04:00
DEFINE_INODE_EVENT ( xfs_getattr ) ;
DEFINE_INODE_EVENT ( xfs_setattr ) ;
DEFINE_INODE_EVENT ( xfs_readlink ) ;
2013-06-18 00:35:57 +04:00
DEFINE_INODE_EVENT ( xfs_inactive_symlink ) ;
2010-06-24 05:57:09 +04:00
DEFINE_INODE_EVENT ( xfs_alloc_file_space ) ;
DEFINE_INODE_EVENT ( xfs_free_file_space ) ;
2014-04-14 12:15:11 +04:00
DEFINE_INODE_EVENT ( xfs_zero_file_space ) ;
2014-02-24 03:58:19 +04:00
DEFINE_INODE_EVENT ( xfs_collapse_file_space ) ;
2015-03-25 07:08:56 +03:00
DEFINE_INODE_EVENT ( xfs_insert_file_space ) ;
2010-06-24 05:57:09 +04:00
DEFINE_INODE_EVENT ( xfs_readdir ) ;
2010-07-20 11:54:41 +04:00
# ifdef CONFIG_XFS_POSIX_ACL
2011-07-23 19:37:31 +04:00
DEFINE_INODE_EVENT ( xfs_get_acl ) ;
2010-07-20 11:54:41 +04:00
# endif
2010-06-24 05:57:09 +04:00
DEFINE_INODE_EVENT ( xfs_vm_bmap ) ;
DEFINE_INODE_EVENT ( xfs_file_ioctl ) ;
DEFINE_INODE_EVENT ( xfs_file_compat_ioctl ) ;
DEFINE_INODE_EVENT ( xfs_ioctl_setattr ) ;
2011-10-02 18:25:16 +04:00
DEFINE_INODE_EVENT ( xfs_dir_fsync ) ;
2010-06-24 05:57:09 +04:00
DEFINE_INODE_EVENT ( xfs_file_fsync ) ;
DEFINE_INODE_EVENT ( xfs_destroy_inode ) ;
2012-06-07 01:01:28 +04:00
DEFINE_INODE_EVENT ( xfs_update_time ) ;
2010-06-24 05:57:09 +04:00
DEFINE_INODE_EVENT ( xfs_dquot_dqalloc ) ;
DEFINE_INODE_EVENT ( xfs_dquot_dqdetach ) ;
2012-11-06 18:50:38 +04:00
DEFINE_INODE_EVENT ( xfs_inode_set_eofblocks_tag ) ;
DEFINE_INODE_EVENT ( xfs_inode_clear_eofblocks_tag ) ;
2012-11-06 18:50:42 +04:00
DEFINE_INODE_EVENT ( xfs_inode_free_eofblocks_invalid ) ;
2016-10-03 19:11:46 +03:00
DEFINE_INODE_EVENT ( xfs_inode_set_cowblocks_tag ) ;
DEFINE_INODE_EVENT ( xfs_inode_clear_cowblocks_tag ) ;
DEFINE_INODE_EVENT ( xfs_inode_free_cowblocks_invalid ) ;
2021-08-06 21:05:39 +03:00
DEFINE_INODE_EVENT ( xfs_inode_set_reclaimable ) ;
DEFINE_INODE_EVENT ( xfs_inode_reclaiming ) ;
DEFINE_INODE_EVENT ( xfs_inode_set_need_inactive ) ;
DEFINE_INODE_EVENT ( xfs_inode_inactivating ) ;
2012-11-06 18:50:38 +04:00
2018-12-19 01:32:29 +03:00
/*
* ftrace ' s __print_symbolic requires that all enum values be wrapped in the
* TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace
* ring buffer . Somehow this was only worth mentioning in the ftrace sample
* code .
*/
TRACE_DEFINE_ENUM ( PE_SIZE_PTE ) ;
TRACE_DEFINE_ENUM ( PE_SIZE_PMD ) ;
TRACE_DEFINE_ENUM ( PE_SIZE_PUD ) ;
2017-08-29 20:08:41 +03:00
TRACE_EVENT ( xfs_filemap_fault ,
TP_PROTO ( struct xfs_inode * ip , enum page_entry_size pe_size ,
bool write_fault ) ,
TP_ARGS ( ip , pe_size , write_fault ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( enum page_entry_size , pe_size )
__field ( bool , write_fault )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > pe_size = pe_size ;
__entry - > write_fault = write_fault ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx %s write_fault %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_symbolic ( __entry - > pe_size ,
{ PE_SIZE_PTE , " PTE " } ,
{ PE_SIZE_PMD , " PMD " } ,
{ PE_SIZE_PUD , " PUD " } ) ,
__entry - > write_fault )
)
2010-06-24 05:57:09 +04:00
DECLARE_EVENT_CLASS ( xfs_iref_class ,
2009-12-21 17:03:03 +03:00
TP_PROTO ( struct xfs_inode * ip , unsigned long caller_ip ) ,
TP_ARGS ( ip , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( int , count )
2010-03-08 03:24:07 +03:00
__field ( int , pincount )
2009-12-21 17:03:03 +03:00
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > count = atomic_read ( & VFS_I ( ip ) - > i_count ) ;
2010-03-08 03:24:07 +03:00
__entry - > pincount = atomic_read ( & ip - > i_pincount ) ;
2009-12-21 17:03:03 +03:00
__entry - > caller_ip = caller_ip ;
) ,
2018-01-09 22:46:05 +03:00
TP_printk ( " dev %d:%d ino 0x%llx count %d pincount %d caller %pS " ,
2009-12-21 17:03:03 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > count ,
2010-03-08 03:24:07 +03:00
__entry - > pincount ,
2009-12-21 17:03:03 +03:00
( char * ) __entry - > caller_ip )
2013-03-18 18:51:48 +04:00
)
TRACE_EVENT ( xfs_iomap_prealloc_size ,
TP_PROTO ( struct xfs_inode * ip , xfs_fsblock_t blocks , int shift ,
unsigned int writeio_blocks ) ,
TP_ARGS ( ip , blocks , shift , writeio_blocks ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fsblock_t , blocks )
__field ( int , shift )
__field ( unsigned int , writeio_blocks )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > blocks = blocks ;
__entry - > shift = shift ;
__entry - > writeio_blocks = writeio_blocks ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx prealloc blocks %llu shift %d "
2019-10-28 18:41:44 +03:00
" m_allocsize_blocks %u " ,
2013-03-18 18:51:48 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) , __entry - > ino ,
__entry - > blocks , __entry - > shift , __entry - > writeio_blocks )
2009-12-21 17:03:03 +03:00
)
2015-05-29 02:18:32 +03:00
TRACE_EVENT ( xfs_irec_merge_pre ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , xfs_agino_t agino ,
uint16_t holemask , xfs_agino_t nagino , uint16_t nholemask ) ,
TP_ARGS ( mp , agno , agino , holemask , nagino , nholemask ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
__field ( uint16_t , holemask )
__field ( xfs_agino_t , nagino )
__field ( uint16_t , nholemask )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > agino = agino ;
__entry - > holemask = holemask ;
__entry - > nagino = nagino ;
__entry - > nholemask = holemask ;
) ,
TP_printk ( " dev %d:%d agno %d inobt (%u:0x%x) new (%u:0x%x) " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) , __entry - > agno ,
__entry - > agino , __entry - > holemask , __entry - > nagino ,
__entry - > nholemask )
)
TRACE_EVENT ( xfs_irec_merge_post ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , xfs_agino_t agino ,
uint16_t holemask ) ,
TP_ARGS ( mp , agno , agino , holemask ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
__field ( uint16_t , holemask )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > agino = agino ;
__entry - > holemask = holemask ;
) ,
TP_printk ( " dev %d:%d agno %d inobt (%u:0x%x) " , MAJOR ( __entry - > dev ) ,
MINOR ( __entry - > dev ) , __entry - > agno , __entry - > agino ,
__entry - > holemask )
)
2010-06-24 05:57:09 +04:00
# define DEFINE_IREF_EVENT(name) \
DEFINE_EVENT ( xfs_iref_class , name , \
2009-12-15 02:14:59 +03:00
TP_PROTO ( struct xfs_inode * ip , unsigned long caller_ip ) , \
2009-12-21 17:03:03 +03:00
TP_ARGS ( ip , caller_ip ) )
2010-06-24 05:57:09 +04:00
DEFINE_IREF_EVENT ( xfs_irele ) ;
DEFINE_IREF_EVENT ( xfs_inode_pin ) ;
DEFINE_IREF_EVENT ( xfs_inode_unpin ) ;
DEFINE_IREF_EVENT ( xfs_inode_unpin_nowait ) ;
DECLARE_EVENT_CLASS ( xfs_namespace_class ,
TP_PROTO ( struct xfs_inode * dp , struct xfs_name * name ) ,
TP_ARGS ( dp , name ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , dp_ino )
2012-02-28 15:01:40 +04:00
__field ( int , namelen )
2010-06-24 05:57:09 +04:00
__dynamic_array ( char , name , name - > len )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( dp ) - > i_sb - > s_dev ;
__entry - > dp_ino = dp - > i_ino ;
2012-02-28 15:01:40 +04:00
__entry - > namelen = name - > len ;
2010-06-24 05:57:09 +04:00
memcpy ( __get_str ( name ) , name - > name , name - > len ) ;
) ,
2012-02-28 15:01:40 +04:00
TP_printk ( " dev %d:%d dp ino 0x%llx name %.*s " ,
2010-06-24 05:57:09 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > dp_ino ,
2012-02-28 15:01:40 +04:00
__entry - > namelen ,
2010-06-24 05:57:09 +04:00
__get_str ( name ) )
)
# define DEFINE_NAMESPACE_EVENT(name) \
DEFINE_EVENT ( xfs_namespace_class , name , \
TP_PROTO ( struct xfs_inode * dp , struct xfs_name * name ) , \
TP_ARGS ( dp , name ) )
DEFINE_NAMESPACE_EVENT ( xfs_remove ) ;
DEFINE_NAMESPACE_EVENT ( xfs_link ) ;
DEFINE_NAMESPACE_EVENT ( xfs_lookup ) ;
DEFINE_NAMESPACE_EVENT ( xfs_create ) ;
DEFINE_NAMESPACE_EVENT ( xfs_symlink ) ;
2010-03-08 03:24:07 +03:00
2010-06-24 05:57:09 +04:00
TRACE_EVENT ( xfs_rename ,
TP_PROTO ( struct xfs_inode * src_dp , struct xfs_inode * target_dp ,
struct xfs_name * src_name , struct xfs_name * target_name ) ,
TP_ARGS ( src_dp , target_dp , src_name , target_name ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , src_dp_ino )
__field ( xfs_ino_t , target_dp_ino )
2012-02-28 15:01:40 +04:00
__field ( int , src_namelen )
__field ( int , target_namelen )
2010-06-24 05:57:09 +04:00
__dynamic_array ( char , src_name , src_name - > len )
__dynamic_array ( char , target_name , target_name - > len )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( src_dp ) - > i_sb - > s_dev ;
__entry - > src_dp_ino = src_dp - > i_ino ;
__entry - > target_dp_ino = target_dp - > i_ino ;
2012-02-28 15:01:40 +04:00
__entry - > src_namelen = src_name - > len ;
__entry - > target_namelen = target_name - > len ;
2010-06-24 05:57:09 +04:00
memcpy ( __get_str ( src_name ) , src_name - > name , src_name - > len ) ;
2012-02-28 15:01:40 +04:00
memcpy ( __get_str ( target_name ) , target_name - > name ,
target_name - > len ) ;
2010-06-24 05:57:09 +04:00
) ,
TP_printk ( " dev %d:%d src dp ino 0x%llx target dp ino 0x%llx "
2012-02-28 15:01:40 +04:00
" src name %.*s target name %.*s " ,
2010-06-24 05:57:09 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > src_dp_ino ,
__entry - > target_dp_ino ,
2012-02-28 15:01:40 +04:00
__entry - > src_namelen ,
2010-06-24 05:57:09 +04:00
__get_str ( src_name ) ,
2012-02-28 15:01:40 +04:00
__entry - > target_namelen ,
2010-06-24 05:57:09 +04:00
__get_str ( target_name ) )
)
2009-12-15 02:14:59 +03:00
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_dquot_class ,
TP_PROTO ( struct xfs_dquot * dqp ) ,
TP_ARGS ( dqp ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2010-02-15 01:01:45 +03:00
__field ( u32 , id )
2020-07-16 03:53:43 +03:00
__field ( xfs_dqtype_t , type )
2009-12-21 17:03:03 +03:00
__field ( unsigned , flags )
__field ( unsigned , nrefs )
__field ( unsigned long long , res_bcount )
2020-07-14 20:37:35 +03:00
__field ( unsigned long long , res_rtbcount )
__field ( unsigned long long , res_icount )
2009-12-21 17:03:03 +03:00
__field ( unsigned long long , bcount )
2020-07-14 20:37:35 +03:00
__field ( unsigned long long , rtbcount )
2009-12-21 17:03:03 +03:00
__field ( unsigned long long , icount )
2020-07-14 20:37:35 +03:00
2009-12-21 17:03:03 +03:00
__field ( unsigned long long , blk_hardlimit )
__field ( unsigned long long , blk_softlimit )
2020-07-14 20:37:35 +03:00
__field ( unsigned long long , rtb_hardlimit )
__field ( unsigned long long , rtb_softlimit )
2009-12-21 17:03:03 +03:00
__field ( unsigned long long , ino_hardlimit )
__field ( unsigned long long , ino_softlimit )
2020-07-14 20:37:35 +03:00
) ,
2009-12-21 17:03:03 +03:00
TP_fast_assign (
__entry - > dev = dqp - > q_mount - > m_super - > s_dev ;
2020-07-14 20:37:30 +03:00
__entry - > id = dqp - > q_id ;
2020-07-16 03:53:43 +03:00
__entry - > type = dqp - > q_type ;
__entry - > flags = dqp - > q_flags ;
2009-12-21 17:03:03 +03:00
__entry - > nrefs = dqp - > q_nrefs ;
2020-07-14 20:37:35 +03:00
2020-07-14 20:37:30 +03:00
__entry - > res_bcount = dqp - > q_blk . reserved ;
2020-07-14 20:37:35 +03:00
__entry - > res_rtbcount = dqp - > q_rtb . reserved ;
__entry - > res_icount = dqp - > q_ino . reserved ;
2020-07-14 20:37:31 +03:00
__entry - > bcount = dqp - > q_blk . count ;
2020-07-14 20:37:35 +03:00
__entry - > rtbcount = dqp - > q_rtb . count ;
2020-07-14 20:37:31 +03:00
__entry - > icount = dqp - > q_ino . count ;
2020-07-14 20:37:35 +03:00
2020-07-14 20:37:31 +03:00
__entry - > blk_hardlimit = dqp - > q_blk . hardlimit ;
__entry - > blk_softlimit = dqp - > q_blk . softlimit ;
2020-07-14 20:37:35 +03:00
__entry - > rtb_hardlimit = dqp - > q_rtb . hardlimit ;
__entry - > rtb_softlimit = dqp - > q_rtb . softlimit ;
2020-07-14 20:37:31 +03:00
__entry - > ino_hardlimit = dqp - > q_ino . hardlimit ;
__entry - > ino_softlimit = dqp - > q_ino . softlimit ;
2009-12-21 17:03:03 +03:00
) ,
2020-07-16 03:53:43 +03:00
TP_printk ( " dev %d:%d id 0x%x type %s flags %s nrefs %u "
2020-07-14 20:37:35 +03:00
" res_bc 0x%llx res_rtbc 0x%llx res_ic 0x%llx "
2010-02-15 01:01:45 +03:00
" bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
2020-07-14 20:37:35 +03:00
" rtbcnt 0x%llx rtbhardlimit 0x%llx rtbsoftlimit 0x%llx "
2010-02-15 01:01:45 +03:00
" icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx] " ,
2009-12-21 17:03:03 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2010-02-15 01:01:45 +03:00
__entry - > id ,
2020-07-16 03:53:43 +03:00
__print_flags ( __entry - > type , " | " , XFS_DQTYPE_STRINGS ) ,
2020-07-14 20:37:13 +03:00
__print_flags ( __entry - > flags , " | " , XFS_DQFLAG_STRINGS ) ,
2009-12-21 17:03:03 +03:00
__entry - > nrefs ,
__entry - > res_bcount ,
2020-07-14 20:37:35 +03:00
__entry - > res_rtbcount ,
__entry - > res_icount ,
2009-12-21 17:03:03 +03:00
__entry - > bcount ,
__entry - > blk_hardlimit ,
__entry - > blk_softlimit ,
2020-07-14 20:37:35 +03:00
__entry - > rtbcount ,
__entry - > rtb_hardlimit ,
__entry - > rtb_softlimit ,
2009-12-21 17:03:03 +03:00
__entry - > icount ,
__entry - > ino_hardlimit ,
__entry - > ino_softlimit )
2009-12-15 02:14:59 +03:00
)
2009-12-21 17:03:03 +03:00
# define DEFINE_DQUOT_EVENT(name) \
DEFINE_EVENT ( xfs_dquot_class , name , \
TP_PROTO ( struct xfs_dquot * dqp ) , \
TP_ARGS ( dqp ) )
2009-12-15 02:14:59 +03:00
DEFINE_DQUOT_EVENT ( xfs_dqadjust ) ;
DEFINE_DQUOT_EVENT ( xfs_dqreclaim_want ) ;
DEFINE_DQUOT_EVENT ( xfs_dqreclaim_dirty ) ;
2012-02-01 17:57:20 +04:00
DEFINE_DQUOT_EVENT ( xfs_dqreclaim_busy ) ;
DEFINE_DQUOT_EVENT ( xfs_dqreclaim_done ) ;
2009-12-15 02:14:59 +03:00
DEFINE_DQUOT_EVENT ( xfs_dqattach_found ) ;
DEFINE_DQUOT_EVENT ( xfs_dqattach_get ) ;
DEFINE_DQUOT_EVENT ( xfs_dqalloc ) ;
DEFINE_DQUOT_EVENT ( xfs_dqtobp_read ) ;
DEFINE_DQUOT_EVENT ( xfs_dqread ) ;
DEFINE_DQUOT_EVENT ( xfs_dqread_fail ) ;
DEFINE_DQUOT_EVENT ( xfs_dqget_hit ) ;
DEFINE_DQUOT_EVENT ( xfs_dqget_miss ) ;
2012-03-13 12:52:35 +04:00
DEFINE_DQUOT_EVENT ( xfs_dqget_freeing ) ;
DEFINE_DQUOT_EVENT ( xfs_dqget_dup ) ;
2009-12-15 02:14:59 +03:00
DEFINE_DQUOT_EVENT ( xfs_dqput ) ;
DEFINE_DQUOT_EVENT ( xfs_dqput_free ) ;
DEFINE_DQUOT_EVENT ( xfs_dqrele ) ;
DEFINE_DQUOT_EVENT ( xfs_dqflush ) ;
DEFINE_DQUOT_EVENT ( xfs_dqflush_force ) ;
DEFINE_DQUOT_EVENT ( xfs_dqflush_done ) ;
2020-07-14 20:37:35 +03:00
DEFINE_DQUOT_EVENT ( xfs_trans_apply_dquot_deltas_before ) ;
DEFINE_DQUOT_EVENT ( xfs_trans_apply_dquot_deltas_after ) ;
# define XFS_QMOPT_FLAGS \
{ XFS_QMOPT_UQUOTA , " UQUOTA " } , \
{ XFS_QMOPT_PQUOTA , " PQUOTA " } , \
{ XFS_QMOPT_FORCE_RES , " FORCE_RES " } , \
{ XFS_QMOPT_SBVERSION , " SBVERSION " } , \
{ XFS_QMOPT_GQUOTA , " GQUOTA " } , \
{ XFS_QMOPT_INHERIT , " INHERIT " } , \
{ XFS_QMOPT_RES_REGBLKS , " RES_REGBLKS " } , \
{ XFS_QMOPT_RES_RTBLKS , " RES_RTBLKS " } , \
{ XFS_QMOPT_BCOUNT , " BCOUNT " } , \
{ XFS_QMOPT_ICOUNT , " ICOUNT " } , \
{ XFS_QMOPT_RTBCOUNT , " RTBCOUNT " } , \
{ XFS_QMOPT_DELBCOUNT , " DELBCOUNT " } , \
{ XFS_QMOPT_DELRTBCOUNT , " DELRTBCOUNT " } , \
{ XFS_QMOPT_RES_INOS , " RES_INOS " }
TRACE_EVENT ( xfs_trans_mod_dquot ,
TP_PROTO ( struct xfs_trans * tp , struct xfs_dquot * dqp ,
unsigned int field , int64_t delta ) ,
TP_ARGS ( tp , dqp , field , delta ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2020-07-16 03:53:43 +03:00
__field ( xfs_dqtype_t , type )
2020-07-14 20:37:35 +03:00
__field ( unsigned int , flags )
__field ( unsigned int , dqid )
__field ( unsigned int , field )
__field ( int64_t , delta )
) ,
TP_fast_assign (
__entry - > dev = tp - > t_mountp - > m_super - > s_dev ;
2020-07-16 03:53:43 +03:00
__entry - > type = dqp - > q_type ;
__entry - > flags = dqp - > q_flags ;
2020-07-14 20:37:35 +03:00
__entry - > dqid = dqp - > q_id ;
__entry - > field = field ;
__entry - > delta = delta ;
) ,
2020-07-16 03:53:43 +03:00
TP_printk ( " dev %d:%d dquot id 0x%x type %s flags %s field %s delta %lld " ,
2020-07-14 20:37:35 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > dqid ,
2020-07-16 03:53:43 +03:00
__print_flags ( __entry - > type , " | " , XFS_DQTYPE_STRINGS ) ,
2020-07-14 20:37:35 +03:00
__print_flags ( __entry - > flags , " | " , XFS_DQFLAG_STRINGS ) ,
__print_flags ( __entry - > field , " | " , XFS_QMOPT_FLAGS ) ,
__entry - > delta )
) ;
DECLARE_EVENT_CLASS ( xfs_dqtrx_class ,
TP_PROTO ( struct xfs_dqtrx * qtrx ) ,
TP_ARGS ( qtrx ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2020-07-16 03:53:43 +03:00
__field ( xfs_dqtype_t , type )
2020-07-14 20:37:35 +03:00
__field ( unsigned int , flags )
__field ( u32 , dqid )
__field ( uint64_t , blk_res )
__field ( int64_t , bcount_delta )
__field ( int64_t , delbcnt_delta )
__field ( uint64_t , rtblk_res )
__field ( uint64_t , rtblk_res_used )
__field ( int64_t , rtbcount_delta )
__field ( int64_t , delrtb_delta )
__field ( uint64_t , ino_res )
__field ( uint64_t , ino_res_used )
__field ( int64_t , icount_delta )
) ,
TP_fast_assign (
__entry - > dev = qtrx - > qt_dquot - > q_mount - > m_super - > s_dev ;
2020-07-16 03:53:43 +03:00
__entry - > type = qtrx - > qt_dquot - > q_type ;
__entry - > flags = qtrx - > qt_dquot - > q_flags ;
2020-07-14 20:37:35 +03:00
__entry - > dqid = qtrx - > qt_dquot - > q_id ;
__entry - > blk_res = qtrx - > qt_blk_res ;
__entry - > bcount_delta = qtrx - > qt_bcount_delta ;
__entry - > delbcnt_delta = qtrx - > qt_delbcnt_delta ;
__entry - > rtblk_res = qtrx - > qt_rtblk_res ;
__entry - > rtblk_res_used = qtrx - > qt_rtblk_res_used ;
__entry - > rtbcount_delta = qtrx - > qt_rtbcount_delta ;
__entry - > delrtb_delta = qtrx - > qt_delrtb_delta ;
__entry - > ino_res = qtrx - > qt_ino_res ;
__entry - > ino_res_used = qtrx - > qt_ino_res_used ;
__entry - > icount_delta = qtrx - > qt_icount_delta ;
) ,
2020-07-16 03:53:43 +03:00
TP_printk ( " dev %d:%d dquot id 0x%x type %s flags %s "
2020-07-14 20:37:35 +03:00
" blk_res %llu bcount_delta %lld delbcnt_delta %lld "
" rtblk_res %llu rtblk_res_used %llu rtbcount_delta %lld delrtb_delta %lld "
" ino_res %llu ino_res_used %llu icount_delta %lld " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > dqid ,
2020-07-16 03:53:43 +03:00
__print_flags ( __entry - > type , " | " , XFS_DQTYPE_STRINGS ) ,
2020-07-14 20:37:35 +03:00
__print_flags ( __entry - > flags , " | " , XFS_DQFLAG_STRINGS ) ,
__entry - > blk_res ,
__entry - > bcount_delta ,
__entry - > delbcnt_delta ,
__entry - > rtblk_res ,
__entry - > rtblk_res_used ,
__entry - > rtbcount_delta ,
__entry - > delrtb_delta ,
__entry - > ino_res ,
__entry - > ino_res_used ,
__entry - > icount_delta )
)
# define DEFINE_DQTRX_EVENT(name) \
DEFINE_EVENT ( xfs_dqtrx_class , name , \
TP_PROTO ( struct xfs_dqtrx * qtrx ) , \
TP_ARGS ( qtrx ) )
DEFINE_DQTRX_EVENT ( xfs_trans_apply_dquot_deltas ) ;
DEFINE_DQTRX_EVENT ( xfs_trans_mod_dquot_before ) ;
DEFINE_DQTRX_EVENT ( xfs_trans_mod_dquot_after ) ;
2009-12-15 02:14:59 +03:00
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_loggrant_class ,
2012-06-14 18:22:15 +04:00
TP_PROTO ( struct xlog * log , struct xlog_ticket * tic ) ,
2009-12-21 17:03:03 +03:00
TP_ARGS ( log , tic ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( char , ocnt )
__field ( char , cnt )
__field ( int , curr_res )
__field ( int , unit_res )
__field ( unsigned int , flags )
2010-12-21 04:02:25 +03:00
__field ( int , reserveq )
__field ( int , writeq )
2009-12-21 17:03:03 +03:00
__field ( int , grant_reserve_cycle )
__field ( int , grant_reserve_bytes )
__field ( int , grant_write_cycle )
__field ( int , grant_write_bytes )
__field ( int , curr_cycle )
__field ( int , curr_block )
__field ( xfs_lsn_t , tail_lsn )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > ocnt = tic - > t_ocnt ;
__entry - > cnt = tic - > t_cnt ;
__entry - > curr_res = tic - > t_curr_res ;
__entry - > unit_res = tic - > t_unit_res ;
__entry - > flags = tic - > t_flags ;
2012-02-20 06:31:25 +04:00
__entry - > reserveq = list_empty ( & log - > l_reserve_head . waiters ) ;
__entry - > writeq = list_empty ( & log - > l_write_head . waiters ) ;
xlog_crack_grant_head ( & log - > l_reserve_head . grant ,
2010-12-21 04:08:20 +03:00
& __entry - > grant_reserve_cycle ,
& __entry - > grant_reserve_bytes ) ;
2012-02-20 06:31:25 +04:00
xlog_crack_grant_head ( & log - > l_write_head . grant ,
2010-12-21 04:08:20 +03:00
& __entry - > grant_write_cycle ,
& __entry - > grant_write_bytes ) ;
2009-12-21 17:03:03 +03:00
__entry - > curr_cycle = log - > l_curr_cycle ;
__entry - > curr_block = log - > l_curr_block ;
2010-12-21 04:28:39 +03:00
__entry - > tail_lsn = atomic64_read ( & log - > l_tail_lsn ) ;
2009-12-21 17:03:03 +03:00
) ,
2016-04-06 02:20:36 +03:00
TP_printk ( " dev %d:%d t_ocnt %u t_cnt %u t_curr_res %u "
2010-12-21 04:02:25 +03:00
" t_unit_res %u t_flags %s reserveq %s "
" writeq %s grant_reserve_cycle %d "
2009-12-21 17:03:03 +03:00
" grant_reserve_bytes %d grant_write_cycle %d "
" grant_write_bytes %d curr_cycle %d curr_block %d "
" tail_cycle %d tail_block %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ocnt ,
__entry - > cnt ,
__entry - > curr_res ,
__entry - > unit_res ,
__print_flags ( __entry - > flags , " | " , XLOG_TIC_FLAGS ) ,
2010-12-21 04:02:25 +03:00
__entry - > reserveq ? " empty " : " active " ,
__entry - > writeq ? " empty " : " active " ,
2009-12-21 17:03:03 +03:00
__entry - > grant_reserve_cycle ,
__entry - > grant_reserve_bytes ,
__entry - > grant_write_cycle ,
__entry - > grant_write_bytes ,
__entry - > curr_cycle ,
__entry - > curr_block ,
CYCLE_LSN ( __entry - > tail_lsn ) ,
BLOCK_LSN ( __entry - > tail_lsn )
)
)
2009-12-15 02:14:59 +03:00
2009-12-21 17:03:03 +03:00
# define DEFINE_LOGGRANT_EVENT(name) \
DEFINE_EVENT ( xfs_loggrant_class , name , \
2012-06-14 18:22:15 +04:00
TP_PROTO ( struct xlog * log , struct xlog_ticket * tic ) , \
2009-12-21 17:03:03 +03:00
TP_ARGS ( log , tic ) )
2009-12-15 02:14:59 +03:00
DEFINE_LOGGRANT_EVENT ( xfs_log_umount_write ) ;
2011-11-28 12:17:36 +04:00
DEFINE_LOGGRANT_EVENT ( xfs_log_grant_sleep ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_grant_wake ) ;
2010-12-21 04:29:01 +03:00
DEFINE_LOGGRANT_EVENT ( xfs_log_grant_wake_up ) ;
2012-02-20 06:31:31 +04:00
DEFINE_LOGGRANT_EVENT ( xfs_log_reserve ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_reserve_exit ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_regrant ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_regrant_exit ) ;
2020-03-26 04:18:23 +03:00
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_regrant ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_regrant_exit ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_regrant_sub ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_ungrant ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_ungrant_sub ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_ungrant_exit ) ;
2020-03-25 06:10:27 +03:00
DEFINE_LOGGRANT_EVENT ( xfs_log_cil_wait ) ;
2009-12-15 02:14:59 +03:00
2011-10-11 19:14:11 +04:00
DECLARE_EVENT_CLASS ( xfs_log_item_class ,
TP_PROTO ( struct xfs_log_item * lip ) ,
TP_ARGS ( lip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( void * , lip )
__field ( uint , type )
2018-05-09 17:47:34 +03:00
__field ( unsigned long , flags )
2011-10-11 19:14:11 +04:00
__field ( xfs_lsn_t , lsn )
) ,
TP_fast_assign (
__entry - > dev = lip - > li_mountp - > m_super - > s_dev ;
__entry - > lip = lip ;
__entry - > type = lip - > li_type ;
__entry - > flags = lip - > li_flags ;
__entry - > lsn = lip - > li_lsn ;
) ,
2018-01-09 22:43:36 +03:00
TP_printk ( " dev %d:%d lip %p lsn %d/%d type %s flags %s " ,
2011-10-11 19:14:11 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > lip ,
CYCLE_LSN ( __entry - > lsn ) , BLOCK_LSN ( __entry - > lsn ) ,
__print_symbolic ( __entry - > type , XFS_LI_TYPE_DESC ) ,
__print_flags ( __entry - > flags , " | " , XFS_LI_FLAGS ) )
)
2012-04-24 10:33:31 +04:00
TRACE_EVENT ( xfs_log_force ,
2016-04-06 02:46:30 +03:00
TP_PROTO ( struct xfs_mount * mp , xfs_lsn_t lsn , unsigned long caller_ip ) ,
TP_ARGS ( mp , lsn , caller_ip ) ,
2012-04-24 10:33:31 +04:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_lsn_t , lsn )
2016-04-06 02:46:30 +03:00
__field ( unsigned long , caller_ip )
2012-04-24 10:33:31 +04:00
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > lsn = lsn ;
2016-04-06 02:46:30 +03:00
__entry - > caller_ip = caller_ip ;
2012-04-24 10:33:31 +04:00
) ,
2018-01-09 22:46:05 +03:00
TP_printk ( " dev %d:%d lsn 0x%llx caller %pS " ,
2012-04-24 10:33:31 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2016-04-06 02:46:30 +03:00
__entry - > lsn , ( void * ) __entry - > caller_ip )
2012-04-24 10:33:31 +04:00
)
2011-10-11 19:14:11 +04:00
# define DEFINE_LOG_ITEM_EVENT(name) \
DEFINE_EVENT ( xfs_log_item_class , name , \
TP_PROTO ( struct xfs_log_item * lip ) , \
TP_ARGS ( lip ) )
DEFINE_LOG_ITEM_EVENT ( xfs_ail_push ) ;
DEFINE_LOG_ITEM_EVENT ( xfs_ail_pinned ) ;
DEFINE_LOG_ITEM_EVENT ( xfs_ail_locked ) ;
xfs: on-stack delayed write buffer lists
Queue delwri buffers on a local on-stack list instead of a per-buftarg one,
and write back the buffers per-process instead of by waking up xfsbufd.
This is now easily doable given that we have very few places left that write
delwri buffers:
- log recovery:
Only done at mount time, and already forcing out the buffers
synchronously using xfs_flush_buftarg
- quotacheck:
Same story.
- dquot reclaim:
Writes out dirty dquots on the LRU under memory pressure. We might
want to look into doing more of this via xfsaild, but it's already
more optimal than the synchronous inode reclaim that writes each
buffer synchronously.
- xfsaild:
This is the main beneficiary of the change. By keeping a local list
of buffers to write we reduce latency of writing out buffers, and
more importably we can remove all the delwri list promotions which
were hitting the buffer cache hard under sustained metadata loads.
The implementation is very straight forward - xfs_buf_delwri_queue now gets
a new list_head pointer that it adds the delwri buffers to, and all callers
need to eventually submit the list using xfs_buf_delwi_submit or
xfs_buf_delwi_submit_nowait. Buffers that already are on a delwri list are
skipped in xfs_buf_delwri_queue, assuming they already are on another delwri
list. The biggest change to pass down the buffer list was done to the AIL
pushing. Now that we operate on buffers the trylock, push and pushbuf log
item methods are merged into a single push routine, which tries to lock the
item, and if possible add the buffer that needs writeback to the buffer list.
This leads to much simpler code than the previous split but requires the
individual IOP_PUSH instances to unlock and reacquire the AIL around calls
to blocking routines.
Given that xfsailds now also handle writing out buffers, the conditions for
log forcing and the sleep times needed some small changes. The most
important one is that we consider an AIL busy as long we still have buffers
to push, and the other one is that we do increment the pushed LSN for
buffers that are under flushing at this moment, but still count them towards
the stuck items for restart purposes. Without this we could hammer on stuck
items without ever forcing the log and not make progress under heavy random
delete workloads on fast flash storage devices.
[ Dave Chinner:
- rebase on previous patches.
- improved comments for XBF_DELWRI_Q handling
- fix XBF_ASYNC handling in queue submission (test 106 failure)
- rename delwri submit function buffer list parameters for clarity
- xfs_efd_item_push() should return XFS_ITEM_PINNED ]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
2012-04-23 09:58:39 +04:00
DEFINE_LOG_ITEM_EVENT ( xfs_ail_flushing ) ;
2011-10-11 19:14:11 +04:00
2013-11-01 08:27:18 +04:00
DECLARE_EVENT_CLASS ( xfs_ail_class ,
TP_PROTO ( struct xfs_log_item * lip , xfs_lsn_t old_lsn , xfs_lsn_t new_lsn ) ,
TP_ARGS ( lip , old_lsn , new_lsn ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( void * , lip )
__field ( uint , type )
2018-05-09 17:47:34 +03:00
__field ( unsigned long , flags )
2013-11-01 08:27:18 +04:00
__field ( xfs_lsn_t , old_lsn )
__field ( xfs_lsn_t , new_lsn )
) ,
TP_fast_assign (
__entry - > dev = lip - > li_mountp - > m_super - > s_dev ;
__entry - > lip = lip ;
__entry - > type = lip - > li_type ;
__entry - > flags = lip - > li_flags ;
__entry - > old_lsn = old_lsn ;
__entry - > new_lsn = new_lsn ;
) ,
2018-01-09 22:43:36 +03:00
TP_printk ( " dev %d:%d lip %p old lsn %d/%d new lsn %d/%d type %s flags %s " ,
2013-11-01 08:27:18 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > lip ,
CYCLE_LSN ( __entry - > old_lsn ) , BLOCK_LSN ( __entry - > old_lsn ) ,
CYCLE_LSN ( __entry - > new_lsn ) , BLOCK_LSN ( __entry - > new_lsn ) ,
__print_symbolic ( __entry - > type , XFS_LI_TYPE_DESC ) ,
__print_flags ( __entry - > flags , " | " , XFS_LI_FLAGS ) )
)
# define DEFINE_AIL_EVENT(name) \
DEFINE_EVENT ( xfs_ail_class , name , \
TP_PROTO ( struct xfs_log_item * lip , xfs_lsn_t old_lsn , xfs_lsn_t new_lsn ) , \
TP_ARGS ( lip , old_lsn , new_lsn ) )
DEFINE_AIL_EVENT ( xfs_ail_insert ) ;
DEFINE_AIL_EVENT ( xfs_ail_move ) ;
DEFINE_AIL_EVENT ( xfs_ail_delete ) ;
TRACE_EVENT ( xfs_log_assign_tail_lsn ,
TP_PROTO ( struct xlog * log , xfs_lsn_t new_lsn ) ,
TP_ARGS ( log , new_lsn ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_lsn_t , new_lsn )
__field ( xfs_lsn_t , old_lsn )
__field ( xfs_lsn_t , last_sync_lsn )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > new_lsn = new_lsn ;
__entry - > old_lsn = atomic64_read ( & log - > l_tail_lsn ) ;
__entry - > last_sync_lsn = atomic64_read ( & log - > l_last_sync_lsn ) ;
) ,
TP_printk ( " dev %d:%d new tail lsn %d/%d, old lsn %d/%d, last sync %d/%d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
CYCLE_LSN ( __entry - > new_lsn ) , BLOCK_LSN ( __entry - > new_lsn ) ,
CYCLE_LSN ( __entry - > old_lsn ) , BLOCK_LSN ( __entry - > old_lsn ) ,
CYCLE_LSN ( __entry - > last_sync_lsn ) , BLOCK_LSN ( __entry - > last_sync_lsn ) )
)
2011-10-11 19:14:11 +04:00
2010-05-24 12:25:57 +04:00
DECLARE_EVENT_CLASS ( xfs_file_class ,
2021-01-23 21:06:29 +03:00
TP_PROTO ( struct kiocb * iocb , struct iov_iter * iter ) ,
TP_ARGS ( iocb , iter ) ,
2010-05-24 12:25:57 +04:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fsize_t , size )
__field ( loff_t , offset )
__field ( size_t , count )
) ,
TP_fast_assign (
2021-01-23 21:06:29 +03:00
__entry - > dev = file_inode ( iocb - > ki_filp ) - > i_sb - > s_dev ;
__entry - > ino = XFS_I ( file_inode ( iocb - > ki_filp ) ) - > i_ino ;
2021-03-29 21:11:40 +03:00
__entry - > size = XFS_I ( file_inode ( iocb - > ki_filp ) ) - > i_disk_size ;
2021-01-23 21:06:29 +03:00
__entry - > offset = iocb - > ki_pos ;
__entry - > count = iov_iter_count ( iter ) ;
2010-05-24 12:25:57 +04:00
) ,
2016-07-20 04:31:42 +03:00
TP_printk ( " dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count 0x%zx " ,
2010-05-24 12:25:57 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > offset ,
2016-07-20 04:31:42 +03:00
__entry - > count )
2009-12-15 02:14:59 +03:00
)
2010-05-24 12:25:57 +04:00
# define DEFINE_RW_EVENT(name) \
DEFINE_EVENT ( xfs_file_class , name , \
2021-01-23 21:06:29 +03:00
TP_PROTO ( struct kiocb * iocb , struct iov_iter * iter ) , \
TP_ARGS ( iocb , iter ) )
2016-07-20 04:31:42 +03:00
DEFINE_RW_EVENT ( xfs_file_buffered_read ) ;
DEFINE_RW_EVENT ( xfs_file_direct_read ) ;
2016-07-20 04:38:55 +03:00
DEFINE_RW_EVENT ( xfs_file_dax_read ) ;
2009-12-15 02:14:59 +03:00
DEFINE_RW_EVENT ( xfs_file_buffered_write ) ;
DEFINE_RW_EVENT ( xfs_file_direct_write ) ;
2016-07-20 04:38:55 +03:00
DEFINE_RW_EVENT ( xfs_file_dax_write ) ;
2021-01-23 21:06:30 +03:00
DEFINE_RW_EVENT ( xfs_reflink_bounce_dio_write ) ;
2009-12-15 02:14:59 +03:00
2010-12-10 11:42:20 +03:00
DECLARE_EVENT_CLASS ( xfs_imap_class ,
2010-05-24 12:25:57 +04:00
TP_PROTO ( struct xfs_inode * ip , xfs_off_t offset , ssize_t count ,
2019-02-15 19:02:46 +03:00
int whichfork , struct xfs_bmbt_irec * irec ) ,
TP_ARGS ( ip , offset , count , whichfork , irec ) ,
2010-05-24 12:25:57 +04:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( loff_t , size )
__field ( loff_t , offset )
__field ( size_t , count )
2019-02-15 19:02:46 +03:00
__field ( int , whichfork )
2010-05-24 12:25:57 +04:00
__field ( xfs_fileoff_t , startoff )
__field ( xfs_fsblock_t , startblock )
__field ( xfs_filblks_t , blockcount )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2021-03-29 21:11:40 +03:00
__entry - > size = ip - > i_disk_size ;
2010-05-24 12:25:57 +04:00
__entry - > offset = offset ;
__entry - > count = count ;
2019-02-15 19:02:46 +03:00
__entry - > whichfork = whichfork ;
2010-05-24 12:25:57 +04:00
__entry - > startoff = irec ? irec - > br_startoff : 0 ;
__entry - > startblock = irec ? irec - > br_startblock : 0 ;
__entry - > blockcount = irec ? irec - > br_blockcount : 0 ;
) ,
2011-12-19 00:00:12 +04:00
TP_printk ( " dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
2019-02-15 19:02:46 +03:00
" fork %s startoff 0x%llx startblock %lld blockcount 0x%llx " ,
2010-05-24 12:25:57 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > offset ,
__entry - > count ,
2019-02-15 19:02:46 +03:00
__entry - > whichfork = = XFS_COW_FORK ? " cow " : " data " ,
2010-05-24 12:25:57 +04:00
__entry - > startoff ,
2017-06-16 21:00:05 +03:00
( int64_t ) __entry - > startblock ,
2010-05-24 12:25:57 +04:00
__entry - > blockcount )
2009-12-15 02:14:59 +03:00
)
2010-05-24 12:25:57 +04:00
2019-02-15 19:02:46 +03:00
# define DEFINE_IMAP_EVENT(name) \
2010-12-10 11:42:20 +03:00
DEFINE_EVENT ( xfs_imap_class , name , \
2010-05-24 12:25:57 +04:00
TP_PROTO ( struct xfs_inode * ip , xfs_off_t offset , ssize_t count , \
2019-02-15 19:02:46 +03:00
int whichfork , struct xfs_bmbt_irec * irec ) , \
TP_ARGS ( ip , offset , count , whichfork , irec ) )
DEFINE_IMAP_EVENT ( xfs_map_blocks_found ) ;
DEFINE_IMAP_EVENT ( xfs_map_blocks_alloc ) ;
DEFINE_IMAP_EVENT ( xfs_iomap_alloc ) ;
DEFINE_IMAP_EVENT ( xfs_iomap_found ) ;
2009-12-15 02:14:59 +03:00
2010-05-24 12:25:57 +04:00
DECLARE_EVENT_CLASS ( xfs_simple_io_class ,
TP_PROTO ( struct xfs_inode * ip , xfs_off_t offset , ssize_t count ) ,
TP_ARGS ( ip , offset , count ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
2011-07-18 07:40:19 +04:00
__field ( loff_t , isize )
__field ( loff_t , disize )
2010-05-24 12:25:57 +04:00
__field ( loff_t , offset )
__field ( size_t , count )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2011-12-19 00:00:11 +04:00
__entry - > isize = VFS_I ( ip ) - > i_size ;
2021-03-29 21:11:40 +03:00
__entry - > disize = ip - > i_disk_size ;
2010-05-24 12:25:57 +04:00
__entry - > offset = offset ;
__entry - > count = count ;
) ,
2011-12-19 00:00:12 +04:00
TP_printk ( " dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
2010-05-24 12:25:57 +04:00
" offset 0x%llx count %zd " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
2011-07-18 07:40:19 +04:00
__entry - > isize ,
__entry - > disize ,
2010-05-24 12:25:57 +04:00
__entry - > offset ,
__entry - > count )
2009-12-15 02:14:59 +03:00
) ;
2010-05-24 12:25:57 +04:00
# define DEFINE_SIMPLE_IO_EVENT(name) \
DEFINE_EVENT ( xfs_simple_io_class , name , \
TP_PROTO ( struct xfs_inode * ip , xfs_off_t offset , ssize_t count ) , \
TP_ARGS ( ip , offset , count ) )
2009-12-15 02:14:59 +03:00
DEFINE_SIMPLE_IO_EVENT ( xfs_delalloc_enospc ) ;
DEFINE_SIMPLE_IO_EVENT ( xfs_unwritten_convert ) ;
2011-07-18 07:40:19 +04:00
DEFINE_SIMPLE_IO_EVENT ( xfs_setfilesize ) ;
2015-10-12 08:02:08 +03:00
DEFINE_SIMPLE_IO_EVENT ( xfs_zero_eof ) ;
2016-02-08 06:40:51 +03:00
DEFINE_SIMPLE_IO_EVENT ( xfs_end_io_direct_write ) ;
DEFINE_SIMPLE_IO_EVENT ( xfs_end_io_direct_write_unwritten ) ;
DEFINE_SIMPLE_IO_EVENT ( xfs_end_io_direct_write_append ) ;
2009-12-15 02:14:59 +03:00
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_itrunc_class ,
TP_PROTO ( struct xfs_inode * ip , xfs_fsize_t new_size ) ,
TP_ARGS ( ip , new_size ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fsize_t , size )
__field ( xfs_fsize_t , new_size )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2021-03-29 21:11:40 +03:00
__entry - > size = ip - > i_disk_size ;
2009-12-21 17:03:03 +03:00
__entry - > new_size = new_size ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > new_size )
)
2009-12-15 02:14:59 +03:00
# define DEFINE_ITRUNC_EVENT(name) \
2009-12-21 17:03:03 +03:00
DEFINE_EVENT ( xfs_itrunc_class , name , \
2009-12-15 02:14:59 +03:00
TP_PROTO ( struct xfs_inode * ip , xfs_fsize_t new_size ) , \
2009-12-21 17:03:03 +03:00
TP_ARGS ( ip , new_size ) )
2011-12-19 00:00:04 +04:00
DEFINE_ITRUNC_EVENT ( xfs_itruncate_extents_start ) ;
DEFINE_ITRUNC_EVENT ( xfs_itruncate_extents_end ) ;
2009-12-15 02:14:59 +03:00
TRACE_EVENT ( xfs_pagecache_inval ,
TP_PROTO ( struct xfs_inode * ip , xfs_off_t start , xfs_off_t finish ) ,
TP_ARGS ( ip , start , finish ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fsize_t , size )
__field ( xfs_off_t , start )
__field ( xfs_off_t , finish )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2021-03-29 21:11:40 +03:00
__entry - > size = ip - > i_disk_size ;
2009-12-15 02:14:59 +03:00
__entry - > start = start ;
__entry - > finish = finish ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > start ,
__entry - > finish )
) ;
TRACE_EVENT ( xfs_bunmap ,
TP_PROTO ( struct xfs_inode * ip , xfs_fileoff_t bno , xfs_filblks_t len ,
int flags , unsigned long caller_ip ) ,
TP_ARGS ( ip , bno , len , flags , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fsize_t , size )
__field ( xfs_fileoff_t , bno )
__field ( xfs_filblks_t , len )
__field ( unsigned long , caller_ip )
__field ( int , flags )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2021-03-29 21:11:40 +03:00
__entry - > size = ip - > i_disk_size ;
2009-12-15 02:14:59 +03:00
__entry - > bno = bno ;
__entry - > len = len ;
__entry - > caller_ip = caller_ip ;
__entry - > flags = flags ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx "
2018-01-09 22:46:05 +03:00
" flags %s caller %pS " ,
2009-12-15 02:14:59 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > bno ,
__entry - > len ,
__print_flags ( __entry - > flags , " | " , XFS_BMAPI_FLAGS ) ,
( void * ) __entry - > caller_ip )
) ;
2012-04-29 14:41:10 +04:00
DECLARE_EVENT_CLASS ( xfs_extent_busy_class ,
2009-12-15 02:14:59 +03:00
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 06:07:08 +04:00
xfs_agblock_t agbno , xfs_extlen_t len ) ,
TP_ARGS ( mp , agno , agbno , len ) ,
2009-12-15 02:14:59 +03:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 06:07:08 +04:00
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
2009-12-15 02:14:59 +03:00
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 06:07:08 +04:00
__entry - > agbno = agbno ;
__entry - > len = len ;
2009-12-15 02:14:59 +03:00
) ,
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 06:07:08 +04:00
TP_printk ( " dev %d:%d agno %u agbno %u len %u " ,
2009-12-15 02:14:59 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 06:07:08 +04:00
__entry - > agbno ,
__entry - > len )
2009-12-15 02:14:59 +03:00
) ;
2011-04-24 23:06:16 +04:00
# define DEFINE_BUSY_EVENT(name) \
2012-04-29 14:41:10 +04:00
DEFINE_EVENT ( xfs_extent_busy_class , name , \
2011-04-24 23:06:16 +04:00
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
xfs_agblock_t agbno , xfs_extlen_t len ) , \
TP_ARGS ( mp , agno , agbno , len ) )
2012-04-29 14:41:10 +04:00
DEFINE_BUSY_EVENT ( xfs_extent_busy ) ;
DEFINE_BUSY_EVENT ( xfs_extent_busy_enomem ) ;
DEFINE_BUSY_EVENT ( xfs_extent_busy_force ) ;
DEFINE_BUSY_EVENT ( xfs_extent_busy_reuse ) ;
DEFINE_BUSY_EVENT ( xfs_extent_busy_clear ) ;
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 06:07:08 +04:00
2012-04-29 14:41:10 +04:00
TRACE_EVENT ( xfs_extent_busy_trim ,
2011-04-24 23:06:15 +04:00
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
xfs_agblock_t agbno , xfs_extlen_t len ,
xfs_agblock_t tbno , xfs_extlen_t tlen ) ,
TP_ARGS ( mp , agno , agbno , len , tbno , tlen ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
__field ( xfs_agblock_t , tbno )
__field ( xfs_extlen_t , tlen )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > agbno = agbno ;
__entry - > len = len ;
__entry - > tbno = tbno ;
__entry - > tlen = tlen ;
) ,
TP_printk ( " dev %d:%d agno %u agbno %u len %u tbno %u tlen %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len ,
__entry - > tbno ,
__entry - > tlen )
) ;
2018-03-15 20:51:58 +03:00
DECLARE_EVENT_CLASS ( xfs_agf_class ,
2009-12-15 02:14:59 +03:00
TP_PROTO ( struct xfs_mount * mp , struct xfs_agf * agf , int flags ,
unsigned long caller_ip ) ,
TP_ARGS ( mp , agf , flags , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( int , flags )
__field ( __u32 , length )
__field ( __u32 , bno_root )
__field ( __u32 , cnt_root )
__field ( __u32 , bno_level )
__field ( __u32 , cnt_level )
__field ( __u32 , flfirst )
__field ( __u32 , fllast )
__field ( __u32 , flcount )
__field ( __u32 , freeblks )
__field ( __u32 , longest )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = be32_to_cpu ( agf - > agf_seqno ) ,
__entry - > flags = flags ;
__entry - > length = be32_to_cpu ( agf - > agf_length ) ,
__entry - > bno_root = be32_to_cpu ( agf - > agf_roots [ XFS_BTNUM_BNO ] ) ,
__entry - > cnt_root = be32_to_cpu ( agf - > agf_roots [ XFS_BTNUM_CNT ] ) ,
__entry - > bno_level =
be32_to_cpu ( agf - > agf_levels [ XFS_BTNUM_BNO ] ) ,
__entry - > cnt_level =
be32_to_cpu ( agf - > agf_levels [ XFS_BTNUM_CNT ] ) ,
__entry - > flfirst = be32_to_cpu ( agf - > agf_flfirst ) ,
__entry - > fllast = be32_to_cpu ( agf - > agf_fllast ) ,
__entry - > flcount = be32_to_cpu ( agf - > agf_flcount ) ,
__entry - > freeblks = be32_to_cpu ( agf - > agf_freeblks ) ,
__entry - > longest = be32_to_cpu ( agf - > agf_longest ) ;
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d agno %u flags %s length %u roots b %u c %u "
" levels b %u c %u flfirst %u fllast %u flcount %u "
2018-01-09 22:46:05 +03:00
" freeblks %u longest %u caller %pS " ,
2009-12-15 02:14:59 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__print_flags ( __entry - > flags , " | " , XFS_AGF_FLAGS ) ,
__entry - > length ,
__entry - > bno_root ,
__entry - > cnt_root ,
__entry - > bno_level ,
__entry - > cnt_level ,
__entry - > flfirst ,
__entry - > fllast ,
__entry - > flcount ,
__entry - > freeblks ,
__entry - > longest ,
( void * ) __entry - > caller_ip )
) ;
2018-03-15 20:51:58 +03:00
# define DEFINE_AGF_EVENT(name) \
DEFINE_EVENT ( xfs_agf_class , name , \
TP_PROTO ( struct xfs_mount * mp , struct xfs_agf * agf , int flags , \
unsigned long caller_ip ) , \
TP_ARGS ( mp , agf , flags , caller_ip ) )
DEFINE_AGF_EVENT ( xfs_agf ) ;
DEFINE_AGF_EVENT ( xfs_agfl_reset ) ;
2009-12-15 02:14:59 +03:00
TRACE_EVENT ( xfs_free_extent ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , xfs_agblock_t agbno ,
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
xfs_extlen_t len , enum xfs_ag_resv_type resv , int haveleft ,
int haveright ) ,
TP_ARGS ( mp , agno , agbno , len , resv , haveleft , haveright ) ,
2009-12-15 02:14:59 +03:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
__field ( int , resv )
2009-12-15 02:14:59 +03:00
__field ( int , haveleft )
__field ( int , haveright )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > agbno = agbno ;
__entry - > len = len ;
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
__entry - > resv = resv ;
2009-12-15 02:14:59 +03:00
__entry - > haveleft = haveleft ;
__entry - > haveright = haveright ;
) ,
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
TP_printk ( " dev %d:%d agno %u agbno %u len %u resv %d %s " ,
2009-12-15 02:14:59 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len ,
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
__entry - > resv ,
2009-12-15 02:14:59 +03:00
__entry - > haveleft ?
( __entry - > haveright ? " both " : " left " ) :
( __entry - > haveright ? " right " : " none " ) )
) ;
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_alloc_class ,
TP_PROTO ( struct xfs_alloc_arg * args ) ,
TP_ARGS ( args ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , minlen )
__field ( xfs_extlen_t , maxlen )
__field ( xfs_extlen_t , mod )
__field ( xfs_extlen_t , prod )
__field ( xfs_extlen_t , minleft )
__field ( xfs_extlen_t , total )
__field ( xfs_extlen_t , alignment )
__field ( xfs_extlen_t , minalignslop )
__field ( xfs_extlen_t , len )
__field ( short , type )
__field ( short , otype )
__field ( char , wasdel )
__field ( char , wasfromfl )
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
__field ( int , resv )
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
__field ( int , datatype )
2009-12-21 17:03:03 +03:00
__field ( xfs_fsblock_t , firstblock )
) ,
TP_fast_assign (
__entry - > dev = args - > mp - > m_super - > s_dev ;
__entry - > agno = args - > agno ;
__entry - > agbno = args - > agbno ;
__entry - > minlen = args - > minlen ;
__entry - > maxlen = args - > maxlen ;
__entry - > mod = args - > mod ;
__entry - > prod = args - > prod ;
__entry - > minleft = args - > minleft ;
__entry - > total = args - > total ;
__entry - > alignment = args - > alignment ;
__entry - > minalignslop = args - > minalignslop ;
__entry - > len = args - > len ;
__entry - > type = args - > type ;
__entry - > otype = args - > otype ;
__entry - > wasdel = args - > wasdel ;
__entry - > wasfromfl = args - > wasfromfl ;
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
__entry - > resv = args - > resv ;
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
__entry - > datatype = args - > datatype ;
2018-07-12 08:26:30 +03:00
__entry - > firstblock = args - > tp - > t_firstblock ;
2009-12-21 17:03:03 +03:00
) ,
TP_printk ( " dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
" prod %u minleft %u total %u alignment %u minalignslop %u "
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
" len %u type %s otype %s wasdel %d wasfromfl %d resv %d "
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
" datatype 0x%x firstblock 0x%llx " ,
2009-12-21 17:03:03 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > minlen ,
__entry - > maxlen ,
__entry - > mod ,
__entry - > prod ,
__entry - > minleft ,
__entry - > total ,
__entry - > alignment ,
__entry - > minalignslop ,
__entry - > len ,
__print_symbolic ( __entry - > type , XFS_ALLOC_TYPES ) ,
__print_symbolic ( __entry - > otype , XFS_ALLOC_TYPES ) ,
__entry - > wasdel ,
__entry - > wasfromfl ,
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
__entry - > resv ,
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 01:21:28 +03:00
__entry - > datatype ,
2011-04-24 23:02:58 +04:00
( unsigned long long ) __entry - > firstblock )
2009-12-15 02:14:59 +03:00
)
2009-12-21 17:03:03 +03:00
# define DEFINE_ALLOC_EVENT(name) \
DEFINE_EVENT ( xfs_alloc_class , name , \
TP_PROTO ( struct xfs_alloc_arg * args ) , \
TP_ARGS ( args ) )
2009-12-15 02:14:59 +03:00
DEFINE_ALLOC_EVENT ( xfs_alloc_exact_done ) ;
2010-12-10 18:03:57 +03:00
DEFINE_ALLOC_EVENT ( xfs_alloc_exact_notfound ) ;
2009-12-15 02:14:59 +03:00
DEFINE_ALLOC_EVENT ( xfs_alloc_exact_error ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_near_nominleft ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_near_first ) ;
2019-10-14 03:10:35 +03:00
DEFINE_ALLOC_EVENT ( xfs_alloc_cur ) ;
2019-10-14 03:10:33 +03:00
DEFINE_ALLOC_EVENT ( xfs_alloc_cur_right ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_cur_left ) ;
2019-10-14 03:10:36 +03:00
DEFINE_ALLOC_EVENT ( xfs_alloc_cur_lookup ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_cur_lookup_done ) ;
2009-12-15 02:14:59 +03:00
DEFINE_ALLOC_EVENT ( xfs_alloc_near_error ) ;
2011-04-24 23:06:15 +04:00
DEFINE_ALLOC_EVENT ( xfs_alloc_near_noentry ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_near_busy ) ;
2009-12-15 02:14:59 +03:00
DEFINE_ALLOC_EVENT ( xfs_alloc_size_neither ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_size_noentry ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_size_nominleft ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_size_done ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_size_error ) ;
2011-04-24 23:06:15 +04:00
DEFINE_ALLOC_EVENT ( xfs_alloc_size_busy ) ;
2009-12-15 02:14:59 +03:00
DEFINE_ALLOC_EVENT ( xfs_alloc_small_freelist ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_small_notenough ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_small_done ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_small_error ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_badargs ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_nofix ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_noagbp ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_loopfailed ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_allfailed ) ;
2019-10-14 03:10:33 +03:00
TRACE_EVENT ( xfs_alloc_cur_check ,
TP_PROTO ( struct xfs_mount * mp , xfs_btnum_t btnum , xfs_agblock_t bno ,
xfs_extlen_t len , xfs_extlen_t diff , bool new ) ,
TP_ARGS ( mp , btnum , bno , len , diff , new ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_btnum_t , btnum )
__field ( xfs_agblock_t , bno )
__field ( xfs_extlen_t , len )
__field ( xfs_extlen_t , diff )
__field ( bool , new )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > btnum = btnum ;
__entry - > bno = bno ;
__entry - > len = len ;
__entry - > diff = diff ;
__entry - > new = new ;
) ,
TP_printk ( " dev %d:%d btree %s bno 0x%x len 0x%x diff 0x%x new %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_symbolic ( __entry - > btnum , XFS_BTNUM_STRINGS ) ,
__entry - > bno , __entry - > len , __entry - > diff , __entry - > new )
)
2012-03-22 09:15:13 +04:00
DECLARE_EVENT_CLASS ( xfs_da_class ,
2009-12-21 17:03:03 +03:00
TP_PROTO ( struct xfs_da_args * args ) ,
TP_ARGS ( args ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__dynamic_array ( char , name , args - > namelen )
__field ( int , namelen )
__field ( xfs_dahash_t , hashval )
__field ( xfs_ino_t , inumber )
__field ( int , op_flags )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( args - > dp ) - > i_sb - > s_dev ;
__entry - > ino = args - > dp - > i_ino ;
if ( args - > namelen )
memcpy ( __get_str ( name ) , args - > name , args - > namelen ) ;
__entry - > namelen = args - > namelen ;
__entry - > hashval = args - > hashval ;
__entry - > inumber = args - > inumber ;
__entry - > op_flags = args - > op_flags ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
" inumber 0x%llx op_flags %s " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > namelen ,
__entry - > namelen ? __get_str ( name ) : NULL ,
__entry - > namelen ,
__entry - > hashval ,
__entry - > inumber ,
__print_flags ( __entry - > op_flags , " | " , XFS_DA_OP_FLAGS ) )
)
# define DEFINE_DIR2_EVENT(name) \
2012-03-22 09:15:13 +04:00
DEFINE_EVENT ( xfs_da_class , name , \
2009-12-15 02:14:59 +03:00
TP_PROTO ( struct xfs_da_args * args ) , \
2009-12-21 17:03:03 +03:00
TP_ARGS ( args ) )
DEFINE_DIR2_EVENT ( xfs_dir2_sf_addname ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_create ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_lookup ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_replace ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_removename ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_toino4 ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_toino8 ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_to_block ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_addname ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_lookup ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_replace ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_removename ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_to_sf ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_to_leaf ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_addname ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_lookup ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_replace ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_removename ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_to_block ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_to_node ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_node_addname ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_node_lookup ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_node_replace ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_node_removename ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_node_to_leaf ) ;
2012-11-12 15:53:53 +04:00
DECLARE_EVENT_CLASS ( xfs_attr_class ,
TP_PROTO ( struct xfs_da_args * args ) ,
TP_ARGS ( args ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__dynamic_array ( char , name , args - > namelen )
__field ( int , namelen )
__field ( int , valuelen )
__field ( xfs_dahash_t , hashval )
2020-02-27 04:30:42 +03:00
__field ( unsigned int , attr_filter )
__field ( unsigned int , attr_flags )
2012-11-12 15:53:53 +04:00
__field ( int , op_flags )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( args - > dp ) - > i_sb - > s_dev ;
__entry - > ino = args - > dp - > i_ino ;
if ( args - > namelen )
memcpy ( __get_str ( name ) , args - > name , args - > namelen ) ;
__entry - > namelen = args - > namelen ;
__entry - > valuelen = args - > valuelen ;
__entry - > hashval = args - > hashval ;
2020-02-27 04:30:42 +03:00
__entry - > attr_filter = args - > attr_filter ;
__entry - > attr_flags = args - > attr_flags ;
2012-11-12 15:53:53 +04:00
__entry - > op_flags = args - > op_flags ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx name %.*s namelen %d valuelen %d "
2020-02-27 04:30:42 +03:00
" hashval 0x%x filter %s flags %s op_flags %s " ,
2012-11-12 15:53:53 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > namelen ,
__entry - > namelen ? __get_str ( name ) : NULL ,
__entry - > namelen ,
__entry - > valuelen ,
__entry - > hashval ,
2020-02-27 04:30:42 +03:00
__print_flags ( __entry - > attr_filter , " | " ,
XFS_ATTR_FILTER_FLAGS ) ,
__print_flags ( __entry - > attr_flags , " | " ,
{ XATTR_CREATE , " CREATE " } ,
{ XATTR_REPLACE , " REPLACE " } ) ,
2012-11-12 15:53:53 +04:00
__print_flags ( __entry - > op_flags , " | " , XFS_DA_OP_FLAGS ) )
)
2012-03-22 09:15:13 +04:00
# define DEFINE_ATTR_EVENT(name) \
2012-11-12 15:53:53 +04:00
DEFINE_EVENT ( xfs_attr_class , name , \
2012-03-22 09:15:13 +04:00
TP_PROTO ( struct xfs_da_args * args ) , \
TP_ARGS ( args ) )
DEFINE_ATTR_EVENT ( xfs_attr_sf_add ) ;
DEFINE_ATTR_EVENT ( xfs_attr_sf_addname ) ;
DEFINE_ATTR_EVENT ( xfs_attr_sf_create ) ;
DEFINE_ATTR_EVENT ( xfs_attr_sf_lookup ) ;
DEFINE_ATTR_EVENT ( xfs_attr_sf_remove ) ;
DEFINE_ATTR_EVENT ( xfs_attr_sf_to_leaf ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_add ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_add_old ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_add_new ) ;
2012-11-12 15:53:53 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_add_work ) ;
2012-03-22 09:15:13 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_create ) ;
2012-11-12 15:53:53 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_compact ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_get ) ;
2012-03-22 09:15:13 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_lookup ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_replace ) ;
2012-11-12 15:53:53 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_remove ) ;
2012-03-22 09:15:13 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_removename ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_split ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_split_before ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_split_after ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_clearflag ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_setflag ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_flipflags ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_to_sf ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_to_node ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_rebalance ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_unbalance ) ;
2012-11-12 15:53:53 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_toosmall ) ;
2012-03-22 09:15:13 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_node_addname ) ;
2012-11-12 15:53:53 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_node_get ) ;
2012-03-22 09:15:13 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_node_replace ) ;
DEFINE_ATTR_EVENT ( xfs_attr_node_removename ) ;
2012-11-12 15:53:53 +04:00
DEFINE_ATTR_EVENT ( xfs_attr_fillstate ) ;
DEFINE_ATTR_EVENT ( xfs_attr_refillstate ) ;
DEFINE_ATTR_EVENT ( xfs_attr_rmtval_get ) ;
DEFINE_ATTR_EVENT ( xfs_attr_rmtval_set ) ;
2012-03-22 09:15:13 +04:00
# define DEFINE_DA_EVENT(name) \
DEFINE_EVENT ( xfs_da_class , name , \
TP_PROTO ( struct xfs_da_args * args ) , \
TP_ARGS ( args ) )
DEFINE_DA_EVENT ( xfs_da_split ) ;
DEFINE_DA_EVENT ( xfs_da_join ) ;
DEFINE_DA_EVENT ( xfs_da_link_before ) ;
DEFINE_DA_EVENT ( xfs_da_link_after ) ;
DEFINE_DA_EVENT ( xfs_da_unlink_back ) ;
DEFINE_DA_EVENT ( xfs_da_unlink_forward ) ;
DEFINE_DA_EVENT ( xfs_da_root_split ) ;
DEFINE_DA_EVENT ( xfs_da_root_join ) ;
DEFINE_DA_EVENT ( xfs_da_node_add ) ;
DEFINE_DA_EVENT ( xfs_da_node_create ) ;
DEFINE_DA_EVENT ( xfs_da_node_split ) ;
DEFINE_DA_EVENT ( xfs_da_node_remove ) ;
DEFINE_DA_EVENT ( xfs_da_node_rebalance ) ;
DEFINE_DA_EVENT ( xfs_da_node_unbalance ) ;
2012-11-12 15:53:53 +04:00
DEFINE_DA_EVENT ( xfs_da_node_toosmall ) ;
2012-03-22 09:15:13 +04:00
DEFINE_DA_EVENT ( xfs_da_swap_lastblock ) ;
DEFINE_DA_EVENT ( xfs_da_grow_inode ) ;
DEFINE_DA_EVENT ( xfs_da_shrink_inode ) ;
2012-11-12 15:53:53 +04:00
DEFINE_DA_EVENT ( xfs_da_fixhashpath ) ;
DEFINE_DA_EVENT ( xfs_da_path_shift ) ;
2012-03-22 09:15:13 +04:00
2009-12-21 17:03:03 +03:00
DECLARE_EVENT_CLASS ( xfs_dir2_space_class ,
TP_PROTO ( struct xfs_da_args * args , int idx ) ,
TP_ARGS ( args , idx ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( int , op_flags )
__field ( int , idx )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( args - > dp ) - > i_sb - > s_dev ;
__entry - > ino = args - > dp - > i_ino ;
__entry - > op_flags = args - > op_flags ;
__entry - > idx = idx ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx op_flags %s index %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_flags ( __entry - > op_flags , " | " , XFS_DA_OP_FLAGS ) ,
__entry - > idx )
2009-12-15 02:14:59 +03:00
)
2009-12-21 17:03:03 +03:00
# define DEFINE_DIR2_SPACE_EVENT(name) \
DEFINE_EVENT ( xfs_dir2_space_class , name , \
2009-12-15 02:14:59 +03:00
TP_PROTO ( struct xfs_da_args * args , int idx ) , \
2009-12-21 17:03:03 +03:00
TP_ARGS ( args , idx ) )
DEFINE_DIR2_SPACE_EVENT ( xfs_dir2_leafn_add ) ;
DEFINE_DIR2_SPACE_EVENT ( xfs_dir2_leafn_remove ) ;
DEFINE_DIR2_SPACE_EVENT ( xfs_dir2_grow_inode ) ;
DEFINE_DIR2_SPACE_EVENT ( xfs_dir2_shrink_inode ) ;
2009-12-15 02:14:59 +03:00
TRACE_EVENT ( xfs_dir2_leafn_moveents ,
TP_PROTO ( struct xfs_da_args * args , int src_idx , int dst_idx , int count ) ,
TP_ARGS ( args , src_idx , dst_idx , count ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( int , op_flags )
__field ( int , src_idx )
__field ( int , dst_idx )
__field ( int , count )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( args - > dp ) - > i_sb - > s_dev ;
__entry - > ino = args - > dp - > i_ino ;
__entry - > op_flags = args - > op_flags ;
__entry - > src_idx = src_idx ;
__entry - > dst_idx = dst_idx ;
__entry - > count = count ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx op_flags %s "
" src_idx %d dst_idx %d count %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_flags ( __entry - > op_flags , " | " , XFS_DA_OP_FLAGS ) ,
__entry - > src_idx ,
__entry - > dst_idx ,
__entry - > count )
) ;
2010-01-14 04:33:55 +03:00
# define XFS_SWAPEXT_INODES \
{ 0 , " target " } , \
{ 1 , " temp " }
2018-12-19 01:32:30 +03:00
TRACE_DEFINE_ENUM ( XFS_DINODE_FMT_DEV ) ;
TRACE_DEFINE_ENUM ( XFS_DINODE_FMT_LOCAL ) ;
TRACE_DEFINE_ENUM ( XFS_DINODE_FMT_EXTENTS ) ;
TRACE_DEFINE_ENUM ( XFS_DINODE_FMT_BTREE ) ;
TRACE_DEFINE_ENUM ( XFS_DINODE_FMT_UUID ) ;
2010-01-14 04:33:55 +03:00
DECLARE_EVENT_CLASS ( xfs_swap_extent_class ,
TP_PROTO ( struct xfs_inode * ip , int which ) ,
TP_ARGS ( ip , which ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( int , which )
__field ( xfs_ino_t , ino )
__field ( int , format )
__field ( int , nex )
__field ( int , broot_size )
__field ( int , fork_off )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > which = which ;
__entry - > ino = ip - > i_ino ;
2020-05-18 20:28:05 +03:00
__entry - > format = ip - > i_df . if_format ;
2020-05-18 20:27:22 +03:00
__entry - > nex = ip - > i_df . if_nextents ;
2010-01-14 04:33:55 +03:00
__entry - > broot_size = ip - > i_df . if_broot_bytes ;
__entry - > fork_off = XFS_IFORK_BOFF ( ip ) ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
2011-12-19 00:00:07 +04:00
" broot size %d, fork offset %d " ,
2010-01-14 04:33:55 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_symbolic ( __entry - > which , XFS_SWAPEXT_INODES ) ,
__print_symbolic ( __entry - > format , XFS_INODE_FORMAT_STR ) ,
__entry - > nex ,
__entry - > broot_size ,
__entry - > fork_off )
)
# define DEFINE_SWAPEXT_EVENT(name) \
DEFINE_EVENT ( xfs_swap_extent_class , name , \
TP_PROTO ( struct xfs_inode * ip , int which ) , \
TP_ARGS ( ip , which ) )
DEFINE_SWAPEXT_EVENT ( xfs_swap_extent_before ) ;
DEFINE_SWAPEXT_EVENT ( xfs_swap_extent_after ) ;
2017-08-09 04:21:53 +03:00
TRACE_EVENT ( xfs_log_recover ,
TP_PROTO ( struct xlog * log , xfs_daddr_t headblk , xfs_daddr_t tailblk ) ,
TP_ARGS ( log , headblk , tailblk ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_daddr_t , headblk )
__field ( xfs_daddr_t , tailblk )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > headblk = headblk ;
__entry - > tailblk = tailblk ;
) ,
TP_printk ( " dev %d:%d headblk 0x%llx tailblk 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) , __entry - > headblk ,
__entry - > tailblk )
)
2016-09-26 01:34:52 +03:00
TRACE_EVENT ( xfs_log_recover_record ,
TP_PROTO ( struct xlog * log , struct xlog_rec_header * rhead , int pass ) ,
TP_ARGS ( log , rhead , pass ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_lsn_t , lsn )
__field ( int , len )
__field ( int , num_logops )
__field ( int , pass )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > lsn = be64_to_cpu ( rhead - > h_lsn ) ;
__entry - > len = be32_to_cpu ( rhead - > h_len ) ;
__entry - > num_logops = be32_to_cpu ( rhead - > h_num_logops ) ;
__entry - > pass = pass ;
) ,
TP_printk ( " dev %d:%d lsn 0x%llx len 0x%x num_logops 0x%x pass %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > lsn , __entry - > len , __entry - > num_logops ,
__entry - > pass )
)
2010-04-13 09:06:46 +04:00
DECLARE_EVENT_CLASS ( xfs_log_recover_item_class ,
2012-06-14 18:22:15 +04:00
TP_PROTO ( struct xlog * log , struct xlog_recover * trans ,
2010-04-13 09:06:46 +04:00
struct xlog_recover_item * item , int pass ) ,
TP_ARGS ( log , trans , item , pass ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned long , item )
__field ( xlog_tid_t , tid )
2016-09-26 01:34:52 +03:00
__field ( xfs_lsn_t , lsn )
2010-04-13 09:06:46 +04:00
__field ( int , type )
__field ( int , pass )
__field ( int , count )
__field ( int , total )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > item = ( unsigned long ) item ;
__entry - > tid = trans - > r_log_tid ;
2016-09-26 01:34:52 +03:00
__entry - > lsn = trans - > r_lsn ;
2010-04-13 09:06:46 +04:00
__entry - > type = ITEM_TYPE ( item ) ;
__entry - > pass = pass ;
__entry - > count = item - > ri_cnt ;
__entry - > total = item - > ri_total ;
) ,
2018-01-09 22:43:36 +03:00
TP_printk ( " dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item %p, "
2016-09-26 01:34:52 +03:00
" item type %s item region count/total %d/%d " ,
2010-04-13 09:06:46 +04:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > tid ,
2016-09-26 01:34:52 +03:00
__entry - > lsn ,
2010-04-13 09:06:46 +04:00
__entry - > pass ,
( void * ) __entry - > item ,
__print_symbolic ( __entry - > type , XFS_LI_TYPE_DESC ) ,
__entry - > count ,
__entry - > total )
)
# define DEFINE_LOG_RECOVER_ITEM(name) \
DEFINE_EVENT ( xfs_log_recover_item_class , name , \
2012-06-14 18:22:15 +04:00
TP_PROTO ( struct xlog * log , struct xlog_recover * trans , \
2010-04-13 09:06:46 +04:00
struct xlog_recover_item * item , int pass ) , \
TP_ARGS ( log , trans , item , pass ) )
DEFINE_LOG_RECOVER_ITEM ( xfs_log_recover_item_add ) ;
DEFINE_LOG_RECOVER_ITEM ( xfs_log_recover_item_add_cont ) ;
DEFINE_LOG_RECOVER_ITEM ( xfs_log_recover_item_reorder_head ) ;
DEFINE_LOG_RECOVER_ITEM ( xfs_log_recover_item_reorder_tail ) ;
DEFINE_LOG_RECOVER_ITEM ( xfs_log_recover_item_recover ) ;
DECLARE_EVENT_CLASS ( xfs_log_recover_buf_item_class ,
2012-06-14 18:22:15 +04:00
TP_PROTO ( struct xlog * log , struct xfs_buf_log_format * buf_f ) ,
2010-04-13 09:06:46 +04:00
TP_ARGS ( log , buf_f ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2017-06-16 21:00:05 +03:00
__field ( int64_t , blkno )
2010-04-13 09:06:46 +04:00
__field ( unsigned short , len )
__field ( unsigned short , flags )
__field ( unsigned short , size )
__field ( unsigned int , map_size )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > blkno = buf_f - > blf_blkno ;
__entry - > len = buf_f - > blf_len ;
__entry - > flags = buf_f - > blf_flags ;
__entry - > size = buf_f - > blf_size ;
__entry - > map_size = buf_f - > blf_map_size ;
) ,
TP_printk ( " dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
" map_size %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > blkno ,
__entry - > len ,
__entry - > flags ,
__entry - > size ,
__entry - > map_size )
)
# define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
DEFINE_EVENT ( xfs_log_recover_buf_item_class , name , \
2012-06-14 18:22:15 +04:00
TP_PROTO ( struct xlog * log , struct xfs_buf_log_format * buf_f ) , \
2010-04-13 09:06:46 +04:00
TP_ARGS ( log , buf_f ) )
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_not_cancel ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_cancel ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_cancel_add ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_cancel_ref_inc ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_recover ) ;
2016-09-26 01:34:52 +03:00
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_skip ) ;
2010-04-13 09:06:46 +04:00
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_inode_buf ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_reg_buf ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_dquot_buf ) ;
DECLARE_EVENT_CLASS ( xfs_log_recover_ino_item_class ,
2012-06-14 18:22:15 +04:00
TP_PROTO ( struct xlog * log , struct xfs_inode_log_format * in_f ) ,
2010-04-13 09:06:46 +04:00
TP_ARGS ( log , in_f ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( unsigned short , size )
__field ( int , fields )
__field ( unsigned short , asize )
__field ( unsigned short , dsize )
2017-06-16 21:00:05 +03:00
__field ( int64_t , blkno )
2010-04-13 09:06:46 +04:00
__field ( int , len )
__field ( int , boffset )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > ino = in_f - > ilf_ino ;
__entry - > size = in_f - > ilf_size ;
__entry - > fields = in_f - > ilf_fields ;
__entry - > asize = in_f - > ilf_asize ;
__entry - > dsize = in_f - > ilf_dsize ;
__entry - > blkno = in_f - > ilf_blkno ;
__entry - > len = in_f - > ilf_len ;
__entry - > boffset = in_f - > ilf_boffset ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
" dsize %d, blkno 0x%llx, len %d, boffset %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > fields ,
__entry - > asize ,
__entry - > dsize ,
__entry - > blkno ,
__entry - > len ,
__entry - > boffset )
)
# define DEFINE_LOG_RECOVER_INO_ITEM(name) \
DEFINE_EVENT ( xfs_log_recover_ino_item_class , name , \
2012-06-14 18:22:15 +04:00
TP_PROTO ( struct xlog * log , struct xfs_inode_log_format * in_f ) , \
2010-04-13 09:06:46 +04:00
TP_ARGS ( log , in_f ) )
DEFINE_LOG_RECOVER_INO_ITEM ( xfs_log_recover_inode_recover ) ;
DEFINE_LOG_RECOVER_INO_ITEM ( xfs_log_recover_inode_cancel ) ;
DEFINE_LOG_RECOVER_INO_ITEM ( xfs_log_recover_inode_skip ) ;
2015-08-19 02:58:48 +03:00
DECLARE_EVENT_CLASS ( xfs_log_recover_icreate_item_class ,
TP_PROTO ( struct xlog * log , struct xfs_icreate_log * in_f ) ,
TP_ARGS ( log , in_f ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( unsigned int , count )
__field ( unsigned int , isize )
__field ( xfs_agblock_t , length )
__field ( unsigned int , gen )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > agno = be32_to_cpu ( in_f - > icl_ag ) ;
__entry - > agbno = be32_to_cpu ( in_f - > icl_agbno ) ;
__entry - > count = be32_to_cpu ( in_f - > icl_count ) ;
__entry - > isize = be32_to_cpu ( in_f - > icl_isize ) ;
__entry - > length = be32_to_cpu ( in_f - > icl_length ) ;
__entry - > gen = be32_to_cpu ( in_f - > icl_gen ) ;
) ,
TP_printk ( " dev %d:%d agno %u agbno %u count %u isize %u length %u "
" gen %u " , MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno , __entry - > agbno , __entry - > count , __entry - > isize ,
__entry - > length , __entry - > gen )
)
# define DEFINE_LOG_RECOVER_ICREATE_ITEM(name) \
DEFINE_EVENT ( xfs_log_recover_icreate_item_class , name , \
TP_PROTO ( struct xlog * log , struct xfs_icreate_log * in_f ) , \
TP_ARGS ( log , in_f ) )
DEFINE_LOG_RECOVER_ICREATE_ITEM ( xfs_log_recover_icreate_cancel ) ;
DEFINE_LOG_RECOVER_ICREATE_ITEM ( xfs_log_recover_icreate_recover ) ;
2011-01-07 16:02:04 +03:00
DECLARE_EVENT_CLASS ( xfs_discard_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
xfs_agblock_t agbno , xfs_extlen_t len ) ,
TP_ARGS ( mp , agno , agbno , len ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > agbno = agbno ;
__entry - > len = len ;
) ,
2017-04-14 21:43:27 +03:00
TP_printk ( " dev %d:%d agno %u agbno %u len %u " ,
2011-01-07 16:02:04 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len )
)
# define DEFINE_DISCARD_EVENT(name) \
DEFINE_EVENT ( xfs_discard_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
xfs_agblock_t agbno , xfs_extlen_t len ) , \
TP_ARGS ( mp , agno , agbno , len ) )
DEFINE_DISCARD_EVENT ( xfs_discard_extent ) ;
DEFINE_DISCARD_EVENT ( xfs_discard_toosmall ) ;
DEFINE_DISCARD_EVENT ( xfs_discard_exclude ) ;
DEFINE_DISCARD_EVENT ( xfs_discard_busy ) ;
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 04:08:36 +03:00
/* btree cursor events */
2018-12-19 01:32:31 +03:00
TRACE_DEFINE_ENUM ( XFS_BTNUM_BNOi ) ;
TRACE_DEFINE_ENUM ( XFS_BTNUM_CNTi ) ;
TRACE_DEFINE_ENUM ( XFS_BTNUM_BMAPi ) ;
TRACE_DEFINE_ENUM ( XFS_BTNUM_INOi ) ;
TRACE_DEFINE_ENUM ( XFS_BTNUM_FINOi ) ;
TRACE_DEFINE_ENUM ( XFS_BTNUM_RMAPi ) ;
TRACE_DEFINE_ENUM ( XFS_BTNUM_REFCi ) ;
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 04:08:36 +03:00
DECLARE_EVENT_CLASS ( xfs_btree_cur_class ,
TP_PROTO ( struct xfs_btree_cur * cur , int level , struct xfs_buf * bp ) ,
TP_ARGS ( cur , level , bp ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_btnum_t , btnum )
__field ( int , level )
__field ( int , nlevels )
__field ( int , ptr )
__field ( xfs_daddr_t , daddr )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
__entry - > btnum = cur - > bc_btnum ;
__entry - > level = level ;
__entry - > nlevels = cur - > bc_nlevels ;
__entry - > ptr = cur - > bc_ptrs [ level ] ;
__entry - > daddr = bp ? bp - > b_bn : - 1 ;
) ,
2018-12-19 01:32:31 +03:00
TP_printk ( " dev %d:%d btree %s level %d/%d ptr %d daddr 0x%llx " ,
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 04:08:36 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2018-12-19 01:32:31 +03:00
__print_symbolic ( __entry - > btnum , XFS_BTNUM_STRINGS ) ,
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 04:08:36 +03:00
__entry - > level ,
__entry - > nlevels ,
__entry - > ptr ,
( unsigned long long ) __entry - > daddr )
)
# define DEFINE_BTREE_CUR_EVENT(name) \
DEFINE_EVENT ( xfs_btree_cur_class , name , \
TP_PROTO ( struct xfs_btree_cur * cur , int level , struct xfs_buf * bp ) , \
TP_ARGS ( cur , level , bp ) )
DEFINE_BTREE_CUR_EVENT ( xfs_btree_updkeys ) ;
2016-08-03 04:10:21 +03:00
DEFINE_BTREE_CUR_EVENT ( xfs_btree_overlapped_query_range ) ;
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 04:08:36 +03:00
2016-08-03 04:13:02 +03:00
/* deferred ops */
struct xfs_defer_pending ;
DECLARE_EVENT_CLASS ( xfs_defer_class ,
2018-08-01 17:20:35 +03:00
TP_PROTO ( struct xfs_trans * tp , unsigned long caller_ip ) ,
TP_ARGS ( tp , caller_ip ) ,
2016-08-03 04:13:02 +03:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2018-08-01 17:20:35 +03:00
__field ( struct xfs_trans * , tp )
2017-04-21 21:24:42 +03:00
__field ( char , committed )
2018-05-09 17:48:52 +03:00
__field ( unsigned long , caller_ip )
2016-08-03 04:13:02 +03:00
) ,
TP_fast_assign (
2018-08-01 17:20:35 +03:00
__entry - > dev = tp - > t_mountp - > m_super - > s_dev ;
__entry - > tp = tp ;
2018-05-09 17:48:52 +03:00
__entry - > caller_ip = caller_ip ;
2016-08-03 04:13:02 +03:00
) ,
2018-08-01 17:20:35 +03:00
TP_printk ( " dev %d:%d tp %p caller %pS " ,
2016-08-03 04:13:02 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2018-08-01 17:20:35 +03:00
__entry - > tp ,
2018-05-09 17:48:52 +03:00
( char * ) __entry - > caller_ip )
2016-08-03 04:13:02 +03:00
)
# define DEFINE_DEFER_EVENT(name) \
DEFINE_EVENT ( xfs_defer_class , name , \
2018-08-01 17:20:35 +03:00
TP_PROTO ( struct xfs_trans * tp , unsigned long caller_ip ) , \
TP_ARGS ( tp , caller_ip ) )
2016-08-03 04:13:02 +03:00
DECLARE_EVENT_CLASS ( xfs_defer_error_class ,
2018-08-01 17:20:35 +03:00
TP_PROTO ( struct xfs_trans * tp , int error ) ,
TP_ARGS ( tp , error ) ,
2016-08-03 04:13:02 +03:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2018-08-01 17:20:35 +03:00
__field ( struct xfs_trans * , tp )
2017-04-21 21:24:42 +03:00
__field ( char , committed )
2016-08-03 04:13:02 +03:00
__field ( int , error )
) ,
TP_fast_assign (
2018-08-01 17:20:35 +03:00
__entry - > dev = tp - > t_mountp - > m_super - > s_dev ;
__entry - > tp = tp ;
2016-08-03 04:13:02 +03:00
__entry - > error = error ;
) ,
2018-08-01 17:20:35 +03:00
TP_printk ( " dev %d:%d tp %p err %d " ,
2016-08-03 04:13:02 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2018-08-01 17:20:35 +03:00
__entry - > tp ,
2016-08-03 04:13:02 +03:00
__entry - > error )
)
# define DEFINE_DEFER_ERROR_EVENT(name) \
DEFINE_EVENT ( xfs_defer_error_class , name , \
2018-08-01 17:20:35 +03:00
TP_PROTO ( struct xfs_trans * tp , int error ) , \
TP_ARGS ( tp , error ) )
2016-08-03 04:13:02 +03:00
DECLARE_EVENT_CLASS ( xfs_defer_pending_class ,
TP_PROTO ( struct xfs_mount * mp , struct xfs_defer_pending * dfp ) ,
TP_ARGS ( mp , dfp ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( int , type )
__field ( void * , intent )
2017-04-21 21:24:42 +03:00
__field ( char , committed )
2016-08-03 04:13:02 +03:00
__field ( int , nr )
) ,
TP_fast_assign (
__entry - > dev = mp ? mp - > m_super - > s_dev : 0 ;
2018-12-12 19:46:22 +03:00
__entry - > type = dfp - > dfp_type ;
2016-08-03 04:13:02 +03:00
__entry - > intent = dfp - > dfp_intent ;
2016-08-30 06:51:39 +03:00
__entry - > committed = dfp - > dfp_done ! = NULL ;
2016-08-03 04:13:02 +03:00
__entry - > nr = dfp - > dfp_count ;
) ,
2017-04-14 21:43:27 +03:00
TP_printk ( " dev %d:%d optype %d intent %p committed %d nr %d " ,
2016-08-03 04:13:02 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > type ,
__entry - > intent ,
__entry - > committed ,
__entry - > nr )
)
# define DEFINE_DEFER_PENDING_EVENT(name) \
DEFINE_EVENT ( xfs_defer_pending_class , name , \
TP_PROTO ( struct xfs_mount * mp , struct xfs_defer_pending * dfp ) , \
TP_ARGS ( mp , dfp ) )
DECLARE_EVENT_CLASS ( xfs_phys_extent_deferred_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
int type , xfs_agblock_t agbno , xfs_extlen_t len ) ,
TP_ARGS ( mp , agno , type , agbno , len ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( int , type )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > type = type ;
__entry - > agbno = agbno ;
__entry - > len = len ;
) ,
TP_printk ( " dev %d:%d op %d agno %u agbno %u len %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > type ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len )
) ;
# define DEFINE_PHYS_EXTENT_DEFERRED_EVENT(name) \
DEFINE_EVENT ( xfs_phys_extent_deferred_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
int type , \
xfs_agblock_t bno , \
xfs_extlen_t len ) , \
TP_ARGS ( mp , agno , type , bno , len ) )
DECLARE_EVENT_CLASS ( xfs_map_extent_deferred_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
int op ,
xfs_agblock_t agbno ,
xfs_ino_t ino ,
int whichfork ,
xfs_fileoff_t offset ,
xfs_filblks_t len ,
xfs_exntst_t state ) ,
TP_ARGS ( mp , agno , op , agbno , ino , whichfork , offset , len , state ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_ino_t , ino )
__field ( xfs_agblock_t , agbno )
__field ( int , whichfork )
__field ( xfs_fileoff_t , l_loff )
__field ( xfs_filblks_t , l_len )
__field ( xfs_exntst_t , l_state )
__field ( int , op )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > ino = ino ;
__entry - > agbno = agbno ;
__entry - > whichfork = whichfork ;
__entry - > l_loff = offset ;
__entry - > l_len = len ;
__entry - > l_state = state ;
__entry - > op = op ;
) ,
TP_printk ( " dev %d:%d op %d agno %u agbno %u owner %lld %s offset %llu len %llu state %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > op ,
__entry - > agno ,
__entry - > agbno ,
__entry - > ino ,
__entry - > whichfork = = XFS_ATTR_FORK ? " attr " : " data " ,
__entry - > l_loff ,
__entry - > l_len ,
__entry - > l_state )
) ;
# define DEFINE_MAP_EXTENT_DEFERRED_EVENT(name) \
DEFINE_EVENT ( xfs_map_extent_deferred_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
int op , \
xfs_agblock_t agbno , \
xfs_ino_t ino , \
int whichfork , \
xfs_fileoff_t offset , \
xfs_filblks_t len , \
xfs_exntst_t state ) , \
TP_ARGS ( mp , agno , op , agbno , ino , whichfork , offset , len , state ) )
DEFINE_DEFER_EVENT ( xfs_defer_cancel ) ;
DEFINE_DEFER_EVENT ( xfs_defer_trans_roll ) ;
DEFINE_DEFER_EVENT ( xfs_defer_trans_abort ) ;
DEFINE_DEFER_EVENT ( xfs_defer_finish ) ;
DEFINE_DEFER_EVENT ( xfs_defer_finish_done ) ;
DEFINE_DEFER_ERROR_EVENT ( xfs_defer_trans_roll_error ) ;
DEFINE_DEFER_ERROR_EVENT ( xfs_defer_finish_error ) ;
2018-08-01 17:20:34 +03:00
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_create_intent ) ;
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_cancel_list ) ;
2016-08-03 04:13:02 +03:00
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_pending_finish ) ;
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_pending_abort ) ;
xfs: periodically relog deferred intent items
There's a subtle design flaw in the deferred log item code that can lead
to pinning the log tail. Taking up the defer ops chain examples from
the previous commit, we can get trapped in sequences like this:
Caller hands us a transaction t0 with D0-D3 attached. The defer ops
chain will look like the following if the transaction rolls succeed:
t1: D0(t0), D1(t0), D2(t0), D3(t0)
t2: d4(t1), d5(t1), D1(t0), D2(t0), D3(t0)
t3: d5(t1), D1(t0), D2(t0), D3(t0)
...
t9: d9(t7), D3(t0)
t10: D3(t0)
t11: d10(t10), d11(t10)
t12: d11(t10)
In transaction 9, we finish d9 and try to roll to t10 while holding onto
an intent item for D3 that we logged in t0.
The previous commit changed the order in which we place new defer ops in
the defer ops processing chain to reduce the maximum chain length. Now
make xfs_defer_finish_noroll capable of relogging the entire chain
periodically so that we can always move the log tail forward. Most
chains will never get relogged, except for operations that generate very
long chains (large extents containing many blocks with different sharing
levels) or are on filesystems with small logs and a lot of ongoing
metadata updates.
Callers are now required to ensure that the transaction reservation is
large enough to handle logging done items and new intent items for the
maximum possible chain length. Most callers are careful to keep the
chain lengths low, so the overhead should be minimal.
The decision to relog an intent item is made based on whether the intent
was logged in a previous checkpoint, since there's no point in relogging
an intent into the same checkpoint.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
2020-09-28 02:18:13 +03:00
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_relog_intent ) ;
2016-08-03 04:13:02 +03:00
2016-08-03 04:26:33 +03:00
# define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
DEFINE_BMAP_FREE_DEFERRED_EVENT ( xfs_bmap_free_defer ) ;
DEFINE_BMAP_FREE_DEFERRED_EVENT ( xfs_bmap_free_deferred ) ;
2018-05-08 03:38:47 +03:00
DEFINE_BMAP_FREE_DEFERRED_EVENT ( xfs_agfl_free_defer ) ;
DEFINE_BMAP_FREE_DEFERRED_EVENT ( xfs_agfl_free_deferred ) ;
2016-08-03 04:26:33 +03:00
2016-08-03 04:33:43 +03:00
/* rmap tracepoints */
DECLARE_EVENT_CLASS ( xfs_rmap_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
xfs_agblock_t agbno , xfs_extlen_t len , bool unwritten ,
2018-12-12 19:46:23 +03:00
const struct xfs_owner_info * oinfo ) ,
2016-08-03 04:33:43 +03:00
TP_ARGS ( mp , agno , agbno , len , unwritten , oinfo ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
__field ( uint64_t , owner )
__field ( uint64_t , offset )
__field ( unsigned long , flags )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > agbno = agbno ;
__entry - > len = len ;
__entry - > owner = oinfo - > oi_owner ;
__entry - > offset = oinfo - > oi_offset ;
__entry - > flags = oinfo - > oi_flags ;
2016-08-03 04:36:07 +03:00
if ( unwritten )
__entry - > flags | = XFS_RMAP_UNWRITTEN ;
2016-08-03 04:33:43 +03:00
) ,
TP_printk ( " dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%lx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len ,
__entry - > owner ,
__entry - > offset ,
__entry - > flags )
) ;
# define DEFINE_RMAP_EVENT(name) \
DEFINE_EVENT ( xfs_rmap_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
xfs_agblock_t agbno , xfs_extlen_t len , bool unwritten , \
2018-12-12 19:46:23 +03:00
const struct xfs_owner_info * oinfo ) , \
2016-08-03 04:33:43 +03:00
TP_ARGS ( mp , agno , agbno , len , unwritten , oinfo ) )
/* simple AG-based error/%ip tracepoint class */
DECLARE_EVENT_CLASS ( xfs_ag_error_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , int error ,
unsigned long caller_ip ) ,
TP_ARGS ( mp , agno , error , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( int , error )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > error = error ;
__entry - > caller_ip = caller_ip ;
) ,
2018-01-09 22:46:05 +03:00
TP_printk ( " dev %d:%d agno %u error %d caller %pS " ,
2016-08-03 04:33:43 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > error ,
( char * ) __entry - > caller_ip )
) ;
# define DEFINE_AG_ERROR_EVENT(name) \
DEFINE_EVENT ( xfs_ag_error_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , int error , \
unsigned long caller_ip ) , \
TP_ARGS ( mp , agno , error , caller_ip ) )
DEFINE_RMAP_EVENT ( xfs_rmap_unmap ) ;
DEFINE_RMAP_EVENT ( xfs_rmap_unmap_done ) ;
DEFINE_AG_ERROR_EVENT ( xfs_rmap_unmap_error ) ;
DEFINE_RMAP_EVENT ( xfs_rmap_map ) ;
DEFINE_RMAP_EVENT ( xfs_rmap_map_done ) ;
DEFINE_AG_ERROR_EVENT ( xfs_rmap_map_error ) ;
2016-08-03 05:03:19 +03:00
DEFINE_RMAP_EVENT ( xfs_rmap_convert ) ;
DEFINE_RMAP_EVENT ( xfs_rmap_convert_done ) ;
DEFINE_AG_ERROR_EVENT ( xfs_rmap_convert_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_rmap_convert_state ) ;
2016-08-03 04:33:43 +03:00
2016-08-03 04:43:24 +03:00
DECLARE_EVENT_CLASS ( xfs_rmapbt_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
xfs_agblock_t agbno , xfs_extlen_t len ,
uint64_t owner , uint64_t offset , unsigned int flags ) ,
TP_ARGS ( mp , agno , agbno , len , owner , offset , flags ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
__field ( uint64_t , owner )
__field ( uint64_t , offset )
__field ( unsigned int , flags )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > agbno = agbno ;
__entry - > len = len ;
__entry - > owner = owner ;
__entry - > offset = offset ;
__entry - > flags = flags ;
) ,
TP_printk ( " dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len ,
__entry - > owner ,
__entry - > offset ,
__entry - > flags )
) ;
# define DEFINE_RMAPBT_EVENT(name) \
DEFINE_EVENT ( xfs_rmapbt_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
xfs_agblock_t agbno , xfs_extlen_t len , \
uint64_t owner , uint64_t offset , unsigned int flags ) , \
TP_ARGS ( mp , agno , agbno , len , owner , offset , flags ) )
# define DEFINE_RMAP_DEFERRED_EVENT DEFINE_MAP_EXTENT_DEFERRED_EVENT
DEFINE_RMAP_DEFERRED_EVENT ( xfs_rmap_defer ) ;
DEFINE_RMAP_DEFERRED_EVENT ( xfs_rmap_deferred ) ;
2016-08-03 04:39:05 +03:00
DEFINE_BUSY_EVENT ( xfs_rmapbt_alloc_block ) ;
DEFINE_BUSY_EVENT ( xfs_rmapbt_free_block ) ;
2016-08-03 04:43:24 +03:00
DEFINE_RMAPBT_EVENT ( xfs_rmap_update ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_insert ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_delete ) ;
DEFINE_AG_ERROR_EVENT ( xfs_rmap_insert_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_rmap_delete_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_rmap_update_error ) ;
2016-10-03 19:11:48 +03:00
DEFINE_RMAPBT_EVENT ( xfs_rmap_find_left_neighbor_candidate ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_find_left_neighbor_query ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_lookup_le_range_candidate ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_lookup_le_range ) ;
2016-08-03 04:44:21 +03:00
DEFINE_RMAPBT_EVENT ( xfs_rmap_lookup_le_range_result ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_find_right_neighbor_result ) ;
2016-08-03 05:03:19 +03:00
DEFINE_RMAPBT_EVENT ( xfs_rmap_find_left_neighbor_result ) ;
2016-08-03 04:39:05 +03:00
2016-10-03 19:11:28 +03:00
/* deferred bmbt updates */
# define DEFINE_BMAP_DEFERRED_EVENT DEFINE_RMAP_DEFERRED_EVENT
DEFINE_BMAP_DEFERRED_EVENT ( xfs_bmap_defer ) ;
DEFINE_BMAP_DEFERRED_EVENT ( xfs_bmap_deferred ) ;
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
/* per-AG reservation */
DECLARE_EVENT_CLASS ( xfs_ag_resv_class ,
TP_PROTO ( struct xfs_perag * pag , enum xfs_ag_resv_type resv ,
xfs_extlen_t len ) ,
TP_ARGS ( pag , resv , len ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( int , resv )
__field ( xfs_extlen_t , freeblks )
__field ( xfs_extlen_t , flcount )
__field ( xfs_extlen_t , reserved )
__field ( xfs_extlen_t , asked )
__field ( xfs_extlen_t , len )
) ,
TP_fast_assign (
struct xfs_ag_resv * r = xfs_perag_resv ( pag , resv ) ;
__entry - > dev = pag - > pag_mount - > m_super - > s_dev ;
__entry - > agno = pag - > pag_agno ;
__entry - > resv = resv ;
__entry - > freeblks = pag - > pagf_freeblks ;
__entry - > flcount = pag - > pagf_flcount ;
__entry - > reserved = r ? r - > ar_reserved : 0 ;
__entry - > asked = r ? r - > ar_asked : 0 ;
__entry - > len = len ;
) ,
2017-04-14 21:43:27 +03:00
TP_printk ( " dev %d:%d agno %u resv %d freeblks %u flcount %u "
" resv %u ask %u len %u " ,
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 03:30:52 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > resv ,
__entry - > freeblks ,
__entry - > flcount ,
__entry - > reserved ,
__entry - > asked ,
__entry - > len )
)
# define DEFINE_AG_RESV_EVENT(name) \
DEFINE_EVENT ( xfs_ag_resv_class , name , \
TP_PROTO ( struct xfs_perag * pag , enum xfs_ag_resv_type type , \
xfs_extlen_t len ) , \
TP_ARGS ( pag , type , len ) )
/* per-AG reservation tracepoints */
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_init ) ;
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_free ) ;
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_alloc_extent ) ;
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_free_extent ) ;
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_critical ) ;
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_needed ) ;
DEFINE_AG_ERROR_EVENT ( xfs_ag_resv_free_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_ag_resv_init_error ) ;
2016-10-03 19:11:15 +03:00
/* refcount tracepoint classes */
/* reuse the discard trace class for agbno/aglen-based traces */
# define DEFINE_AG_EXTENT_EVENT(name) DEFINE_DISCARD_EVENT(name)
/* ag btree lookup tracepoint class */
2018-12-19 01:32:29 +03:00
TRACE_DEFINE_ENUM ( XFS_LOOKUP_EQi ) ;
TRACE_DEFINE_ENUM ( XFS_LOOKUP_LEi ) ;
TRACE_DEFINE_ENUM ( XFS_LOOKUP_GEi ) ;
2016-10-03 19:11:15 +03:00
DECLARE_EVENT_CLASS ( xfs_ag_btree_lookup_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
xfs_agblock_t agbno , xfs_lookup_t dir ) ,
TP_ARGS ( mp , agno , agbno , dir ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_lookup_t , dir )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > agbno = agbno ;
__entry - > dir = dir ;
) ,
2017-04-14 21:43:27 +03:00
TP_printk ( " dev %d:%d agno %u agbno %u cmp %s(%d) " ,
2016-10-03 19:11:15 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__print_symbolic ( __entry - > dir , XFS_AG_BTREE_CMP_FORMAT_STR ) ,
__entry - > dir )
)
# define DEFINE_AG_BTREE_LOOKUP_EVENT(name) \
DEFINE_EVENT ( xfs_ag_btree_lookup_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
xfs_agblock_t agbno , xfs_lookup_t dir ) , \
TP_ARGS ( mp , agno , agbno , dir ) )
/* single-rcext tracepoint class */
DECLARE_EVENT_CLASS ( xfs_refcount_extent_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
struct xfs_refcount_irec * irec ) ,
TP_ARGS ( mp , agno , irec ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , startblock )
__field ( xfs_extlen_t , blockcount )
__field ( xfs_nlink_t , refcount )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > startblock = irec - > rc_startblock ;
__entry - > blockcount = irec - > rc_blockcount ;
__entry - > refcount = irec - > rc_refcount ;
) ,
2017-04-14 21:43:27 +03:00
TP_printk ( " dev %d:%d agno %u agbno %u len %u refcount %u " ,
2016-10-03 19:11:15 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > startblock ,
__entry - > blockcount ,
__entry - > refcount )
)
# define DEFINE_REFCOUNT_EXTENT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_extent_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
struct xfs_refcount_irec * irec ) , \
TP_ARGS ( mp , agno , irec ) )
/* single-rcext and an agbno tracepoint class */
DECLARE_EVENT_CLASS ( xfs_refcount_extent_at_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
struct xfs_refcount_irec * irec , xfs_agblock_t agbno ) ,
TP_ARGS ( mp , agno , irec , agbno ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , startblock )
__field ( xfs_extlen_t , blockcount )
__field ( xfs_nlink_t , refcount )
__field ( xfs_agblock_t , agbno )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > startblock = irec - > rc_startblock ;
__entry - > blockcount = irec - > rc_blockcount ;
__entry - > refcount = irec - > rc_refcount ;
__entry - > agbno = agbno ;
) ,
2017-04-14 21:43:27 +03:00
TP_printk ( " dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u " ,
2016-10-03 19:11:15 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > startblock ,
__entry - > blockcount ,
__entry - > refcount ,
__entry - > agbno )
)
# define DEFINE_REFCOUNT_EXTENT_AT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_extent_at_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
struct xfs_refcount_irec * irec , xfs_agblock_t agbno ) , \
TP_ARGS ( mp , agno , irec , agbno ) )
/* double-rcext tracepoint class */
DECLARE_EVENT_CLASS ( xfs_refcount_double_extent_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
struct xfs_refcount_irec * i1 , struct xfs_refcount_irec * i2 ) ,
TP_ARGS ( mp , agno , i1 , i2 ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , i1_startblock )
__field ( xfs_extlen_t , i1_blockcount )
__field ( xfs_nlink_t , i1_refcount )
__field ( xfs_agblock_t , i2_startblock )
__field ( xfs_extlen_t , i2_blockcount )
__field ( xfs_nlink_t , i2_refcount )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > i1_startblock = i1 - > rc_startblock ;
__entry - > i1_blockcount = i1 - > rc_blockcount ;
__entry - > i1_refcount = i1 - > rc_refcount ;
__entry - > i2_startblock = i2 - > rc_startblock ;
__entry - > i2_blockcount = i2 - > rc_blockcount ;
__entry - > i2_refcount = i2 - > rc_refcount ;
) ,
TP_printk ( " dev %d:%d agno %u agbno %u len %u refcount %u -- "
2017-04-14 21:43:27 +03:00
" agbno %u len %u refcount %u " ,
2016-10-03 19:11:15 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > i1_startblock ,
__entry - > i1_blockcount ,
__entry - > i1_refcount ,
__entry - > i2_startblock ,
__entry - > i2_blockcount ,
__entry - > i2_refcount )
)
# define DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_double_extent_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
struct xfs_refcount_irec * i1 , struct xfs_refcount_irec * i2 ) , \
TP_ARGS ( mp , agno , i1 , i2 ) )
/* double-rcext and an agbno tracepoint class */
DECLARE_EVENT_CLASS ( xfs_refcount_double_extent_at_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
struct xfs_refcount_irec * i1 , struct xfs_refcount_irec * i2 ,
xfs_agblock_t agbno ) ,
TP_ARGS ( mp , agno , i1 , i2 , agbno ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , i1_startblock )
__field ( xfs_extlen_t , i1_blockcount )
__field ( xfs_nlink_t , i1_refcount )
__field ( xfs_agblock_t , i2_startblock )
__field ( xfs_extlen_t , i2_blockcount )
__field ( xfs_nlink_t , i2_refcount )
__field ( xfs_agblock_t , agbno )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > i1_startblock = i1 - > rc_startblock ;
__entry - > i1_blockcount = i1 - > rc_blockcount ;
__entry - > i1_refcount = i1 - > rc_refcount ;
__entry - > i2_startblock = i2 - > rc_startblock ;
__entry - > i2_blockcount = i2 - > rc_blockcount ;
__entry - > i2_refcount = i2 - > rc_refcount ;
__entry - > agbno = agbno ;
) ,
TP_printk ( " dev %d:%d agno %u agbno %u len %u refcount %u -- "
2017-04-14 21:43:27 +03:00
" agbno %u len %u refcount %u @ agbno %u " ,
2016-10-03 19:11:15 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > i1_startblock ,
__entry - > i1_blockcount ,
__entry - > i1_refcount ,
__entry - > i2_startblock ,
__entry - > i2_blockcount ,
__entry - > i2_refcount ,
__entry - > agbno )
)
# define DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_double_extent_at_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
struct xfs_refcount_irec * i1 , struct xfs_refcount_irec * i2 , \
xfs_agblock_t agbno ) , \
TP_ARGS ( mp , agno , i1 , i2 , agbno ) )
/* triple-rcext tracepoint class */
DECLARE_EVENT_CLASS ( xfs_refcount_triple_extent_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
struct xfs_refcount_irec * i1 , struct xfs_refcount_irec * i2 ,
struct xfs_refcount_irec * i3 ) ,
TP_ARGS ( mp , agno , i1 , i2 , i3 ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , i1_startblock )
__field ( xfs_extlen_t , i1_blockcount )
__field ( xfs_nlink_t , i1_refcount )
__field ( xfs_agblock_t , i2_startblock )
__field ( xfs_extlen_t , i2_blockcount )
__field ( xfs_nlink_t , i2_refcount )
__field ( xfs_agblock_t , i3_startblock )
__field ( xfs_extlen_t , i3_blockcount )
__field ( xfs_nlink_t , i3_refcount )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > i1_startblock = i1 - > rc_startblock ;
__entry - > i1_blockcount = i1 - > rc_blockcount ;
__entry - > i1_refcount = i1 - > rc_refcount ;
__entry - > i2_startblock = i2 - > rc_startblock ;
__entry - > i2_blockcount = i2 - > rc_blockcount ;
__entry - > i2_refcount = i2 - > rc_refcount ;
__entry - > i3_startblock = i3 - > rc_startblock ;
__entry - > i3_blockcount = i3 - > rc_blockcount ;
__entry - > i3_refcount = i3 - > rc_refcount ;
) ,
TP_printk ( " dev %d:%d agno %u agbno %u len %u refcount %u -- "
" agbno %u len %u refcount %u -- "
2017-04-14 21:43:27 +03:00
" agbno %u len %u refcount %u " ,
2016-10-03 19:11:15 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > i1_startblock ,
__entry - > i1_blockcount ,
__entry - > i1_refcount ,
__entry - > i2_startblock ,
__entry - > i2_blockcount ,
__entry - > i2_refcount ,
__entry - > i3_startblock ,
__entry - > i3_blockcount ,
__entry - > i3_refcount )
) ;
# define DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_triple_extent_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
struct xfs_refcount_irec * i1 , struct xfs_refcount_irec * i2 , \
struct xfs_refcount_irec * i3 ) , \
TP_ARGS ( mp , agno , i1 , i2 , i3 ) )
/* refcount btree tracepoints */
DEFINE_BUSY_EVENT ( xfs_refcountbt_alloc_block ) ;
DEFINE_BUSY_EVENT ( xfs_refcountbt_free_block ) ;
DEFINE_AG_BTREE_LOOKUP_EVENT ( xfs_refcount_lookup ) ;
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_get ) ;
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_update ) ;
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_insert ) ;
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_delete ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_insert_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_delete_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_update_error ) ;
/* refcount adjustment tracepoints */
DEFINE_AG_EXTENT_EVENT ( xfs_refcount_increase ) ;
DEFINE_AG_EXTENT_EVENT ( xfs_refcount_decrease ) ;
2016-10-03 19:11:39 +03:00
DEFINE_AG_EXTENT_EVENT ( xfs_refcount_cow_increase ) ;
DEFINE_AG_EXTENT_EVENT ( xfs_refcount_cow_decrease ) ;
2016-10-03 19:11:15 +03:00
DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT ( xfs_refcount_merge_center_extents ) ;
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_modify_extent ) ;
2016-10-03 19:11:39 +03:00
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_recover_extent ) ;
2016-10-03 19:11:15 +03:00
DEFINE_REFCOUNT_EXTENT_AT_EVENT ( xfs_refcount_split_extent ) ;
DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT ( xfs_refcount_merge_left_extent ) ;
DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT ( xfs_refcount_merge_right_extent ) ;
DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT ( xfs_refcount_find_left_extent ) ;
DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT ( xfs_refcount_find_right_extent ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_adjust_error ) ;
2016-10-03 19:11:39 +03:00
DEFINE_AG_ERROR_EVENT ( xfs_refcount_adjust_cow_error ) ;
2016-10-03 19:11:15 +03:00
DEFINE_AG_ERROR_EVENT ( xfs_refcount_merge_center_extents_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_modify_extent_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_split_extent_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_merge_left_extent_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_merge_right_extent_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_find_left_extent_error ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_find_right_extent_error ) ;
/* reflink helpers */
DEFINE_AG_EXTENT_EVENT ( xfs_refcount_find_shared ) ;
DEFINE_AG_EXTENT_EVENT ( xfs_refcount_find_shared_result ) ;
DEFINE_AG_ERROR_EVENT ( xfs_refcount_find_shared_error ) ;
2016-10-03 19:11:22 +03:00
# define DEFINE_REFCOUNT_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
DEFINE_REFCOUNT_DEFERRED_EVENT ( xfs_refcount_defer ) ;
DEFINE_REFCOUNT_DEFERRED_EVENT ( xfs_refcount_deferred ) ;
2016-10-03 19:11:15 +03:00
2016-10-03 19:11:21 +03:00
TRACE_EVENT ( xfs_refcount_finish_one_leftover ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
int type , xfs_agblock_t agbno , xfs_extlen_t len ,
xfs_agblock_t new_agbno , xfs_extlen_t new_len ) ,
TP_ARGS ( mp , agno , type , agbno , len , new_agbno , new_len ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( int , type )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
__field ( xfs_agblock_t , new_agbno )
__field ( xfs_extlen_t , new_len )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > type = type ;
__entry - > agbno = agbno ;
__entry - > len = len ;
__entry - > new_agbno = new_agbno ;
__entry - > new_len = new_len ;
) ,
TP_printk ( " dev %d:%d type %d agno %u agbno %u len %u new_agbno %u new_len %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > type ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len ,
__entry - > new_agbno ,
__entry - > new_len )
) ;
2016-10-03 19:11:27 +03:00
/* simple inode-based error/%ip tracepoint class */
DECLARE_EVENT_CLASS ( xfs_inode_error_class ,
TP_PROTO ( struct xfs_inode * ip , int error , unsigned long caller_ip ) ,
TP_ARGS ( ip , error , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( int , error )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > error = error ;
__entry - > caller_ip = caller_ip ;
) ,
2018-01-09 22:46:05 +03:00
TP_printk ( " dev %d:%d ino %llx error %d caller %pS " ,
2016-10-03 19:11:27 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > error ,
( char * ) __entry - > caller_ip )
) ;
# define DEFINE_INODE_ERROR_EVENT(name) \
DEFINE_EVENT ( xfs_inode_error_class , name , \
TP_PROTO ( struct xfs_inode * ip , int error , \
unsigned long caller_ip ) , \
TP_ARGS ( ip , error , caller_ip ) )
2016-10-03 19:11:30 +03:00
/* reflink tracepoint classes */
/* two-file io tracepoint class */
DECLARE_EVENT_CLASS ( xfs_double_io_class ,
TP_PROTO ( struct xfs_inode * src , xfs_off_t soffset , xfs_off_t len ,
struct xfs_inode * dest , xfs_off_t doffset ) ,
TP_ARGS ( src , soffset , len , dest , doffset ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , src_ino )
__field ( loff_t , src_isize )
__field ( loff_t , src_disize )
__field ( loff_t , src_offset )
__field ( size_t , len )
__field ( xfs_ino_t , dest_ino )
__field ( loff_t , dest_isize )
__field ( loff_t , dest_disize )
__field ( loff_t , dest_offset )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( src ) - > i_sb - > s_dev ;
__entry - > src_ino = src - > i_ino ;
__entry - > src_isize = VFS_I ( src ) - > i_size ;
2021-03-29 21:11:40 +03:00
__entry - > src_disize = src - > i_disk_size ;
2016-10-03 19:11:30 +03:00
__entry - > src_offset = soffset ;
__entry - > len = len ;
__entry - > dest_ino = dest - > i_ino ;
__entry - > dest_isize = VFS_I ( dest ) - > i_size ;
2021-03-29 21:11:40 +03:00
__entry - > dest_disize = dest - > i_disk_size ;
2016-10-03 19:11:30 +03:00
__entry - > dest_offset = doffset ;
) ,
TP_printk ( " dev %d:%d count %zd "
" ino 0x%llx isize 0x%llx disize 0x%llx offset 0x%llx -> "
" ino 0x%llx isize 0x%llx disize 0x%llx offset 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > len ,
__entry - > src_ino ,
__entry - > src_isize ,
__entry - > src_disize ,
__entry - > src_offset ,
__entry - > dest_ino ,
__entry - > dest_isize ,
__entry - > dest_disize ,
__entry - > dest_offset )
)
# define DEFINE_DOUBLE_IO_EVENT(name) \
DEFINE_EVENT ( xfs_double_io_class , name , \
TP_PROTO ( struct xfs_inode * src , xfs_off_t soffset , xfs_off_t len , \
struct xfs_inode * dest , xfs_off_t doffset ) , \
TP_ARGS ( src , soffset , len , dest , doffset ) )
/* inode/irec events */
DECLARE_EVENT_CLASS ( xfs_inode_irec_class ,
TP_PROTO ( struct xfs_inode * ip , struct xfs_bmbt_irec * irec ) ,
TP_ARGS ( ip , irec ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fileoff_t , lblk )
__field ( xfs_extlen_t , len )
__field ( xfs_fsblock_t , pblk )
2017-02-03 02:14:02 +03:00
__field ( int , state )
2016-10-03 19:11:30 +03:00
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > lblk = irec - > br_startoff ;
__entry - > len = irec - > br_blockcount ;
__entry - > pblk = irec - > br_startblock ;
2017-02-03 02:14:02 +03:00
__entry - > state = irec - > br_state ;
2016-10-03 19:11:30 +03:00
) ,
2017-02-03 02:14:02 +03:00
TP_printk ( " dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d " ,
2016-10-03 19:11:30 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > lblk ,
__entry - > len ,
2017-02-03 02:14:02 +03:00
__entry - > pblk ,
__entry - > state )
2016-10-03 19:11:30 +03:00
) ;
# define DEFINE_INODE_IREC_EVENT(name) \
DEFINE_EVENT ( xfs_inode_irec_class , name , \
TP_PROTO ( struct xfs_inode * ip , struct xfs_bmbt_irec * irec ) , \
TP_ARGS ( ip , irec ) )
/* refcount/reflink tracepoint definitions */
/* reflink tracepoints */
DEFINE_INODE_EVENT ( xfs_reflink_set_inode_flag ) ;
DEFINE_INODE_EVENT ( xfs_reflink_unset_inode_flag ) ;
DEFINE_ITRUNC_EVENT ( xfs_reflink_update_inode_size ) ;
2020-06-30 00:47:18 +03:00
TRACE_EVENT ( xfs_reflink_remap_blocks ,
2016-10-03 19:11:30 +03:00
TP_PROTO ( struct xfs_inode * src , xfs_fileoff_t soffset ,
xfs_filblks_t len , struct xfs_inode * dest ,
xfs_fileoff_t doffset ) ,
TP_ARGS ( src , soffset , len , dest , doffset ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , src_ino )
__field ( xfs_fileoff_t , src_lblk )
__field ( xfs_filblks_t , len )
__field ( xfs_ino_t , dest_ino )
__field ( xfs_fileoff_t , dest_lblk )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( src ) - > i_sb - > s_dev ;
__entry - > src_ino = src - > i_ino ;
__entry - > src_lblk = soffset ;
__entry - > len = len ;
__entry - > dest_ino = dest - > i_ino ;
__entry - > dest_lblk = doffset ;
) ,
TP_printk ( " dev %d:%d len 0x%llx "
" ino 0x%llx offset 0x%llx blocks -> "
" ino 0x%llx offset 0x%llx blocks " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > len ,
__entry - > src_ino ,
__entry - > src_lblk ,
__entry - > dest_ino ,
__entry - > dest_lblk )
) ;
DEFINE_DOUBLE_IO_EVENT ( xfs_reflink_remap_range ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_remap_range_error ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_set_inode_flag_error ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_update_inode_size_error ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_remap_blocks_error ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_remap_extent_error ) ;
2020-06-30 00:47:18 +03:00
DEFINE_INODE_IREC_EVENT ( xfs_reflink_remap_extent_src ) ;
DEFINE_INODE_IREC_EVENT ( xfs_reflink_remap_extent_dest ) ;
2016-10-03 19:11:30 +03:00
/* dedupe tracepoints */
DEFINE_DOUBLE_IO_EVENT ( xfs_reflink_compare_extents ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_compare_extents_error ) ;
/* ioctl tracepoints */
TRACE_EVENT ( xfs_ioctl_clone ,
TP_PROTO ( struct inode * src , struct inode * dest ) ,
TP_ARGS ( src , dest ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned long , src_ino )
__field ( loff_t , src_isize )
__field ( unsigned long , dest_ino )
__field ( loff_t , dest_isize )
) ,
TP_fast_assign (
__entry - > dev = src - > i_sb - > s_dev ;
__entry - > src_ino = src - > i_ino ;
__entry - > src_isize = i_size_read ( src ) ;
__entry - > dest_ino = dest - > i_ino ;
__entry - > dest_isize = i_size_read ( dest ) ;
) ,
TP_printk ( " dev %d:%d "
" ino 0x%lx isize 0x%llx -> "
2017-04-14 21:43:27 +03:00
" ino 0x%lx isize 0x%llx " ,
2016-10-03 19:11:30 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > src_ino ,
__entry - > src_isize ,
__entry - > dest_ino ,
__entry - > dest_isize )
) ;
/* unshare tracepoints */
DEFINE_SIMPLE_IO_EVENT ( xfs_reflink_unshare ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_unshare_error ) ;
/* copy on write */
DEFINE_INODE_IREC_EVENT ( xfs_reflink_trim_around_shared ) ;
2016-10-03 19:11:32 +03:00
DEFINE_INODE_IREC_EVENT ( xfs_reflink_cow_found ) ;
DEFINE_INODE_IREC_EVENT ( xfs_reflink_cow_enospc ) ;
2017-02-03 02:14:02 +03:00
DEFINE_INODE_IREC_EVENT ( xfs_reflink_convert_cow ) ;
2016-10-03 19:11:30 +03:00
DEFINE_SIMPLE_IO_EVENT ( xfs_reflink_cancel_cow_range ) ;
DEFINE_SIMPLE_IO_EVENT ( xfs_reflink_end_cow ) ;
DEFINE_INODE_IREC_EVENT ( xfs_reflink_cow_remap ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_cancel_cow_range_error ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_end_cow_error ) ;
DEFINE_INODE_IREC_EVENT ( xfs_reflink_cancel_cow ) ;
2016-10-03 19:11:53 +03:00
/* rmap swapext tracepoints */
DEFINE_INODE_IREC_EVENT ( xfs_swap_extent_rmap_remap ) ;
DEFINE_INODE_IREC_EVENT ( xfs_swap_extent_rmap_remap_piece ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_swap_extent_rmap_error ) ;
2017-03-29 00:56:37 +03:00
/* fsmap traces */
DECLARE_EVENT_CLASS ( xfs_fsmap_class ,
TP_PROTO ( struct xfs_mount * mp , u32 keydev , xfs_agnumber_t agno ,
struct xfs_rmap_irec * rmap ) ,
TP_ARGS ( mp , keydev , agno , rmap ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( dev_t , keydev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_fsblock_t , bno )
__field ( xfs_filblks_t , len )
2017-06-16 21:00:05 +03:00
__field ( uint64_t , owner )
__field ( uint64_t , offset )
2017-03-29 00:56:37 +03:00
__field ( unsigned int , flags )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > keydev = new_decode_dev ( keydev ) ;
__entry - > agno = agno ;
__entry - > bno = rmap - > rm_startblock ;
__entry - > len = rmap - > rm_blockcount ;
__entry - > owner = rmap - > rm_owner ;
__entry - > offset = rmap - > rm_offset ;
__entry - > flags = rmap - > rm_flags ;
) ,
2017-04-14 21:43:27 +03:00
TP_printk ( " dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld offset %llu flags 0x%x " ,
2017-03-29 00:56:37 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
MAJOR ( __entry - > keydev ) , MINOR ( __entry - > keydev ) ,
__entry - > agno ,
__entry - > bno ,
__entry - > len ,
__entry - > owner ,
__entry - > offset ,
__entry - > flags )
)
# define DEFINE_FSMAP_EVENT(name) \
DEFINE_EVENT ( xfs_fsmap_class , name , \
TP_PROTO ( struct xfs_mount * mp , u32 keydev , xfs_agnumber_t agno , \
struct xfs_rmap_irec * rmap ) , \
TP_ARGS ( mp , keydev , agno , rmap ) )
DEFINE_FSMAP_EVENT ( xfs_fsmap_low_key ) ;
DEFINE_FSMAP_EVENT ( xfs_fsmap_high_key ) ;
DEFINE_FSMAP_EVENT ( xfs_fsmap_mapping ) ;
DECLARE_EVENT_CLASS ( xfs_getfsmap_class ,
TP_PROTO ( struct xfs_mount * mp , struct xfs_fsmap * fsmap ) ,
TP_ARGS ( mp , fsmap ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( dev_t , keydev )
__field ( xfs_daddr_t , block )
__field ( xfs_daddr_t , len )
2017-06-16 21:00:05 +03:00
__field ( uint64_t , owner )
__field ( uint64_t , offset )
__field ( uint64_t , flags )
2017-03-29 00:56:37 +03:00
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > keydev = new_decode_dev ( fsmap - > fmr_device ) ;
__entry - > block = fsmap - > fmr_physical ;
__entry - > len = fsmap - > fmr_length ;
__entry - > owner = fsmap - > fmr_owner ;
__entry - > offset = fsmap - > fmr_offset ;
__entry - > flags = fsmap - > fmr_flags ;
) ,
2017-04-14 21:43:27 +03:00
TP_printk ( " dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx " ,
2017-03-29 00:56:37 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
MAJOR ( __entry - > keydev ) , MINOR ( __entry - > keydev ) ,
__entry - > block ,
__entry - > len ,
__entry - > owner ,
__entry - > offset ,
__entry - > flags )
)
# define DEFINE_GETFSMAP_EVENT(name) \
DEFINE_EVENT ( xfs_getfsmap_class , name , \
TP_PROTO ( struct xfs_mount * mp , struct xfs_fsmap * fsmap ) , \
TP_ARGS ( mp , fsmap ) )
DEFINE_GETFSMAP_EVENT ( xfs_getfsmap_low_key ) ;
DEFINE_GETFSMAP_EVENT ( xfs_getfsmap_high_key ) ;
DEFINE_GETFSMAP_EVENT ( xfs_getfsmap_mapping ) ;
2018-01-08 21:51:26 +03:00
TRACE_EVENT ( xfs_trans_resv_calc ,
TP_PROTO ( struct xfs_mount * mp , unsigned int type ,
struct xfs_trans_res * res ) ,
TP_ARGS ( mp , type , res ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( int , type )
__field ( uint , logres )
__field ( int , logcount )
__field ( int , logflags )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > type = type ;
__entry - > logres = res - > tr_logres ;
__entry - > logcount = res - > tr_logcount ;
__entry - > logflags = res - > tr_logflags ;
) ,
TP_printk ( " dev %d:%d type %d logres %u logcount %d flags 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > type ,
__entry - > logres ,
__entry - > logcount ,
__entry - > logflags )
) ;
2018-05-09 17:47:57 +03:00
DECLARE_EVENT_CLASS ( xfs_trans_class ,
TP_PROTO ( struct xfs_trans * tp , unsigned long caller_ip ) ,
TP_ARGS ( tp , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( uint32_t , tid )
__field ( uint32_t , flags )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = tp - > t_mountp - > m_super - > s_dev ;
__entry - > tid = 0 ;
if ( tp - > t_ticket )
__entry - > tid = tp - > t_ticket - > t_tid ;
__entry - > flags = tp - > t_flags ;
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d trans %x flags 0x%x caller %pS " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > tid ,
__entry - > flags ,
( char * ) __entry - > caller_ip )
)
# define DEFINE_TRANS_EVENT(name) \
DEFINE_EVENT ( xfs_trans_class , name , \
TP_PROTO ( struct xfs_trans * tp , unsigned long caller_ip ) , \
TP_ARGS ( tp , caller_ip ) )
DEFINE_TRANS_EVENT ( xfs_trans_alloc ) ;
DEFINE_TRANS_EVENT ( xfs_trans_cancel ) ;
DEFINE_TRANS_EVENT ( xfs_trans_commit ) ;
DEFINE_TRANS_EVENT ( xfs_trans_dup ) ;
DEFINE_TRANS_EVENT ( xfs_trans_free ) ;
DEFINE_TRANS_EVENT ( xfs_trans_roll ) ;
DEFINE_TRANS_EVENT ( xfs_trans_add_item ) ;
2019-06-29 05:27:31 +03:00
DEFINE_TRANS_EVENT ( xfs_trans_commit_items ) ;
2018-05-09 17:47:57 +03:00
DEFINE_TRANS_EVENT ( xfs_trans_free_items ) ;
2019-02-07 21:37:14 +03:00
TRACE_EVENT ( xfs_iunlink_update_bucket ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , unsigned int bucket ,
xfs_agino_t old_ptr , xfs_agino_t new_ptr ) ,
TP_ARGS ( mp , agno , bucket , old_ptr , new_ptr ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( unsigned int , bucket )
__field ( xfs_agino_t , old_ptr )
__field ( xfs_agino_t , new_ptr )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > bucket = bucket ;
__entry - > old_ptr = old_ptr ;
__entry - > new_ptr = new_ptr ;
) ,
TP_printk ( " dev %d:%d agno %u bucket %u old 0x%x new 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > bucket ,
__entry - > old_ptr ,
__entry - > new_ptr )
) ;
2019-02-07 21:37:15 +03:00
TRACE_EVENT ( xfs_iunlink_update_dinode ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , xfs_agino_t agino ,
xfs_agino_t old_ptr , xfs_agino_t new_ptr ) ,
TP_ARGS ( mp , agno , agino , old_ptr , new_ptr ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
__field ( xfs_agino_t , old_ptr )
__field ( xfs_agino_t , new_ptr )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > agino = agino ;
__entry - > old_ptr = old_ptr ;
__entry - > new_ptr = new_ptr ;
) ,
TP_printk ( " dev %d:%d agno %u agino 0x%x old 0x%x new 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agino ,
__entry - > old_ptr ,
__entry - > new_ptr )
) ;
2019-02-07 21:37:16 +03:00
DECLARE_EVENT_CLASS ( xfs_ag_inode_class ,
TP_PROTO ( struct xfs_inode * ip ) ,
TP_ARGS ( ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > agno = XFS_INO_TO_AGNO ( ip - > i_mount , ip - > i_ino ) ;
__entry - > agino = XFS_INO_TO_AGINO ( ip - > i_mount , ip - > i_ino ) ;
) ,
TP_printk ( " dev %d:%d agno %u agino %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno , __entry - > agino )
)
# define DEFINE_AGINODE_EVENT(name) \
DEFINE_EVENT ( xfs_ag_inode_class , name , \
TP_PROTO ( struct xfs_inode * ip ) , \
TP_ARGS ( ip ) )
DEFINE_AGINODE_EVENT ( xfs_iunlink ) ;
DEFINE_AGINODE_EVENT ( xfs_iunlink_remove ) ;
2019-02-07 21:37:16 +03:00
DEFINE_AG_EVENT ( xfs_iunlink_map_prev_fallback ) ;
2019-02-07 21:37:16 +03:00
2019-04-12 17:40:25 +03:00
DECLARE_EVENT_CLASS ( xfs_fs_corrupt_class ,
TP_PROTO ( struct xfs_mount * mp , unsigned int flags ) ,
TP_ARGS ( mp , flags ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned int , flags )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > flags = flags ;
) ,
TP_printk ( " dev %d:%d flags 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > flags )
) ;
# define DEFINE_FS_CORRUPT_EVENT(name) \
DEFINE_EVENT ( xfs_fs_corrupt_class , name , \
TP_PROTO ( struct xfs_mount * mp , unsigned int flags ) , \
TP_ARGS ( mp , flags ) )
DEFINE_FS_CORRUPT_EVENT ( xfs_fs_mark_sick ) ;
DEFINE_FS_CORRUPT_EVENT ( xfs_fs_mark_healthy ) ;
2019-04-12 17:41:16 +03:00
DEFINE_FS_CORRUPT_EVENT ( xfs_fs_unfixed_corruption ) ;
2019-04-12 17:40:25 +03:00
DEFINE_FS_CORRUPT_EVENT ( xfs_rt_mark_sick ) ;
DEFINE_FS_CORRUPT_EVENT ( xfs_rt_mark_healthy ) ;
2019-04-12 17:41:16 +03:00
DEFINE_FS_CORRUPT_EVENT ( xfs_rt_unfixed_corruption ) ;
2019-04-12 17:40:25 +03:00
DECLARE_EVENT_CLASS ( xfs_ag_corrupt_class ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , unsigned int flags ) ,
TP_ARGS ( mp , agno , flags ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( unsigned int , flags )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > flags = flags ;
) ,
TP_printk ( " dev %d:%d agno %u flags 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno , __entry - > flags )
) ;
# define DEFINE_AG_CORRUPT_EVENT(name) \
DEFINE_EVENT ( xfs_ag_corrupt_class , name , \
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno , \
unsigned int flags ) , \
TP_ARGS ( mp , agno , flags ) )
DEFINE_AG_CORRUPT_EVENT ( xfs_ag_mark_sick ) ;
DEFINE_AG_CORRUPT_EVENT ( xfs_ag_mark_healthy ) ;
2019-04-12 17:41:16 +03:00
DEFINE_AG_CORRUPT_EVENT ( xfs_ag_unfixed_corruption ) ;
2019-04-12 17:40:25 +03:00
DECLARE_EVENT_CLASS ( xfs_inode_corrupt_class ,
TP_PROTO ( struct xfs_inode * ip , unsigned int flags ) ,
TP_ARGS ( ip , flags ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( unsigned int , flags )
) ,
TP_fast_assign (
__entry - > dev = ip - > i_mount - > m_super - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > flags = flags ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx flags 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino , __entry - > flags )
) ;
# define DEFINE_INODE_CORRUPT_EVENT(name) \
DEFINE_EVENT ( xfs_inode_corrupt_class , name , \
TP_PROTO ( struct xfs_inode * ip , unsigned int flags ) , \
TP_ARGS ( ip , flags ) )
DEFINE_INODE_CORRUPT_EVENT ( xfs_inode_mark_sick ) ;
DEFINE_INODE_CORRUPT_EVENT ( xfs_inode_mark_healthy ) ;
2019-07-02 19:39:38 +03:00
TRACE_EVENT ( xfs_iwalk_ag ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
xfs_agino_t startino ) ,
TP_ARGS ( mp , agno , startino ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , startino )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > startino = startino ;
) ,
TP_printk ( " dev %d:%d agno %d startino %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) , __entry - > agno ,
__entry - > startino )
)
TRACE_EVENT ( xfs_iwalk_ag_rec ,
TP_PROTO ( struct xfs_mount * mp , xfs_agnumber_t agno ,
struct xfs_inobt_rec_incore * irec ) ,
TP_ARGS ( mp , agno , irec ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , startino )
__field ( uint64_t , freemask )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = agno ;
__entry - > startino = irec - > ir_startino ;
__entry - > freemask = irec - > ir_free ;
) ,
TP_printk ( " dev %d:%d agno %d startino %u freemask 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) , __entry - > agno ,
__entry - > startino , __entry - > freemask )
)
2019-07-03 17:33:26 +03:00
TRACE_EVENT ( xfs_pwork_init ,
TP_PROTO ( struct xfs_mount * mp , unsigned int nr_threads , pid_t pid ) ,
TP_ARGS ( mp , nr_threads , pid ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned int , nr_threads )
__field ( pid_t , pid )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > nr_threads = nr_threads ;
__entry - > pid = pid ;
) ,
TP_printk ( " dev %d:%d nr_threads %u pid %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > nr_threads , __entry - > pid )
)
2019-08-26 22:08:10 +03:00
DECLARE_EVENT_CLASS ( xfs_kmem_class ,
TP_PROTO ( ssize_t size , int flags , unsigned long caller_ip ) ,
TP_ARGS ( size , flags , caller_ip ) ,
TP_STRUCT__entry (
__field ( ssize_t , size )
__field ( int , flags )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > size = size ;
__entry - > flags = flags ;
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " size %zd flags 0x%x caller %pS " ,
__entry - > size ,
__entry - > flags ,
( char * ) __entry - > caller_ip )
)
# define DEFINE_KMEM_EVENT(name) \
DEFINE_EVENT ( xfs_kmem_class , name , \
TP_PROTO ( ssize_t size , int flags , unsigned long caller_ip ) , \
TP_ARGS ( size , flags , caller_ip ) )
DEFINE_KMEM_EVENT ( kmem_alloc ) ;
2019-12-12 00:19:06 +03:00
TRACE_EVENT ( xfs_check_new_dalign ,
TP_PROTO ( struct xfs_mount * mp , int new_dalign , xfs_ino_t calc_rootino ) ,
TP_ARGS ( mp , new_dalign , calc_rootino ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( int , new_dalign )
__field ( xfs_ino_t , sb_rootino )
__field ( xfs_ino_t , calc_rootino )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > new_dalign = new_dalign ;
__entry - > sb_rootino = mp - > m_sb . sb_rootino ;
__entry - > calc_rootino = calc_rootino ;
) ,
TP_printk ( " dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > new_dalign , __entry - > sb_rootino ,
__entry - > calc_rootino )
)
2020-03-11 20:40:26 +03:00
TRACE_EVENT ( xfs_btree_commit_afakeroot ,
TP_PROTO ( struct xfs_btree_cur * cur ) ,
TP_ARGS ( cur ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_btnum_t , btnum )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( unsigned int , levels )
__field ( unsigned int , blocks )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
__entry - > btnum = cur - > bc_btnum ;
2021-06-02 03:48:24 +03:00
__entry - > agno = cur - > bc_ag . pag - > pag_agno ;
2020-03-11 20:40:26 +03:00
__entry - > agbno = cur - > bc_ag . afake - > af_root ;
__entry - > levels = cur - > bc_ag . afake - > af_levels ;
__entry - > blocks = cur - > bc_ag . afake - > af_blocks ;
) ,
TP_printk ( " dev %d:%d btree %s ag %u levels %u blocks %u root %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_symbolic ( __entry - > btnum , XFS_BTNUM_STRINGS ) ,
__entry - > agno ,
__entry - > levels ,
__entry - > blocks ,
__entry - > agbno )
)
2020-03-11 20:42:34 +03:00
TRACE_EVENT ( xfs_btree_commit_ifakeroot ,
TP_PROTO ( struct xfs_btree_cur * cur ) ,
TP_ARGS ( cur ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_btnum_t , btnum )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
__field ( unsigned int , levels )
__field ( unsigned int , blocks )
__field ( int , whichfork )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
__entry - > btnum = cur - > bc_btnum ;
__entry - > agno = XFS_INO_TO_AGNO ( cur - > bc_mp ,
cur - > bc_ino . ip - > i_ino ) ;
__entry - > agino = XFS_INO_TO_AGINO ( cur - > bc_mp ,
cur - > bc_ino . ip - > i_ino ) ;
__entry - > levels = cur - > bc_ino . ifake - > if_levels ;
__entry - > blocks = cur - > bc_ino . ifake - > if_blocks ;
__entry - > whichfork = cur - > bc_ino . whichfork ;
) ,
TP_printk ( " dev %d:%d btree %s ag %u agino %u whichfork %s levels %u blocks %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_symbolic ( __entry - > btnum , XFS_BTNUM_STRINGS ) ,
__entry - > agno ,
__entry - > agino ,
__entry - > whichfork = = XFS_ATTR_FORK ? " attr " : " data " ,
__entry - > levels ,
__entry - > blocks )
)
2020-03-11 20:51:50 +03:00
TRACE_EVENT ( xfs_btree_bload_level_geometry ,
TP_PROTO ( struct xfs_btree_cur * cur , unsigned int level ,
uint64_t nr_this_level , unsigned int nr_per_block ,
unsigned int desired_npb , uint64_t blocks ,
uint64_t blocks_with_extra ) ,
TP_ARGS ( cur , level , nr_this_level , nr_per_block , desired_npb , blocks ,
blocks_with_extra ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_btnum_t , btnum )
__field ( unsigned int , level )
__field ( unsigned int , nlevels )
__field ( uint64_t , nr_this_level )
__field ( unsigned int , nr_per_block )
__field ( unsigned int , desired_npb )
__field ( unsigned long long , blocks )
__field ( unsigned long long , blocks_with_extra )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
__entry - > btnum = cur - > bc_btnum ;
__entry - > level = level ;
__entry - > nlevels = cur - > bc_nlevels ;
__entry - > nr_this_level = nr_this_level ;
__entry - > nr_per_block = nr_per_block ;
__entry - > desired_npb = desired_npb ;
__entry - > blocks = blocks ;
__entry - > blocks_with_extra = blocks_with_extra ;
) ,
TP_printk ( " dev %d:%d btree %s level %u/%u nr_this_level %llu nr_per_block %u desired_npb %u blocks %llu blocks_with_extra %llu " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_symbolic ( __entry - > btnum , XFS_BTNUM_STRINGS ) ,
__entry - > level ,
__entry - > nlevels ,
__entry - > nr_this_level ,
__entry - > nr_per_block ,
__entry - > desired_npb ,
__entry - > blocks ,
__entry - > blocks_with_extra )
)
TRACE_EVENT ( xfs_btree_bload_block ,
TP_PROTO ( struct xfs_btree_cur * cur , unsigned int level ,
uint64_t block_idx , uint64_t nr_blocks ,
union xfs_btree_ptr * ptr , unsigned int nr_records ) ,
TP_ARGS ( cur , level , block_idx , nr_blocks , ptr , nr_records ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_btnum_t , btnum )
__field ( unsigned int , level )
__field ( unsigned long long , block_idx )
__field ( unsigned long long , nr_blocks )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( unsigned int , nr_records )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
__entry - > btnum = cur - > bc_btnum ;
__entry - > level = level ;
__entry - > block_idx = block_idx ;
__entry - > nr_blocks = nr_blocks ;
if ( cur - > bc_flags & XFS_BTREE_LONG_PTRS ) {
xfs_fsblock_t fsb = be64_to_cpu ( ptr - > l ) ;
__entry - > agno = XFS_FSB_TO_AGNO ( cur - > bc_mp , fsb ) ;
__entry - > agbno = XFS_FSB_TO_AGBNO ( cur - > bc_mp , fsb ) ;
} else {
2021-06-02 03:48:24 +03:00
__entry - > agno = cur - > bc_ag . pag - > pag_agno ;
2020-03-11 20:51:50 +03:00
__entry - > agbno = be32_to_cpu ( ptr - > s ) ;
}
__entry - > nr_records = nr_records ;
) ,
TP_printk ( " dev %d:%d btree %s level %u block %llu/%llu fsb (%u/%u) recs %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_symbolic ( __entry - > btnum , XFS_BTNUM_STRINGS ) ,
__entry - > level ,
__entry - > block_idx ,
__entry - > nr_blocks ,
__entry - > agno ,
__entry - > agbno ,
__entry - > nr_records )
)
2020-08-24 21:58:01 +03:00
DECLARE_EVENT_CLASS ( xfs_timestamp_range_class ,
TP_PROTO ( struct xfs_mount * mp , time64_t min , time64_t max ) ,
TP_ARGS ( mp , min , max ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( long long , min )
__field ( long long , max )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > min = min ;
__entry - > max = max ;
) ,
TP_printk ( " dev %d:%d min %lld max %lld " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > min ,
__entry - > max )
)
# define DEFINE_TIMESTAMP_RANGE_EVENT(name) \
DEFINE_EVENT ( xfs_timestamp_range_class , name , \
TP_PROTO ( struct xfs_mount * mp , long long min , long long max ) , \
TP_ARGS ( mp , min , max ) )
DEFINE_TIMESTAMP_RANGE_EVENT ( xfs_inode_timestamp_range ) ;
DEFINE_TIMESTAMP_RANGE_EVENT ( xfs_quota_expiry_range ) ;
2021-06-07 19:34:51 +03:00
DECLARE_EVENT_CLASS ( xfs_icwalk_class ,
TP_PROTO ( struct xfs_mount * mp , struct xfs_icwalk * icw ,
2021-01-23 03:48:38 +03:00
unsigned long caller_ip ) ,
2021-06-07 19:34:51 +03:00
TP_ARGS ( mp , icw , caller_ip ) ,
2021-01-23 03:48:38 +03:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( __u32 , flags )
__field ( uint32_t , uid )
__field ( uint32_t , gid )
__field ( prid_t , prid )
__field ( __u64 , min_file_size )
2021-06-18 21:57:06 +03:00
__field ( long , scan_limit )
2021-01-23 03:48:38 +03:00
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
2021-06-07 19:34:51 +03:00
__entry - > flags = icw ? icw - > icw_flags : 0 ;
__entry - > uid = icw ? from_kuid ( mp - > m_super - > s_user_ns ,
icw - > icw_uid ) : 0 ;
__entry - > gid = icw ? from_kgid ( mp - > m_super - > s_user_ns ,
icw - > icw_gid ) : 0 ;
__entry - > prid = icw ? icw - > icw_prid : 0 ;
__entry - > min_file_size = icw ? icw - > icw_min_file_size : 0 ;
__entry - > scan_limit = icw ? icw - > icw_scan_limit : 0 ;
2021-01-23 03:48:38 +03:00
__entry - > caller_ip = caller_ip ;
) ,
2021-06-18 21:57:06 +03:00
TP_printk ( " dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %ld caller %pS " ,
2021-01-23 03:48:38 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > flags ,
__entry - > uid ,
__entry - > gid ,
__entry - > prid ,
__entry - > min_file_size ,
2021-05-31 21:32:02 +03:00
__entry - > scan_limit ,
2021-01-23 03:48:38 +03:00
( char * ) __entry - > caller_ip )
) ;
2021-06-07 19:34:51 +03:00
# define DEFINE_ICWALK_EVENT(name) \
DEFINE_EVENT ( xfs_icwalk_class , name , \
TP_PROTO ( struct xfs_mount * mp , struct xfs_icwalk * icw , \
2021-01-23 03:48:38 +03:00
unsigned long caller_ip ) , \
2021-06-07 19:34:51 +03:00
TP_ARGS ( mp , icw , caller_ip ) )
DEFINE_ICWALK_EVENT ( xfs_ioc_free_eofblocks ) ;
DEFINE_ICWALK_EVENT ( xfs_blockgc_free_space ) ;
2021-01-23 03:48:38 +03:00
2021-06-18 21:57:05 +03:00
TRACE_DEFINE_ENUM ( XLOG_STATE_ACTIVE ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_WANT_SYNC ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_SYNCING ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_DONE_SYNC ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_CALLBACK ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_DIRTY ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_IOERROR ) ;
DECLARE_EVENT_CLASS ( xlog_iclog_class ,
TP_PROTO ( struct xlog_in_core * iclog , unsigned long caller_ip ) ,
TP_ARGS ( iclog , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( uint32_t , state )
__field ( int32_t , refcount )
__field ( uint32_t , offset )
2021-07-28 02:23:50 +03:00
__field ( uint32_t , flags )
2021-06-18 21:57:05 +03:00
__field ( unsigned long long , lsn )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = iclog - > ic_log - > l_mp - > m_super - > s_dev ;
__entry - > state = iclog - > ic_state ;
__entry - > refcount = atomic_read ( & iclog - > ic_refcnt ) ;
__entry - > offset = iclog - > ic_offset ;
2021-07-28 02:23:50 +03:00
__entry - > flags = iclog - > ic_flags ;
2021-06-18 21:57:05 +03:00
__entry - > lsn = be64_to_cpu ( iclog - > ic_header . h_lsn ) ;
__entry - > caller_ip = caller_ip ;
) ,
2021-07-28 02:23:50 +03:00
TP_printk ( " dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS " ,
2021-06-18 21:57:05 +03:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_symbolic ( __entry - > state , XLOG_STATE_STRINGS ) ,
__entry - > refcount ,
__entry - > offset ,
__entry - > lsn ,
2021-07-28 02:23:50 +03:00
__print_flags ( __entry - > flags , " | " , XLOG_ICL_STRINGS ) ,
2021-06-18 21:57:05 +03:00
( char * ) __entry - > caller_ip )
) ;
# define DEFINE_ICLOG_EVENT(name) \
DEFINE_EVENT ( xlog_iclog_class , name , \
TP_PROTO ( struct xlog_in_core * iclog , unsigned long caller_ip ) , \
TP_ARGS ( iclog , caller_ip ) )
DEFINE_ICLOG_EVENT ( xlog_iclog_activate ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_clean ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_callback ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_callbacks_start ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_callbacks_done ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_force ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_force_lsn ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_get_space ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_release ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_switch ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_sync ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_syncing ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_sync_done ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_want_sync ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_wait_on ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_write ) ;
2009-12-15 02:14:59 +03:00
# endif /* _TRACE_XFS_H */
# undef TRACE_INCLUDE_PATH
# define TRACE_INCLUDE_PATH .
# define TRACE_INCLUDE_FILE xfs_trace
# include <trace/define_trace.h>