1516 lines
45 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
/* AFS tracepoints
*
* Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM afs
#if !defined(_TRACE_AFS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_AFS_H
#include <linux/tracepoint.h>
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
/*
* Define enums for tracing information.
*/
#ifndef __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
#define __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
enum afs_call_trace {
afs_call_trace_alloc,
afs_call_trace_free,
afs_call_trace_get,
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
afs_call_trace_put,
afs_call_trace_wake,
afs_call_trace_work,
};
enum afs_server_trace {
afs_server_trace_alloc,
afs_server_trace_callback,
afs_server_trace_destroy,
afs_server_trace_free,
afs_server_trace_gc,
afs_server_trace_get_by_addr,
afs_server_trace_get_by_uuid,
afs_server_trace_get_caps,
afs_server_trace_get_install,
afs_server_trace_get_new_cbi,
afs: Actively poll fileservers to maintain NAT or firewall openings When an AFS client accesses a file, it receives a limited-duration callback promise that the server will notify it if another client changes a file. This callback duration can be a few hours in length. If a client mounts a volume and then an application prevents it from being unmounted, say by chdir'ing into it, but then does nothing for some time, the rxrpc_peer record will expire and rxrpc-level keepalive will cease. If there is NAT or a firewall between the client and the server, the route back for the server may close after a comparatively short duration, meaning that attempts by the server to notify the client may then bounce. The client, however, may (so far as it knows) still have a valid unexpired promise and will then rely on its cached data and will not see changes made on the server by a third party until it incidentally rechecks the status or the promise needs renewal. To deal with this, the client needs to regularly probe the server. This has two effects: firstly, it keeps a route open back for the server, and secondly, it causes the server to disgorge any notifications that got queued up because they couldn't be sent. Fix this by adding a mechanism to emit regular probes. Two levels of probing are made available: Under normal circumstances the 'slow' queue will be used for a fileserver - this just probes the preferred address once every 5 mins or so; however, if server fails to respond to any probes, the server will shift to the 'fast' queue from which all its interfaces will be probed every 30s. When it finally responds, the record will switch back to the slow queue. Further notes: (1) Probing is now no longer driven from the fileserver rotation algorithm. (2) Probes are dispatched to all interfaces on a fileserver when that an afs_server object is set up to record it. (3) The afs_server object is removed from the probe queues when we start to probe it. afs_is_probing_server() returns true if it's not listed - ie. it's undergoing probing. (4) The afs_server object is added back on to the probe queue when the final outstanding probe completes, but the probed_at time is set when we're about to launch a probe so that it's not dependent on the probe duration. (5) The timer and the work item added for this must be handed a count on net->servers_outstanding, which they hand on or release. This makes sure that network namespace cleanup waits for them. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Reported-by: Dave Botsch <botsch@cnf.cornell.edu> Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-24 15:10:00 +01:00
afs_server_trace_get_probe,
afs_server_trace_give_up_cb,
afs_server_trace_purging,
afs_server_trace_put_call,
afs_server_trace_put_cbi,
afs_server_trace_put_find_rsq,
afs: Actively poll fileservers to maintain NAT or firewall openings When an AFS client accesses a file, it receives a limited-duration callback promise that the server will notify it if another client changes a file. This callback duration can be a few hours in length. If a client mounts a volume and then an application prevents it from being unmounted, say by chdir'ing into it, but then does nothing for some time, the rxrpc_peer record will expire and rxrpc-level keepalive will cease. If there is NAT or a firewall between the client and the server, the route back for the server may close after a comparatively short duration, meaning that attempts by the server to notify the client may then bounce. The client, however, may (so far as it knows) still have a valid unexpired promise and will then rely on its cached data and will not see changes made on the server by a third party until it incidentally rechecks the status or the promise needs renewal. To deal with this, the client needs to regularly probe the server. This has two effects: firstly, it keeps a route open back for the server, and secondly, it causes the server to disgorge any notifications that got queued up because they couldn't be sent. Fix this by adding a mechanism to emit regular probes. Two levels of probing are made available: Under normal circumstances the 'slow' queue will be used for a fileserver - this just probes the preferred address once every 5 mins or so; however, if server fails to respond to any probes, the server will shift to the 'fast' queue from which all its interfaces will be probed every 30s. When it finally responds, the record will switch back to the slow queue. Further notes: (1) Probing is now no longer driven from the fileserver rotation algorithm. (2) Probes are dispatched to all interfaces on a fileserver when that an afs_server object is set up to record it. (3) The afs_server object is removed from the probe queues when we start to probe it. afs_is_probing_server() returns true if it's not listed - ie. it's undergoing probing. (4) The afs_server object is added back on to the probe queue when the final outstanding probe completes, but the probed_at time is set when we're about to launch a probe so that it's not dependent on the probe duration. (5) The timer and the work item added for this must be handed a count on net->servers_outstanding, which they hand on or release. This makes sure that network namespace cleanup waits for them. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Reported-by: Dave Botsch <botsch@cnf.cornell.edu> Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-24 15:10:00 +01:00
afs_server_trace_put_probe,
afs_server_trace_put_slist,
afs_server_trace_put_slist_isort,
afs_server_trace_put_uuid_rsq,
afs_server_trace_update,
};
enum afs_volume_trace {
afs_volume_trace_alloc,
afs_volume_trace_free,
afs_volume_trace_get_alloc_sbi,
afs_volume_trace_get_cell_insert,
afs_volume_trace_get_new_op,
afs_volume_trace_get_query_alias,
afs_volume_trace_put_cell_dup,
afs_volume_trace_put_cell_root,
afs_volume_trace_put_destroy_sbi,
afs_volume_trace_put_free_fc,
afs_volume_trace_put_put_op,
afs_volume_trace_put_query_alias,
afs_volume_trace_put_validate_fc,
afs_volume_trace_remove,
};
enum afs_cell_trace {
afs_cell_trace_alloc,
afs_cell_trace_free,
afs_cell_trace_get_queue_dns,
afs_cell_trace_get_queue_manage,
afs_cell_trace_get_queue_new,
afs_cell_trace_get_vol,
afs_cell_trace_insert,
afs_cell_trace_manage,
afs_cell_trace_put_candidate,
afs_cell_trace_put_destroy,
afs_cell_trace_put_queue_fail,
afs_cell_trace_put_queue_work,
afs_cell_trace_put_vol,
afs_cell_trace_see_source,
afs_cell_trace_see_ws,
afs_cell_trace_unuse_alias,
afs_cell_trace_unuse_check_alias,
afs_cell_trace_unuse_delete,
afs_cell_trace_unuse_fc,
afs_cell_trace_unuse_lookup,
afs_cell_trace_unuse_mntpt,
afs_cell_trace_unuse_no_pin,
afs_cell_trace_unuse_parse,
afs_cell_trace_unuse_pin,
afs_cell_trace_unuse_probe,
afs_cell_trace_unuse_sbi,
afs_cell_trace_unuse_ws,
afs_cell_trace_use_alias,
afs_cell_trace_use_check_alias,
afs_cell_trace_use_fc,
afs_cell_trace_use_fc_alias,
afs_cell_trace_use_lookup,
afs_cell_trace_use_mntpt,
afs_cell_trace_use_pin,
afs_cell_trace_use_probe,
afs_cell_trace_use_sbi,
afs_cell_trace_wait,
};
enum afs_fs_operation {
afs_FS_FetchData = 130, /* AFS Fetch file data */
afs_FS_FetchACL = 131, /* AFS Fetch file ACL */
afs_FS_FetchStatus = 132, /* AFS Fetch file status */
afs_FS_StoreData = 133, /* AFS Store file data */
afs_FS_StoreACL = 134, /* AFS Store file ACL */
afs_FS_StoreStatus = 135, /* AFS Store file status */
afs_FS_RemoveFile = 136, /* AFS Remove a file */
afs_FS_CreateFile = 137, /* AFS Create a file */
afs_FS_Rename = 138, /* AFS Rename or move a file or directory */
afs_FS_Symlink = 139, /* AFS Create a symbolic link */
afs_FS_Link = 140, /* AFS Create a hard link */
afs_FS_MakeDir = 141, /* AFS Create a directory */
afs_FS_RemoveDir = 142, /* AFS Remove a directory */
afs_FS_GetVolumeInfo = 148, /* AFS Get information about a volume */
afs_FS_GetVolumeStatus = 149, /* AFS Get volume status information */
afs_FS_GetRootVolume = 151, /* AFS Get root volume name */
afs_FS_SetLock = 156, /* AFS Request a file lock */
afs_FS_ExtendLock = 157, /* AFS Extend a file lock */
afs_FS_ReleaseLock = 158, /* AFS Release a file lock */
afs_FS_Lookup = 161, /* AFS lookup file in directory */
afs_FS_InlineBulkStatus = 65536, /* AFS Fetch multiple file statuses with errors */
afs_FS_FetchData64 = 65537, /* AFS Fetch file data */
afs_FS_StoreData64 = 65538, /* AFS Store file data */
afs_FS_GiveUpAllCallBacks = 65539, /* AFS Give up all our callbacks on a server */
afs_FS_GetCapabilities = 65540, /* AFS Get FS server capabilities */
yfs_FS_FetchData = 130, /* YFS Fetch file data */
yfs_FS_FetchACL = 64131, /* YFS Fetch file ACL */
yfs_FS_FetchStatus = 64132, /* YFS Fetch file status */
yfs_FS_StoreACL = 64134, /* YFS Store file ACL */
yfs_FS_StoreStatus = 64135, /* YFS Store file status */
yfs_FS_RemoveFile = 64136, /* YFS Remove a file */
yfs_FS_CreateFile = 64137, /* YFS Create a file */
yfs_FS_Rename = 64138, /* YFS Rename or move a file or directory */
yfs_FS_Symlink = 64139, /* YFS Create a symbolic link */
yfs_FS_Link = 64140, /* YFS Create a hard link */
yfs_FS_MakeDir = 64141, /* YFS Create a directory */
yfs_FS_RemoveDir = 64142, /* YFS Remove a directory */
yfs_FS_GetVolumeStatus = 64149, /* YFS Get volume status information */
yfs_FS_SetVolumeStatus = 64150, /* YFS Set volume status information */
yfs_FS_SetLock = 64156, /* YFS Request a file lock */
yfs_FS_ExtendLock = 64157, /* YFS Extend a file lock */
yfs_FS_ReleaseLock = 64158, /* YFS Release a file lock */
yfs_FS_Lookup = 64161, /* YFS lookup file in directory */
yfs_FS_FlushCPS = 64165,
yfs_FS_FetchOpaqueACL = 64168,
yfs_FS_WhoAmI = 64170,
yfs_FS_RemoveACL = 64171,
yfs_FS_RemoveFile2 = 64173,
yfs_FS_StoreOpaqueACL2 = 64174,
yfs_FS_InlineBulkStatus = 64536, /* YFS Fetch multiple file statuses with errors */
yfs_FS_FetchData64 = 64537, /* YFS Fetch file data */
yfs_FS_StoreData64 = 64538, /* YFS Store file data */
yfs_FS_UpdateSymlink = 64540,
};
enum afs_vl_operation {
afs_VL_GetEntryByNameU = 527, /* AFS Get Vol Entry By Name operation ID */
afs_VL_GetAddrsU = 533, /* AFS Get FS server addresses */
afs_YFSVL_GetEndpoints = 64002, /* YFS Get FS & Vol server addresses */
afs_YFSVL_GetCellName = 64014, /* YFS Get actual cell name */
afs_VL_GetCapabilities = 65537, /* AFS Get VL server capabilities */
};
afs: Fix tracepoint string placement with built-in AFS To quote Alexey[1]: I was adding custom tracepoint to the kernel, grabbed full F34 kernel .config, disabled modules and booted whole shebang as VM kernel. Then did perf record -a -e ... It crashed: general protection fault, probably for non-canonical address 0x435f5346592e4243: 0000 [#1] SMP PTI CPU: 1 PID: 842 Comm: cat Not tainted 5.12.6+ #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 RIP: 0010:t_show+0x22/0xd0 Then reproducer was narrowed to # cat /sys/kernel/tracing/printk_formats Original F34 kernel with modules didn't crash. So I started to disable options and after disabling AFS everything started working again. The root cause is that AFS was placing char arrays content into a section full of _pointers_ to strings with predictable consequences. Non canonical address 435f5346592e4243 is "CB.YFS_" which came from CM_NAME macro. Steps to reproduce: CONFIG_AFS=y CONFIG_TRACING=y # cat /sys/kernel/tracing/printk_formats Fix this by the following means: (1) Add enum->string translation tables in the event header with the AFS and YFS cache/callback manager operations listed by RPC operation ID. (2) Modify the afs_cb_call tracepoint to print the string from the translation table rather than using the string at the afs_call name pointer. (3) Switch translation table depending on the service we're being accessed as (AFS or YFS) in the tracepoint print clause. Will this cause problems to userspace utilities? Note that the symbolic representation of the YFS service ID isn't available to this header, so I've put it in as a number. I'm not sure if this is the best way to do this. (4) Remove the name wrangling (CM_NAME) macro and put the names directly into the afs_call_type structs in cmservice.c. Fixes: 8e8d7f13b6d5a9 ("afs: Add some tracepoints") Reported-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Reviewed-by: Marc Dionne <marc.dionne@auristor.com> cc: Andrew Morton <akpm@linux-foundation.org> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/YLAXfvZ+rObEOdc%2F@localhost.localdomain/ [1] Link: https://lore.kernel.org/r/643721.1623754699@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162430903582.2896199.6098150063997983353.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162609463957.3133237.15916579353149746363.stgit@warthog.procyon.org.uk/ # v1 (repost) Link: https://lore.kernel.org/r/162610726860.3408253.445207609466288531.stgit@warthog.procyon.org.uk/ # v2
2021-06-15 11:57:26 +01:00
enum afs_cm_operation {
afs_CB_CallBack = 204, /* AFS break callback promises */
afs_CB_InitCallBackState = 205, /* AFS initialise callback state */
afs_CB_Probe = 206, /* AFS probe client */
afs_CB_GetLock = 207, /* AFS get contents of CM lock table */
afs_CB_GetCE = 208, /* AFS get cache file description */
afs_CB_GetXStatsVersion = 209, /* AFS get version of extended statistics */
afs_CB_GetXStats = 210, /* AFS get contents of extended statistics data */
afs_CB_InitCallBackState3 = 213, /* AFS initialise callback state, version 3 */
afs_CB_ProbeUuid = 214, /* AFS check the client hasn't rebooted */
};
enum yfs_cm_operation {
yfs_CB_Probe = 206, /* YFS probe client */
yfs_CB_GetLock = 207, /* YFS get contents of CM lock table */
yfs_CB_XStatsVersion = 209, /* YFS get version of extended statistics */
yfs_CB_GetXStats = 210, /* YFS get contents of extended statistics data */
yfs_CB_InitCallBackState3 = 213, /* YFS initialise callback state, version 3 */
yfs_CB_ProbeUuid = 214, /* YFS check the client hasn't rebooted */
yfs_CB_GetServerPrefs = 215,
yfs_CB_GetCellServDV = 216,
yfs_CB_GetLocalCell = 217,
yfs_CB_GetCacheConfig = 218,
yfs_CB_GetCellByNum = 65537,
yfs_CB_TellMeAboutYourself = 65538, /* get client capabilities */
yfs_CB_CallBack = 64204,
};
enum afs_edit_dir_op {
afs_edit_dir_create,
afs_edit_dir_create_error,
afs_edit_dir_create_inval,
afs_edit_dir_create_nospc,
afs_edit_dir_delete,
afs_edit_dir_delete_error,
afs_edit_dir_delete_inval,
afs_edit_dir_delete_noent,
};
enum afs_edit_dir_reason {
afs_edit_dir_for_create,
afs_edit_dir_for_link,
afs_edit_dir_for_mkdir,
afs_edit_dir_for_rename_0,
afs_edit_dir_for_rename_1,
afs_edit_dir_for_rename_2,
afs_edit_dir_for_rmdir,
afs_edit_dir_for_silly_0,
afs_edit_dir_for_silly_1,
afs_edit_dir_for_symlink,
afs_edit_dir_for_unlink,
};
enum afs_eproto_cause {
afs_eproto_bad_status,
afs_eproto_cb_count,
afs_eproto_cb_fid_count,
afs_eproto_cellname_len,
afs_eproto_file_type,
afs_eproto_ibulkst_cb_count,
afs_eproto_ibulkst_count,
afs_eproto_motd_len,
afs_eproto_offline_msg_len,
afs_eproto_volname_len,
afs_eproto_yvl_fsendpt4_len,
afs_eproto_yvl_fsendpt6_len,
afs_eproto_yvl_fsendpt_num,
afs_eproto_yvl_fsendpt_type,
afs_eproto_yvl_vlendpt4_len,
afs_eproto_yvl_vlendpt6_len,
afs_eproto_yvl_vlendpt_type,
};
enum afs_io_error {
afs_io_error_cm_reply,
afs_io_error_extract,
afs_io_error_fs_probe_fail,
afs_io_error_vl_lookup_fail,
afs_io_error_vl_probe_fail,
};
enum afs_file_error {
afs_file_error_dir_bad_magic,
afs_file_error_dir_big,
afs_file_error_dir_missing_page,
afs_file_error_dir_name_too_long,
afs_file_error_dir_over_end,
afs_file_error_dir_small,
afs_file_error_dir_unmarked_ext,
afs_file_error_mntpt,
afs_file_error_writeback_fail,
};
enum afs_flock_event {
afs_flock_acquired,
afs_flock_callback_break,
afs_flock_defer_unlock,
afs_flock_extend_fail,
afs_flock_fail_other,
afs_flock_fail_perm,
afs_flock_no_lockers,
afs_flock_release_fail,
afs_flock_silly_delete,
afs_flock_timestamp,
afs_flock_try_to_lock,
afs_flock_vfs_lock,
afs_flock_vfs_locking,
afs_flock_waited,
afs_flock_waiting,
afs_flock_work_extending,
afs_flock_work_retry,
afs_flock_work_unlocking,
afs_flock_would_block,
};
enum afs_flock_operation {
afs_flock_op_copy_lock,
afs_flock_op_flock,
afs_flock_op_grant,
afs_flock_op_lock,
afs_flock_op_release_lock,
afs_flock_op_return_ok,
afs_flock_op_return_eagain,
afs_flock_op_return_edeadlk,
afs_flock_op_return_error,
afs_flock_op_set_lock,
afs_flock_op_unlock,
afs_flock_op_wake,
};
enum afs_cb_break_reason {
afs_cb_break_no_break,
afs: Try to avoid taking RCU read lock when checking vnode validity Try to avoid taking the RCU read lock when checking the validity of a vnode's callback state. The only thing it's needed for is to pin the parent volume's server list whilst we search it to find the record of the server we're currently using to see if it has been reinitialised (ie. it sent us a CB.InitCallBackState* RPC). Do this by the following means: (1) Keep an additional per-cell counter (fs_s_break) that's incremented each time any of the fileservers in the cell reinitialises. Since the new counter can be accessed without RCU from the vnode, we can check that first - and only if it differs, get the RCU read lock and check the volume's server list. (2) Replace afs_get_s_break_rcu() with afs_check_server_good() which now indicates whether the callback promise is still expected to be present on the server. This does the checks as described in (1). (3) Restructure afs_check_validity() to take account of the change in (2). We can also get rid of the valid variable and just use the need_clear variable with the addition of the afs_cb_break_no_promise reason. (4) afs_check_validity() probably shouldn't be altering vnode->cb_v_break and vnode->cb_s_break when it doesn't have cb_lock exclusively locked. Move the change to vnode->cb_v_break to __afs_break_callback(). Delegate the change to vnode->cb_s_break to afs_select_fileserver() and set vnode->cb_fs_s_break there also. (5) afs_validate() no longer needs to get the RCU read lock around its call to afs_check_validity() - and can skip the call entirely if we don't have a promise. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Markus Suvanto <markus.suvanto@gmail.com> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111669583.283156.1397603105683094563.stgit@warthog.procyon.org.uk/
2021-09-02 21:51:01 +01:00
afs_cb_break_no_promise,
afs_cb_break_for_callback,
afs_cb_break_for_deleted,
afs_cb_break_for_lapsed,
afs: Try to avoid taking RCU read lock when checking vnode validity Try to avoid taking the RCU read lock when checking the validity of a vnode's callback state. The only thing it's needed for is to pin the parent volume's server list whilst we search it to find the record of the server we're currently using to see if it has been reinitialised (ie. it sent us a CB.InitCallBackState* RPC). Do this by the following means: (1) Keep an additional per-cell counter (fs_s_break) that's incremented each time any of the fileservers in the cell reinitialises. Since the new counter can be accessed without RCU from the vnode, we can check that first - and only if it differs, get the RCU read lock and check the volume's server list. (2) Replace afs_get_s_break_rcu() with afs_check_server_good() which now indicates whether the callback promise is still expected to be present on the server. This does the checks as described in (1). (3) Restructure afs_check_validity() to take account of the change in (2). We can also get rid of the valid variable and just use the need_clear variable with the addition of the afs_cb_break_no_promise reason. (4) afs_check_validity() probably shouldn't be altering vnode->cb_v_break and vnode->cb_s_break when it doesn't have cb_lock exclusively locked. Move the change to vnode->cb_v_break to __afs_break_callback(). Delegate the change to vnode->cb_s_break to afs_select_fileserver() and set vnode->cb_fs_s_break there also. (5) afs_validate() no longer needs to get the RCU read lock around its call to afs_check_validity() - and can skip the call entirely if we don't have a promise. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Markus Suvanto <markus.suvanto@gmail.com> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111669583.283156.1397603105683094563.stgit@warthog.procyon.org.uk/
2021-09-02 21:51:01 +01:00
afs_cb_break_for_s_reinit,
afs_cb_break_for_unlink,
afs: Try to avoid taking RCU read lock when checking vnode validity Try to avoid taking the RCU read lock when checking the validity of a vnode's callback state. The only thing it's needed for is to pin the parent volume's server list whilst we search it to find the record of the server we're currently using to see if it has been reinitialised (ie. it sent us a CB.InitCallBackState* RPC). Do this by the following means: (1) Keep an additional per-cell counter (fs_s_break) that's incremented each time any of the fileservers in the cell reinitialises. Since the new counter can be accessed without RCU from the vnode, we can check that first - and only if it differs, get the RCU read lock and check the volume's server list. (2) Replace afs_get_s_break_rcu() with afs_check_server_good() which now indicates whether the callback promise is still expected to be present on the server. This does the checks as described in (1). (3) Restructure afs_check_validity() to take account of the change in (2). We can also get rid of the valid variable and just use the need_clear variable with the addition of the afs_cb_break_no_promise reason. (4) afs_check_validity() probably shouldn't be altering vnode->cb_v_break and vnode->cb_s_break when it doesn't have cb_lock exclusively locked. Move the change to vnode->cb_v_break to __afs_break_callback(). Delegate the change to vnode->cb_s_break to afs_select_fileserver() and set vnode->cb_fs_s_break there also. (5) afs_validate() no longer needs to get the RCU read lock around its call to afs_check_validity() - and can skip the call entirely if we don't have a promise. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Markus Suvanto <markus.suvanto@gmail.com> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111669583.283156.1397603105683094563.stgit@warthog.procyon.org.uk/
2021-09-02 21:51:01 +01:00
afs_cb_break_for_v_break,
afs_cb_break_for_volume_callback,
afs_cb_break_for_zap,
};
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
#endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
/*
* Declare tracing information enums and their string mappings for display.
*/
#define afs_call_traces \
EM(afs_call_trace_alloc, "ALLOC") \
EM(afs_call_trace_free, "FREE ") \
EM(afs_call_trace_get, "GET ") \
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
EM(afs_call_trace_put, "PUT ") \
EM(afs_call_trace_wake, "WAKE ") \
E_(afs_call_trace_work, "QUEUE")
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
#define afs_server_traces \
EM(afs_server_trace_alloc, "ALLOC ") \
EM(afs_server_trace_callback, "CALLBACK ") \
EM(afs_server_trace_destroy, "DESTROY ") \
EM(afs_server_trace_free, "FREE ") \
EM(afs_server_trace_gc, "GC ") \
EM(afs_server_trace_get_by_addr, "GET addr ") \
EM(afs_server_trace_get_by_uuid, "GET uuid ") \
EM(afs_server_trace_get_caps, "GET caps ") \
EM(afs_server_trace_get_install, "GET inst ") \
EM(afs_server_trace_get_new_cbi, "GET cbi ") \
afs: Actively poll fileservers to maintain NAT or firewall openings When an AFS client accesses a file, it receives a limited-duration callback promise that the server will notify it if another client changes a file. This callback duration can be a few hours in length. If a client mounts a volume and then an application prevents it from being unmounted, say by chdir'ing into it, but then does nothing for some time, the rxrpc_peer record will expire and rxrpc-level keepalive will cease. If there is NAT or a firewall between the client and the server, the route back for the server may close after a comparatively short duration, meaning that attempts by the server to notify the client may then bounce. The client, however, may (so far as it knows) still have a valid unexpired promise and will then rely on its cached data and will not see changes made on the server by a third party until it incidentally rechecks the status or the promise needs renewal. To deal with this, the client needs to regularly probe the server. This has two effects: firstly, it keeps a route open back for the server, and secondly, it causes the server to disgorge any notifications that got queued up because they couldn't be sent. Fix this by adding a mechanism to emit regular probes. Two levels of probing are made available: Under normal circumstances the 'slow' queue will be used for a fileserver - this just probes the preferred address once every 5 mins or so; however, if server fails to respond to any probes, the server will shift to the 'fast' queue from which all its interfaces will be probed every 30s. When it finally responds, the record will switch back to the slow queue. Further notes: (1) Probing is now no longer driven from the fileserver rotation algorithm. (2) Probes are dispatched to all interfaces on a fileserver when that an afs_server object is set up to record it. (3) The afs_server object is removed from the probe queues when we start to probe it. afs_is_probing_server() returns true if it's not listed - ie. it's undergoing probing. (4) The afs_server object is added back on to the probe queue when the final outstanding probe completes, but the probed_at time is set when we're about to launch a probe so that it's not dependent on the probe duration. (5) The timer and the work item added for this must be handed a count on net->servers_outstanding, which they hand on or release. This makes sure that network namespace cleanup waits for them. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Reported-by: Dave Botsch <botsch@cnf.cornell.edu> Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-24 15:10:00 +01:00
EM(afs_server_trace_get_probe, "GET probe") \
EM(afs_server_trace_give_up_cb, "giveup-cb") \
EM(afs_server_trace_purging, "PURGE ") \
EM(afs_server_trace_put_call, "PUT call ") \
EM(afs_server_trace_put_cbi, "PUT cbi ") \
EM(afs_server_trace_put_find_rsq, "PUT f-rsq") \
afs: Actively poll fileservers to maintain NAT or firewall openings When an AFS client accesses a file, it receives a limited-duration callback promise that the server will notify it if another client changes a file. This callback duration can be a few hours in length. If a client mounts a volume and then an application prevents it from being unmounted, say by chdir'ing into it, but then does nothing for some time, the rxrpc_peer record will expire and rxrpc-level keepalive will cease. If there is NAT or a firewall between the client and the server, the route back for the server may close after a comparatively short duration, meaning that attempts by the server to notify the client may then bounce. The client, however, may (so far as it knows) still have a valid unexpired promise and will then rely on its cached data and will not see changes made on the server by a third party until it incidentally rechecks the status or the promise needs renewal. To deal with this, the client needs to regularly probe the server. This has two effects: firstly, it keeps a route open back for the server, and secondly, it causes the server to disgorge any notifications that got queued up because they couldn't be sent. Fix this by adding a mechanism to emit regular probes. Two levels of probing are made available: Under normal circumstances the 'slow' queue will be used for a fileserver - this just probes the preferred address once every 5 mins or so; however, if server fails to respond to any probes, the server will shift to the 'fast' queue from which all its interfaces will be probed every 30s. When it finally responds, the record will switch back to the slow queue. Further notes: (1) Probing is now no longer driven from the fileserver rotation algorithm. (2) Probes are dispatched to all interfaces on a fileserver when that an afs_server object is set up to record it. (3) The afs_server object is removed from the probe queues when we start to probe it. afs_is_probing_server() returns true if it's not listed - ie. it's undergoing probing. (4) The afs_server object is added back on to the probe queue when the final outstanding probe completes, but the probed_at time is set when we're about to launch a probe so that it's not dependent on the probe duration. (5) The timer and the work item added for this must be handed a count on net->servers_outstanding, which they hand on or release. This makes sure that network namespace cleanup waits for them. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Reported-by: Dave Botsch <botsch@cnf.cornell.edu> Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-24 15:10:00 +01:00
EM(afs_server_trace_put_probe, "PUT probe") \
EM(afs_server_trace_put_slist, "PUT slist") \
EM(afs_server_trace_put_slist_isort, "PUT isort") \
EM(afs_server_trace_put_uuid_rsq, "PUT u-req") \
E_(afs_server_trace_update, "UPDATE")
#define afs_volume_traces \
EM(afs_volume_trace_alloc, "ALLOC ") \
EM(afs_volume_trace_free, "FREE ") \
EM(afs_volume_trace_get_alloc_sbi, "GET sbi-alloc ") \
EM(afs_volume_trace_get_cell_insert, "GET cell-insrt") \
EM(afs_volume_trace_get_new_op, "GET op-new ") \
EM(afs_volume_trace_get_query_alias, "GET cell-alias") \
EM(afs_volume_trace_put_cell_dup, "PUT cell-dup ") \
EM(afs_volume_trace_put_cell_root, "PUT cell-root ") \
EM(afs_volume_trace_put_destroy_sbi, "PUT sbi-destry") \
EM(afs_volume_trace_put_free_fc, "PUT fc-free ") \
EM(afs_volume_trace_put_put_op, "PUT op-put ") \
EM(afs_volume_trace_put_query_alias, "PUT cell-alias") \
EM(afs_volume_trace_put_validate_fc, "PUT fc-validat") \
E_(afs_volume_trace_remove, "REMOVE ")
#define afs_cell_traces \
EM(afs_cell_trace_alloc, "ALLOC ") \
EM(afs_cell_trace_free, "FREE ") \
EM(afs_cell_trace_get_queue_dns, "GET q-dns ") \
EM(afs_cell_trace_get_queue_manage, "GET q-mng ") \
EM(afs_cell_trace_get_queue_new, "GET q-new ") \
EM(afs_cell_trace_get_vol, "GET vol ") \
EM(afs_cell_trace_insert, "INSERT ") \
EM(afs_cell_trace_manage, "MANAGE ") \
EM(afs_cell_trace_put_candidate, "PUT candid") \
EM(afs_cell_trace_put_destroy, "PUT destry") \
EM(afs_cell_trace_put_queue_work, "PUT q-work") \
EM(afs_cell_trace_put_queue_fail, "PUT q-fail") \
EM(afs_cell_trace_put_vol, "PUT vol ") \
EM(afs_cell_trace_see_source, "SEE source") \
EM(afs_cell_trace_see_ws, "SEE ws ") \
EM(afs_cell_trace_unuse_alias, "UNU alias ") \
EM(afs_cell_trace_unuse_check_alias, "UNU chk-al") \
EM(afs_cell_trace_unuse_delete, "UNU delete") \
EM(afs_cell_trace_unuse_fc, "UNU fc ") \
EM(afs_cell_trace_unuse_lookup, "UNU lookup") \
EM(afs_cell_trace_unuse_mntpt, "UNU mntpt ") \
EM(afs_cell_trace_unuse_parse, "UNU parse ") \
EM(afs_cell_trace_unuse_pin, "UNU pin ") \
EM(afs_cell_trace_unuse_probe, "UNU probe ") \
EM(afs_cell_trace_unuse_sbi, "UNU sbi ") \
EM(afs_cell_trace_unuse_ws, "UNU ws ") \
EM(afs_cell_trace_use_alias, "USE alias ") \
EM(afs_cell_trace_use_check_alias, "USE chk-al") \
EM(afs_cell_trace_use_fc, "USE fc ") \
EM(afs_cell_trace_use_fc_alias, "USE fc-al ") \
EM(afs_cell_trace_use_lookup, "USE lookup") \
EM(afs_cell_trace_use_mntpt, "USE mntpt ") \
EM(afs_cell_trace_use_pin, "USE pin ") \
EM(afs_cell_trace_use_probe, "USE probe ") \
EM(afs_cell_trace_use_sbi, "USE sbi ") \
E_(afs_cell_trace_wait, "WAIT ")
#define afs_fs_operations \
EM(afs_FS_FetchData, "FS.FetchData") \
EM(afs_FS_FetchStatus, "FS.FetchStatus") \
EM(afs_FS_StoreData, "FS.StoreData") \
EM(afs_FS_StoreStatus, "FS.StoreStatus") \
EM(afs_FS_RemoveFile, "FS.RemoveFile") \
EM(afs_FS_CreateFile, "FS.CreateFile") \
EM(afs_FS_Rename, "FS.Rename") \
EM(afs_FS_Symlink, "FS.Symlink") \
EM(afs_FS_Link, "FS.Link") \
EM(afs_FS_MakeDir, "FS.MakeDir") \
EM(afs_FS_RemoveDir, "FS.RemoveDir") \
EM(afs_FS_GetVolumeInfo, "FS.GetVolumeInfo") \
EM(afs_FS_GetVolumeStatus, "FS.GetVolumeStatus") \
EM(afs_FS_GetRootVolume, "FS.GetRootVolume") \
EM(afs_FS_SetLock, "FS.SetLock") \
EM(afs_FS_ExtendLock, "FS.ExtendLock") \
EM(afs_FS_ReleaseLock, "FS.ReleaseLock") \
EM(afs_FS_Lookup, "FS.Lookup") \
EM(afs_FS_InlineBulkStatus, "FS.InlineBulkStatus") \
EM(afs_FS_FetchData64, "FS.FetchData64") \
EM(afs_FS_StoreData64, "FS.StoreData64") \
EM(afs_FS_GiveUpAllCallBacks, "FS.GiveUpAllCallBacks") \
EM(afs_FS_GetCapabilities, "FS.GetCapabilities") \
EM(yfs_FS_FetchACL, "YFS.FetchACL") \
EM(yfs_FS_FetchStatus, "YFS.FetchStatus") \
EM(yfs_FS_StoreACL, "YFS.StoreACL") \
EM(yfs_FS_StoreStatus, "YFS.StoreStatus") \
EM(yfs_FS_RemoveFile, "YFS.RemoveFile") \
EM(yfs_FS_CreateFile, "YFS.CreateFile") \
EM(yfs_FS_Rename, "YFS.Rename") \
EM(yfs_FS_Symlink, "YFS.Symlink") \
EM(yfs_FS_Link, "YFS.Link") \
EM(yfs_FS_MakeDir, "YFS.MakeDir") \
EM(yfs_FS_RemoveDir, "YFS.RemoveDir") \
EM(yfs_FS_GetVolumeStatus, "YFS.GetVolumeStatus") \
EM(yfs_FS_SetVolumeStatus, "YFS.SetVolumeStatus") \
EM(yfs_FS_SetLock, "YFS.SetLock") \
EM(yfs_FS_ExtendLock, "YFS.ExtendLock") \
EM(yfs_FS_ReleaseLock, "YFS.ReleaseLock") \
EM(yfs_FS_Lookup, "YFS.Lookup") \
EM(yfs_FS_FlushCPS, "YFS.FlushCPS") \
EM(yfs_FS_FetchOpaqueACL, "YFS.FetchOpaqueACL") \
EM(yfs_FS_WhoAmI, "YFS.WhoAmI") \
EM(yfs_FS_RemoveACL, "YFS.RemoveACL") \
EM(yfs_FS_RemoveFile2, "YFS.RemoveFile2") \
EM(yfs_FS_StoreOpaqueACL2, "YFS.StoreOpaqueACL2") \
EM(yfs_FS_InlineBulkStatus, "YFS.InlineBulkStatus") \
EM(yfs_FS_FetchData64, "YFS.FetchData64") \
EM(yfs_FS_StoreData64, "YFS.StoreData64") \
E_(yfs_FS_UpdateSymlink, "YFS.UpdateSymlink")
#define afs_vl_operations \
EM(afs_VL_GetEntryByNameU, "VL.GetEntryByNameU") \
EM(afs_VL_GetAddrsU, "VL.GetAddrsU") \
EM(afs_YFSVL_GetEndpoints, "YFSVL.GetEndpoints") \
EM(afs_YFSVL_GetCellName, "YFSVL.GetCellName") \
E_(afs_VL_GetCapabilities, "VL.GetCapabilities")
afs: Fix tracepoint string placement with built-in AFS To quote Alexey[1]: I was adding custom tracepoint to the kernel, grabbed full F34 kernel .config, disabled modules and booted whole shebang as VM kernel. Then did perf record -a -e ... It crashed: general protection fault, probably for non-canonical address 0x435f5346592e4243: 0000 [#1] SMP PTI CPU: 1 PID: 842 Comm: cat Not tainted 5.12.6+ #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 RIP: 0010:t_show+0x22/0xd0 Then reproducer was narrowed to # cat /sys/kernel/tracing/printk_formats Original F34 kernel with modules didn't crash. So I started to disable options and after disabling AFS everything started working again. The root cause is that AFS was placing char arrays content into a section full of _pointers_ to strings with predictable consequences. Non canonical address 435f5346592e4243 is "CB.YFS_" which came from CM_NAME macro. Steps to reproduce: CONFIG_AFS=y CONFIG_TRACING=y # cat /sys/kernel/tracing/printk_formats Fix this by the following means: (1) Add enum->string translation tables in the event header with the AFS and YFS cache/callback manager operations listed by RPC operation ID. (2) Modify the afs_cb_call tracepoint to print the string from the translation table rather than using the string at the afs_call name pointer. (3) Switch translation table depending on the service we're being accessed as (AFS or YFS) in the tracepoint print clause. Will this cause problems to userspace utilities? Note that the symbolic representation of the YFS service ID isn't available to this header, so I've put it in as a number. I'm not sure if this is the best way to do this. (4) Remove the name wrangling (CM_NAME) macro and put the names directly into the afs_call_type structs in cmservice.c. Fixes: 8e8d7f13b6d5a9 ("afs: Add some tracepoints") Reported-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Reviewed-by: Marc Dionne <marc.dionne@auristor.com> cc: Andrew Morton <akpm@linux-foundation.org> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/YLAXfvZ+rObEOdc%2F@localhost.localdomain/ [1] Link: https://lore.kernel.org/r/643721.1623754699@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162430903582.2896199.6098150063997983353.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162609463957.3133237.15916579353149746363.stgit@warthog.procyon.org.uk/ # v1 (repost) Link: https://lore.kernel.org/r/162610726860.3408253.445207609466288531.stgit@warthog.procyon.org.uk/ # v2
2021-06-15 11:57:26 +01:00
#define afs_cm_operations \
EM(afs_CB_CallBack, "CB.CallBack") \
EM(afs_CB_InitCallBackState, "CB.InitCallBackState") \
EM(afs_CB_Probe, "CB.Probe") \
EM(afs_CB_GetLock, "CB.GetLock") \
EM(afs_CB_GetCE, "CB.GetCE") \
EM(afs_CB_GetXStatsVersion, "CB.GetXStatsVersion") \
EM(afs_CB_GetXStats, "CB.GetXStats") \
EM(afs_CB_InitCallBackState3, "CB.InitCallBackState3") \
E_(afs_CB_ProbeUuid, "CB.ProbeUuid")
#define yfs_cm_operations \
EM(yfs_CB_Probe, "YFSCB.Probe") \
EM(yfs_CB_GetLock, "YFSCB.GetLock") \
EM(yfs_CB_XStatsVersion, "YFSCB.XStatsVersion") \
EM(yfs_CB_GetXStats, "YFSCB.GetXStats") \
EM(yfs_CB_InitCallBackState3, "YFSCB.InitCallBackState3") \
EM(yfs_CB_ProbeUuid, "YFSCB.ProbeUuid") \
EM(yfs_CB_GetServerPrefs, "YFSCB.GetServerPrefs") \
EM(yfs_CB_GetCellServDV, "YFSCB.GetCellServDV") \
EM(yfs_CB_GetLocalCell, "YFSCB.GetLocalCell") \
EM(yfs_CB_GetCacheConfig, "YFSCB.GetCacheConfig") \
EM(yfs_CB_GetCellByNum, "YFSCB.GetCellByNum") \
EM(yfs_CB_TellMeAboutYourself, "YFSCB.TellMeAboutYourself") \
E_(yfs_CB_CallBack, "YFSCB.CallBack")
#define afs_edit_dir_ops \
EM(afs_edit_dir_create, "create") \
EM(afs_edit_dir_create_error, "c_fail") \
EM(afs_edit_dir_create_inval, "c_invl") \
EM(afs_edit_dir_create_nospc, "c_nspc") \
EM(afs_edit_dir_delete, "delete") \
EM(afs_edit_dir_delete_error, "d_err ") \
EM(afs_edit_dir_delete_inval, "d_invl") \
E_(afs_edit_dir_delete_noent, "d_nent")
#define afs_edit_dir_reasons \
EM(afs_edit_dir_for_create, "Create") \
EM(afs_edit_dir_for_link, "Link ") \
EM(afs_edit_dir_for_mkdir, "MkDir ") \
EM(afs_edit_dir_for_rename_0, "Renam0") \
EM(afs_edit_dir_for_rename_1, "Renam1") \
EM(afs_edit_dir_for_rename_2, "Renam2") \
EM(afs_edit_dir_for_rmdir, "RmDir ") \
EM(afs_edit_dir_for_silly_0, "S_Ren0") \
EM(afs_edit_dir_for_silly_1, "S_Ren1") \
EM(afs_edit_dir_for_symlink, "Symlnk") \
E_(afs_edit_dir_for_unlink, "Unlink")
#define afs_eproto_causes \
EM(afs_eproto_bad_status, "BadStatus") \
EM(afs_eproto_cb_count, "CbCount") \
EM(afs_eproto_cb_fid_count, "CbFidCount") \
EM(afs_eproto_cellname_len, "CellNameLen") \
EM(afs_eproto_file_type, "FileTYpe") \
EM(afs_eproto_ibulkst_cb_count, "IBS.CbCount") \
EM(afs_eproto_ibulkst_count, "IBS.FidCount") \
EM(afs_eproto_motd_len, "MotdLen") \
EM(afs_eproto_offline_msg_len, "OfflineMsgLen") \
EM(afs_eproto_volname_len, "VolNameLen") \
EM(afs_eproto_yvl_fsendpt4_len, "YVL.FsEnd4Len") \
EM(afs_eproto_yvl_fsendpt6_len, "YVL.FsEnd6Len") \
EM(afs_eproto_yvl_fsendpt_num, "YVL.FsEndCount") \
EM(afs_eproto_yvl_fsendpt_type, "YVL.FsEndType") \
EM(afs_eproto_yvl_vlendpt4_len, "YVL.VlEnd4Len") \
EM(afs_eproto_yvl_vlendpt6_len, "YVL.VlEnd6Len") \
E_(afs_eproto_yvl_vlendpt_type, "YVL.VlEndType")
#define afs_io_errors \
EM(afs_io_error_cm_reply, "CM_REPLY") \
EM(afs_io_error_extract, "EXTRACT") \
EM(afs_io_error_fs_probe_fail, "FS_PROBE_FAIL") \
EM(afs_io_error_vl_lookup_fail, "VL_LOOKUP_FAIL") \
E_(afs_io_error_vl_probe_fail, "VL_PROBE_FAIL")
#define afs_file_errors \
EM(afs_file_error_dir_bad_magic, "DIR_BAD_MAGIC") \
EM(afs_file_error_dir_big, "DIR_BIG") \
EM(afs_file_error_dir_missing_page, "DIR_MISSING_PAGE") \
EM(afs_file_error_dir_name_too_long, "DIR_NAME_TOO_LONG") \
EM(afs_file_error_dir_over_end, "DIR_ENT_OVER_END") \
EM(afs_file_error_dir_small, "DIR_SMALL") \
EM(afs_file_error_dir_unmarked_ext, "DIR_UNMARKED_EXT") \
EM(afs_file_error_mntpt, "MNTPT_READ_FAILED") \
E_(afs_file_error_writeback_fail, "WRITEBACK_FAILED")
#define afs_flock_types \
EM(F_RDLCK, "RDLCK") \
EM(F_WRLCK, "WRLCK") \
E_(F_UNLCK, "UNLCK")
#define afs_flock_states \
EM(AFS_VNODE_LOCK_NONE, "NONE") \
EM(AFS_VNODE_LOCK_WAITING_FOR_CB, "WAIT_FOR_CB") \
EM(AFS_VNODE_LOCK_SETTING, "SETTING") \
EM(AFS_VNODE_LOCK_GRANTED, "GRANTED") \
EM(AFS_VNODE_LOCK_EXTENDING, "EXTENDING") \
EM(AFS_VNODE_LOCK_NEED_UNLOCK, "NEED_UNLOCK") \
EM(AFS_VNODE_LOCK_UNLOCKING, "UNLOCKING") \
E_(AFS_VNODE_LOCK_DELETED, "DELETED")
#define afs_flock_events \
EM(afs_flock_acquired, "Acquired") \
EM(afs_flock_callback_break, "Callback") \
EM(afs_flock_defer_unlock, "D-Unlock") \
EM(afs_flock_extend_fail, "Ext_Fail") \
EM(afs_flock_fail_other, "ErrOther") \
EM(afs_flock_fail_perm, "ErrPerm ") \
EM(afs_flock_no_lockers, "NoLocker") \
EM(afs_flock_release_fail, "Rel_Fail") \
EM(afs_flock_silly_delete, "SillyDel") \
EM(afs_flock_timestamp, "Timestmp") \
EM(afs_flock_try_to_lock, "TryToLck") \
EM(afs_flock_vfs_lock, "VFSLock ") \
EM(afs_flock_vfs_locking, "VFSLking") \
EM(afs_flock_waited, "Waited ") \
EM(afs_flock_waiting, "Waiting ") \
EM(afs_flock_work_extending, "Extendng") \
EM(afs_flock_work_retry, "Retry ") \
EM(afs_flock_work_unlocking, "Unlcking") \
E_(afs_flock_would_block, "EWOULDBL")
#define afs_flock_operations \
EM(afs_flock_op_copy_lock, "COPY ") \
EM(afs_flock_op_flock, "->flock ") \
EM(afs_flock_op_grant, "GRANT ") \
EM(afs_flock_op_lock, "->lock ") \
EM(afs_flock_op_release_lock, "RELEASE ") \
EM(afs_flock_op_return_ok, "<-OK ") \
EM(afs_flock_op_return_edeadlk, "<-EDEADL") \
EM(afs_flock_op_return_eagain, "<-EAGAIN") \
EM(afs_flock_op_return_error, "<-ERROR ") \
EM(afs_flock_op_set_lock, "SET ") \
EM(afs_flock_op_unlock, "UNLOCK ") \
E_(afs_flock_op_wake, "WAKE ")
#define afs_cb_break_reasons \
EM(afs_cb_break_no_break, "no-break") \
afs: Try to avoid taking RCU read lock when checking vnode validity Try to avoid taking the RCU read lock when checking the validity of a vnode's callback state. The only thing it's needed for is to pin the parent volume's server list whilst we search it to find the record of the server we're currently using to see if it has been reinitialised (ie. it sent us a CB.InitCallBackState* RPC). Do this by the following means: (1) Keep an additional per-cell counter (fs_s_break) that's incremented each time any of the fileservers in the cell reinitialises. Since the new counter can be accessed without RCU from the vnode, we can check that first - and only if it differs, get the RCU read lock and check the volume's server list. (2) Replace afs_get_s_break_rcu() with afs_check_server_good() which now indicates whether the callback promise is still expected to be present on the server. This does the checks as described in (1). (3) Restructure afs_check_validity() to take account of the change in (2). We can also get rid of the valid variable and just use the need_clear variable with the addition of the afs_cb_break_no_promise reason. (4) afs_check_validity() probably shouldn't be altering vnode->cb_v_break and vnode->cb_s_break when it doesn't have cb_lock exclusively locked. Move the change to vnode->cb_v_break to __afs_break_callback(). Delegate the change to vnode->cb_s_break to afs_select_fileserver() and set vnode->cb_fs_s_break there also. (5) afs_validate() no longer needs to get the RCU read lock around its call to afs_check_validity() - and can skip the call entirely if we don't have a promise. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Markus Suvanto <markus.suvanto@gmail.com> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111669583.283156.1397603105683094563.stgit@warthog.procyon.org.uk/
2021-09-02 21:51:01 +01:00
EM(afs_cb_break_no_promise, "no-promise") \
EM(afs_cb_break_for_callback, "break-cb") \
EM(afs_cb_break_for_deleted, "break-del") \
EM(afs_cb_break_for_lapsed, "break-lapsed") \
afs: Try to avoid taking RCU read lock when checking vnode validity Try to avoid taking the RCU read lock when checking the validity of a vnode's callback state. The only thing it's needed for is to pin the parent volume's server list whilst we search it to find the record of the server we're currently using to see if it has been reinitialised (ie. it sent us a CB.InitCallBackState* RPC). Do this by the following means: (1) Keep an additional per-cell counter (fs_s_break) that's incremented each time any of the fileservers in the cell reinitialises. Since the new counter can be accessed without RCU from the vnode, we can check that first - and only if it differs, get the RCU read lock and check the volume's server list. (2) Replace afs_get_s_break_rcu() with afs_check_server_good() which now indicates whether the callback promise is still expected to be present on the server. This does the checks as described in (1). (3) Restructure afs_check_validity() to take account of the change in (2). We can also get rid of the valid variable and just use the need_clear variable with the addition of the afs_cb_break_no_promise reason. (4) afs_check_validity() probably shouldn't be altering vnode->cb_v_break and vnode->cb_s_break when it doesn't have cb_lock exclusively locked. Move the change to vnode->cb_v_break to __afs_break_callback(). Delegate the change to vnode->cb_s_break to afs_select_fileserver() and set vnode->cb_fs_s_break there also. (5) afs_validate() no longer needs to get the RCU read lock around its call to afs_check_validity() - and can skip the call entirely if we don't have a promise. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Markus Suvanto <markus.suvanto@gmail.com> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111669583.283156.1397603105683094563.stgit@warthog.procyon.org.uk/
2021-09-02 21:51:01 +01:00
EM(afs_cb_break_for_s_reinit, "s-reinit") \
EM(afs_cb_break_for_unlink, "break-unlink") \
afs: Try to avoid taking RCU read lock when checking vnode validity Try to avoid taking the RCU read lock when checking the validity of a vnode's callback state. The only thing it's needed for is to pin the parent volume's server list whilst we search it to find the record of the server we're currently using to see if it has been reinitialised (ie. it sent us a CB.InitCallBackState* RPC). Do this by the following means: (1) Keep an additional per-cell counter (fs_s_break) that's incremented each time any of the fileservers in the cell reinitialises. Since the new counter can be accessed without RCU from the vnode, we can check that first - and only if it differs, get the RCU read lock and check the volume's server list. (2) Replace afs_get_s_break_rcu() with afs_check_server_good() which now indicates whether the callback promise is still expected to be present on the server. This does the checks as described in (1). (3) Restructure afs_check_validity() to take account of the change in (2). We can also get rid of the valid variable and just use the need_clear variable with the addition of the afs_cb_break_no_promise reason. (4) afs_check_validity() probably shouldn't be altering vnode->cb_v_break and vnode->cb_s_break when it doesn't have cb_lock exclusively locked. Move the change to vnode->cb_v_break to __afs_break_callback(). Delegate the change to vnode->cb_s_break to afs_select_fileserver() and set vnode->cb_fs_s_break there also. (5) afs_validate() no longer needs to get the RCU read lock around its call to afs_check_validity() - and can skip the call entirely if we don't have a promise. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Markus Suvanto <markus.suvanto@gmail.com> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111669583.283156.1397603105683094563.stgit@warthog.procyon.org.uk/
2021-09-02 21:51:01 +01:00
EM(afs_cb_break_for_v_break, "break-v") \
EM(afs_cb_break_for_volume_callback, "break-v-cb") \
E_(afs_cb_break_for_zap, "break-zap")
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
/*
* Export enum symbols via userspace.
*/
#undef EM
#undef E_
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define E_(a, b) TRACE_DEFINE_ENUM(a);
afs_call_traces;
afs_server_traces;
afs_cell_traces;
afs_fs_operations;
afs_vl_operations;
afs: Fix tracepoint string placement with built-in AFS To quote Alexey[1]: I was adding custom tracepoint to the kernel, grabbed full F34 kernel .config, disabled modules and booted whole shebang as VM kernel. Then did perf record -a -e ... It crashed: general protection fault, probably for non-canonical address 0x435f5346592e4243: 0000 [#1] SMP PTI CPU: 1 PID: 842 Comm: cat Not tainted 5.12.6+ #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 RIP: 0010:t_show+0x22/0xd0 Then reproducer was narrowed to # cat /sys/kernel/tracing/printk_formats Original F34 kernel with modules didn't crash. So I started to disable options and after disabling AFS everything started working again. The root cause is that AFS was placing char arrays content into a section full of _pointers_ to strings with predictable consequences. Non canonical address 435f5346592e4243 is "CB.YFS_" which came from CM_NAME macro. Steps to reproduce: CONFIG_AFS=y CONFIG_TRACING=y # cat /sys/kernel/tracing/printk_formats Fix this by the following means: (1) Add enum->string translation tables in the event header with the AFS and YFS cache/callback manager operations listed by RPC operation ID. (2) Modify the afs_cb_call tracepoint to print the string from the translation table rather than using the string at the afs_call name pointer. (3) Switch translation table depending on the service we're being accessed as (AFS or YFS) in the tracepoint print clause. Will this cause problems to userspace utilities? Note that the symbolic representation of the YFS service ID isn't available to this header, so I've put it in as a number. I'm not sure if this is the best way to do this. (4) Remove the name wrangling (CM_NAME) macro and put the names directly into the afs_call_type structs in cmservice.c. Fixes: 8e8d7f13b6d5a9 ("afs: Add some tracepoints") Reported-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Reviewed-by: Marc Dionne <marc.dionne@auristor.com> cc: Andrew Morton <akpm@linux-foundation.org> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/YLAXfvZ+rObEOdc%2F@localhost.localdomain/ [1] Link: https://lore.kernel.org/r/643721.1623754699@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162430903582.2896199.6098150063997983353.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162609463957.3133237.15916579353149746363.stgit@warthog.procyon.org.uk/ # v1 (repost) Link: https://lore.kernel.org/r/162610726860.3408253.445207609466288531.stgit@warthog.procyon.org.uk/ # v2
2021-06-15 11:57:26 +01:00
afs_cm_operations;
yfs_cm_operations;
afs_edit_dir_ops;
afs_edit_dir_reasons;
afs_eproto_causes;
afs_io_errors;
afs_file_errors;
afs_flock_types;
afs_flock_operations;
afs_cb_break_reasons;
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
/*
* Now redefine the EM() and E_() macros to map the enums to the strings that
* will be printed in the output.
*/
#undef EM
#undef E_
#define EM(a, b) { a, b },
#define E_(a, b) { a, b }
TRACE_EVENT(afs_receive_data,
TP_PROTO(struct afs_call *call, struct iov_iter *iter,
bool want_more, int ret),
TP_ARGS(call, iter, want_more, ret),
TP_STRUCT__entry(
__field(loff_t, remain )
__field(unsigned int, call )
__field(enum afs_call_state, state )
__field(unsigned short, unmarshall )
__field(bool, want_more )
__field(int, ret )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->state = call->state;
__entry->unmarshall = call->unmarshall;
__entry->remain = iov_iter_count(iter);
__entry->want_more = want_more;
__entry->ret = ret;
),
TP_printk("c=%08x r=%llu u=%u w=%u s=%u ret=%d",
__entry->call,
__entry->remain,
__entry->unmarshall,
__entry->want_more,
__entry->state,
__entry->ret)
);
TRACE_EVENT(afs_notify_call,
TP_PROTO(struct rxrpc_call *rxcall, struct afs_call *call),
TP_ARGS(rxcall, call),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(enum afs_call_state, state )
__field(unsigned short, unmarshall )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->state = call->state;
__entry->unmarshall = call->unmarshall;
),
TP_printk("c=%08x s=%u u=%u",
__entry->call,
__entry->state, __entry->unmarshall)
);
TRACE_EVENT(afs_cb_call,
TP_PROTO(struct afs_call *call),
TP_ARGS(call),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(u32, op )
afs: Fix tracepoint string placement with built-in AFS To quote Alexey[1]: I was adding custom tracepoint to the kernel, grabbed full F34 kernel .config, disabled modules and booted whole shebang as VM kernel. Then did perf record -a -e ... It crashed: general protection fault, probably for non-canonical address 0x435f5346592e4243: 0000 [#1] SMP PTI CPU: 1 PID: 842 Comm: cat Not tainted 5.12.6+ #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 RIP: 0010:t_show+0x22/0xd0 Then reproducer was narrowed to # cat /sys/kernel/tracing/printk_formats Original F34 kernel with modules didn't crash. So I started to disable options and after disabling AFS everything started working again. The root cause is that AFS was placing char arrays content into a section full of _pointers_ to strings with predictable consequences. Non canonical address 435f5346592e4243 is "CB.YFS_" which came from CM_NAME macro. Steps to reproduce: CONFIG_AFS=y CONFIG_TRACING=y # cat /sys/kernel/tracing/printk_formats Fix this by the following means: (1) Add enum->string translation tables in the event header with the AFS and YFS cache/callback manager operations listed by RPC operation ID. (2) Modify the afs_cb_call tracepoint to print the string from the translation table rather than using the string at the afs_call name pointer. (3) Switch translation table depending on the service we're being accessed as (AFS or YFS) in the tracepoint print clause. Will this cause problems to userspace utilities? Note that the symbolic representation of the YFS service ID isn't available to this header, so I've put it in as a number. I'm not sure if this is the best way to do this. (4) Remove the name wrangling (CM_NAME) macro and put the names directly into the afs_call_type structs in cmservice.c. Fixes: 8e8d7f13b6d5a9 ("afs: Add some tracepoints") Reported-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Reviewed-by: Marc Dionne <marc.dionne@auristor.com> cc: Andrew Morton <akpm@linux-foundation.org> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/YLAXfvZ+rObEOdc%2F@localhost.localdomain/ [1] Link: https://lore.kernel.org/r/643721.1623754699@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162430903582.2896199.6098150063997983353.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162609463957.3133237.15916579353149746363.stgit@warthog.procyon.org.uk/ # v1 (repost) Link: https://lore.kernel.org/r/162610726860.3408253.445207609466288531.stgit@warthog.procyon.org.uk/ # v2
2021-06-15 11:57:26 +01:00
__field(u16, service_id )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->op = call->operation_ID;
afs: Fix tracepoint string placement with built-in AFS To quote Alexey[1]: I was adding custom tracepoint to the kernel, grabbed full F34 kernel .config, disabled modules and booted whole shebang as VM kernel. Then did perf record -a -e ... It crashed: general protection fault, probably for non-canonical address 0x435f5346592e4243: 0000 [#1] SMP PTI CPU: 1 PID: 842 Comm: cat Not tainted 5.12.6+ #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 RIP: 0010:t_show+0x22/0xd0 Then reproducer was narrowed to # cat /sys/kernel/tracing/printk_formats Original F34 kernel with modules didn't crash. So I started to disable options and after disabling AFS everything started working again. The root cause is that AFS was placing char arrays content into a section full of _pointers_ to strings with predictable consequences. Non canonical address 435f5346592e4243 is "CB.YFS_" which came from CM_NAME macro. Steps to reproduce: CONFIG_AFS=y CONFIG_TRACING=y # cat /sys/kernel/tracing/printk_formats Fix this by the following means: (1) Add enum->string translation tables in the event header with the AFS and YFS cache/callback manager operations listed by RPC operation ID. (2) Modify the afs_cb_call tracepoint to print the string from the translation table rather than using the string at the afs_call name pointer. (3) Switch translation table depending on the service we're being accessed as (AFS or YFS) in the tracepoint print clause. Will this cause problems to userspace utilities? Note that the symbolic representation of the YFS service ID isn't available to this header, so I've put it in as a number. I'm not sure if this is the best way to do this. (4) Remove the name wrangling (CM_NAME) macro and put the names directly into the afs_call_type structs in cmservice.c. Fixes: 8e8d7f13b6d5a9 ("afs: Add some tracepoints") Reported-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Reviewed-by: Marc Dionne <marc.dionne@auristor.com> cc: Andrew Morton <akpm@linux-foundation.org> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/YLAXfvZ+rObEOdc%2F@localhost.localdomain/ [1] Link: https://lore.kernel.org/r/643721.1623754699@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162430903582.2896199.6098150063997983353.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162609463957.3133237.15916579353149746363.stgit@warthog.procyon.org.uk/ # v1 (repost) Link: https://lore.kernel.org/r/162610726860.3408253.445207609466288531.stgit@warthog.procyon.org.uk/ # v2
2021-06-15 11:57:26 +01:00
__entry->service_id = call->service_id;
),
afs: Fix tracepoint string placement with built-in AFS To quote Alexey[1]: I was adding custom tracepoint to the kernel, grabbed full F34 kernel .config, disabled modules and booted whole shebang as VM kernel. Then did perf record -a -e ... It crashed: general protection fault, probably for non-canonical address 0x435f5346592e4243: 0000 [#1] SMP PTI CPU: 1 PID: 842 Comm: cat Not tainted 5.12.6+ #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 RIP: 0010:t_show+0x22/0xd0 Then reproducer was narrowed to # cat /sys/kernel/tracing/printk_formats Original F34 kernel with modules didn't crash. So I started to disable options and after disabling AFS everything started working again. The root cause is that AFS was placing char arrays content into a section full of _pointers_ to strings with predictable consequences. Non canonical address 435f5346592e4243 is "CB.YFS_" which came from CM_NAME macro. Steps to reproduce: CONFIG_AFS=y CONFIG_TRACING=y # cat /sys/kernel/tracing/printk_formats Fix this by the following means: (1) Add enum->string translation tables in the event header with the AFS and YFS cache/callback manager operations listed by RPC operation ID. (2) Modify the afs_cb_call tracepoint to print the string from the translation table rather than using the string at the afs_call name pointer. (3) Switch translation table depending on the service we're being accessed as (AFS or YFS) in the tracepoint print clause. Will this cause problems to userspace utilities? Note that the symbolic representation of the YFS service ID isn't available to this header, so I've put it in as a number. I'm not sure if this is the best way to do this. (4) Remove the name wrangling (CM_NAME) macro and put the names directly into the afs_call_type structs in cmservice.c. Fixes: 8e8d7f13b6d5a9 ("afs: Add some tracepoints") Reported-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Reviewed-by: Marc Dionne <marc.dionne@auristor.com> cc: Andrew Morton <akpm@linux-foundation.org> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/YLAXfvZ+rObEOdc%2F@localhost.localdomain/ [1] Link: https://lore.kernel.org/r/643721.1623754699@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162430903582.2896199.6098150063997983353.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162609463957.3133237.15916579353149746363.stgit@warthog.procyon.org.uk/ # v1 (repost) Link: https://lore.kernel.org/r/162610726860.3408253.445207609466288531.stgit@warthog.procyon.org.uk/ # v2
2021-06-15 11:57:26 +01:00
TP_printk("c=%08x %s",
__entry->call,
afs: Fix tracepoint string placement with built-in AFS To quote Alexey[1]: I was adding custom tracepoint to the kernel, grabbed full F34 kernel .config, disabled modules and booted whole shebang as VM kernel. Then did perf record -a -e ... It crashed: general protection fault, probably for non-canonical address 0x435f5346592e4243: 0000 [#1] SMP PTI CPU: 1 PID: 842 Comm: cat Not tainted 5.12.6+ #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 RIP: 0010:t_show+0x22/0xd0 Then reproducer was narrowed to # cat /sys/kernel/tracing/printk_formats Original F34 kernel with modules didn't crash. So I started to disable options and after disabling AFS everything started working again. The root cause is that AFS was placing char arrays content into a section full of _pointers_ to strings with predictable consequences. Non canonical address 435f5346592e4243 is "CB.YFS_" which came from CM_NAME macro. Steps to reproduce: CONFIG_AFS=y CONFIG_TRACING=y # cat /sys/kernel/tracing/printk_formats Fix this by the following means: (1) Add enum->string translation tables in the event header with the AFS and YFS cache/callback manager operations listed by RPC operation ID. (2) Modify the afs_cb_call tracepoint to print the string from the translation table rather than using the string at the afs_call name pointer. (3) Switch translation table depending on the service we're being accessed as (AFS or YFS) in the tracepoint print clause. Will this cause problems to userspace utilities? Note that the symbolic representation of the YFS service ID isn't available to this header, so I've put it in as a number. I'm not sure if this is the best way to do this. (4) Remove the name wrangling (CM_NAME) macro and put the names directly into the afs_call_type structs in cmservice.c. Fixes: 8e8d7f13b6d5a9 ("afs: Add some tracepoints") Reported-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Reviewed-by: Marc Dionne <marc.dionne@auristor.com> cc: Andrew Morton <akpm@linux-foundation.org> cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/YLAXfvZ+rObEOdc%2F@localhost.localdomain/ [1] Link: https://lore.kernel.org/r/643721.1623754699@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162430903582.2896199.6098150063997983353.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162609463957.3133237.15916579353149746363.stgit@warthog.procyon.org.uk/ # v1 (repost) Link: https://lore.kernel.org/r/162610726860.3408253.445207609466288531.stgit@warthog.procyon.org.uk/ # v2
2021-06-15 11:57:26 +01:00
__entry->service_id == 2501 ?
__print_symbolic(__entry->op, yfs_cm_operations) :
__print_symbolic(__entry->op, afs_cm_operations))
);
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
TRACE_EVENT(afs_call,
TP_PROTO(unsigned int call_debug_id, enum afs_call_trace op,
int ref, int outstanding, const void *where),
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
TP_ARGS(call_debug_id, op, ref, outstanding, where),
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
TP_STRUCT__entry(
__field(unsigned int, call )
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
__field(int, op )
__field(int, ref )
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
__field(int, outstanding )
__field(const void *, where )
),
TP_fast_assign(
__entry->call = call_debug_id;
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
__entry->op = op;
__entry->ref = ref;
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
__entry->outstanding = outstanding;
__entry->where = where;
),
TP_printk("c=%08x %s r=%d o=%d sp=%pSR",
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
__entry->call,
__print_symbolic(__entry->op, afs_call_traces),
__entry->ref,
afs: Refcount the afs_call struct A static checker warning occurs in the AFS filesystem: fs/afs/cmservice.c:155 SRXAFSCB_CallBack() error: dereferencing freed memory 'call' due to the reply being sent before we access the server it points to. The act of sending the reply causes the call to be freed if an error occurs (but not if it doesn't). On top of this, the lifetime handling of afs_call structs is fragile because they get passed around through workqueues without any sort of refcounting. Deal with the issues by: (1) Fix the maybe/maybe not nature of the reply sending functions with regards to whether they release the call struct. (2) Refcount the afs_call struct and sort out places that need to get/put references. (3) Pass a ref through the work queue and release (or pass on) that ref in the work function. Care has to be taken because a work queue may already own a ref to the call. (4) Do the cleaning up in the put function only. (5) Simplify module cleanup by always incrementing afs_outstanding_calls whenever a call is allocated. (6) Set the backlog to 0 with kernel_listen() at the beginning of the process of closing the socket to prevent new incoming calls from occurring and to remove the contribution of preallocated calls from afs_outstanding_calls before we wait on it. A tracepoint is also added to monitor the afs_call refcount and lifetime. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David Howells <dhowells@redhat.com> Fixes: 08e0e7c82eea: "[AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC."
2017-01-05 10:38:36 +00:00
__entry->outstanding,
__entry->where)
);
TRACE_EVENT(afs_make_fs_call,
TP_PROTO(struct afs_call *call, const struct afs_fid *fid),
TP_ARGS(call, fid),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(enum afs_fs_operation, op )
__field_struct(struct afs_fid, fid )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->op = call->operation_ID;
if (fid) {
__entry->fid = *fid;
} else {
__entry->fid.vid = 0;
__entry->fid.vnode = 0;
__entry->fid.unique = 0;
}
),
TP_printk("c=%08x %06llx:%06llx:%06x %s",
__entry->call,
__entry->fid.vid,
__entry->fid.vnode,
__entry->fid.unique,
__print_symbolic(__entry->op, afs_fs_operations))
);
TRACE_EVENT(afs_make_fs_calli,
TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
unsigned int i),
TP_ARGS(call, fid, i),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(unsigned int, i )
__field(enum afs_fs_operation, op )
__field_struct(struct afs_fid, fid )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->i = i;
__entry->op = call->operation_ID;
if (fid) {
__entry->fid = *fid;
} else {
__entry->fid.vid = 0;
__entry->fid.vnode = 0;
__entry->fid.unique = 0;
}
),
TP_printk("c=%08x %06llx:%06llx:%06x %s i=%u",
__entry->call,
__entry->fid.vid,
__entry->fid.vnode,
__entry->fid.unique,
__print_symbolic(__entry->op, afs_fs_operations),
__entry->i)
);
TRACE_EVENT(afs_make_fs_call1,
TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
afs: Build an abstraction around an "operation" concept Turn the afs_operation struct into the main way that most fileserver operations are managed. Various things are added to the struct, including the following: (1) All the parameters and results of the relevant operations are moved into it, removing corresponding fields from the afs_call struct. afs_call gets a pointer to the op. (2) The target volume is made the main focus of the operation, rather than the target vnode(s), and a bunch of op->vnode->volume are made op->volume instead. (3) Two vnode records are defined (op->file[]) for the vnode(s) involved in most operations. The vnode record (struct afs_vnode_param) contains: - The vnode pointer. - The fid of the vnode to be included in the parameters or that was returned in the reply (eg. FS.MakeDir). - The status and callback information that may be returned in the reply about the vnode. - Callback break and data version tracking for detecting simultaneous third-parth changes. (4) Pointers to dentries to be updated with new inodes. (5) An operations table pointer. The table includes pointers to functions for issuing AFS and YFS-variant RPCs, handling the success and abort of an operation and handling post-I/O-lock local editing of a directory. To make this work, the following function restructuring is made: (A) The rotation loop that issues calls to fileservers that can be found in each function that wants to issue an RPC (such as afs_mkdir()) is extracted out into common code, in a new file called fs_operation.c. (B) The rotation loops, such as the one in afs_mkdir(), are replaced with a much smaller piece of code that allocates an operation, sets the parameters and then calls out to the common code to do the actual work. (C) The code for handling the success and failure of an operation are moved into operation functions (as (5) above) and these are called from the core code at appropriate times. (D) The pseudo inode getting stuff used by the dynamic root code is moved over into dynroot.c. (E) struct afs_iget_data is absorbed into the operation struct and afs_iget() expects to be given an op pointer and a vnode record. (F) Point (E) doesn't work for the root dir of a volume, but we know the FID in advance (it's always vnode 1, unique 1), so a separate inode getter, afs_root_iget(), is provided to special-case that. (G) The inode status init/update functions now also take an op and a vnode record. (H) The RPC marshalling functions now, for the most part, just take an afs_operation struct as their only argument. All the data they need is held there. The result delivery functions write their answers there as well. (I) The call is attached to the operation and then the operation core does the waiting. And then the new operation code is, for the moment, made to just initialise the operation, get the appropriate vnode I/O locks and do the same rotation loop as before. This lays the foundation for the following changes in the future: (*) Overhauling the rotation (again). (*) Support for asynchronous I/O, where the fileserver rotation must be done asynchronously also. Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-10 20:51:51 +01:00
const struct qstr *name),
TP_ARGS(call, fid, name),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(enum afs_fs_operation, op )
__field_struct(struct afs_fid, fid )
__array(char, name, 24 )
),
TP_fast_assign(
afs: Build an abstraction around an "operation" concept Turn the afs_operation struct into the main way that most fileserver operations are managed. Various things are added to the struct, including the following: (1) All the parameters and results of the relevant operations are moved into it, removing corresponding fields from the afs_call struct. afs_call gets a pointer to the op. (2) The target volume is made the main focus of the operation, rather than the target vnode(s), and a bunch of op->vnode->volume are made op->volume instead. (3) Two vnode records are defined (op->file[]) for the vnode(s) involved in most operations. The vnode record (struct afs_vnode_param) contains: - The vnode pointer. - The fid of the vnode to be included in the parameters or that was returned in the reply (eg. FS.MakeDir). - The status and callback information that may be returned in the reply about the vnode. - Callback break and data version tracking for detecting simultaneous third-parth changes. (4) Pointers to dentries to be updated with new inodes. (5) An operations table pointer. The table includes pointers to functions for issuing AFS and YFS-variant RPCs, handling the success and abort of an operation and handling post-I/O-lock local editing of a directory. To make this work, the following function restructuring is made: (A) The rotation loop that issues calls to fileservers that can be found in each function that wants to issue an RPC (such as afs_mkdir()) is extracted out into common code, in a new file called fs_operation.c. (B) The rotation loops, such as the one in afs_mkdir(), are replaced with a much smaller piece of code that allocates an operation, sets the parameters and then calls out to the common code to do the actual work. (C) The code for handling the success and failure of an operation are moved into operation functions (as (5) above) and these are called from the core code at appropriate times. (D) The pseudo inode getting stuff used by the dynamic root code is moved over into dynroot.c. (E) struct afs_iget_data is absorbed into the operation struct and afs_iget() expects to be given an op pointer and a vnode record. (F) Point (E) doesn't work for the root dir of a volume, but we know the FID in advance (it's always vnode 1, unique 1), so a separate inode getter, afs_root_iget(), is provided to special-case that. (G) The inode status init/update functions now also take an op and a vnode record. (H) The RPC marshalling functions now, for the most part, just take an afs_operation struct as their only argument. All the data they need is held there. The result delivery functions write their answers there as well. (I) The call is attached to the operation and then the operation core does the waiting. And then the new operation code is, for the moment, made to just initialise the operation, get the appropriate vnode I/O locks and do the same rotation loop as before. This lays the foundation for the following changes in the future: (*) Overhauling the rotation (again). (*) Support for asynchronous I/O, where the fileserver rotation must be done asynchronously also. Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-10 20:51:51 +01:00
unsigned int __len = min_t(unsigned int, name->len, 23);
__entry->call = call->debug_id;
__entry->op = call->operation_ID;
if (fid) {
__entry->fid = *fid;
} else {
__entry->fid.vid = 0;
__entry->fid.vnode = 0;
__entry->fid.unique = 0;
}
afs: Build an abstraction around an "operation" concept Turn the afs_operation struct into the main way that most fileserver operations are managed. Various things are added to the struct, including the following: (1) All the parameters and results of the relevant operations are moved into it, removing corresponding fields from the afs_call struct. afs_call gets a pointer to the op. (2) The target volume is made the main focus of the operation, rather than the target vnode(s), and a bunch of op->vnode->volume are made op->volume instead. (3) Two vnode records are defined (op->file[]) for the vnode(s) involved in most operations. The vnode record (struct afs_vnode_param) contains: - The vnode pointer. - The fid of the vnode to be included in the parameters or that was returned in the reply (eg. FS.MakeDir). - The status and callback information that may be returned in the reply about the vnode. - Callback break and data version tracking for detecting simultaneous third-parth changes. (4) Pointers to dentries to be updated with new inodes. (5) An operations table pointer. The table includes pointers to functions for issuing AFS and YFS-variant RPCs, handling the success and abort of an operation and handling post-I/O-lock local editing of a directory. To make this work, the following function restructuring is made: (A) The rotation loop that issues calls to fileservers that can be found in each function that wants to issue an RPC (such as afs_mkdir()) is extracted out into common code, in a new file called fs_operation.c. (B) The rotation loops, such as the one in afs_mkdir(), are replaced with a much smaller piece of code that allocates an operation, sets the parameters and then calls out to the common code to do the actual work. (C) The code for handling the success and failure of an operation are moved into operation functions (as (5) above) and these are called from the core code at appropriate times. (D) The pseudo inode getting stuff used by the dynamic root code is moved over into dynroot.c. (E) struct afs_iget_data is absorbed into the operation struct and afs_iget() expects to be given an op pointer and a vnode record. (F) Point (E) doesn't work for the root dir of a volume, but we know the FID in advance (it's always vnode 1, unique 1), so a separate inode getter, afs_root_iget(), is provided to special-case that. (G) The inode status init/update functions now also take an op and a vnode record. (H) The RPC marshalling functions now, for the most part, just take an afs_operation struct as their only argument. All the data they need is held there. The result delivery functions write their answers there as well. (I) The call is attached to the operation and then the operation core does the waiting. And then the new operation code is, for the moment, made to just initialise the operation, get the appropriate vnode I/O locks and do the same rotation loop as before. This lays the foundation for the following changes in the future: (*) Overhauling the rotation (again). (*) Support for asynchronous I/O, where the fileserver rotation must be done asynchronously also. Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-10 20:51:51 +01:00
memcpy(__entry->name, name->name, __len);
__entry->name[__len] = 0;
),
TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\"",
__entry->call,
__entry->fid.vid,
__entry->fid.vnode,
__entry->fid.unique,
__print_symbolic(__entry->op, afs_fs_operations),
__entry->name)
);
TRACE_EVENT(afs_make_fs_call2,
TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
afs: Build an abstraction around an "operation" concept Turn the afs_operation struct into the main way that most fileserver operations are managed. Various things are added to the struct, including the following: (1) All the parameters and results of the relevant operations are moved into it, removing corresponding fields from the afs_call struct. afs_call gets a pointer to the op. (2) The target volume is made the main focus of the operation, rather than the target vnode(s), and a bunch of op->vnode->volume are made op->volume instead. (3) Two vnode records are defined (op->file[]) for the vnode(s) involved in most operations. The vnode record (struct afs_vnode_param) contains: - The vnode pointer. - The fid of the vnode to be included in the parameters or that was returned in the reply (eg. FS.MakeDir). - The status and callback information that may be returned in the reply about the vnode. - Callback break and data version tracking for detecting simultaneous third-parth changes. (4) Pointers to dentries to be updated with new inodes. (5) An operations table pointer. The table includes pointers to functions for issuing AFS and YFS-variant RPCs, handling the success and abort of an operation and handling post-I/O-lock local editing of a directory. To make this work, the following function restructuring is made: (A) The rotation loop that issues calls to fileservers that can be found in each function that wants to issue an RPC (such as afs_mkdir()) is extracted out into common code, in a new file called fs_operation.c. (B) The rotation loops, such as the one in afs_mkdir(), are replaced with a much smaller piece of code that allocates an operation, sets the parameters and then calls out to the common code to do the actual work. (C) The code for handling the success and failure of an operation are moved into operation functions (as (5) above) and these are called from the core code at appropriate times. (D) The pseudo inode getting stuff used by the dynamic root code is moved over into dynroot.c. (E) struct afs_iget_data is absorbed into the operation struct and afs_iget() expects to be given an op pointer and a vnode record. (F) Point (E) doesn't work for the root dir of a volume, but we know the FID in advance (it's always vnode 1, unique 1), so a separate inode getter, afs_root_iget(), is provided to special-case that. (G) The inode status init/update functions now also take an op and a vnode record. (H) The RPC marshalling functions now, for the most part, just take an afs_operation struct as their only argument. All the data they need is held there. The result delivery functions write their answers there as well. (I) The call is attached to the operation and then the operation core does the waiting. And then the new operation code is, for the moment, made to just initialise the operation, get the appropriate vnode I/O locks and do the same rotation loop as before. This lays the foundation for the following changes in the future: (*) Overhauling the rotation (again). (*) Support for asynchronous I/O, where the fileserver rotation must be done asynchronously also. Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-10 20:51:51 +01:00
const struct qstr *name, const struct qstr *name2),
TP_ARGS(call, fid, name, name2),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(enum afs_fs_operation, op )
__field_struct(struct afs_fid, fid )
__array(char, name, 24 )
__array(char, name2, 24 )
),
TP_fast_assign(
afs: Build an abstraction around an "operation" concept Turn the afs_operation struct into the main way that most fileserver operations are managed. Various things are added to the struct, including the following: (1) All the parameters and results of the relevant operations are moved into it, removing corresponding fields from the afs_call struct. afs_call gets a pointer to the op. (2) The target volume is made the main focus of the operation, rather than the target vnode(s), and a bunch of op->vnode->volume are made op->volume instead. (3) Two vnode records are defined (op->file[]) for the vnode(s) involved in most operations. The vnode record (struct afs_vnode_param) contains: - The vnode pointer. - The fid of the vnode to be included in the parameters or that was returned in the reply (eg. FS.MakeDir). - The status and callback information that may be returned in the reply about the vnode. - Callback break and data version tracking for detecting simultaneous third-parth changes. (4) Pointers to dentries to be updated with new inodes. (5) An operations table pointer. The table includes pointers to functions for issuing AFS and YFS-variant RPCs, handling the success and abort of an operation and handling post-I/O-lock local editing of a directory. To make this work, the following function restructuring is made: (A) The rotation loop that issues calls to fileservers that can be found in each function that wants to issue an RPC (such as afs_mkdir()) is extracted out into common code, in a new file called fs_operation.c. (B) The rotation loops, such as the one in afs_mkdir(), are replaced with a much smaller piece of code that allocates an operation, sets the parameters and then calls out to the common code to do the actual work. (C) The code for handling the success and failure of an operation are moved into operation functions (as (5) above) and these are called from the core code at appropriate times. (D) The pseudo inode getting stuff used by the dynamic root code is moved over into dynroot.c. (E) struct afs_iget_data is absorbed into the operation struct and afs_iget() expects to be given an op pointer and a vnode record. (F) Point (E) doesn't work for the root dir of a volume, but we know the FID in advance (it's always vnode 1, unique 1), so a separate inode getter, afs_root_iget(), is provided to special-case that. (G) The inode status init/update functions now also take an op and a vnode record. (H) The RPC marshalling functions now, for the most part, just take an afs_operation struct as their only argument. All the data they need is held there. The result delivery functions write their answers there as well. (I) The call is attached to the operation and then the operation core does the waiting. And then the new operation code is, for the moment, made to just initialise the operation, get the appropriate vnode I/O locks and do the same rotation loop as before. This lays the foundation for the following changes in the future: (*) Overhauling the rotation (again). (*) Support for asynchronous I/O, where the fileserver rotation must be done asynchronously also. Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-10 20:51:51 +01:00
unsigned int __len = min_t(unsigned int, name->len, 23);
unsigned int __len2 = min_t(unsigned int, name2->len, 23);
__entry->call = call->debug_id;
__entry->op = call->operation_ID;
if (fid) {
__entry->fid = *fid;
} else {
__entry->fid.vid = 0;
__entry->fid.vnode = 0;
__entry->fid.unique = 0;
}
afs: Build an abstraction around an "operation" concept Turn the afs_operation struct into the main way that most fileserver operations are managed. Various things are added to the struct, including the following: (1) All the parameters and results of the relevant operations are moved into it, removing corresponding fields from the afs_call struct. afs_call gets a pointer to the op. (2) The target volume is made the main focus of the operation, rather than the target vnode(s), and a bunch of op->vnode->volume are made op->volume instead. (3) Two vnode records are defined (op->file[]) for the vnode(s) involved in most operations. The vnode record (struct afs_vnode_param) contains: - The vnode pointer. - The fid of the vnode to be included in the parameters or that was returned in the reply (eg. FS.MakeDir). - The status and callback information that may be returned in the reply about the vnode. - Callback break and data version tracking for detecting simultaneous third-parth changes. (4) Pointers to dentries to be updated with new inodes. (5) An operations table pointer. The table includes pointers to functions for issuing AFS and YFS-variant RPCs, handling the success and abort of an operation and handling post-I/O-lock local editing of a directory. To make this work, the following function restructuring is made: (A) The rotation loop that issues calls to fileservers that can be found in each function that wants to issue an RPC (such as afs_mkdir()) is extracted out into common code, in a new file called fs_operation.c. (B) The rotation loops, such as the one in afs_mkdir(), are replaced with a much smaller piece of code that allocates an operation, sets the parameters and then calls out to the common code to do the actual work. (C) The code for handling the success and failure of an operation are moved into operation functions (as (5) above) and these are called from the core code at appropriate times. (D) The pseudo inode getting stuff used by the dynamic root code is moved over into dynroot.c. (E) struct afs_iget_data is absorbed into the operation struct and afs_iget() expects to be given an op pointer and a vnode record. (F) Point (E) doesn't work for the root dir of a volume, but we know the FID in advance (it's always vnode 1, unique 1), so a separate inode getter, afs_root_iget(), is provided to special-case that. (G) The inode status init/update functions now also take an op and a vnode record. (H) The RPC marshalling functions now, for the most part, just take an afs_operation struct as their only argument. All the data they need is held there. The result delivery functions write their answers there as well. (I) The call is attached to the operation and then the operation core does the waiting. And then the new operation code is, for the moment, made to just initialise the operation, get the appropriate vnode I/O locks and do the same rotation loop as before. This lays the foundation for the following changes in the future: (*) Overhauling the rotation (again). (*) Support for asynchronous I/O, where the fileserver rotation must be done asynchronously also. Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-10 20:51:51 +01:00
memcpy(__entry->name, name->name, __len);
__entry->name[__len] = 0;
afs: Build an abstraction around an "operation" concept Turn the afs_operation struct into the main way that most fileserver operations are managed. Various things are added to the struct, including the following: (1) All the parameters and results of the relevant operations are moved into it, removing corresponding fields from the afs_call struct. afs_call gets a pointer to the op. (2) The target volume is made the main focus of the operation, rather than the target vnode(s), and a bunch of op->vnode->volume are made op->volume instead. (3) Two vnode records are defined (op->file[]) for the vnode(s) involved in most operations. The vnode record (struct afs_vnode_param) contains: - The vnode pointer. - The fid of the vnode to be included in the parameters or that was returned in the reply (eg. FS.MakeDir). - The status and callback information that may be returned in the reply about the vnode. - Callback break and data version tracking for detecting simultaneous third-parth changes. (4) Pointers to dentries to be updated with new inodes. (5) An operations table pointer. The table includes pointers to functions for issuing AFS and YFS-variant RPCs, handling the success and abort of an operation and handling post-I/O-lock local editing of a directory. To make this work, the following function restructuring is made: (A) The rotation loop that issues calls to fileservers that can be found in each function that wants to issue an RPC (such as afs_mkdir()) is extracted out into common code, in a new file called fs_operation.c. (B) The rotation loops, such as the one in afs_mkdir(), are replaced with a much smaller piece of code that allocates an operation, sets the parameters and then calls out to the common code to do the actual work. (C) The code for handling the success and failure of an operation are moved into operation functions (as (5) above) and these are called from the core code at appropriate times. (D) The pseudo inode getting stuff used by the dynamic root code is moved over into dynroot.c. (E) struct afs_iget_data is absorbed into the operation struct and afs_iget() expects to be given an op pointer and a vnode record. (F) Point (E) doesn't work for the root dir of a volume, but we know the FID in advance (it's always vnode 1, unique 1), so a separate inode getter, afs_root_iget(), is provided to special-case that. (G) The inode status init/update functions now also take an op and a vnode record. (H) The RPC marshalling functions now, for the most part, just take an afs_operation struct as their only argument. All the data they need is held there. The result delivery functions write their answers there as well. (I) The call is attached to the operation and then the operation core does the waiting. And then the new operation code is, for the moment, made to just initialise the operation, get the appropriate vnode I/O locks and do the same rotation loop as before. This lays the foundation for the following changes in the future: (*) Overhauling the rotation (again). (*) Support for asynchronous I/O, where the fileserver rotation must be done asynchronously also. Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-10 20:51:51 +01:00
memcpy(__entry->name2, name2->name, __len2);
__entry->name2[__len2] = 0;
),
TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\" \"%s\"",
__entry->call,
__entry->fid.vid,
__entry->fid.vnode,
__entry->fid.unique,
__print_symbolic(__entry->op, afs_fs_operations),
__entry->name,
__entry->name2)
);
TRACE_EVENT(afs_make_vl_call,
TP_PROTO(struct afs_call *call),
TP_ARGS(call),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(enum afs_vl_operation, op )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->op = call->operation_ID;
),
TP_printk("c=%08x %s",
__entry->call,
__print_symbolic(__entry->op, afs_vl_operations))
);
TRACE_EVENT(afs_call_done,
TP_PROTO(struct afs_call *call),
TP_ARGS(call),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(struct rxrpc_call *, rx_call )
__field(int, ret )
__field(u32, abort_code )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->rx_call = call->rxcall;
__entry->ret = call->error;
__entry->abort_code = call->abort_code;
),
TP_printk(" c=%08x ret=%d ab=%d [%p]",
__entry->call,
__entry->ret,
__entry->abort_code,
__entry->rx_call)
);
afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
2020-02-06 14:22:28 +00:00
TRACE_EVENT(afs_send_data,
TP_PROTO(struct afs_call *call, struct msghdr *msg),
afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
2020-02-06 14:22:28 +00:00
TP_ARGS(call, msg),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(unsigned int, flags )
afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
2020-02-06 14:22:28 +00:00
__field(loff_t, offset )
__field(loff_t, count )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->flags = msg->msg_flags;
afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
2020-02-06 14:22:28 +00:00
__entry->offset = msg->msg_iter.xarray_start + msg->msg_iter.iov_offset;
__entry->count = iov_iter_count(&msg->msg_iter);
),
afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
2020-02-06 14:22:28 +00:00
TP_printk(" c=%08x o=%llx n=%llx f=%x",
__entry->call, __entry->offset, __entry->count,
__entry->flags)
);
afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
2020-02-06 14:22:28 +00:00
TRACE_EVENT(afs_sent_data,
TP_PROTO(struct afs_call *call, struct msghdr *msg, int ret),
afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
2020-02-06 14:22:28 +00:00
TP_ARGS(call, msg, ret),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(int, ret )
afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
2020-02-06 14:22:28 +00:00
__field(loff_t, offset )
__field(loff_t, count )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->ret = ret;
afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
2020-02-06 14:22:28 +00:00
__entry->offset = msg->msg_iter.xarray_start + msg->msg_iter.iov_offset;
__entry->count = iov_iter_count(&msg->msg_iter);
),
afs: Use ITER_XARRAY for writing Use a single ITER_XARRAY iterator to describe the portion of a file to be transmitted to the server rather than generating a series of small ITER_BVEC iterators on the fly. This will make it easier to implement AIO in afs. In theory we could maybe use one giant ITER_BVEC, but that means potentially allocating a huge array of bio_vec structs (max 256 per page) when in fact the pagecache already has a structure listing all the relevant pages (radix_tree/xarray) that can be walked over. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/153685395197.14766.16289516750731233933.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/158861251312.340223.17924900795425422532.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465828607.1377938.6903132788463419368.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588535018.3465195.14509994354240338307.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118152415.1232039.6452879415814850025.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161048194.2537118.13763612220937637316.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340411602.1303470.4661108879482218408.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539555629.286939.5241869986617154517.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653811456.2770958.7017388543246759245.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789095005.6155.6789055030327407928.stgit@warthog.procyon.org.uk/ # v6
2020-02-06 14:22:28 +00:00
TP_printk(" c=%08x o=%llx n=%llx r=%x",
__entry->call, __entry->offset, __entry->count,
__entry->ret)
);
TRACE_EVENT(afs_dir_check_failed,
TP_PROTO(struct afs_vnode *vnode, loff_t off, loff_t i_size),
TP_ARGS(vnode, off, i_size),
TP_STRUCT__entry(
__field(struct afs_vnode *, vnode )
__field(loff_t, off )
__field(loff_t, i_size )
),
TP_fast_assign(
__entry->vnode = vnode;
__entry->off = off;
__entry->i_size = i_size;
),
TP_printk("vn=%p %llx/%llx",
__entry->vnode, __entry->off, __entry->i_size)
);
netfs, 9p, afs, ceph: Use folios Convert the netfs helper library to use folios throughout, convert the 9p and afs filesystems to use folios in their file I/O paths and convert the ceph filesystem to use just enough folios to compile. With these changes, afs passes -g quick xfstests. Changes ======= ver #5: - Got rid of folio_end{io,_read,_write}() and inlined the stuff it does instead (Willy decided he didn't want this after all). ver #4: - Fixed a bug in afs_redirty_page() whereby it didn't set the next page index in the loop and returned too early. - Simplified a check in v9fs_vfs_write_folio_locked()[1]. - Undid a change to afs_symlink_readpage()[1]. - Used offset_in_folio() in afs_write_end()[1]. - Changed from using page_endio() to folio_end{io,_read,_write}()[1]. ver #2: - Add 9p foliation. Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Tested-by: Jeff Layton <jlayton@kernel.org> Tested-by: Dominique Martinet <asmadeus@codewreck.org> Tested-by: kafs-testing@auristor.com cc: Matthew Wilcox (Oracle) <willy@infradead.org> cc: Marc Dionne <marc.dionne@auristor.com> cc: Ilya Dryomov <idryomov@gmail.com> cc: Dominique Martinet <asmadeus@codewreck.org> cc: v9fs-developer@lists.sourceforge.net cc: linux-afs@lists.infradead.org cc: ceph-devel@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YYKa3bfQZxK5/wDN@casper.infradead.org/ [1] Link: https://lore.kernel.org/r/2408234.1628687271@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/162877311459.3085614.10601478228012245108.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162981153551.1901565.3124454657133703341.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/163005745264.2472992.9852048135392188995.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163584187452.4023316.500389675405550116.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/163649328026.309189.1124218109373941936.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/163657852454.834781.9265101983152100556.stgit@warthog.procyon.org.uk/ # v5
2021-08-11 09:49:13 +01:00
TRACE_EVENT(afs_folio_dirty,
TP_PROTO(struct afs_vnode *vnode, const char *where, struct folio *folio),
netfs, 9p, afs, ceph: Use folios Convert the netfs helper library to use folios throughout, convert the 9p and afs filesystems to use folios in their file I/O paths and convert the ceph filesystem to use just enough folios to compile. With these changes, afs passes -g quick xfstests. Changes ======= ver #5: - Got rid of folio_end{io,_read,_write}() and inlined the stuff it does instead (Willy decided he didn't want this after all). ver #4: - Fixed a bug in afs_redirty_page() whereby it didn't set the next page index in the loop and returned too early. - Simplified a check in v9fs_vfs_write_folio_locked()[1]. - Undid a change to afs_symlink_readpage()[1]. - Used offset_in_folio() in afs_write_end()[1]. - Changed from using page_endio() to folio_end{io,_read,_write}()[1]. ver #2: - Add 9p foliation. Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Tested-by: Jeff Layton <jlayton@kernel.org> Tested-by: Dominique Martinet <asmadeus@codewreck.org> Tested-by: kafs-testing@auristor.com cc: Matthew Wilcox (Oracle) <willy@infradead.org> cc: Marc Dionne <marc.dionne@auristor.com> cc: Ilya Dryomov <idryomov@gmail.com> cc: Dominique Martinet <asmadeus@codewreck.org> cc: v9fs-developer@lists.sourceforge.net cc: linux-afs@lists.infradead.org cc: ceph-devel@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YYKa3bfQZxK5/wDN@casper.infradead.org/ [1] Link: https://lore.kernel.org/r/2408234.1628687271@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/162877311459.3085614.10601478228012245108.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162981153551.1901565.3124454657133703341.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/163005745264.2472992.9852048135392188995.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163584187452.4023316.500389675405550116.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/163649328026.309189.1124218109373941936.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/163657852454.834781.9265101983152100556.stgit@warthog.procyon.org.uk/ # v5
2021-08-11 09:49:13 +01:00
TP_ARGS(vnode, where, folio),
TP_STRUCT__entry(
__field(struct afs_vnode *, vnode )
__field(const char *, where )
netfs, 9p, afs, ceph: Use folios Convert the netfs helper library to use folios throughout, convert the 9p and afs filesystems to use folios in their file I/O paths and convert the ceph filesystem to use just enough folios to compile. With these changes, afs passes -g quick xfstests. Changes ======= ver #5: - Got rid of folio_end{io,_read,_write}() and inlined the stuff it does instead (Willy decided he didn't want this after all). ver #4: - Fixed a bug in afs_redirty_page() whereby it didn't set the next page index in the loop and returned too early. - Simplified a check in v9fs_vfs_write_folio_locked()[1]. - Undid a change to afs_symlink_readpage()[1]. - Used offset_in_folio() in afs_write_end()[1]. - Changed from using page_endio() to folio_end{io,_read,_write}()[1]. ver #2: - Add 9p foliation. Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Tested-by: Jeff Layton <jlayton@kernel.org> Tested-by: Dominique Martinet <asmadeus@codewreck.org> Tested-by: kafs-testing@auristor.com cc: Matthew Wilcox (Oracle) <willy@infradead.org> cc: Marc Dionne <marc.dionne@auristor.com> cc: Ilya Dryomov <idryomov@gmail.com> cc: Dominique Martinet <asmadeus@codewreck.org> cc: v9fs-developer@lists.sourceforge.net cc: linux-afs@lists.infradead.org cc: ceph-devel@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YYKa3bfQZxK5/wDN@casper.infradead.org/ [1] Link: https://lore.kernel.org/r/2408234.1628687271@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/162877311459.3085614.10601478228012245108.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162981153551.1901565.3124454657133703341.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/163005745264.2472992.9852048135392188995.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163584187452.4023316.500389675405550116.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/163649328026.309189.1124218109373941936.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/163657852454.834781.9265101983152100556.stgit@warthog.procyon.org.uk/ # v5
2021-08-11 09:49:13 +01:00
__field(pgoff_t, index )
__field(unsigned long, from )
__field(unsigned long, to )
),
TP_fast_assign(
netfs, 9p, afs, ceph: Use folios Convert the netfs helper library to use folios throughout, convert the 9p and afs filesystems to use folios in their file I/O paths and convert the ceph filesystem to use just enough folios to compile. With these changes, afs passes -g quick xfstests. Changes ======= ver #5: - Got rid of folio_end{io,_read,_write}() and inlined the stuff it does instead (Willy decided he didn't want this after all). ver #4: - Fixed a bug in afs_redirty_page() whereby it didn't set the next page index in the loop and returned too early. - Simplified a check in v9fs_vfs_write_folio_locked()[1]. - Undid a change to afs_symlink_readpage()[1]. - Used offset_in_folio() in afs_write_end()[1]. - Changed from using page_endio() to folio_end{io,_read,_write}()[1]. ver #2: - Add 9p foliation. Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Tested-by: Jeff Layton <jlayton@kernel.org> Tested-by: Dominique Martinet <asmadeus@codewreck.org> Tested-by: kafs-testing@auristor.com cc: Matthew Wilcox (Oracle) <willy@infradead.org> cc: Marc Dionne <marc.dionne@auristor.com> cc: Ilya Dryomov <idryomov@gmail.com> cc: Dominique Martinet <asmadeus@codewreck.org> cc: v9fs-developer@lists.sourceforge.net cc: linux-afs@lists.infradead.org cc: ceph-devel@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YYKa3bfQZxK5/wDN@casper.infradead.org/ [1] Link: https://lore.kernel.org/r/2408234.1628687271@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/162877311459.3085614.10601478228012245108.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162981153551.1901565.3124454657133703341.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/163005745264.2472992.9852048135392188995.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163584187452.4023316.500389675405550116.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/163649328026.309189.1124218109373941936.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/163657852454.834781.9265101983152100556.stgit@warthog.procyon.org.uk/ # v5
2021-08-11 09:49:13 +01:00
unsigned long priv = (unsigned long)folio_get_private(folio);
__entry->vnode = vnode;
__entry->where = where;
netfs, 9p, afs, ceph: Use folios Convert the netfs helper library to use folios throughout, convert the 9p and afs filesystems to use folios in their file I/O paths and convert the ceph filesystem to use just enough folios to compile. With these changes, afs passes -g quick xfstests. Changes ======= ver #5: - Got rid of folio_end{io,_read,_write}() and inlined the stuff it does instead (Willy decided he didn't want this after all). ver #4: - Fixed a bug in afs_redirty_page() whereby it didn't set the next page index in the loop and returned too early. - Simplified a check in v9fs_vfs_write_folio_locked()[1]. - Undid a change to afs_symlink_readpage()[1]. - Used offset_in_folio() in afs_write_end()[1]. - Changed from using page_endio() to folio_end{io,_read,_write}()[1]. ver #2: - Add 9p foliation. Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Tested-by: Jeff Layton <jlayton@kernel.org> Tested-by: Dominique Martinet <asmadeus@codewreck.org> Tested-by: kafs-testing@auristor.com cc: Matthew Wilcox (Oracle) <willy@infradead.org> cc: Marc Dionne <marc.dionne@auristor.com> cc: Ilya Dryomov <idryomov@gmail.com> cc: Dominique Martinet <asmadeus@codewreck.org> cc: v9fs-developer@lists.sourceforge.net cc: linux-afs@lists.infradead.org cc: ceph-devel@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YYKa3bfQZxK5/wDN@casper.infradead.org/ [1] Link: https://lore.kernel.org/r/2408234.1628687271@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/162877311459.3085614.10601478228012245108.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162981153551.1901565.3124454657133703341.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/163005745264.2472992.9852048135392188995.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163584187452.4023316.500389675405550116.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/163649328026.309189.1124218109373941936.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/163657852454.834781.9265101983152100556.stgit@warthog.procyon.org.uk/ # v5
2021-08-11 09:49:13 +01:00
__entry->index = folio_index(folio);
__entry->from = afs_folio_dirty_from(folio, priv);
__entry->to = afs_folio_dirty_to(folio, priv);
__entry->to |= (afs_is_folio_dirty_mmapped(priv) ?
(1UL << (BITS_PER_LONG - 1)) : 0);
),
TP_printk("vn=%p %lx %s %lx-%lx%s",
netfs, 9p, afs, ceph: Use folios Convert the netfs helper library to use folios throughout, convert the 9p and afs filesystems to use folios in their file I/O paths and convert the ceph filesystem to use just enough folios to compile. With these changes, afs passes -g quick xfstests. Changes ======= ver #5: - Got rid of folio_end{io,_read,_write}() and inlined the stuff it does instead (Willy decided he didn't want this after all). ver #4: - Fixed a bug in afs_redirty_page() whereby it didn't set the next page index in the loop and returned too early. - Simplified a check in v9fs_vfs_write_folio_locked()[1]. - Undid a change to afs_symlink_readpage()[1]. - Used offset_in_folio() in afs_write_end()[1]. - Changed from using page_endio() to folio_end{io,_read,_write}()[1]. ver #2: - Add 9p foliation. Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Tested-by: Jeff Layton <jlayton@kernel.org> Tested-by: Dominique Martinet <asmadeus@codewreck.org> Tested-by: kafs-testing@auristor.com cc: Matthew Wilcox (Oracle) <willy@infradead.org> cc: Marc Dionne <marc.dionne@auristor.com> cc: Ilya Dryomov <idryomov@gmail.com> cc: Dominique Martinet <asmadeus@codewreck.org> cc: v9fs-developer@lists.sourceforge.net cc: linux-afs@lists.infradead.org cc: ceph-devel@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YYKa3bfQZxK5/wDN@casper.infradead.org/ [1] Link: https://lore.kernel.org/r/2408234.1628687271@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/162877311459.3085614.10601478228012245108.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162981153551.1901565.3124454657133703341.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/163005745264.2472992.9852048135392188995.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163584187452.4023316.500389675405550116.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/163649328026.309189.1124218109373941936.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/163657852454.834781.9265101983152100556.stgit@warthog.procyon.org.uk/ # v5
2021-08-11 09:49:13 +01:00
__entry->vnode, __entry->index, __entry->where,
__entry->from,
__entry->to & ~(1UL << (BITS_PER_LONG - 1)),
__entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "")
);
TRACE_EVENT(afs_call_state,
TP_PROTO(struct afs_call *call,
enum afs_call_state from,
enum afs_call_state to,
int ret, u32 remote_abort),
TP_ARGS(call, from, to, ret, remote_abort),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(enum afs_call_state, from )
__field(enum afs_call_state, to )
__field(int, ret )
__field(u32, abort )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->from = from;
__entry->to = to;
__entry->ret = ret;
__entry->abort = remote_abort;
),
TP_printk("c=%08x %u->%u r=%d ab=%d",
__entry->call,
__entry->from, __entry->to,
__entry->ret, __entry->abort)
);
TRACE_EVENT(afs_lookup,
TP_PROTO(struct afs_vnode *dvnode, const struct qstr *name,
struct afs_fid *fid),
TP_ARGS(dvnode, name, fid),
TP_STRUCT__entry(
__field_struct(struct afs_fid, dfid )
__field_struct(struct afs_fid, fid )
__array(char, name, 24 )
),
TP_fast_assign(
int __len = min_t(int, name->len, 23);
__entry->dfid = dvnode->fid;
__entry->fid = *fid;
memcpy(__entry->name, name->name, __len);
__entry->name[__len] = 0;
),
TP_printk("d=%llx:%llx:%x \"%s\" f=%llx:%x",
__entry->dfid.vid, __entry->dfid.vnode, __entry->dfid.unique,
__entry->name,
__entry->fid.vnode, __entry->fid.unique)
);
TRACE_EVENT(afs_edit_dir,
TP_PROTO(struct afs_vnode *dvnode,
enum afs_edit_dir_reason why,
enum afs_edit_dir_op op,
unsigned int block,
unsigned int slot,
unsigned int f_vnode,
unsigned int f_unique,
const char *name),
TP_ARGS(dvnode, why, op, block, slot, f_vnode, f_unique, name),
TP_STRUCT__entry(
__field(unsigned int, vnode )
__field(unsigned int, unique )
__field(enum afs_edit_dir_reason, why )
__field(enum afs_edit_dir_op, op )
__field(unsigned int, block )
__field(unsigned short, slot )
__field(unsigned int, f_vnode )
__field(unsigned int, f_unique )
__array(char, name, 24 )
),
TP_fast_assign(
int __len = strlen(name);
__len = min(__len, 23);
__entry->vnode = dvnode->fid.vnode;
__entry->unique = dvnode->fid.unique;
__entry->why = why;
__entry->op = op;
__entry->block = block;
__entry->slot = slot;
__entry->f_vnode = f_vnode;
__entry->f_unique = f_unique;
memcpy(__entry->name, name, __len);
__entry->name[__len] = 0;
),
TP_printk("d=%x:%x %s %s %u[%u] f=%x:%x \"%s\"",
__entry->vnode, __entry->unique,
__print_symbolic(__entry->why, afs_edit_dir_reasons),
__print_symbolic(__entry->op, afs_edit_dir_ops),
__entry->block, __entry->slot,
__entry->f_vnode, __entry->f_unique,
__entry->name)
);
TRACE_EVENT(afs_protocol_error,
TP_PROTO(struct afs_call *call, enum afs_eproto_cause cause),
TP_ARGS(call, cause),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(enum afs_eproto_cause, cause )
),
TP_fast_assign(
__entry->call = call ? call->debug_id : 0;
__entry->cause = cause;
),
TP_printk("c=%08x %s",
__entry->call,
__print_symbolic(__entry->cause, afs_eproto_causes))
);
TRACE_EVENT(afs_io_error,
TP_PROTO(unsigned int call, int error, enum afs_io_error where),
TP_ARGS(call, error, where),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(int, error )
__field(enum afs_io_error, where )
),
TP_fast_assign(
__entry->call = call;
__entry->error = error;
__entry->where = where;
),
TP_printk("c=%08x r=%d %s",
__entry->call, __entry->error,
__print_symbolic(__entry->where, afs_io_errors))
);
TRACE_EVENT(afs_file_error,
TP_PROTO(struct afs_vnode *vnode, int error, enum afs_file_error where),
TP_ARGS(vnode, error, where),
TP_STRUCT__entry(
__field_struct(struct afs_fid, fid )
__field(int, error )
__field(enum afs_file_error, where )
),
TP_fast_assign(
__entry->fid = vnode->fid;
__entry->error = error;
__entry->where = where;
),
TP_printk("%llx:%llx:%x r=%d %s",
__entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
__entry->error,
__print_symbolic(__entry->where, afs_file_errors))
);
TRACE_EVENT(afs_cm_no_server,
TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx),
TP_ARGS(call, srx),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(unsigned int, op_id )
__field_struct(struct sockaddr_rxrpc, srx )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->op_id = call->operation_ID;
memcpy(&__entry->srx, srx, sizeof(__entry->srx));
),
TP_printk("c=%08x op=%u %pISpc",
__entry->call, __entry->op_id, &__entry->srx.transport)
);
TRACE_EVENT(afs_cm_no_server_u,
TP_PROTO(struct afs_call *call, const uuid_t *uuid),
TP_ARGS(call, uuid),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(unsigned int, op_id )
__field_struct(uuid_t, uuid )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->op_id = call->operation_ID;
memcpy(&__entry->uuid, uuid, sizeof(__entry->uuid));
),
TP_printk("c=%08x op=%u %pU",
__entry->call, __entry->op_id, &__entry->uuid)
);
TRACE_EVENT(afs_flock_ev,
TP_PROTO(struct afs_vnode *vnode, struct file_lock *fl,
enum afs_flock_event event, int error),
TP_ARGS(vnode, fl, event, error),
TP_STRUCT__entry(
__field_struct(struct afs_fid, fid )
__field(enum afs_flock_event, event )
__field(enum afs_lock_state, state )
__field(int, error )
__field(unsigned int, debug_id )
),
TP_fast_assign(
__entry->fid = vnode->fid;
__entry->event = event;
__entry->state = vnode->lock_state;
__entry->error = error;
__entry->debug_id = fl ? fl->fl_u.afs.debug_id : 0;
),
TP_printk("%llx:%llx:%x %04x %s s=%s e=%d",
__entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
__entry->debug_id,
__print_symbolic(__entry->event, afs_flock_events),
__print_symbolic(__entry->state, afs_flock_states),
__entry->error)
);
TRACE_EVENT(afs_flock_op,
TP_PROTO(struct afs_vnode *vnode, struct file_lock *fl,
enum afs_flock_operation op),
TP_ARGS(vnode, fl, op),
TP_STRUCT__entry(
__field_struct(struct afs_fid, fid )
__field(loff_t, from )
__field(loff_t, len )
__field(enum afs_flock_operation, op )
__field(unsigned char, type )
__field(unsigned int, flags )
__field(unsigned int, debug_id )
),
TP_fast_assign(
__entry->fid = vnode->fid;
__entry->from = fl->fl_start;
__entry->len = fl->fl_end - fl->fl_start + 1;
__entry->op = op;
__entry->type = fl->fl_type;
__entry->flags = fl->fl_flags;
__entry->debug_id = fl->fl_u.afs.debug_id;
),
TP_printk("%llx:%llx:%x %04x %s t=%s R=%llx/%llx f=%x",
__entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
__entry->debug_id,
__print_symbolic(__entry->op, afs_flock_operations),
__print_symbolic(__entry->type, afs_flock_types),
__entry->from, __entry->len, __entry->flags)
);
TRACE_EVENT(afs_reload_dir,
TP_PROTO(struct afs_vnode *vnode),
TP_ARGS(vnode),
TP_STRUCT__entry(
__field_struct(struct afs_fid, fid )
),
TP_fast_assign(
__entry->fid = vnode->fid;
),
TP_printk("%llx:%llx:%x",
__entry->fid.vid, __entry->fid.vnode, __entry->fid.unique)
);
TRACE_EVENT(afs_silly_rename,
TP_PROTO(struct afs_vnode *vnode, bool done),
TP_ARGS(vnode, done),
TP_STRUCT__entry(
__field_struct(struct afs_fid, fid )
__field(bool, done )
),
TP_fast_assign(
__entry->fid = vnode->fid;
__entry->done = done;
),
TP_printk("%llx:%llx:%x done=%u",
__entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
__entry->done)
);
TRACE_EVENT(afs_get_tree,
TP_PROTO(struct afs_cell *cell, struct afs_volume *volume),
TP_ARGS(cell, volume),
TP_STRUCT__entry(
__field(u64, vid )
__array(char, cell, 24 )
__array(char, volume, 24 )
),
TP_fast_assign(
int __len;
__entry->vid = volume->vid;
__len = min_t(int, cell->name_len, 23);
memcpy(__entry->cell, cell->name, __len);
__entry->cell[__len] = 0;
__len = min_t(int, volume->name_len, 23);
memcpy(__entry->volume, volume->name, __len);
__entry->volume[__len] = 0;
),
TP_printk("--- MOUNT %s:%s %llx",
__entry->cell, __entry->volume, __entry->vid)
);
TRACE_EVENT(afs_cb_break,
TP_PROTO(struct afs_fid *fid, unsigned int cb_break,
enum afs_cb_break_reason reason, bool skipped),
TP_ARGS(fid, cb_break, reason, skipped),
TP_STRUCT__entry(
__field_struct(struct afs_fid, fid )
__field(unsigned int, cb_break )
__field(enum afs_cb_break_reason, reason )
__field(bool, skipped )
),
TP_fast_assign(
__entry->fid = *fid;
__entry->cb_break = cb_break;
__entry->reason = reason;
__entry->skipped = skipped;
),
TP_printk("%llx:%llx:%x b=%x s=%u %s",
__entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
__entry->cb_break,
__entry->skipped,
__print_symbolic(__entry->reason, afs_cb_break_reasons))
);
TRACE_EVENT(afs_cb_miss,
TP_PROTO(struct afs_fid *fid, enum afs_cb_break_reason reason),
TP_ARGS(fid, reason),
TP_STRUCT__entry(
__field_struct(struct afs_fid, fid )
__field(enum afs_cb_break_reason, reason )
),
TP_fast_assign(
__entry->fid = *fid;
__entry->reason = reason;
),
TP_printk(" %llx:%llx:%x %s",
__entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
__print_symbolic(__entry->reason, afs_cb_break_reasons))
);
TRACE_EVENT(afs_server,
TP_PROTO(unsigned int server_debug_id, int ref, int active,
enum afs_server_trace reason),
TP_ARGS(server_debug_id, ref, active, reason),
TP_STRUCT__entry(
__field(unsigned int, server )
__field(int, ref )
__field(int, active )
__field(int, reason )
),
TP_fast_assign(
__entry->server = server_debug_id;
__entry->ref = ref;
__entry->active = active;
__entry->reason = reason;
),
TP_printk("s=%08x %s u=%d a=%d",
__entry->server,
__print_symbolic(__entry->reason, afs_server_traces),
__entry->ref,
__entry->active)
);
TRACE_EVENT(afs_volume,
TP_PROTO(afs_volid_t vid, int ref, enum afs_volume_trace reason),
TP_ARGS(vid, ref, reason),
TP_STRUCT__entry(
__field(afs_volid_t, vid )
__field(int, ref )
__field(enum afs_volume_trace, reason )
),
TP_fast_assign(
__entry->vid = vid;
__entry->ref = ref;
__entry->reason = reason;
),
TP_printk("V=%llx %s ur=%d",
__entry->vid,
__print_symbolic(__entry->reason, afs_volume_traces),
__entry->ref)
);
TRACE_EVENT(afs_cell,
TP_PROTO(unsigned int cell_debug_id, int ref, int active,
enum afs_cell_trace reason),
TP_ARGS(cell_debug_id, ref, active, reason),
TP_STRUCT__entry(
__field(unsigned int, cell )
__field(int, ref )
__field(int, active )
__field(int, reason )
),
TP_fast_assign(
__entry->cell = cell_debug_id;
__entry->ref = ref;
__entry->active = active;
__entry->reason = reason;
),
TP_printk("L=%08x %s r=%d a=%d",
__entry->cell,
__print_symbolic(__entry->reason, afs_cell_traces),
__entry->ref,
__entry->active)
);
#endif /* _TRACE_AFS_H */
/* This part must be outside protection */
#include <trace/define_trace.h>