From 2c8dfff7d9e33dadf14df76bccb978cb955d4952 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Thu, 4 Apr 2024 19:18:19 +0200 Subject: [PATCH] smbd: add option "smbd lease break:debug hung procs" By enabling this a process sending a lease break message to another process holding a lease will start watching that process and if that process didn't process the lease break within 10 seconds (cf server_id_watch_waited()), we log a kernel stack backtrace of that process. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15624 Pair-Programmed-With: Stefan Metzmacher Signed-off-by: Ralph Boehme Signed-off-by: Stefan Metzmacher Reviewed-by: Guenther Deschner (cherry picked from commit d8613d7ee23c4e990285a387eb9ac2eeefff9749) --- source3/smbd/open.c | 110 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 8 deletions(-) diff --git a/source3/smbd/open.c b/source3/smbd/open.c index 5a0fc4626bd..12735303c6b 100644 --- a/source3/smbd/open.c +++ b/source3/smbd/open.c @@ -38,6 +38,7 @@ #include "serverid.h" #include "messages.h" #include "source3/lib/dbwrap/dbwrap_watch.h" +#include "source3/lib/server_id_watch.h" #include "locking/leases_db.h" #include "librpc/gen_ndr/ndr_leases_db.h" #include "lib/util/time_basic.h" @@ -2479,6 +2480,10 @@ static int map_lease_type_to_oplock(uint32_t lease_type) return result; } +struct blocker_debug_state { + size_t num_blockers; +}; + struct delay_for_oplock_state { struct files_struct *fsp; const struct smb2_lease *lease; @@ -2490,8 +2495,22 @@ struct delay_for_oplock_state { bool have_other_lease; uint32_t total_lease_types; bool delay; + struct blocker_debug_state *blocker_debug_state; }; +static int blocker_debug_state_destructor(struct blocker_debug_state *state) +{ + if (state->num_blockers == 0) { + return 0; + } + + DBG_DEBUG("blocker_debug_state [%p] num_blockers [%zu]\n", + state, state->num_blockers); + return 0; +} + +static void delay_for_oplock_fn_watch_done(struct tevent_req *subreq); + static bool delay_for_oplock_fn( struct share_mode_entry *e, bool *modified, @@ -2504,6 +2523,8 @@ static bool delay_for_oplock_fn( uint32_t e_lease_type = SMB2_LEASE_NONE; uint32_t break_to; bool lease_is_breaking = false; + struct tevent_req *subreq = NULL; + struct server_id_buf idbuf = {}; if (e_is_lease) { NTSTATUS status; @@ -2643,9 +2664,56 @@ static bool delay_for_oplock_fn( state->delay = true; } + if (!state->delay) { + return false; + } + + if (state->blocker_debug_state == NULL) { + return false; + } + + subreq = server_id_watch_send(state->blocker_debug_state, + fsp->conn->sconn->ev_ctx, + e->pid); + if (subreq == NULL) { + DBG_ERR("server_id_watch_send(%s) returned NULL\n", + server_id_str_buf(e->pid, &idbuf)); + return false; + } + + tevent_req_set_callback(subreq, + delay_for_oplock_fn_watch_done, + state->blocker_debug_state); + + state->blocker_debug_state->num_blockers++; + + DBG_DEBUG("Starting to watch pid [%s] state [%p] num_blockers [%zu]\n", + server_id_str_buf(e->pid, &idbuf), + state->blocker_debug_state, + state->blocker_debug_state->num_blockers); + return false; }; +static void delay_for_oplock_fn_watch_done(struct tevent_req *subreq) +{ + struct blocker_debug_state *blocker_debug_state = tevent_req_callback_data( + subreq, struct blocker_debug_state); + struct server_id pid = {}; + struct server_id_buf idbuf = {}; + int ret; + + ret = server_id_watch_recv(subreq, &pid); + if (ret != 0) { + DBG_ERR("server_id_watch_recv failed %s\n", strerror(ret)); + return; + } + + DBG_DEBUG("state [%p] server_id_watch_recv() returned pid [%s] exited\n", + blocker_debug_state, + server_id_str_buf(pid, &idbuf)); +} + static NTSTATUS delay_for_oplock(files_struct *fsp, int oplock_request, const struct smb2_lease *lease, @@ -2654,7 +2722,8 @@ static NTSTATUS delay_for_oplock(files_struct *fsp, uint32_t create_disposition, bool first_open_attempt, int *poplock_type, - uint32_t *pgranted) + uint32_t *pgranted, + struct blocker_debug_state **blocker_debug_state) { struct delay_for_oplock_state state = { .fsp = fsp, @@ -2700,6 +2769,22 @@ static NTSTATUS delay_for_oplock(files_struct *fsp, goto grant; } + if (lp_parm_bool(GLOBAL_SECTION_SNUM, + "smbd lease break", + "debug hung procs", + false)) + { + state.blocker_debug_state = talloc_zero(fsp, + struct blocker_debug_state); + if (state.blocker_debug_state == NULL) { + return NT_STATUS_NO_MEMORY; + } + talloc_steal(talloc_tos(), state.blocker_debug_state); + + talloc_set_destructor(state.blocker_debug_state, + blocker_debug_state_destructor); + } + state.delay_mask = have_sharing_violation ? SMB2_LEASE_HANDLE : SMB2_LEASE_WRITE; @@ -2721,6 +2806,7 @@ static NTSTATUS delay_for_oplock(files_struct *fsp, } if (state.delay) { + *blocker_debug_state = state.blocker_debug_state; return NT_STATUS_RETRY; } @@ -2834,7 +2920,8 @@ static NTSTATUS handle_share_mode_lease( const struct smb2_lease *lease, bool first_open_attempt, int *poplock_type, - uint32_t *pgranted) + uint32_t *pgranted, + struct blocker_debug_state **blocker_debug_state) { bool sharing_violation = false; NTSTATUS status; @@ -2875,7 +2962,8 @@ static NTSTATUS handle_share_mode_lease( create_disposition, first_open_attempt, poplock_type, - pgranted); + pgranted, + blocker_debug_state); if (!NT_STATUS_IS_OK(status)) { return status; } @@ -2910,7 +2998,8 @@ static void defer_open_done(struct tevent_req *req); static void defer_open(struct share_mode_lock *lck, struct timeval timeout, struct smb_request *req, - struct file_id id) + struct file_id id, + struct blocker_debug_state **blocker_debug_state) { struct deferred_open_record *open_rec = NULL; struct timeval abs_timeout; @@ -2954,6 +3043,8 @@ static void defer_open(struct share_mode_lock *lck, } tevent_req_set_callback(watch_req, defer_open_done, watch_state); + talloc_move(watch_req, blocker_debug_state); + ok = tevent_req_set_endtime(watch_req, req->sconn->ev_ctx, abs_timeout); if (!ok) { exit_server("tevent_req_set_endtime failed"); @@ -3236,7 +3327,8 @@ static bool open_match_attributes(connection_struct *conn, static void schedule_defer_open(struct share_mode_lock *lck, struct file_id id, - struct smb_request *req) + struct smb_request *req, + struct blocker_debug_state **blocker_debug_state) { /* This is a relative time, added to the absolute request_time value to get the absolute timeout time. @@ -3260,7 +3352,7 @@ static void schedule_defer_open(struct share_mode_lock *lck, return; } - defer_open(lck, timeout, req, id); + defer_open(lck, timeout, req, id, blocker_debug_state); } /**************************************************************************** @@ -3322,6 +3414,7 @@ static NTSTATUS check_and_store_share_mode( int oplock_type = NO_OPLOCK; uint32_t granted_lease = 0; const struct smb2_lease_key *lease_key = NULL; + struct blocker_debug_state *blocker_debug_state = NULL; bool delete_on_close; bool ok; @@ -3344,9 +3437,10 @@ static NTSTATUS check_and_store_share_mode( lease, first_open_attempt, &oplock_type, - &granted_lease); + &granted_lease, + &blocker_debug_state); if (NT_STATUS_EQUAL(status, NT_STATUS_RETRY)) { - schedule_defer_open(lck, fsp->file_id, req); + schedule_defer_open(lck, fsp->file_id, req, &blocker_debug_state); return NT_STATUS_SHARING_VIOLATION; } if (!NT_STATUS_IS_OK(status)) {