nfs/nlm: handle reconnect for non-NLM4_LOCK requests
When a reply on an NLM-procedure gets stuck, the NFS-client will resend the request. This can happen through a re-connect in case the connection was terminated (long delay in the reply on the initial request). Once that happens, not all NLM-procedures are handled correctly. Testing this is difficult and time-consuming. There still may be problems with certain operations, but this definitely makes it behave much better than before. The problem occured due to a problem in EC, change-id I18a782903ba addressed the root cause. Change-Id: I23b385568e27232951fa3fbd7198a0e5d775a8c2 BUG: 1467313 Signed-off-by: Niels de Vos <ndevos@redhat.com> Reviewed-on: https://review.gluster.org/17698 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
This commit is contained in:
parent
26241777bf
commit
fafe1491ea
@ -919,38 +919,20 @@ nlm4svc_send_granted_cbk (struct rpc_req *req, struct iovec *iov, int count,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
nlm4svc_send_granted (nfs3_call_state_t *cs);
|
||||
static int
|
||||
nlm_handle_connect (struct rpc_clnt *rpc_clnt, nfs3_call_state_t *cs);
|
||||
|
||||
int
|
||||
nlm_rpcclnt_notify (struct rpc_clnt *rpc_clnt, void *mydata,
|
||||
rpc_clnt_event_t fn, void *data)
|
||||
{
|
||||
int ret = 0;
|
||||
char *caller_name = NULL;
|
||||
nfs3_call_state_t *cs = NULL;
|
||||
|
||||
cs = mydata;
|
||||
|
||||
switch (fn) {
|
||||
case RPC_CLNT_CONNECT:
|
||||
if (!cs->req) {
|
||||
gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL,
|
||||
NFS_MSG_RPC_CLNT_ERROR, "Spurious notify?!");
|
||||
goto err;
|
||||
}
|
||||
|
||||
caller_name = cs->args.nlm4_lockargs.alock.caller_name;
|
||||
ret = nlm_set_rpc_clnt (rpc_clnt, caller_name);
|
||||
if (ret == -1) {
|
||||
gf_msg (GF_NLM, GF_LOG_ERROR, 0,
|
||||
NFS_MSG_RPC_CLNT_ERROR, "Failed to set "
|
||||
"rpc clnt");
|
||||
goto err;
|
||||
}
|
||||
nlm4svc_send_granted (cs);
|
||||
rpc_clnt_unref (rpc_clnt);
|
||||
|
||||
nlm_handle_connect (rpc_clnt, cs);
|
||||
break;
|
||||
|
||||
case RPC_CLNT_MSG:
|
||||
@ -963,7 +945,6 @@ nlm_rpcclnt_notify (struct rpc_clnt *rpc_clnt, void *mydata,
|
||||
break;
|
||||
}
|
||||
|
||||
err:
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2368,6 +2349,82 @@ nlm4svc_sm_notify (struct nlm_sm_status *status)
|
||||
nlm_cleanup_fds (status->mon_name);
|
||||
}
|
||||
|
||||
|
||||
/* RPC_CLNT_CONNECT gets called on (re)connects and should be able to handle
|
||||
* different NLM requests. */
|
||||
static int
|
||||
nlm_handle_connect (struct rpc_clnt *rpc_clnt, nfs3_call_state_t *cs)
|
||||
{
|
||||
int ret = -1;
|
||||
int nlm_proc = NLM4_NULL;
|
||||
struct nlm4_lock *alock = NULL;
|
||||
char *caller_name = NULL;
|
||||
|
||||
if (!cs || !cs->req) {
|
||||
gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL, NFS_MSG_RPC_CLNT_ERROR,
|
||||
"Spurious notify?!");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* NLM4_* actions from nlm4.h */
|
||||
if (cs->req->prognum == NLM_PROGRAM) {
|
||||
nlm_proc = cs->req->procnum;
|
||||
} else {
|
||||
/* hmm, cs->req has not been filled completely */
|
||||
if (cs->resume_fn == nlm4_lock_fd_resume)
|
||||
nlm_proc = NLM4_LOCK;
|
||||
else if (cs->resume_fn == nlm4_cancel_fd_resume)
|
||||
nlm_proc = NLM4_CANCEL;
|
||||
else if (cs->resume_fn == nlm4_unlock_fd_resume)
|
||||
nlm_proc = NLM4_UNLOCK;
|
||||
else {
|
||||
gf_msg (GF_NLM, GF_LOG_ERROR, 0,
|
||||
NFS_MSG_RPC_CLNT_ERROR, "(re)connect with an "
|
||||
"unexpected NLM4 procedure (%d)", nlm_proc);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
switch (nlm_proc) {
|
||||
case NLM4_LOCK:
|
||||
alock = &cs->args.nlm4_lockargs.alock;
|
||||
caller_name = alock->caller_name;
|
||||
|
||||
ret = nlm_set_rpc_clnt (rpc_clnt, caller_name);
|
||||
if (ret == -1) {
|
||||
gf_msg (GF_NLM, GF_LOG_ERROR, 0,
|
||||
NFS_MSG_RPC_CLNT_ERROR, "Failed to set "
|
||||
"rpc clnt");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* extra ref taken with nlm_set_rpc_clnt() */
|
||||
rpc_clnt_unref (rpc_clnt);
|
||||
|
||||
nlm4svc_send_granted (cs);
|
||||
break;
|
||||
|
||||
case NLM4_CANCEL:
|
||||
/* alock = &cs->args.nlm4_cancargs.alock; */
|
||||
ret = nlm4svc_cancel (cs->req);
|
||||
break;
|
||||
|
||||
case NLM4_UNLOCK:
|
||||
/* alock = &cs->args.nlm4_unlockargs.alock; */
|
||||
ret = nlm4svc_unlock (cs->req);
|
||||
break;
|
||||
|
||||
default:
|
||||
gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_RPC_CLNT_ERROR,
|
||||
"(re)connect with an unexpected NLM4 procedure "
|
||||
"(%d)", nlm_proc);
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
rpcsvc_actor_t nlm4svc_actors[NLM4_PROC_COUNT] = {
|
||||
/* 0 */
|
||||
{"NULL", NLM4_NULL, nlm4svc_null, NULL, 0, DRC_IDEMPOTENT},
|
||||
|
Loading…
x
Reference in New Issue
Block a user