2009-09-25 16:07:19 -07:00
/*
drbd_req . h
This file is part of DRBD by Philipp Reisner and Lars Ellenberg .
Copyright ( C ) 2006 - 2008 , LINBIT Information Technologies GmbH .
Copyright ( C ) 2006 - 2008 , Lars Ellenberg < lars . ellenberg @ linbit . com > .
Copyright ( C ) 2006 - 2008 , Philipp Reisner < philipp . reisner @ linbit . com > .
DRBD is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 2 , or ( at your option )
any later version .
DRBD is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with drbd ; see the file COPYING . If not , write to
the Free Software Foundation , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# ifndef _DRBD_REQ_H
# define _DRBD_REQ_H
# include <linux/module.h>
# include <linux/slab.h>
# include <linux/drbd.h>
# include "drbd_int.h"
/* The request callbacks will be called in irq context by the IDE drivers,
and in Softirqs / Tasklets / BH context by the SCSI drivers ,
and by the receiver and worker in kernel - thread context .
Try to get the locking right : ) */
/*
* Objects of type struct drbd_request do only exist on a R_PRIMARY node , and are
* associated with IO requests originating from the block layer above us .
*
* There are quite a few things that may happen to a drbd request
* during its lifetime .
*
* It will be created .
* It will be marked with the intention to be
* submitted to local disk and / or
* send via the network .
*
* It has to be placed on the transfer log and other housekeeping lists ,
* In case we have a network connection .
*
* It may be identified as a concurrent ( write ) request
* and be handled accordingly .
*
* It may me handed over to the local disk subsystem .
* It may be completed by the local disk subsystem ,
tree-wide: Assorted spelling fixes
In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.
Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-02-03 08:01:28 +08:00
* either successfully or with io - error .
2009-09-25 16:07:19 -07:00
* In case it is a READ request , and it failed locally ,
* it may be retried remotely .
*
* It may be queued for sending .
* It may be handed over to the network stack ,
* which may fail .
* It may be acknowledged by the " peer " according to the wire_protocol in use .
* this may be a negative ack .
* It may receive a faked ack when the network connection is lost and the
* transfer log is cleaned up .
* Sending may be canceled due to network connection loss .
* When it finally has outlived its time ,
* corresponding dirty bits in the resync - bitmap may be cleared or set ,
* it will be destroyed ,
* and completion will be signalled to the originator ,
* with or without " success " .
*/
enum drbd_req_event {
2011-01-25 15:37:43 +01:00
CREATED ,
TO_BE_SENT ,
TO_BE_SUBMITTED ,
2009-09-25 16:07:19 -07:00
/* XXX yes, now I am inconsistent...
2010-10-27 14:33:00 +02:00
* these are not " events " but " actions "
2009-09-25 16:07:19 -07:00
* oh , well . . . */
2011-01-25 15:37:43 +01:00
QUEUE_FOR_NET_WRITE ,
QUEUE_FOR_NET_READ ,
QUEUE_FOR_SEND_OOS ,
2013-03-27 14:08:41 +01:00
/* An empty flush is queued as P_BARRIER,
* which will cause it to complete " successfully " ,
* even if the local disk flush failed .
*
* Just like " real " requests , empty flushes ( blkdev_issue_flush ( ) ) will
* only see an error if neither local nor remote data is reachable . */
QUEUE_AS_DRBD_BARRIER ,
2011-01-25 15:37:43 +01:00
SEND_CANCELED ,
SEND_FAILED ,
HANDED_OVER_TO_NETWORK ,
OOS_HANDED_TO_NETWORK ,
CONNECTION_LOST_WHILE_PENDING ,
READ_RETRY_REMOTE_CANCELED ,
RECV_ACKED_BY_PEER ,
WRITE_ACKED_BY_PEER ,
WRITE_ACKED_BY_PEER_AND_SIS , /* and set_in_sync */
2012-08-01 12:33:51 +02:00
CONFLICT_RESOLVED ,
2011-02-22 02:15:32 +01:00
POSTPONE_WRITE ,
2011-01-25 15:37:43 +01:00
NEG_ACKED ,
BARRIER_ACKED , /* in protocol A and B */
DATA_RECEIVED , /* (remote read) */
2014-04-28 18:43:24 +02:00
COMPLETED_OK ,
2011-01-25 15:37:43 +01:00
READ_COMPLETED_WITH_ERROR ,
READ_AHEAD_COMPLETED_WITH_ERROR ,
WRITE_COMPLETED_WITH_ERROR ,
2014-04-28 18:43:24 +02:00
DISCARD_COMPLETED_NOTSUPP ,
DISCARD_COMPLETED_WITH_ERROR ,
2011-07-05 15:38:59 +02:00
ABORT_DISK_IO ,
2011-01-25 15:37:43 +01:00
RESEND ,
FAIL_FROZEN_DISK_IO ,
RESTART_FROZEN_DISK_IO ,
NOTHING ,
2009-09-25 16:07:19 -07:00
} ;
/* encoding of request states for now. we don't actually need that many bits.
* we don ' t need to do atomic bit operations either , since most of the time we
* need to look at the connection state and / or manipulate some lists at the
* same time , so we should hold the request lock anyways .
*/
enum drbd_req_state_bits {
2011-07-05 15:38:59 +02:00
/* 3210
* 0000 : no local possible
* 0001 : to be submitted
2009-09-25 16:07:19 -07:00
* UNUSED , we could map : 011 : submitted , completion still pending
2011-07-05 15:38:59 +02:00
* 0110 : completed ok
* 0010 : completed with error
* 1001 : Aborted ( before completion )
* 1 x10 : Aborted and completed - > free
2009-09-25 16:07:19 -07:00
*/
__RQ_LOCAL_PENDING ,
__RQ_LOCAL_COMPLETED ,
__RQ_LOCAL_OK ,
2011-07-05 15:38:59 +02:00
__RQ_LOCAL_ABORTED ,
2009-09-25 16:07:19 -07:00
2011-07-05 15:38:59 +02:00
/* 87654
2009-09-25 16:07:19 -07:00
* 00000 : no network possible
* 00001 : to be send
* 00011 : to be send , on worker queue
* 00101 : sent , expecting recv_ack ( B ) or write_ack ( C )
* 11101 : sent ,
* recv_ack ( B ) or implicit " ack " ( A ) ,
* still waiting for the barrier ack .
* master_bio may already be completed and invalidated .
2011-01-25 15:37:43 +01:00
* 11100 : write acked ( C ) ,
* data received ( for remote read , any protocol )
2009-09-25 16:07:19 -07:00
* or finally the barrier ack has arrived ( B , A ) . . .
* request can be freed
* 01100 : neg - acked ( write , protocol C )
* or neg - d - acked ( read , any protocol )
* or killed from the transfer log
* during cleanup after connection loss
* request can be freed
* 01000 : canceled or send failed . . .
* request can be freed
*/
/* if "SENT" is not set, yet, this can still fail or be canceled.
* if " SENT " is set already , we still wait for an Ack packet .
* when cleared , the master_bio may be completed .
* in ( B , A ) the request object may still linger on the transaction log
* until the corresponding barrier ack comes in */
__RQ_NET_PENDING ,
/* If it is QUEUED, and it is a WRITE, it is also registered in the
* transfer log . Currently we need this flag to avoid conflicts between
* worker canceling the request and tl_clear_barrier killing it from
* transfer log . We should restructure the code so this conflict does
* no longer occur . */
__RQ_NET_QUEUED ,
/* well, actually only "handed over to the network stack".
*
* TODO can potentially be dropped because of the similar meaning
* of RQ_NET_SENT and ~ RQ_NET_QUEUED .
* however it is not exactly the same . before we drop it
* we must ensure that we can tell a request with network part
* from a request without , regardless of what happens to it . */
__RQ_NET_SENT ,
/* when set, the request may be freed (if RQ_NET_QUEUED is clear).
* basically this means the corresponding P_BARRIER_ACK was received */
__RQ_NET_DONE ,
/* whether or not we know (C) or pretend (B,A) that the write
* was successfully written on the peer .
*/
__RQ_NET_OK ,
/* peer called drbd_set_in_sync() for this write */
__RQ_NET_SIS ,
/* keep this last, its for the RQ_NET_MASK */
__RQ_NET_MAX ,
2010-05-27 15:07:43 +02:00
/* Set when this is a write, clear for a read */
__RQ_WRITE ,
2010-08-31 12:00:50 +02:00
/* Should call drbd_al_complete_io() for this request... */
__RQ_IN_ACT_LOG ,
2011-02-22 02:15:32 +01:00
/* The peer has sent a retry ACK */
__RQ_POSTPONED ,
2011-04-13 16:24:47 -07:00
2012-01-24 17:19:42 +01:00
/* would have been completed,
* but was not , because of drbd_suspended ( ) */
__RQ_COMPLETION_SUSP ,
2011-04-13 16:24:47 -07:00
/* We expect a receive ACK (wire proto B) */
__RQ_EXP_RECEIVE_ACK ,
/* We expect a write ACK (wite proto C) */
__RQ_EXP_WRITE_ACK ,
2012-01-24 17:19:42 +01:00
/* waiting for a barrier ack, did an extra kref_get */
__RQ_EXP_BARR_ACK ,
2009-09-25 16:07:19 -07:00
} ;
# define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
# define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
# define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK)
2011-07-05 15:38:59 +02:00
# define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED)
2009-09-25 16:07:19 -07:00
2011-07-05 15:38:59 +02:00
# define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1)
2009-09-25 16:07:19 -07:00
# define RQ_NET_PENDING (1UL << __RQ_NET_PENDING)
# define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED)
# define RQ_NET_SENT (1UL << __RQ_NET_SENT)
# define RQ_NET_DONE (1UL << __RQ_NET_DONE)
# define RQ_NET_OK (1UL << __RQ_NET_OK)
# define RQ_NET_SIS (1UL << __RQ_NET_SIS)
/* 0x1f8 */
# define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
2010-05-27 15:07:43 +02:00
# define RQ_WRITE (1UL << __RQ_WRITE)
2010-08-31 12:00:50 +02:00
# define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
2011-02-22 02:15:32 +01:00
# define RQ_POSTPONED (1UL << __RQ_POSTPONED)
2012-01-24 17:19:42 +01:00
# define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
2011-04-13 16:24:47 -07:00
# define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
# define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK)
2012-01-24 17:19:42 +01:00
# define RQ_EXP_BARR_ACK (1UL << __RQ_EXP_BARR_ACK)
2010-05-27 15:07:43 +02:00
2010-05-12 17:08:26 +02:00
/* For waking up the frozen transfer log mod_req() has to return if the request
should be counted in the epoch object */
2011-07-17 23:06:12 +02:00
# define MR_WRITE 1
# define MR_READ 2
2010-05-12 17:08:26 +02:00
2010-06-10 13:30:36 +02:00
static inline void drbd_req_make_private_bio ( struct drbd_request * req , struct bio * bio_src )
{
struct bio * bio ;
bio = bio_clone ( bio_src , GFP_NOIO ) ; /* XXX cannot fail?? */
req - > private_bio = bio ;
bio - > bi_private = req ;
2011-02-17 16:46:59 +01:00
bio - > bi_end_io = drbd_request_endio ;
2010-06-10 13:30:36 +02:00
bio - > bi_next = NULL ;
}
2009-09-25 16:07:19 -07:00
/* Short lived temporary struct on the stack.
* We could squirrel the error to be returned into
2013-10-11 15:44:27 -07:00
* bio - > bi_iter . bi_size , or similar . But that would be too ugly . */
2009-09-25 16:07:19 -07:00
struct bio_and_error {
struct bio * bio ;
int error ;
} ;
2011-05-30 16:32:41 +02:00
extern void start_new_tl_epoch ( struct drbd_connection * connection ) ;
2012-07-24 10:12:36 +02:00
extern void drbd_req_destroy ( struct kref * kref ) ;
2009-09-25 16:07:19 -07:00
extern void _req_may_be_done ( struct drbd_request * req ,
struct bio_and_error * m ) ;
2010-06-09 14:07:43 +02:00
extern int __req_mod ( struct drbd_request * req , enum drbd_req_event what ,
2009-09-25 16:07:19 -07:00
struct bio_and_error * m ) ;
2011-07-03 13:26:43 +02:00
extern void complete_master_bio ( struct drbd_device * device ,
2009-09-25 16:07:19 -07:00
struct bio_and_error * m ) ;
2011-03-01 11:08:28 +01:00
extern void request_timer_fn ( unsigned long data ) ;
2011-05-30 16:32:41 +02:00
extern void tl_restart ( struct drbd_connection * connection , enum drbd_req_event what ) ;
extern void _tl_restart ( struct drbd_connection * connection , enum drbd_req_event what ) ;
2014-02-27 09:46:18 +01:00
extern void tl_abort_disk_io ( struct drbd_device * device ) ;
2009-09-25 16:07:19 -07:00
drbd: fix potential deadlock during "restart" of conflicting writes
w_restart_write(), run from worker context, calls __drbd_make_request()
and further drbd_al_begin_io(, delegate=true), which then
potentially deadlocks. The previous patch moved a BUG_ON to expose
such call paths, which would now be triggered.
Also, if we call __drbd_make_request() from resource worker context,
like w_restart_write() did, and that should block for whatever reason
(!drbd_state_is_stable(), resource suspended, ...),
we potentially deadlock the whole resource, as the worker
is needed for state changes and other things.
Create a dedicated retry workqueue for this instead.
Also make sure that inc_ap_bio()/dec_ap_bio() are properly paired,
even if do_retry() needs to retry itself,
in case __drbd_make_request() returns != 0.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
2011-11-24 10:36:25 +01:00
/* this is in drbd_main.c */
2012-07-17 10:05:04 +02:00
extern void drbd_restart_request ( struct drbd_request * req ) ;
drbd: fix potential deadlock during "restart" of conflicting writes
w_restart_write(), run from worker context, calls __drbd_make_request()
and further drbd_al_begin_io(, delegate=true), which then
potentially deadlocks. The previous patch moved a BUG_ON to expose
such call paths, which would now be triggered.
Also, if we call __drbd_make_request() from resource worker context,
like w_restart_write() did, and that should block for whatever reason
(!drbd_state_is_stable(), resource suspended, ...),
we potentially deadlock the whole resource, as the worker
is needed for state changes and other things.
Create a dedicated retry workqueue for this instead.
Also make sure that inc_ap_bio()/dec_ap_bio() are properly paired,
even if do_retry() needs to retry itself,
in case __drbd_make_request() returns != 0.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
2011-11-24 10:36:25 +01:00
2009-09-25 16:07:19 -07:00
/* use this if you don't want to deal with calling complete_master_bio()
* outside the spinlock , e . g . when walking some list on cleanup . */
2010-06-09 14:07:43 +02:00
static inline int _req_mod ( struct drbd_request * req , enum drbd_req_event what )
2009-09-25 16:07:19 -07:00
{
2011-07-28 15:27:51 +02:00
struct drbd_device * device = req - > device ;
2009-09-25 16:07:19 -07:00
struct bio_and_error m ;
2010-06-09 14:07:43 +02:00
int rv ;
2009-09-25 16:07:19 -07:00
/* __req_mod possibly frees req, do not touch req after that! */
2010-06-09 14:07:43 +02:00
rv = __req_mod ( req , what , & m ) ;
2009-09-25 16:07:19 -07:00
if ( m . bio )
2011-07-03 13:26:43 +02:00
complete_master_bio ( device , & m ) ;
2010-06-09 14:07:43 +02:00
return rv ;
2009-09-25 16:07:19 -07:00
}
2010-10-26 16:02:27 +02:00
/* completion of master bio is outside of our spinlock.
* We still may or may not be inside some irqs disabled section
* of the lower level driver completion callback , so we need to
* spin_lock_irqsave here . */
2010-06-09 14:07:43 +02:00
static inline int req_mod ( struct drbd_request * req ,
2009-09-25 16:07:19 -07:00
enum drbd_req_event what )
{
2010-10-26 16:02:27 +02:00
unsigned long flags ;
2011-07-28 15:27:51 +02:00
struct drbd_device * device = req - > device ;
2009-09-25 16:07:19 -07:00
struct bio_and_error m ;
2010-06-09 14:07:43 +02:00
int rv ;
2011-07-07 14:19:42 +02:00
spin_lock_irqsave ( & device - > resource - > req_lock , flags ) ;
2010-06-09 14:07:43 +02:00
rv = __req_mod ( req , what , & m ) ;
2011-07-07 14:19:42 +02:00
spin_unlock_irqrestore ( & device - > resource - > req_lock , flags ) ;
2009-09-25 16:07:19 -07:00
if ( m . bio )
2011-07-03 13:26:43 +02:00
complete_master_bio ( device , & m ) ;
2010-06-09 14:07:43 +02:00
return rv ;
2009-09-25 16:07:19 -07:00
}
2011-01-17 20:27:30 +01:00
2014-08-08 17:48:00 +02:00
extern bool drbd_should_do_remote ( union drbd_dev_state ) ;
2011-01-17 20:27:30 +01:00
2009-09-25 16:07:19 -07:00
# endif