2009-09-25 16:07:19 -07:00
/*
drbd_req . h
This file is part of DRBD by Philipp Reisner and Lars Ellenberg .
Copyright ( C ) 2006 - 2008 , LINBIT Information Technologies GmbH .
Copyright ( C ) 2006 - 2008 , Lars Ellenberg < lars . ellenberg @ linbit . com > .
Copyright ( C ) 2006 - 2008 , Philipp Reisner < philipp . reisner @ linbit . com > .
DRBD is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 2 , or ( at your option )
any later version .
DRBD is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with drbd ; see the file COPYING . If not , write to
the Free Software Foundation , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# ifndef _DRBD_REQ_H
# define _DRBD_REQ_H
# include <linux/module.h>
# include <linux/slab.h>
# include <linux/drbd.h>
# include "drbd_int.h"
# include "drbd_wrappers.h"
/* The request callbacks will be called in irq context by the IDE drivers,
and in Softirqs / Tasklets / BH context by the SCSI drivers ,
and by the receiver and worker in kernel - thread context .
Try to get the locking right : ) */
/*
* Objects of type struct drbd_request do only exist on a R_PRIMARY node , and are
* associated with IO requests originating from the block layer above us .
*
* There are quite a few things that may happen to a drbd request
* during its lifetime .
*
* It will be created .
* It will be marked with the intention to be
* submitted to local disk and / or
* send via the network .
*
* It has to be placed on the transfer log and other housekeeping lists ,
* In case we have a network connection .
*
* It may be identified as a concurrent ( write ) request
* and be handled accordingly .
*
* It may me handed over to the local disk subsystem .
* It may be completed by the local disk subsystem ,
tree-wide: Assorted spelling fixes
In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.
Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-02-03 08:01:28 +08:00
* either successfully or with io - error .
2009-09-25 16:07:19 -07:00
* In case it is a READ request , and it failed locally ,
* it may be retried remotely .
*
* It may be queued for sending .
* It may be handed over to the network stack ,
* which may fail .
* It may be acknowledged by the " peer " according to the wire_protocol in use .
* this may be a negative ack .
* It may receive a faked ack when the network connection is lost and the
* transfer log is cleaned up .
* Sending may be canceled due to network connection loss .
* When it finally has outlived its time ,
* corresponding dirty bits in the resync - bitmap may be cleared or set ,
* it will be destroyed ,
* and completion will be signalled to the originator ,
* with or without " success " .
*/
enum drbd_req_event {
2011-01-25 15:37:43 +01:00
CREATED ,
TO_BE_SENT ,
TO_BE_SUBMITTED ,
2009-09-25 16:07:19 -07:00
/* XXX yes, now I am inconsistent...
2010-10-27 14:33:00 +02:00
* these are not " events " but " actions "
2009-09-25 16:07:19 -07:00
* oh , well . . . */
2011-01-25 15:37:43 +01:00
QUEUE_FOR_NET_WRITE ,
QUEUE_FOR_NET_READ ,
QUEUE_FOR_SEND_OOS ,
SEND_CANCELED ,
SEND_FAILED ,
HANDED_OVER_TO_NETWORK ,
OOS_HANDED_TO_NETWORK ,
CONNECTION_LOST_WHILE_PENDING ,
READ_RETRY_REMOTE_CANCELED ,
RECV_ACKED_BY_PEER ,
WRITE_ACKED_BY_PEER ,
WRITE_ACKED_BY_PEER_AND_SIS , /* and set_in_sync */
2011-02-22 02:15:32 +01:00
DISCARD_WRITE ,
POSTPONE_WRITE ,
2011-01-25 15:37:43 +01:00
NEG_ACKED ,
BARRIER_ACKED , /* in protocol A and B */
DATA_RECEIVED , /* (remote read) */
READ_COMPLETED_WITH_ERROR ,
READ_AHEAD_COMPLETED_WITH_ERROR ,
WRITE_COMPLETED_WITH_ERROR ,
COMPLETED_OK ,
RESEND ,
FAIL_FROZEN_DISK_IO ,
RESTART_FROZEN_DISK_IO ,
NOTHING ,
2009-09-25 16:07:19 -07:00
} ;
/* encoding of request states for now. we don't actually need that many bits.
* we don ' t need to do atomic bit operations either , since most of the time we
* need to look at the connection state and / or manipulate some lists at the
* same time , so we should hold the request lock anyways .
*/
enum drbd_req_state_bits {
/* 210
* 000 : no local possible
* 001 : to be submitted
* UNUSED , we could map : 011 : submitted , completion still pending
* 110 : completed ok
* 010 : completed with error
*/
__RQ_LOCAL_PENDING ,
__RQ_LOCAL_COMPLETED ,
__RQ_LOCAL_OK ,
/* 76543
* 00000 : no network possible
* 00001 : to be send
* 00011 : to be send , on worker queue
* 00101 : sent , expecting recv_ack ( B ) or write_ack ( C )
* 11101 : sent ,
* recv_ack ( B ) or implicit " ack " ( A ) ,
* still waiting for the barrier ack .
* master_bio may already be completed and invalidated .
2011-01-25 15:37:43 +01:00
* 11100 : write acked ( C ) ,
* data received ( for remote read , any protocol )
2009-09-25 16:07:19 -07:00
* or finally the barrier ack has arrived ( B , A ) . . .
* request can be freed
* 01100 : neg - acked ( write , protocol C )
* or neg - d - acked ( read , any protocol )
* or killed from the transfer log
* during cleanup after connection loss
* request can be freed
* 01000 : canceled or send failed . . .
* request can be freed
*/
/* if "SENT" is not set, yet, this can still fail or be canceled.
* if " SENT " is set already , we still wait for an Ack packet .
* when cleared , the master_bio may be completed .
* in ( B , A ) the request object may still linger on the transaction log
* until the corresponding barrier ack comes in */
__RQ_NET_PENDING ,
/* If it is QUEUED, and it is a WRITE, it is also registered in the
* transfer log . Currently we need this flag to avoid conflicts between
* worker canceling the request and tl_clear_barrier killing it from
* transfer log . We should restructure the code so this conflict does
* no longer occur . */
__RQ_NET_QUEUED ,
/* well, actually only "handed over to the network stack".
*
* TODO can potentially be dropped because of the similar meaning
* of RQ_NET_SENT and ~ RQ_NET_QUEUED .
* however it is not exactly the same . before we drop it
* we must ensure that we can tell a request with network part
* from a request without , regardless of what happens to it . */
__RQ_NET_SENT ,
/* when set, the request may be freed (if RQ_NET_QUEUED is clear).
* basically this means the corresponding P_BARRIER_ACK was received */
__RQ_NET_DONE ,
/* whether or not we know (C) or pretend (B,A) that the write
* was successfully written on the peer .
*/
__RQ_NET_OK ,
/* peer called drbd_set_in_sync() for this write */
__RQ_NET_SIS ,
/* keep this last, its for the RQ_NET_MASK */
__RQ_NET_MAX ,
2010-05-27 15:07:43 +02:00
/* Set when this is a write, clear for a read */
__RQ_WRITE ,
2010-08-31 12:00:50 +02:00
/* Should call drbd_al_complete_io() for this request... */
__RQ_IN_ACT_LOG ,
2011-02-22 02:15:32 +01:00
/* The peer has sent a retry ACK */
__RQ_POSTPONED ,
2011-04-13 16:24:47 -07:00
/* We expect a receive ACK (wire proto B) */
__RQ_EXP_RECEIVE_ACK ,
/* We expect a write ACK (wite proto C) */
__RQ_EXP_WRITE_ACK ,
2009-09-25 16:07:19 -07:00
} ;
# define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
# define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
# define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK)
# define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */
# define RQ_NET_PENDING (1UL << __RQ_NET_PENDING)
# define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED)
# define RQ_NET_SENT (1UL << __RQ_NET_SENT)
# define RQ_NET_DONE (1UL << __RQ_NET_DONE)
# define RQ_NET_OK (1UL << __RQ_NET_OK)
# define RQ_NET_SIS (1UL << __RQ_NET_SIS)
/* 0x1f8 */
# define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
2010-05-27 15:07:43 +02:00
# define RQ_WRITE (1UL << __RQ_WRITE)
2010-08-31 12:00:50 +02:00
# define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
2011-02-22 02:15:32 +01:00
# define RQ_POSTPONED (1UL << __RQ_POSTPONED)
2011-04-13 16:24:47 -07:00
# define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
# define RQ_EXP_WRITE_ACK (1UL << __RQ_EXP_WRITE_ACK)
2010-05-27 15:07:43 +02:00
2010-05-12 17:08:26 +02:00
/* For waking up the frozen transfer log mod_req() has to return if the request
should be counted in the epoch object */
# define MR_WRITE_SHIFT 0
# define MR_WRITE (1 << MR_WRITE_SHIFT)
# define MR_READ_SHIFT 1
# define MR_READ (1 << MR_READ_SHIFT)
2010-06-10 13:30:36 +02:00
static inline void drbd_req_make_private_bio ( struct drbd_request * req , struct bio * bio_src )
{
struct bio * bio ;
bio = bio_clone ( bio_src , GFP_NOIO ) ; /* XXX cannot fail?? */
req - > private_bio = bio ;
bio - > bi_private = req ;
2011-02-17 16:46:59 +01:00
bio - > bi_end_io = drbd_request_endio ;
2010-06-10 13:30:36 +02:00
bio - > bi_next = NULL ;
}
2009-09-25 16:07:19 -07:00
/* Short lived temporary struct on the stack.
* We could squirrel the error to be returned into
* bio - > bi_size , or similar . But that would be too ugly . */
struct bio_and_error {
struct bio * bio ;
int error ;
} ;
extern void _req_may_be_done ( struct drbd_request * req ,
struct bio_and_error * m ) ;
2010-06-09 14:07:43 +02:00
extern int __req_mod ( struct drbd_request * req , enum drbd_req_event what ,
2009-09-25 16:07:19 -07:00
struct bio_and_error * m ) ;
extern void complete_master_bio ( struct drbd_conf * mdev ,
struct bio_and_error * m ) ;
2011-03-01 11:08:28 +01:00
extern void request_timer_fn ( unsigned long data ) ;
2011-02-21 14:29:27 +01:00
extern void tl_restart ( struct drbd_tconn * tconn , enum drbd_req_event what ) ;
extern void _tl_restart ( struct drbd_tconn * tconn , enum drbd_req_event what ) ;
2009-09-25 16:07:19 -07:00
/* use this if you don't want to deal with calling complete_master_bio()
* outside the spinlock , e . g . when walking some list on cleanup . */
2010-06-09 14:07:43 +02:00
static inline int _req_mod ( struct drbd_request * req , enum drbd_req_event what )
2009-09-25 16:07:19 -07:00
{
2011-02-08 15:08:49 +01:00
struct drbd_conf * mdev = req - > w . mdev ;
2009-09-25 16:07:19 -07:00
struct bio_and_error m ;
2010-06-09 14:07:43 +02:00
int rv ;
2009-09-25 16:07:19 -07:00
/* __req_mod possibly frees req, do not touch req after that! */
2010-06-09 14:07:43 +02:00
rv = __req_mod ( req , what , & m ) ;
2009-09-25 16:07:19 -07:00
if ( m . bio )
complete_master_bio ( mdev , & m ) ;
2010-06-09 14:07:43 +02:00
return rv ;
2009-09-25 16:07:19 -07:00
}
2010-10-26 16:02:27 +02:00
/* completion of master bio is outside of our spinlock.
* We still may or may not be inside some irqs disabled section
* of the lower level driver completion callback , so we need to
* spin_lock_irqsave here . */
2010-06-09 14:07:43 +02:00
static inline int req_mod ( struct drbd_request * req ,
2009-09-25 16:07:19 -07:00
enum drbd_req_event what )
{
2010-10-26 16:02:27 +02:00
unsigned long flags ;
2011-02-08 15:08:49 +01:00
struct drbd_conf * mdev = req - > w . mdev ;
2009-09-25 16:07:19 -07:00
struct bio_and_error m ;
2010-06-09 14:07:43 +02:00
int rv ;
2011-01-19 14:16:30 +01:00
spin_lock_irqsave ( & mdev - > tconn - > req_lock , flags ) ;
2010-06-09 14:07:43 +02:00
rv = __req_mod ( req , what , & m ) ;
2011-01-19 14:16:30 +01:00
spin_unlock_irqrestore ( & mdev - > tconn - > req_lock , flags ) ;
2009-09-25 16:07:19 -07:00
if ( m . bio )
complete_master_bio ( mdev , & m ) ;
2010-06-09 14:07:43 +02:00
return rv ;
2009-09-25 16:07:19 -07:00
}
2011-01-17 20:27:30 +01:00
2011-03-29 10:52:01 +02:00
static inline bool drbd_should_do_remote ( union drbd_dev_state s )
2011-01-17 20:27:30 +01:00
{
return s . pdsk = = D_UP_TO_DATE | |
( s . pdsk > = D_INCONSISTENT & &
s . conn > = C_WF_BITMAP_T & &
s . conn < C_AHEAD ) ;
/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP *
states . */
}
2011-03-29 10:52:01 +02:00
static inline bool drbd_should_send_out_of_sync ( union drbd_dev_state s )
2011-01-17 20:27:30 +01:00
{
return s . conn = = C_AHEAD | | s . conn = = C_WF_BITMAP_S ;
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
since we enter state C_AHEAD only if proto > = 96 */
}
2009-09-25 16:07:19 -07:00
# endif