2009-09-26 03:07:19 +04:00
/*
drbd_int . h
This file is part of DRBD by Philipp Reisner and Lars Ellenberg .
Copyright ( C ) 2001 - 2008 , LINBIT Information Technologies GmbH .
Copyright ( C ) 1999 - 2008 , Philipp Reisner < philipp . reisner @ linbit . com > .
Copyright ( C ) 2002 - 2008 , Lars Ellenberg < lars . ellenberg @ linbit . com > .
drbd is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 2 , or ( at your option )
any later version .
drbd is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with drbd ; see the file COPYING . If not , write to
the Free Software Foundation , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*/
# ifndef _DRBD_INT_H
# define _DRBD_INT_H
# include <linux/compiler.h>
# include <linux/types.h>
# include <linux/version.h>
# include <linux/list.h>
# include <linux/sched.h>
# include <linux/bitops.h>
# include <linux/slab.h>
# include <linux/crypto.h>
2009-10-07 21:26:00 +04:00
# include <linux/ratelimit.h>
2009-09-26 03:07:19 +04:00
# include <linux/tcp.h>
# include <linux/mutex.h>
# include <linux/major.h>
# include <linux/blkdev.h>
# include <linux/genhd.h>
2011-02-08 13:09:18 +03:00
# include <linux/idr.h>
2009-09-26 03:07:19 +04:00
# include <net/tcp.h>
# include <linux/lru_cache.h>
2011-05-23 00:47:17 +04:00
# include <linux/prefetch.h>
2011-03-07 14:49:34 +03:00
# include <linux/drbd_genl_api.h>
2011-01-27 16:07:51 +03:00
# include <linux/drbd.h>
# include "drbd_state.h"
2009-09-26 03:07:19 +04:00
# ifdef __CHECKER__
# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr")))
# define __protected_read_by(x) __attribute__((require_context(x,1,999,"read")))
# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write")))
# define __must_hold(x) __attribute__((context(x,1,1), require_context(x,1,999,"call")))
# else
# define __protected_by(x)
# define __protected_read_by(x)
# define __protected_write_by(x)
# define __must_hold(x)
# endif
# define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0)
/* module parameter, defined in drbd_main.c */
extern unsigned int minor_count ;
extern int disable_sendpage ;
extern int allow_oos ;
# ifdef CONFIG_DRBD_FAULT_INJECTION
extern int enable_faults ;
extern int fault_rate ;
extern int fault_devs ;
# endif
extern char usermode_helper [ ] ;
/* I don't remember why XCPU ...
* This is used to wake the asender ,
* and to interrupt sending the sending task
* on disconnect .
*/
# define DRBD_SIG SIGXCPU
/* This is used to stop/restart our threads.
* Cannot use SIGTERM nor SIGKILL , since these
* are sent out by init on runlevel changes
* I choose SIGHUP for now .
*/
# define DRBD_SIGKILL SIGHUP
# define ID_IN_SYNC (4711ULL)
# define ID_OUT_OF_SYNC (4712ULL)
# define ID_SYNCER (-1ULL)
2011-01-13 20:40:57 +03:00
2011-01-11 19:42:17 +03:00
# define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
2009-09-26 03:07:19 +04:00
struct drbd_conf ;
2011-01-19 14:26:59 +03:00
struct drbd_tconn ;
2009-09-26 03:07:19 +04:00
/* to shorten dev_warn(DEV, "msg"); and relatives statements */
# define DEV (disk_to_dev(mdev->vdisk))
2011-02-07 16:01:51 +03:00
# define conn_printk(LEVEL, TCONN, FMT, ARGS...) \
printk ( LEVEL " d-con %s: " FMT , TCONN - > name , # # ARGS )
# define conn_alert(TCONN, FMT, ARGS...) conn_printk(KERN_ALERT, TCONN, FMT, ## ARGS)
# define conn_crit(TCONN, FMT, ARGS...) conn_printk(KERN_CRIT, TCONN, FMT, ## ARGS)
# define conn_err(TCONN, FMT, ARGS...) conn_printk(KERN_ERR, TCONN, FMT, ## ARGS)
# define conn_warn(TCONN, FMT, ARGS...) conn_printk(KERN_WARNING, TCONN, FMT, ## ARGS)
# define conn_notice(TCONN, FMT, ARGS...) conn_printk(KERN_NOTICE, TCONN, FMT, ## ARGS)
# define conn_info(TCONN, FMT, ARGS...) conn_printk(KERN_INFO, TCONN, FMT, ## ARGS)
# define conn_dbg(TCONN, FMT, ARGS...) conn_printk(KERN_DEBUG, TCONN, FMT, ## ARGS)
2009-09-26 03:07:19 +04:00
# define D_ASSERT(exp) if (!(exp)) \
dev_err ( DEV , " ASSERT( " # exp " ) in %s:%d \n " , __FILE__ , __LINE__ )
2010-12-15 21:31:20 +03:00
/**
* expect - Make an assertion
*
* Unlike the assert macro , this macro returns a boolean result .
*/
# define expect(exp) ({ \
bool _bool = ( exp ) ; \
if ( ! _bool ) \
dev_err ( DEV , " ASSERTION %s FAILED in %s \n " , \
# exp, __func__); \
_bool ; \
} )
2009-09-26 03:07:19 +04:00
/* Defines to control fault insertion */
enum {
DRBD_FAULT_MD_WR = 0 , /* meta data write */
DRBD_FAULT_MD_RD = 1 , /* read */
DRBD_FAULT_RS_WR = 2 , /* resync */
DRBD_FAULT_RS_RD = 3 ,
DRBD_FAULT_DT_WR = 4 , /* data */
DRBD_FAULT_DT_RD = 5 ,
DRBD_FAULT_DT_RA = 6 , /* data read ahead */
DRBD_FAULT_BM_ALLOC = 7 , /* bitmap allocation */
DRBD_FAULT_AL_EE = 8 , /* alloc ee */
2010-04-26 16:11:45 +04:00
DRBD_FAULT_RECEIVE = 9 , /* Changes some bytes upon receiving a [rs]data block */
2009-09-26 03:07:19 +04:00
DRBD_FAULT_MAX ,
} ;
extern unsigned int
_drbd_insert_fault ( struct drbd_conf * mdev , unsigned int type ) ;
2010-12-07 12:43:29 +03:00
2009-09-26 03:07:19 +04:00
static inline int
drbd_insert_fault ( struct drbd_conf * mdev , unsigned int type ) {
2010-12-07 12:43:29 +03:00
# ifdef CONFIG_DRBD_FAULT_INJECTION
2009-09-26 03:07:19 +04:00
return fault_rate & &
( enable_faults & ( 1 < < type ) ) & &
_drbd_insert_fault ( mdev , type ) ;
# else
2010-12-07 12:43:29 +03:00
return 0 ;
2009-09-26 03:07:19 +04:00
# endif
2010-12-07 12:43:29 +03:00
}
2009-09-26 03:07:19 +04:00
/* integer division, round _UP_ to the next integer */
# define div_ceil(A, B) ((A) / (B) + ((A)%(B) ? 1 : 0))
/* usual integer division */
# define div_floor(A, B) ((A) / (B))
/* drbd_meta-data.c (still in drbd_main.c) */
/* 4th incarnation of the disk layout. */
# define DRBD_MD_MAGIC (DRBD_MAGIC+4)
extern struct ratelimit_state drbd_ratelimit_state ;
2011-02-23 03:53:16 +03:00
extern struct idr minors ;
2011-02-18 16:56:45 +03:00
extern struct list_head drbd_tconns ;
2011-04-14 01:21:29 +04:00
extern struct rw_semaphore drbd_cfg_rwsem ;
2011-04-14 01:46:05 +04:00
/* drbd_cfg_rwsem protects: drbd_tconns list, minors idr, tconn->volumes idr
2011-04-14 01:21:29 +04:00
note : non sleeping iterations over the idrs are protoected by RCU */
2009-09-26 03:07:19 +04:00
/* on the wire */
2011-01-26 19:39:41 +03:00
enum drbd_packet {
2009-09-26 03:07:19 +04:00
/* receiver (data socket) */
P_DATA = 0x00 ,
P_DATA_REPLY = 0x01 , /* Response to P_DATA_REQUEST */
P_RS_DATA_REPLY = 0x02 , /* Response to P_RS_DATA_REQUEST */
P_BARRIER = 0x03 ,
P_BITMAP = 0x04 ,
P_BECOME_SYNC_TARGET = 0x05 ,
P_BECOME_SYNC_SOURCE = 0x06 ,
P_UNPLUG_REMOTE = 0x07 , /* Used at various times to hint the peer */
P_DATA_REQUEST = 0x08 , /* Used to ask for a data block */
P_RS_DATA_REQUEST = 0x09 , /* Used to ask for a data block for resync */
P_SYNC_PARAM = 0x0a ,
P_PROTOCOL = 0x0b ,
P_UUIDS = 0x0c ,
P_SIZES = 0x0d ,
P_STATE = 0x0e ,
P_SYNC_UUID = 0x0f ,
P_AUTH_CHALLENGE = 0x10 ,
P_AUTH_RESPONSE = 0x11 ,
P_STATE_CHG_REQ = 0x12 ,
/* asender (meta socket */
P_PING = 0x13 ,
P_PING_ACK = 0x14 ,
P_RECV_ACK = 0x15 , /* Used in protocol B */
P_WRITE_ACK = 0x16 , /* Used in protocol C */
P_RS_WRITE_ACK = 0x17 , /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
2011-02-22 04:15:32 +03:00
P_DISCARD_WRITE = 0x18 , /* Used in proto C, two-primaries conflict detection */
2009-09-26 03:07:19 +04:00
P_NEG_ACK = 0x19 , /* Sent if local disk is unusable */
P_NEG_DREPLY = 0x1a , /* Local disk is broken... */
P_NEG_RS_DREPLY = 0x1b , /* Local disk is broken... */
P_BARRIER_ACK = 0x1c ,
P_STATE_CHG_REPLY = 0x1d ,
/* "new" commands, no longer fitting into the ordering scheme above */
P_OV_REQUEST = 0x1e , /* data socket */
P_OV_REPLY = 0x1f ,
P_OV_RESULT = 0x20 , /* meta socket */
P_CSUM_RS_REQUEST = 0x21 , /* data socket */
P_RS_IS_IN_SYNC = 0x22 , /* meta socket */
P_SYNC_PARAM89 = 0x23 , /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
P_COMPRESSED_BITMAP = 0x24 , /* compressed or otherwise encoded bitmap transfer */
2010-04-30 17:26:20 +04:00
/* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */
/* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */
P_DELAY_PROBE = 0x27 , /* is used on BOTH sockets */
2010-10-27 16:33:00 +04:00
P_OUT_OF_SYNC = 0x28 , /* Mark as out of sync (Outrunning), data socket */
2010-12-27 12:53:28 +03:00
P_RS_CANCEL = 0x29 , /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
2011-02-11 17:11:24 +03:00
P_CONN_ST_CHG_REQ = 0x2a , /* data sock: Connection wide state request */
P_CONN_ST_CHG_REPLY = 0x2b , /* meta sock: Connection side state req reply */
2011-02-22 04:15:32 +03:00
P_RETRY_WRITE = 0x2c , /* Protocol C: retry conflicting write request */
2009-09-26 03:07:19 +04:00
P_MAY_IGNORE = 0x100 , /* Flag to test if (cmd > P_MAY_IGNORE) ... */
P_MAX_OPT_CMD = 0x101 ,
/* special command ids for handshake */
2011-03-28 18:44:40 +04:00
P_INITIAL_META = 0xfff1 , /* First Packet on the MetaSock */
P_INITIAL_DATA = 0xfff2 , /* First Packet on the Socket */
2009-09-26 03:07:19 +04:00
2011-03-28 19:05:50 +04:00
P_CONNECTION_FEATURES = 0xfffe /* FIXED for the next century! */
2009-09-26 03:07:19 +04:00
} ;
2011-01-26 19:39:41 +03:00
extern const char * cmdname ( enum drbd_packet cmd ) ;
2009-09-26 03:07:19 +04:00
/* for sending/receiving the bitmap,
* possibly in some encoding scheme */
struct bm_xfer_ctx {
/* "const"
* stores total bits and long words
* of the bitmap , so we don ' t need to
* call the accessor functions over and again . */
unsigned long bm_bits ;
unsigned long bm_words ;
/* during xfer, current position within the bitmap */
unsigned long bit_offset ;
unsigned long word_offset ;
/* statistics; index: (h->command == P_BITMAP) */
unsigned packets [ 2 ] ;
unsigned bytes [ 2 ] ;
} ;
extern void INFO_bm_xfer_stats ( struct drbd_conf * mdev ,
const char * direction , struct bm_xfer_ctx * c ) ;
static inline void bm_xfer_ctx_bit_to_word_offset ( struct bm_xfer_ctx * c )
{
/* word_offset counts "native long words" (32 or 64 bit),
* aligned at 64 bit .
* Encoded packet may end at an unaligned bit offset .
* In case a fallback clear text packet is transmitted in
* between , we adjust this offset back to the last 64 bit
* aligned " native long word " , which makes coding and decoding
* the plain text bitmap much more convenient . */
# if BITS_PER_LONG == 64
c - > word_offset = c - > bit_offset > > 6 ;
# elif BITS_PER_LONG == 32
c - > word_offset = c - > bit_offset > > 5 ;
c - > word_offset & = ~ ( 1UL ) ;
# else
# error "unsupported BITS_PER_LONG"
# endif
}
# ifndef __packed
# define __packed __attribute__((packed))
# endif
/* This is the layout for a packet on the wire.
* The byteorder is the network byte order .
* ( except block_id and barrier fields .
* these are pointers to local structs
* and have no relevance for the partner ,
* which just echoes them as received . )
*
* NOTE that the payload starts at a long aligned offset ,
* regardless of 32 or 64 bit arch !
*/
2010-08-20 15:36:10 +04:00
struct p_header80 {
2009-09-26 03:07:19 +04:00
u32 magic ;
u16 command ;
u16 length ; /* bytes of data after this header */
} __packed ;
2010-08-20 15:36:10 +04:00
/* Header for big packets, Used for data packets exceeding 64kB */
struct p_header95 {
u16 magic ; /* use DRBD_MAGIC_BIG here */
u16 command ;
2011-03-22 15:17:47 +03:00
u32 length ;
2010-08-20 15:36:10 +04:00
} __packed ;
2011-03-30 18:00:17 +04:00
struct p_header100 {
u32 magic ;
u16 volume ;
u16 command ;
u32 length ;
u32 pad ;
} __packed ;
2011-03-30 13:38:49 +04:00
extern unsigned int drbd_header_size ( struct drbd_tconn * tconn ) ;
2009-09-26 03:07:19 +04:00
/* these defines must not be changed without changing the protocol version */
2010-08-25 13:58:05 +04:00
# define DP_HARDBARRIER 1 /* depricated */
# define DP_RW_SYNC 2 /* equals REQ_SYNC */
2009-09-26 03:07:19 +04:00
# define DP_MAY_SET_IN_SYNC 4
2011-03-09 13:56:30 +03:00
# define DP_UNPLUG 8 /* not used anymore */
2010-08-25 13:58:05 +04:00
# define DP_FUA 16 /* equals REQ_FUA */
# define DP_FLUSH 32 /* equals REQ_FLUSH */
# define DP_DISCARD 64 /* equals REQ_DISCARD */
2009-09-26 03:07:19 +04:00
struct p_data {
u64 sector ; /* 64 bits sector number */
u64 block_id ; /* to identify the request in protocol B&C */
u32 seq_num ;
u32 dp_flags ;
} __packed ;
/*
* commands which share a struct :
* p_block_ack :
* P_RECV_ACK ( proto B ) , P_WRITE_ACK ( proto C ) ,
2011-02-22 04:15:32 +03:00
* P_DISCARD_WRITE ( proto C , two - primaries conflict detection )
2009-09-26 03:07:19 +04:00
* p_block_req :
* P_DATA_REQUEST , P_RS_DATA_REQUEST
*/
struct p_block_ack {
u64 sector ;
u64 block_id ;
u32 blksize ;
u32 seq_num ;
} __packed ;
struct p_block_req {
u64 sector ;
u64 block_id ;
u32 blksize ;
u32 pad ; /* to multiple of 8 Byte */
} __packed ;
/*
* commands with their own struct for additional fields :
2011-03-28 19:05:50 +04:00
* P_CONNECTION_FEATURES
2009-09-26 03:07:19 +04:00
* P_BARRIER
* P_BARRIER_ACK
* P_SYNC_PARAM
* ReportParams
*/
2011-03-28 19:05:50 +04:00
struct p_connection_features {
2009-09-26 03:07:19 +04:00
u32 protocol_min ;
u32 feature_flags ;
u32 protocol_max ;
/* should be more than enough for future enhancements
* for now , feature_flags and the reserverd array shall be zero .
*/
u32 _pad ;
u64 reserverd [ 7 ] ;
} __packed ;
struct p_barrier {
u32 barrier ; /* barrier number _handle_ only */
u32 pad ; /* to multiple of 8 Byte */
} __packed ;
struct p_barrier_ack {
u32 barrier ;
u32 set_size ;
} __packed ;
struct p_rs_param {
u32 rate ;
/* Since protocol version 88 and higher. */
char verify_alg [ 0 ] ;
} __packed ;
struct p_rs_param_89 {
u32 rate ;
/* protocol version 89: */
char verify_alg [ SHARED_SECRET_MAX ] ;
char csums_alg [ SHARED_SECRET_MAX ] ;
} __packed ;
2010-07-06 19:25:54 +04:00
struct p_rs_param_95 {
u32 rate ;
char verify_alg [ SHARED_SECRET_MAX ] ;
char csums_alg [ SHARED_SECRET_MAX ] ;
u32 c_plan_ahead ;
u32 c_delay_target ;
u32 c_fill_target ;
u32 c_max_rate ;
} __packed ;
2010-02-02 23:03:50 +03:00
enum drbd_conn_flags {
CF_WANT_LOSE = 1 ,
CF_DRY_RUN = 2 ,
} ;
2009-09-26 03:07:19 +04:00
struct p_protocol {
u32 protocol ;
u32 after_sb_0p ;
u32 after_sb_1p ;
u32 after_sb_2p ;
2010-02-02 23:03:50 +03:00
u32 conn_flags ;
2009-09-26 03:07:19 +04:00
u32 two_primaries ;
/* Since protocol version 87 and higher. */
char integrity_alg [ 0 ] ;
} __packed ;
struct p_uuids {
u64 uuid [ UI_EXTENDED_SIZE ] ;
} __packed ;
struct p_rs_uuid {
u64 uuid ;
} __packed ;
struct p_sizes {
u64 d_size ; /* size of disk */
u64 u_size ; /* user requested size */
u64 c_size ; /* current exported size */
2010-11-11 17:19:07 +03:00
u32 max_bio_size ; /* Maximal size of a BIO */
2010-03-24 19:11:33 +03:00
u16 queue_order_type ; /* not yet implemented in DRBD*/
u16 dds_flags ; /* use enum dds_flags here. */
2009-09-26 03:07:19 +04:00
} __packed ;
struct p_state {
u32 state ;
} __packed ;
struct p_req_state {
u32 mask ;
u32 val ;
} __packed ;
struct p_req_state_reply {
u32 retcode ;
} __packed ;
struct p_drbd06_param {
u64 size ;
u32 state ;
u32 blksize ;
u32 protocol ;
u32 version ;
u32 gen_cnt [ 5 ] ;
u32 bit_map_gen [ 5 ] ;
} __packed ;
struct p_discard {
u64 block_id ;
u32 seq_num ;
u32 pad ;
} __packed ;
2010-10-27 16:33:00 +04:00
struct p_block_desc {
u64 sector ;
u32 blksize ;
u32 pad ; /* to multiple of 8 Byte */
} __packed ;
2009-09-26 03:07:19 +04:00
/* Valid values for the encoding field.
* Bump proto version when changing this . */
enum drbd_bitmap_code {
/* RLE_VLI_Bytes = 0,
* and other bit variants had been defined during
* algorithm evaluation . */
RLE_VLI_Bits = 2 ,
} ;
struct p_compressed_bm {
/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
* ( encoding & 0x80 ) : polarity ( set / unset ) of first runlength
* ( ( encoding > > 4 ) & 0x07 ) : pad_bits , number of trailing zero bits
* used to pad up to head . length bytes
*/
u8 encoding ;
u8 code [ 0 ] ;
} __packed ;
2010-08-20 15:36:10 +04:00
struct p_delay_probe93 {
u32 seq_num ; /* sequence number to match the two probe packets */
u32 offset ; /* usecs the probe got sent after the reference time point */
2010-04-30 17:26:20 +04:00
} __packed ;
2011-03-30 13:53:51 +04:00
/*
* Bitmap packets need to fit within a single page on the sender and receiver ,
* so we are limited to 4 KiB ( and not to PAGE_SIZE , which can be bigger ) .
2009-09-26 03:07:19 +04:00
*/
2011-03-24 20:07:54 +03:00
# define DRBD_SOCKET_BUFFER_SIZE 4096
2009-09-26 03:07:19 +04:00
/**********************************************************************/
enum drbd_thread_state {
2011-01-25 17:43:39 +03:00
NONE ,
RUNNING ,
EXITING ,
RESTARTING
2009-09-26 03:07:19 +04:00
} ;
struct drbd_thread {
spinlock_t t_lock ;
struct task_struct * task ;
struct completion stop ;
enum drbd_thread_state t_state ;
int ( * function ) ( struct drbd_thread * ) ;
2011-02-09 12:33:31 +03:00
struct drbd_tconn * tconn ;
2009-09-26 03:07:19 +04:00
int reset_cpu_mask ;
2011-02-04 16:00:37 +03:00
char name [ 9 ] ;
2009-09-26 03:07:19 +04:00
} ;
static inline enum drbd_thread_state get_t_state ( struct drbd_thread * thi )
{
/* THINK testing the t_state seems to be uncritical in all cases
* ( but thread_ { start , stop } ) , so we can read it * without * the lock .
* - - lge */
smp_rmb ( ) ;
return thi - > t_state ;
}
struct drbd_work {
struct list_head list ;
2010-12-21 14:38:39 +03:00
int ( * cb ) ( struct drbd_work * , int cancel ) ;
2011-02-09 20:09:48 +03:00
union {
struct drbd_conf * mdev ;
struct drbd_tconn * tconn ;
} ;
2009-09-26 03:07:19 +04:00
} ;
2011-01-03 19:09:58 +03:00
# include "drbd_interval.h"
2011-02-22 04:15:32 +03:00
extern int drbd_wait_misc ( struct drbd_conf * , struct drbd_interval * ) ;
2009-09-26 03:07:19 +04:00
struct drbd_request {
struct drbd_work w ;
/* if local IO is not allowed, will be NULL.
* if local IO _is_ allowed , holds the locally submitted bio clone ,
* or , after local IO completion , the ERR_PTR ( error ) .
2011-02-17 18:46:59 +03:00
* see drbd_request_endio ( ) . */
2009-09-26 03:07:19 +04:00
struct bio * private_bio ;
2011-01-03 19:09:58 +03:00
struct drbd_interval i ;
2009-09-26 03:07:19 +04:00
unsigned int epoch ; /* barrier_nr */
/* barrier_nr: used to check on "completion" whether this req was in
* the current epoch , and we therefore have to close it ,
* starting a new epoch . . .
*/
struct list_head tl_requests ; /* ring list in the transfer log */
struct bio * master_bio ; /* master bio pointer */
unsigned long rq_state ; /* see comments above _req_mod() */
int seq_num ;
unsigned long start_time ;
} ;
struct drbd_tl_epoch {
struct drbd_work w ;
struct list_head requests ; /* requests before */
struct drbd_tl_epoch * next ; /* pointer to the next barrier */
unsigned int br_number ; /* the barriers identifier. */
2010-05-27 16:49:27 +04:00
int n_writes ; /* number of requests attached before this barrier */
2009-09-26 03:07:19 +04:00
} ;
struct drbd_epoch {
struct list_head list ;
unsigned int barrier_nr ;
atomic_t epoch_size ; /* increased on every request added. */
atomic_t active ; /* increased on every req. added, and dec on every finished. */
unsigned long flags ;
} ;
/* drbd_epoch flag bits */
enum {
DE_HAVE_BARRIER_NUMBER ,
} ;
enum epoch_event {
EV_PUT ,
EV_GOT_BARRIER_NR ,
EV_BECAME_LAST ,
EV_CLEANUP = 32 , /* used as flag */
} ;
struct drbd_wq_barrier {
struct drbd_work w ;
struct completion done ;
} ;
struct digest_info {
int digest_size ;
void * digest ;
} ;
2011-02-04 17:30:34 +03:00
struct drbd_peer_request {
2010-05-14 19:10:48 +04:00
struct drbd_work w ;
2010-07-21 12:20:17 +04:00
struct drbd_epoch * epoch ; /* for writes */
2010-05-14 19:10:48 +04:00
struct page * pages ;
atomic_t pending_bios ;
2011-01-14 22:59:35 +03:00
struct drbd_interval i ;
2010-05-14 19:10:48 +04:00
/* see comments on ee flag bits below */
unsigned long flags ;
2010-07-21 12:20:17 +04:00
union {
u64 block_id ;
struct digest_info * digest ;
} ;
2010-05-14 19:10:48 +04:00
} ;
/* ee flag bits.
* While corresponding bios are in flight , the only modification will be
* set_bit WAS_ERROR , which has to be atomic .
* If no bios are in flight yet , or all have been completed ,
* non - atomic modification to ee - > flags is ok .
*/
2009-09-26 03:07:19 +04:00
enum {
__EE_CALL_AL_COMPLETE_IO ,
__EE_MAY_SET_IN_SYNC ,
2010-05-14 19:10:48 +04:00
/* In case a barrier failed,
* we need to resubmit without the barrier flag . */
__EE_RESUBMITTED ,
2011-02-04 17:38:52 +03:00
/* we may have several bios per peer request.
2010-05-14 19:10:48 +04:00
* if any of those fail , we set this flag atomically
* from the endio callback */
__EE_WAS_ERROR ,
2010-08-11 22:42:55 +04:00
/* This ee has a pointer to a digest instead of a block id */
__EE_HAS_DIGEST ,
2011-02-22 04:15:32 +03:00
/* Conflicting local requests need to be restarted after this request */
__EE_RESTART_REQUESTS ,
2009-09-26 03:07:19 +04:00
} ;
# define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
# define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
2010-05-14 19:10:48 +04:00
# define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
# define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
2010-08-11 22:42:55 +04:00
# define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
2011-02-22 04:15:32 +03:00
# define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS)
2009-09-26 03:07:19 +04:00
2011-02-07 16:30:33 +03:00
/* flag bits per mdev */
2009-09-26 03:07:19 +04:00
enum {
2011-03-31 05:57:33 +04:00
CREATE_BARRIER , /* next P_DATA is preceded by a P_BARRIER */
2009-09-26 03:07:19 +04:00
UNPLUG_QUEUED , /* only relevant with kernel 2.4 */
UNPLUG_REMOTE , /* sending a "UnplugRemote" could help */
MD_DIRTY , /* current uuids and flags not yet on disk */
USE_DEGR_WFC_T , /* degr-wfc-timeout instead of wfc-timeout. */
CL_ST_CHG_SUCCESS ,
CL_ST_CHG_FAIL ,
CRASHED_PRIMARY , /* This node was a crashed primary.
* Gets cleared when the state . conn
* goes into C_CONNECTED state . */
2010-12-15 10:59:11 +03:00
NO_BARRIER_SUPP , /* underlying block device doesn't implement barriers */
2009-09-26 03:07:19 +04:00
CONSIDER_RESYNC ,
2010-08-25 12:21:04 +04:00
MD_NO_FUA , /* Users wants us to not use FUA/FLUSH on meta data dev */
2009-09-26 03:07:19 +04:00
SUSPEND_IO , /* suspend application io */
BITMAP_IO , /* suspend application io;
once no more io in flight , start bitmap io */
BITMAP_IO_QUEUED , /* Started bitmap IO */
2010-10-16 14:13:47 +04:00
GO_DISKLESS , /* Disk is being detached, on io-error or admin request. */
WAS_IO_ERROR , /* Local disk failed returned IO error */
2009-09-26 03:07:19 +04:00
RESYNC_AFTER_NEG , /* Resync after online grow after the attach&negotiate finished. */
RESIZE_PENDING , /* Size change detected locally, waiting for the response from
* the peer , if it changed there as well . */
2010-06-11 13:26:34 +04:00
NEW_CUR_UUID , /* Create new current UUID when thawing IO */
2010-08-31 14:00:50 +04:00
AL_SUSPENDED , /* Activity logging is currently suspended. */
2011-01-14 18:03:11 +03:00
AHEAD_TO_SYNC_SOURCE , /* Ahead -> SyncSource queued */
2011-02-05 19:34:11 +03:00
B_RS_H_DONE , /* Before resync handler done (already executed) */
2009-09-26 03:07:19 +04:00
} ;
struct drbd_bitmap ; /* opaque for drbd_conf */
2011-01-21 12:56:44 +03:00
/* definition of bits in bm_flags to be used in drbd_bm_lock
* and drbd_bitmap_io and friends . */
enum bm_flag {
/* do we need to kfree, or vfree bm_pages? */
BM_P_VMALLOCED = 0x10000 , /* internal use only, will be masked out */
/* currently locked for bulk operation */
BM_LOCKED_MASK = 0x7 ,
/* in detail, that is: */
BM_DONT_CLEAR = 0x1 ,
BM_DONT_SET = 0x2 ,
BM_DONT_TEST = 0x4 ,
/* (test bit, count bit) allowed (common case) */
BM_LOCKED_TEST_ALLOWED = 0x3 ,
/* testing bits, as well as setting new bits allowed, but clearing bits
* would be unexpected . Used during bitmap receive . Setting new bits
* requires sending of " out-of-sync " information , though . */
BM_LOCKED_SET_ALLOWED = 0x1 ,
/* clear is not expected while bitmap is locked for bulk operation */
} ;
2009-09-26 03:07:19 +04:00
/* TODO sort members for performance
* MAYBE group them further */
/* THINK maybe we actually want to use the default "event/%s" worker threads
* or similar in linux 2.6 , which uses per cpu data and threads .
*/
struct drbd_work_queue {
struct list_head q ;
struct semaphore s ; /* producers up it, worker down()s it */
spinlock_t q_lock ; /* to protect the list. */
} ;
struct drbd_socket {
struct drbd_work_queue work ;
struct mutex mutex ;
struct socket * socket ;
/* this way we get our
* send / receive buffers off the stack */
2011-03-24 23:17:52 +03:00
void * sbuf ;
2011-03-24 20:07:54 +03:00
void * rbuf ;
2009-09-26 03:07:19 +04:00
} ;
struct drbd_md {
u64 md_offset ; /* sector offset to 'super' block */
u64 la_size_sect ; /* last agreed size, unit sectors */
u64 uuid [ UI_SIZE ] ;
u64 device_uuid ;
u32 flags ;
u32 md_size_sect ;
s32 al_offset ; /* signed relative sector offset to al area */
s32 bm_offset ; /* signed relative sector offset to bitmap */
/* u32 al_nr_extents; important for restoring the AL
2011-03-23 16:31:09 +03:00
* is stored into ldev - > dc . al_extents , which in turn
2009-09-26 03:07:19 +04:00
* gets applied to act_log - > nr_elements
*/
} ;
struct drbd_backing_dev {
struct block_device * backing_bdev ;
struct block_device * md_bdev ;
struct drbd_md md ;
struct disk_conf dc ; /* The user provided config... */
sector_t known_size ; /* last known size of that backing device */
} ;
struct drbd_md_io {
struct drbd_conf * mdev ;
struct completion event ;
int error ;
} ;
struct bm_io_work {
struct drbd_work w ;
char * why ;
2011-01-21 12:56:44 +03:00
enum bm_flag flags ;
2009-09-26 03:07:19 +04:00
int ( * io_fn ) ( struct drbd_conf * mdev ) ;
void ( * done ) ( struct drbd_conf * mdev , int rv ) ;
} ;
enum write_ordering_e {
WO_none ,
WO_drain_io ,
WO_bdev_flush ,
} ;
2010-07-06 13:14:00 +04:00
struct fifo_buffer {
int * values ;
unsigned int head_index ;
unsigned int size ;
} ;
2011-02-07 16:30:33 +03:00
/* flag bits per tconn */
enum {
NET_CONGESTED , /* The data socket is congested */
2011-02-07 16:35:25 +03:00
DISCARD_CONCURRENT , /* Set on one node, cleared on the peer! */
2011-02-07 16:40:40 +03:00
SEND_PING , /* whether asender should send a ping asap */
2011-02-07 16:44:14 +03:00
SIGNAL_ASENDER , /* whether asender wants to be interrupted */
2011-02-09 16:10:32 +03:00
GOT_PING_ACK , /* set when we receive a ping_ack packet, ping_wait gets woken */
2011-02-15 13:07:59 +03:00
CONN_WD_ST_CHG_OKAY ,
CONN_WD_ST_CHG_FAIL ,
2011-02-18 16:23:11 +03:00
CONFIG_PENDING , /* serialization of (re)configuration requests.
* if set , also prevents the device from dying */
OBJECT_DYING , /* device became unconfigured,
* but worker thread is still handling the cleanup .
* reconfiguring ( nl_disk_conf , nl_net_conf ) is dissalowed ,
* while this is set . */
2011-03-15 20:40:27 +03:00
CONN_DRY_RUN , /* Expect disconnect after resync handshake. */
2011-02-07 16:30:33 +03:00
} ;
2011-01-19 14:26:59 +03:00
struct drbd_tconn { /* is a resource from the config file */
char * name ; /* Resource name */
2011-03-11 00:18:18 +03:00
struct list_head all_tconn ; /* linked on global drbd_tconns */
2011-02-11 22:11:10 +03:00
struct idr volumes ; /* <tconn, vnr> to mdev mapping */
enum drbd_conns cstate ; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */
2011-03-28 18:18:39 +04:00
unsigned susp : 1 ; /* IO suspended by user */
unsigned susp_nod : 1 ; /* IO suspended because no data */
unsigned susp_fen : 1 ; /* IO suspended because fence peer handler runs */
2011-02-11 22:11:10 +03:00
struct mutex cstate_mutex ; /* Protects graceful disconnects */
2011-01-19 14:26:59 +03:00
2011-02-08 13:09:18 +03:00
unsigned long flags ;
2011-01-19 14:26:59 +03:00
struct net_conf * net_conf ; /* protected by get_net_conf() and put_net_conf() */
2011-01-19 15:48:44 +03:00
atomic_t net_cnt ; /* Users of net_conf */
wait_queue_head_t net_cnt_wait ;
2011-02-09 16:10:32 +03:00
wait_queue_head_t ping_wait ; /* Woken upon reception of a ping, and a state change */
2011-03-23 16:31:09 +03:00
struct res_opts res_opts ;
2011-01-19 15:55:45 +03:00
struct drbd_socket data ; /* data/barrier/cstate/parameter packets */
struct drbd_socket meta ; /* ping/ack (metadata) packets */
2011-01-19 16:12:51 +03:00
int agreed_pro_version ; /* actually used protocol version */
unsigned long last_received ; /* in jiffies, either socket */
unsigned int ko_count ;
2011-01-19 16:02:01 +03:00
2011-01-19 16:16:30 +03:00
spinlock_t req_lock ;
struct drbd_tl_epoch * unused_spare_tle ; /* for pre-allocation */
struct drbd_tl_epoch * newest_tle ;
struct drbd_tl_epoch * oldest_tle ;
struct list_head out_of_sequence_requests ;
2011-01-19 16:31:32 +03:00
struct crypto_hash * cram_hmac_tfm ;
struct crypto_hash * integrity_w_tfm ; /* to be used by the worker thread */
struct crypto_hash * integrity_r_tfm ; /* to be used by the receiver thread */
2011-03-23 16:31:09 +03:00
struct crypto_hash * csums_tfm ;
struct crypto_hash * verify_tfm ;
2011-01-19 16:31:32 +03:00
void * int_dig_in ;
void * int_dig_vv ;
2011-01-19 16:02:01 +03:00
struct drbd_thread receiver ;
struct drbd_thread worker ;
struct drbd_thread asender ;
2011-02-08 14:46:30 +03:00
cpumask_var_t cpu_mask ;
2011-01-19 14:26:59 +03:00
} ;
2009-09-26 03:07:19 +04:00
struct drbd_conf {
2011-01-19 14:26:59 +03:00
struct drbd_tconn * tconn ;
int vnr ; /* volume number within the connection */
2009-09-26 03:07:19 +04:00
/* things that are stored as / read from meta data on disk */
unsigned long flags ;
/* configured by drbdsetup */
struct drbd_backing_dev * ldev __protected_by ( local ) ;
sector_t p_size ; /* partner's disk size */
struct request_queue * rq_queue ;
struct block_device * this_bdev ;
struct gendisk * vdisk ;
struct drbd_work resync_work ,
unplug_work ,
2010-09-14 22:26:27 +04:00
go_diskless ,
2010-10-27 19:32:36 +04:00
md_sync_work ,
start_resync_work ;
2009-09-26 03:07:19 +04:00
struct timer_list resync_timer ;
struct timer_list md_sync_timer ;
2011-01-14 18:03:11 +03:00
struct timer_list start_resync_timer ;
2011-03-01 13:08:28 +03:00
struct timer_list request_timer ;
2010-09-03 12:00:09 +04:00
# ifdef DRBD_DEBUG_MD_SYNC
struct {
unsigned int line ;
const char * func ;
} last_md_mark_dirty ;
# endif
2009-09-26 03:07:19 +04:00
/* Used after attach while negotiating new disk state. */
union drbd_state new_state_tmp ;
2011-03-29 12:52:01 +04:00
union drbd_dev_state state ;
2009-09-26 03:07:19 +04:00
wait_queue_head_t misc_wait ;
wait_queue_head_t state_wait ; /* upon each state change. */
unsigned int send_cnt ;
unsigned int recv_cnt ;
unsigned int read_cnt ;
unsigned int writ_cnt ;
unsigned int al_writ_cnt ;
unsigned int bm_writ_cnt ;
atomic_t ap_bio_cnt ; /* Requests we need to complete */
atomic_t ap_pending_cnt ; /* AP data packets on the wire, ack expected */
atomic_t rs_pending_cnt ; /* RS request/data packets on the wire */
atomic_t unacked_cnt ; /* Need to send replys for */
atomic_t local_cnt ; /* Waiting for local completion */
2011-01-19 15:48:44 +03:00
2011-01-21 19:18:39 +03:00
/* Interval tree of pending local requests */
struct rb_root read_requests ;
2011-01-20 17:00:24 +03:00
struct rb_root write_requests ;
2010-12-14 17:13:04 +03:00
/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
2009-09-26 03:07:19 +04:00
unsigned long rs_total ;
2010-12-14 17:13:04 +03:00
/* number of resync blocks that failed in this run */
2009-09-26 03:07:19 +04:00
unsigned long rs_failed ;
/* Syncer's start time [unit jiffies] */
unsigned long rs_start ;
/* cumulated time in PausedSyncX state [unit jiffies] */
unsigned long rs_paused ;
2010-08-11 23:21:50 +04:00
/* skipped because csum was equal [unit BM_BLOCK_SIZE] */
unsigned long rs_same_csum ;
# define DRBD_SYNC_MARKS 8
# define DRBD_SYNC_MARK_STEP (3*HZ)
2009-09-26 03:07:19 +04:00
/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
2010-08-11 23:21:50 +04:00
unsigned long rs_mark_left [ DRBD_SYNC_MARKS ] ;
2009-09-26 03:07:19 +04:00
/* marks's time [unit jiffies] */
2010-08-11 23:21:50 +04:00
unsigned long rs_mark_time [ DRBD_SYNC_MARKS ] ;
/* current index into rs_mark_{left,time} */
int rs_last_mark ;
2009-09-26 03:07:19 +04:00
/* where does the admin want us to start? (sector) */
sector_t ov_start_sector ;
/* where are we now? (sector) */
sector_t ov_position ;
/* Start sector of out of sync range (to merge printk reporting). */
sector_t ov_last_oos_start ;
/* size of out-of-sync range in sectors. */
sector_t ov_last_oos_size ;
unsigned long ov_left ; /* in bits */
struct drbd_bitmap * bitmap ;
unsigned long bm_resync_fo ; /* bit offset for drbd_bm_find_next */
/* Used to track operations of resync... */
struct lru_cache * resync ;
/* Number of locked elements in resync LRU */
unsigned int resync_locked ;
/* resync extent number waiting for application requests */
unsigned int resync_wenr ;
int open_cnt ;
u64 * p_uuid ;
struct drbd_epoch * current_epoch ;
spinlock_t epoch_lock ;
unsigned int epochs ;
enum write_ordering_e write_ordering ;
2010-07-21 12:20:17 +04:00
struct list_head active_ee ; /* IO in progress (P_DATA gets written to disk) */
struct list_head sync_ee ; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
2011-02-04 17:36:22 +03:00
struct list_head done_ee ; /* need to send P_WRITE_ACK */
struct list_head read_ee ; /* [RS]P_DATA_REQUEST being read */
2009-09-26 03:07:19 +04:00
struct list_head net_ee ; /* zero-copy network send in progress */
int next_barrier_nr ;
struct list_head resync_reads ;
2010-09-06 14:30:25 +04:00
atomic_t pp_in_use ; /* allocated from page pool */
atomic_t pp_in_use_by_net ; /* sendpage()d, still referenced by tcp */
2009-09-26 03:07:19 +04:00
wait_queue_head_t ee_wait ;
struct page * md_io_page ; /* one page buffer for md_io */
struct mutex md_io_mutex ; /* protects the md_io_buffer */
spinlock_t al_lock ;
wait_queue_head_t al_wait ;
struct lru_cache * act_log ; /* activity log */
unsigned int al_tr_number ;
int al_tr_cycle ;
int al_tr_pos ; /* position of the next transaction in the journal */
wait_queue_head_t seq_wait ;
atomic_t packet_seq ;
unsigned int peer_seq ;
spinlock_t peer_seq_lock ;
unsigned int minor ;
unsigned long comm_bm_set ; /* communicated number of set bits. */
struct bm_io_work bm_io_work ;
u64 ed_uuid ; /* UUID of the exposed data */
2011-02-11 22:11:10 +03:00
struct mutex own_state_mutex ;
struct mutex * state_mutex ; /* either own_state_mutex or mdev->tconn->cstate_mutex */
2009-09-26 03:07:19 +04:00
char congestion_reason ; /* Why we where congested... */
2010-08-11 23:21:50 +04:00
atomic_t rs_sect_in ; /* for incoming resync data rate, SyncTarget */
atomic_t rs_sect_ev ; /* for submitted resync data rate, both */
int rs_last_sect_ev ; /* counter to compare with */
int rs_last_events ; /* counter of read or write "events" (unit sectors)
* on the lower level device when we last looked . */
int c_sync_rate ; /* current resync rate after syncer throttle magic */
2010-07-06 13:14:00 +04:00
struct fifo_buffer rs_plan_s ; /* correction values of resync planer */
int rs_in_flight ; /* resync sectors in flight (to proxy, in proxy and from proxy) */
2011-03-31 05:57:33 +04:00
int rs_planed ; /* resync sectors already planned */
2010-10-26 18:02:27 +04:00
atomic_t ap_in_flight ; /* App sectors in flight (waiting for ack) */
2011-05-20 18:39:13 +04:00
int peer_max_bio_size ;
int local_max_bio_size ;
2009-09-26 03:07:19 +04:00
} ;
static inline struct drbd_conf * minor_to_mdev ( unsigned int minor )
{
2011-02-23 03:53:16 +03:00
return ( struct drbd_conf * ) idr_find ( & minors , minor ) ;
2009-09-26 03:07:19 +04:00
}
static inline unsigned int mdev_to_minor ( struct drbd_conf * mdev )
{
return mdev - > minor ;
}
2011-02-08 14:55:24 +03:00
static inline struct drbd_conf * vnr_to_mdev ( struct drbd_tconn * tconn , int vnr )
{
return ( struct drbd_conf * ) idr_find ( & tconn - > volumes , vnr ) ;
}
2009-09-26 03:07:19 +04:00
/*
* function declarations
* * * * * * * * * * * * * * * * * * * * * * * * */
/* drbd_main.c */
2010-03-24 19:11:33 +03:00
enum dds_flags {
DDSF_FORCED = 1 ,
DDSF_NO_RESYNC = 2 , /* Do not run a resync for the new space */
} ;
2009-09-26 03:07:19 +04:00
extern void drbd_init_set_defaults ( struct drbd_conf * mdev ) ;
extern int drbd_thread_start ( struct drbd_thread * thi ) ;
extern void _drbd_thread_stop ( struct drbd_thread * thi , int restart , int wait ) ;
2011-02-09 12:33:31 +03:00
extern char * drbd_task_to_thread_name ( struct drbd_tconn * tconn , struct task_struct * task ) ;
2009-09-26 03:07:19 +04:00
# ifdef CONFIG_SMP
2011-02-08 14:46:30 +03:00
extern void drbd_thread_current_set_cpu ( struct drbd_thread * thi ) ;
extern void drbd_calc_cpu_mask ( struct drbd_tconn * tconn ) ;
2009-09-26 03:07:19 +04:00
# else
2011-02-08 14:46:30 +03:00
# define drbd_thread_current_set_cpu(A) ({})
2009-09-26 03:07:19 +04:00
# define drbd_calc_cpu_mask(A) ({})
# endif
extern void drbd_free_resources ( struct drbd_conf * mdev ) ;
2011-02-21 16:29:27 +03:00
extern void tl_release ( struct drbd_tconn * , unsigned int barrier_nr ,
2009-09-26 03:07:19 +04:00
unsigned int set_size ) ;
2011-02-21 16:29:27 +03:00
extern void tl_clear ( struct drbd_tconn * ) ;
extern void _tl_add_barrier ( struct drbd_tconn * , struct drbd_tl_epoch * ) ;
2011-02-08 16:29:53 +03:00
extern void drbd_free_sock ( struct drbd_tconn * tconn ) ;
2011-02-07 17:08:48 +03:00
extern int drbd_send ( struct drbd_tconn * tconn , struct socket * sock ,
void * buf , size_t size , unsigned msg_flags ) ;
2010-12-15 19:04:36 +03:00
extern int drbd_send_all ( struct drbd_tconn * , struct socket * , void * , size_t ,
unsigned ) ;
2011-02-08 12:13:15 +03:00
extern int drbd_send_protocol ( struct drbd_tconn * tconn ) ;
2009-09-26 03:07:19 +04:00
extern int drbd_send_uuids ( struct drbd_conf * mdev ) ;
extern int drbd_send_uuids_skip_initial_sync ( struct drbd_conf * mdev ) ;
2011-03-16 03:09:01 +03:00
extern void drbd_gen_and_send_sync_uuid ( struct drbd_conf * mdev ) ;
2010-03-24 19:11:33 +03:00
extern int drbd_send_sizes ( struct drbd_conf * mdev , int trigger_reply , enum dds_flags flags ) ;
2011-02-11 17:11:24 +03:00
extern int _conn_send_state_req ( struct drbd_tconn * , int vnr , enum drbd_packet cmd ,
union drbd_state , union drbd_state ) ;
2009-09-26 03:07:19 +04:00
extern int _drbd_send_state ( struct drbd_conf * mdev ) ;
extern int drbd_send_state ( struct drbd_conf * mdev ) ;
2011-03-23 16:31:09 +03:00
extern int drbd_send_sync_param ( struct drbd_conf * mdev ) ;
2011-03-16 03:25:28 +03:00
extern void drbd_send_b_ack ( struct drbd_conf * mdev , u32 barrier_nr ,
u32 set_size ) ;
2011-02-04 17:30:34 +03:00
extern int drbd_send_ack ( struct drbd_conf * , enum drbd_packet ,
struct drbd_peer_request * ) ;
2011-03-16 03:30:14 +03:00
extern void drbd_send_ack_rp ( struct drbd_conf * mdev , enum drbd_packet cmd ,
struct p_block_req * rp ) ;
extern void drbd_send_ack_dp ( struct drbd_conf * mdev , enum drbd_packet cmd ,
struct p_data * dp , int data_size ) ;
2011-01-26 19:39:41 +03:00
extern int drbd_send_ack_ex ( struct drbd_conf * mdev , enum drbd_packet cmd ,
2009-09-26 03:07:19 +04:00
sector_t sector , int blksize , u64 block_id ) ;
2010-12-20 01:53:14 +03:00
extern int drbd_send_out_of_sync ( struct drbd_conf * , struct drbd_request * ) ;
2011-02-04 17:30:34 +03:00
extern int drbd_send_block ( struct drbd_conf * , enum drbd_packet ,
struct drbd_peer_request * ) ;
2009-09-26 03:07:19 +04:00
extern int drbd_send_dblock ( struct drbd_conf * mdev , struct drbd_request * req ) ;
extern int drbd_send_drequest ( struct drbd_conf * mdev , int cmd ,
sector_t sector , int size , u64 block_id ) ;
2011-01-26 19:39:41 +03:00
extern int drbd_send_drequest_csum ( struct drbd_conf * mdev , sector_t sector ,
int size , void * digest , int digest_size ,
enum drbd_packet cmd ) ;
2009-09-26 03:07:19 +04:00
extern int drbd_send_ov_request ( struct drbd_conf * mdev , sector_t sector , int size ) ;
extern int drbd_send_bitmap ( struct drbd_conf * mdev ) ;
2011-03-16 03:20:38 +03:00
extern void drbd_send_sr_reply ( struct drbd_conf * mdev , enum drbd_state_rv retcode ) ;
2011-03-28 16:23:08 +04:00
extern void conn_send_sr_reply ( struct drbd_tconn * tconn , enum drbd_state_rv retcode ) ;
2009-09-26 03:07:19 +04:00
extern void drbd_free_bc ( struct drbd_backing_dev * ldev ) ;
extern void drbd_mdev_cleanup ( struct drbd_conf * mdev ) ;
2011-01-20 15:25:21 +03:00
void drbd_print_uuids ( struct drbd_conf * mdev , const char * text ) ;
2009-09-26 03:07:19 +04:00
extern void drbd_md_sync ( struct drbd_conf * mdev ) ;
extern int drbd_md_read ( struct drbd_conf * mdev , struct drbd_backing_dev * bdev ) ;
extern void drbd_uuid_set ( struct drbd_conf * mdev , int idx , u64 val ) __must_hold ( local ) ;
extern void _drbd_uuid_set ( struct drbd_conf * mdev , int idx , u64 val ) __must_hold ( local ) ;
extern void drbd_uuid_new_current ( struct drbd_conf * mdev ) __must_hold ( local ) ;
extern void _drbd_uuid_new_current ( struct drbd_conf * mdev ) __must_hold ( local ) ;
extern void drbd_uuid_set_bm ( struct drbd_conf * mdev , u64 val ) __must_hold ( local ) ;
extern void drbd_md_set_flag ( struct drbd_conf * mdev , int flags ) __must_hold ( local ) ;
extern void drbd_md_clear_flag ( struct drbd_conf * mdev , int flags ) __must_hold ( local ) ;
extern int drbd_md_test_flag ( struct drbd_backing_dev * , int ) ;
2010-09-03 12:00:09 +04:00
# ifndef DRBD_DEBUG_MD_SYNC
2009-09-26 03:07:19 +04:00
extern void drbd_md_mark_dirty ( struct drbd_conf * mdev ) ;
2010-09-03 12:00:09 +04:00
# else
# define drbd_md_mark_dirty(m) drbd_md_mark_dirty_(m, __LINE__ , __func__ )
extern void drbd_md_mark_dirty_ ( struct drbd_conf * mdev ,
unsigned int line , const char * func ) ;
# endif
2009-09-26 03:07:19 +04:00
extern void drbd_queue_bitmap_io ( struct drbd_conf * mdev ,
int ( * io_fn ) ( struct drbd_conf * ) ,
void ( * done ) ( struct drbd_conf * , int ) ,
2011-01-21 12:56:44 +03:00
char * why , enum bm_flag flags ) ;
extern int drbd_bitmap_io ( struct drbd_conf * mdev ,
int ( * io_fn ) ( struct drbd_conf * ) ,
char * why , enum bm_flag flags ) ;
2009-09-26 03:07:19 +04:00
extern int drbd_bmio_set_n_write ( struct drbd_conf * mdev ) ;
extern int drbd_bmio_clear_n_write ( struct drbd_conf * mdev ) ;
2010-09-14 22:26:27 +04:00
extern void drbd_go_diskless ( struct drbd_conf * mdev ) ;
2010-10-16 14:13:47 +04:00
extern void drbd_ldev_destroy ( struct drbd_conf * mdev ) ;
2009-09-26 03:07:19 +04:00
/* Meta data layout
We reserve a 128 MB Block ( 4 k aligned )
* either at the end of the backing device
tree-wide: Assorted spelling fixes
In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.
Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-02-03 03:01:28 +03:00
* or on a separate meta data device . */
2009-09-26 03:07:19 +04:00
/* The following numbers are sectors */
2011-02-21 15:21:03 +03:00
/* Allows up to about 3.8TB, so if you want more,
* you need to use the " flexible " meta data format . */
# define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */
# define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */
# define MD_AL_SECTORS 64 /* = 32 kB on disk activity log ring buffer */
# define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_SECTORS)
/* we do all meta data IO in 4k blocks */
# define MD_BLOCK_SHIFT 12
# define MD_BLOCK_SIZE (1<<MD_BLOCK_SHIFT)
/* One activity log extent represents 4M of storage */
# define AL_EXTENT_SHIFT 22
2009-09-26 03:07:19 +04:00
# define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
2011-02-21 15:21:03 +03:00
/* We could make these currently hardcoded constants configurable
* variables at create - md time ( or even re - configurable at runtime ? ) .
* Which will require some more changes to the DRBD " super block "
* and attach code .
*
* updates per transaction :
* This many changes to the active set can be logged with one transaction .
* This number is arbitrary .
* context per transaction :
* This many context extent numbers are logged with each transaction .
* This number is resulting from the transaction block size ( 4 k ) , the layout
* of the transaction header , and the number of updates per transaction .
* See drbd_actlog . c : struct al_transaction_on_disk
* */
# define AL_UPDATES_PER_TRANSACTION 64 // arbitrary
# define AL_CONTEXT_PER_TRANSACTION 919 // (4096 - 36 - 6*64)/4
2009-09-26 03:07:19 +04:00
# if BITS_PER_LONG == 32
# define LN2_BPL 5
# define cpu_to_lel(A) cpu_to_le32(A)
# define lel_to_cpu(A) le32_to_cpu(A)
# elif BITS_PER_LONG == 64
# define LN2_BPL 6
# define cpu_to_lel(A) cpu_to_le64(A)
# define lel_to_cpu(A) le64_to_cpu(A)
# else
# error "LN2 of BITS_PER_LONG unknown!"
# endif
/* resync bitmap */
/* 16MB sized 'bitmap extent' to track syncer usage */
struct bm_extent {
int rs_left ; /* number of bits set (out of sync) in this extent. */
int rs_failed ; /* number of failed resync requests in this extent. */
unsigned long flags ;
struct lc_element lce ;
} ;
# define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */
# define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */
2010-11-07 17:56:29 +03:00
# define BME_PRIORITY 2 /* finish resync IO on this extent ASAP! App IO waiting! */
2009-09-26 03:07:19 +04:00
/* drbd_bitmap.c */
/*
* We need to store one bit for a block .
* Example : 1 GB disk @ 4096 byte blocks = = > we need 32 KB bitmap .
* Bit 0 = = > local node thinks this block is binary identical on both nodes
* Bit 1 = = > local node thinks this block needs to be synced .
*/
2010-07-06 19:25:54 +04:00
# define SLEEP_TIME (HZ / 10)
2011-02-21 15:21:00 +03:00
/* We do bitmap IO in units of 4k blocks.
* We also still have a hardcoded 4 k per bit relation . */
# define BM_BLOCK_SHIFT 12 /* 4k per bit */
2009-09-26 03:07:19 +04:00
# define BM_BLOCK_SIZE (1<<BM_BLOCK_SHIFT)
2011-02-21 15:21:00 +03:00
/* mostly arbitrarily set the represented size of one bitmap extent,
* aka resync extent , to 16 MiB ( which is also 512 Byte worth of bitmap
* at 4 k per bit resolution ) */
# define BM_EXT_SHIFT 24 /* 16 MiB per resync extent */
2009-09-26 03:07:19 +04:00
# define BM_EXT_SIZE (1<<BM_EXT_SHIFT)
# if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12)
# error "HAVE YOU FIXED drbdmeta AS WELL??"
# endif
/* thus many _storage_ sectors are described by one bit */
# define BM_SECT_TO_BIT(x) ((x)>>(BM_BLOCK_SHIFT-9))
# define BM_BIT_TO_SECT(x) ((sector_t)(x)<<(BM_BLOCK_SHIFT-9))
# define BM_SECT_PER_BIT BM_BIT_TO_SECT(1)
/* bit to represented kilo byte conversion */
# define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10))
/* in which _bitmap_ extent (resp. sector) the bit for a certain
* _storage_ sector is located in */
# define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9))
/* how much _storage_ sectors we have per bitmap sector */
# define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9))
# define BM_SECT_PER_EXT BM_EXT_TO_SECT(1)
/* in one sector of the bitmap, we have this many activity_log extents. */
# define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
# define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
# define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
# define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
/* the extent in "PER_EXTENT" below is an activity log extent
* we need that many ( long words / bytes ) to store the bitmap
* of one AL_EXTENT_SIZE chunk of storage .
* we can store the bitmap for that many AL_EXTENTS within
* one sector of the _on_disk_ bitmap :
* bit 0 bit 37 bit 38 bit ( 512 * 8 ) - 1
* . . . | . . . . . . . . | . . . . . . . . | . . // ..|........|
* sect . 0 ` 296 ` 304 ^ ( 512 * 8 * 8 ) - 1
*
# define BM_WORDS_PER_EXT ( (AL_EXT_SIZE / BM_BLOCK_SIZE) / BITS_PER_LONG )
# define BM_BYTES_PER_EXT ( (AL_EXT_SIZE / BM_BLOCK_SIZE) / 8 ) // 128
# define BM_EXT_PER_SECT ( 512 / BM_BYTES_PER_EXTENT ) // 4
*/
# define DRBD_MAX_SECTORS_32 (0xffffffffLU)
# define DRBD_MAX_SECTORS_BM \
( ( MD_RESERVED_SECT - MD_BM_OFFSET ) * ( 1LL < < ( BM_EXT_SHIFT - 9 ) ) )
# if DRBD_MAX_SECTORS_BM < DRBD_MAX_SECTORS_32
# define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM
# define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_BM
2010-01-05 21:33:54 +03:00
# elif !defined(CONFIG_LBDAF) && BITS_PER_LONG == 32
2009-09-26 03:07:19 +04:00
# define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_32
# define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32
# else
# define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM
/* 16 TB in units of sectors */
# if BITS_PER_LONG == 32
/* adjust by one page worth of bitmap,
* so we won ' t wrap around in drbd_bm_find_next_bit .
* you should use 64 bit OS for that much storage , anyways . */
# define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
# else
2010-12-14 17:13:04 +03:00
/* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */
# define DRBD_MAX_SECTORS_FLEX (1UL << 51)
/* corresponds to (1UL << 38) bits right now. */
2009-09-26 03:07:19 +04:00
# endif
# endif
2011-03-31 18:36:43 +04:00
/* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE,
* so for typical PAGE_CACHE_SIZE of 4 k , that is ( 1 < < 20 ) Byte .
* Since we may live in a mixed - platform cluster ,
* we limit us to a platform agnostic constant here for now .
* A followup commit may allow even bigger BIO sizes ,
* once we thought that through . */
# define DRBD_MAX_BIO_SIZE (1 << 20)
# if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
# error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
# endif
2011-05-20 18:39:13 +04:00
# define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */
2009-09-26 03:07:19 +04:00
2010-08-23 17:18:33 +04:00
# define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
2009-09-26 03:07:19 +04:00
extern int drbd_bm_init ( struct drbd_conf * mdev ) ;
2010-03-24 18:23:03 +03:00
extern int drbd_bm_resize ( struct drbd_conf * mdev , sector_t sectors , int set_new_bits ) ;
2009-09-26 03:07:19 +04:00
extern void drbd_bm_cleanup ( struct drbd_conf * mdev ) ;
extern void drbd_bm_set_all ( struct drbd_conf * mdev ) ;
extern void drbd_bm_clear_all ( struct drbd_conf * mdev ) ;
2010-12-14 17:13:04 +03:00
/* set/clear/test only a few bits at a time */
2009-09-26 03:07:19 +04:00
extern int drbd_bm_set_bits (
struct drbd_conf * mdev , unsigned long s , unsigned long e ) ;
extern int drbd_bm_clear_bits (
struct drbd_conf * mdev , unsigned long s , unsigned long e ) ;
2010-12-14 17:13:04 +03:00
extern int drbd_bm_count_bits (
struct drbd_conf * mdev , const unsigned long s , const unsigned long e ) ;
/* bm_set_bits variant for use while holding drbd_bm_lock,
* may process the whole bitmap in one go */
2009-09-26 03:07:19 +04:00
extern void _drbd_bm_set_bits ( struct drbd_conf * mdev ,
const unsigned long s , const unsigned long e ) ;
extern int drbd_bm_test_bit ( struct drbd_conf * mdev , unsigned long bitnr ) ;
extern int drbd_bm_e_weight ( struct drbd_conf * mdev , unsigned long enr ) ;
2010-12-15 10:59:11 +03:00
extern int drbd_bm_write_page ( struct drbd_conf * mdev , unsigned int idx ) __must_hold ( local ) ;
2009-09-26 03:07:19 +04:00
extern int drbd_bm_read ( struct drbd_conf * mdev ) __must_hold ( local ) ;
2011-02-21 15:21:00 +03:00
extern void drbd_bm_mark_for_writeout ( struct drbd_conf * mdev , int page_nr ) ;
2009-09-26 03:07:19 +04:00
extern int drbd_bm_write ( struct drbd_conf * mdev ) __must_hold ( local ) ;
2011-02-21 15:21:00 +03:00
extern int drbd_bm_write_hinted ( struct drbd_conf * mdev ) __must_hold ( local ) ;
2009-09-26 03:07:19 +04:00
extern unsigned long drbd_bm_ALe_set_all ( struct drbd_conf * mdev ,
unsigned long al_enr ) ;
extern size_t drbd_bm_words ( struct drbd_conf * mdev ) ;
extern unsigned long drbd_bm_bits ( struct drbd_conf * mdev ) ;
extern sector_t drbd_bm_capacity ( struct drbd_conf * mdev ) ;
2010-12-14 17:13:04 +03:00
# define DRBD_END_OF_BITMAP (~(unsigned long)0)
2009-09-26 03:07:19 +04:00
extern unsigned long drbd_bm_find_next ( struct drbd_conf * mdev , unsigned long bm_fo ) ;
/* bm_find_next variants for use while you hold drbd_bm_lock() */
extern unsigned long _drbd_bm_find_next ( struct drbd_conf * mdev , unsigned long bm_fo ) ;
extern unsigned long _drbd_bm_find_next_zero ( struct drbd_conf * mdev , unsigned long bm_fo ) ;
2010-08-31 14:00:50 +04:00
extern unsigned long _drbd_bm_total_weight ( struct drbd_conf * mdev ) ;
2009-09-26 03:07:19 +04:00
extern unsigned long drbd_bm_total_weight ( struct drbd_conf * mdev ) ;
extern int drbd_bm_rs_done ( struct drbd_conf * mdev ) ;
/* for receive_bitmap */
extern void drbd_bm_merge_lel ( struct drbd_conf * mdev , size_t offset ,
size_t number , unsigned long * buffer ) ;
2010-12-15 10:59:11 +03:00
/* for _drbd_send_bitmap */
2009-09-26 03:07:19 +04:00
extern void drbd_bm_get_lel ( struct drbd_conf * mdev , size_t offset ,
size_t number , unsigned long * buffer ) ;
2011-01-21 12:56:44 +03:00
extern void drbd_bm_lock ( struct drbd_conf * mdev , char * why , enum bm_flag flags ) ;
2009-09-26 03:07:19 +04:00
extern void drbd_bm_unlock ( struct drbd_conf * mdev ) ;
/* drbd_main.c */
extern struct kmem_cache * drbd_request_cache ;
2011-02-04 17:38:52 +03:00
extern struct kmem_cache * drbd_ee_cache ; /* peer requests */
2009-09-26 03:07:19 +04:00
extern struct kmem_cache * drbd_bm_ext_cache ; /* bitmap extents */
extern struct kmem_cache * drbd_al_ext_cache ; /* activity log extents */
extern mempool_t * drbd_request_mempool ;
extern mempool_t * drbd_ee_mempool ;
2011-02-23 14:39:46 +03:00
/* drbd's page pool, used to buffer data received from the peer,
* or data requested by the peer .
*
* This does not have an emergency reserve .
*
* When allocating from this pool , it first takes pages from the pool .
* Only if the pool is depleted will try to allocate from the system .
*
* The assumption is that pages taken from this pool will be processed ,
* and given back , " quickly " , and then can be recycled , so we can avoid
* frequent calls to alloc_page ( ) , and still will be able to make progress even
* under memory pressure .
*/
extern struct page * drbd_pp_pool ;
2009-09-26 03:07:19 +04:00
extern spinlock_t drbd_pp_lock ;
extern int drbd_pp_vacant ;
extern wait_queue_head_t drbd_pp_wait ;
2011-02-23 14:39:46 +03:00
/* We also need a standard (emergency-reserve backed) page pool
* for meta data IO ( activity log , bitmap ) .
* We can keep it global , as long as it is used as " N pages at a time " .
* 128 should be plenty , currently we probably can get away with as few as 1.
*/
# define DRBD_MIN_POOL_PAGES 128
extern mempool_t * drbd_md_io_page_pool ;
2011-02-23 19:02:01 +03:00
/* We also need to make sure we get a bio
* when we need it for housekeeping purposes */
extern struct bio_set * drbd_md_io_bio_set ;
/* to allocate from that set */
extern struct bio * bio_alloc_drbd ( gfp_t gfp_mask ) ;
2009-09-26 03:07:19 +04:00
extern rwlock_t global_state_lock ;
2011-02-18 16:56:45 +03:00
extern int conn_lowest_minor ( struct drbd_tconn * tconn ) ;
2011-02-22 10:07:03 +03:00
enum drbd_ret_code conn_new_minor ( struct drbd_tconn * tconn , unsigned int minor , int vnr ) ;
2011-04-12 08:10:11 +04:00
extern void drbd_delete_device ( struct drbd_conf * mdev ) ;
2009-09-26 03:07:19 +04:00
2011-03-07 14:49:34 +03:00
struct drbd_tconn * drbd_new_tconn ( const char * name ) ;
2011-01-19 14:26:59 +03:00
extern void drbd_free_tconn ( struct drbd_tconn * tconn ) ;
2011-02-21 17:38:08 +03:00
struct drbd_tconn * conn_by_name ( const char * name ) ;
2011-01-19 14:26:59 +03:00
2009-09-26 03:07:19 +04:00
extern int proc_details ;
/* drbd_req */
2011-02-22 04:15:32 +03:00
extern int __drbd_make_request ( struct drbd_conf * , struct bio * , unsigned long ) ;
2010-12-13 19:48:19 +03:00
extern int drbd_make_request ( struct request_queue * q , struct bio * bio ) ;
2009-09-26 03:07:19 +04:00
extern int drbd_read_remote ( struct drbd_conf * mdev , struct drbd_request * req ) ;
extern int drbd_merge_bvec ( struct request_queue * q , struct bvec_merge_data * bvm , struct bio_vec * bvec ) ;
extern int is_valid_ar_handle ( struct drbd_request * , sector_t ) ;
/* drbd_nl.c */
2011-03-08 18:11:16 +03:00
extern int drbd_msg_put_info ( const char * info ) ;
2009-09-26 03:07:19 +04:00
extern void drbd_suspend_io ( struct drbd_conf * mdev ) ;
extern void drbd_resume_io ( struct drbd_conf * mdev ) ;
extern char * ppsize ( char * buf , unsigned long long size ) ;
2009-12-22 15:35:52 +03:00
extern sector_t drbd_new_dev_size ( struct drbd_conf * , struct drbd_backing_dev * , int ) ;
2009-09-26 03:07:19 +04:00
enum determine_dev_size { dev_size_error = - 1 , unchanged = 0 , shrunk = 1 , grew = 2 } ;
2011-05-21 20:32:29 +04:00
extern enum determine_dev_size drbd_determine_dev_size ( struct drbd_conf * , enum dds_flags ) __must_hold ( local ) ;
2009-09-26 03:07:19 +04:00
extern void resync_after_online_grow ( struct drbd_conf * ) ;
2011-05-20 18:39:13 +04:00
extern void drbd_reconsider_max_bio_size ( struct drbd_conf * mdev ) ;
2010-12-08 02:39:32 +03:00
extern enum drbd_state_rv drbd_set_role ( struct drbd_conf * mdev ,
enum drbd_role new_role ,
int force ) ;
2011-03-24 13:03:07 +03:00
extern bool conn_try_outdate_peer ( struct drbd_tconn * tconn ) ;
extern void conn_try_outdate_peer_async ( struct drbd_tconn * tconn ) ;
2009-09-26 03:07:19 +04:00
extern int drbd_khelper ( struct drbd_conf * mdev , char * cmd ) ;
/* drbd_worker.c */
extern int drbd_worker ( struct drbd_thread * thi ) ;
extern int drbd_alter_sa ( struct drbd_conf * mdev , int na ) ;
extern void drbd_start_resync ( struct drbd_conf * mdev , enum drbd_conns side ) ;
extern void resume_next_sg ( struct drbd_conf * mdev ) ;
extern void suspend_other_sg ( struct drbd_conf * mdev ) ;
extern int drbd_resync_finished ( struct drbd_conf * mdev ) ;
/* maybe rather drbd_main.c ? */
extern int drbd_md_sync_page_io ( struct drbd_conf * mdev ,
struct drbd_backing_dev * bdev , sector_t sector , int rw ) ;
2010-12-20 01:53:14 +03:00
extern void drbd_ov_out_of_sync_found ( struct drbd_conf * , sector_t , int ) ;
2010-11-05 11:55:18 +03:00
extern void drbd_rs_controller_reset ( struct drbd_conf * mdev ) ;
2009-09-26 03:07:19 +04:00
2010-12-20 01:53:14 +03:00
static inline void ov_out_of_sync_print ( struct drbd_conf * mdev )
2009-09-26 03:07:19 +04:00
{
if ( mdev - > ov_last_oos_size ) {
dev_err ( DEV , " Out of sync: start=%llu, size=%lu (sectors) \n " ,
( unsigned long long ) mdev - > ov_last_oos_start ,
( unsigned long ) mdev - > ov_last_oos_size ) ;
}
mdev - > ov_last_oos_size = 0 ;
}
2010-05-14 19:10:48 +04:00
extern void drbd_csum_bio ( struct drbd_conf * , struct crypto_hash * , struct bio * , void * ) ;
2011-02-04 17:30:34 +03:00
extern void drbd_csum_ee ( struct drbd_conf * , struct crypto_hash * ,
struct drbd_peer_request * , void * ) ;
2009-09-26 03:07:19 +04:00
/* worker callbacks */
2011-03-16 17:31:39 +03:00
extern int w_read_retry_remote ( struct drbd_work * , int ) ;
extern int w_e_end_data_req ( struct drbd_work * , int ) ;
extern int w_e_end_rsdata_req ( struct drbd_work * , int ) ;
extern int w_e_end_csum_rs_req ( struct drbd_work * , int ) ;
extern int w_e_end_ov_reply ( struct drbd_work * , int ) ;
extern int w_e_end_ov_req ( struct drbd_work * , int ) ;
extern int w_ov_finished ( struct drbd_work * , int ) ;
extern int w_resync_timer ( struct drbd_work * , int ) ;
extern int w_send_write_hint ( struct drbd_work * , int ) ;
extern int w_make_resync_request ( struct drbd_work * , int ) ;
extern int w_send_dblock ( struct drbd_work * , int ) ;
extern int w_send_barrier ( struct drbd_work * , int ) ;
extern int w_send_read_req ( struct drbd_work * , int ) ;
extern int w_prev_work_done ( struct drbd_work * , int ) ;
extern int w_e_reissue ( struct drbd_work * , int ) ;
extern int w_restart_disk_io ( struct drbd_work * , int ) ;
2010-12-20 01:53:14 +03:00
extern int w_send_out_of_sync ( struct drbd_work * , int ) ;
2011-03-16 17:31:39 +03:00
extern int w_start_resync ( struct drbd_work * , int ) ;
2009-09-26 03:07:19 +04:00
extern void resync_timer_fn ( unsigned long data ) ;
2011-01-14 18:03:11 +03:00
extern void start_resync_timer_fn ( unsigned long data ) ;
2009-09-26 03:07:19 +04:00
/* drbd_receiver.c */
2010-11-07 17:56:29 +03:00
extern int drbd_rs_should_slow_down ( struct drbd_conf * mdev , sector_t sector ) ;
2011-02-17 18:38:35 +03:00
extern int drbd_submit_peer_request ( struct drbd_conf * ,
struct drbd_peer_request * , const unsigned ,
const int ) ;
2011-04-06 19:14:02 +04:00
extern int drbd_free_peer_reqs ( struct drbd_conf * , struct list_head * ) ;
2011-04-06 18:09:15 +04:00
extern struct drbd_peer_request * drbd_alloc_peer_req ( struct drbd_conf * , u64 ,
sector_t , unsigned int ,
gfp_t ) __must_hold ( local ) ;
2011-04-06 18:16:56 +04:00
extern void __drbd_free_peer_req ( struct drbd_conf * , struct drbd_peer_request * ,
int ) ;
# define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0)
# define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1)
2011-04-07 23:02:09 +04:00
extern struct page * drbd_alloc_pages ( struct drbd_conf * , unsigned int , bool ) ;
2009-09-26 03:07:19 +04:00
extern void drbd_set_recv_tcq ( struct drbd_conf * mdev , int tcq_enabled ) ;
extern void _drbd_clear_done_ee ( struct drbd_conf * mdev , struct list_head * to_be_freed ) ;
2011-02-18 16:23:11 +03:00
extern void conn_flush_workqueue ( struct drbd_tconn * tconn ) ;
2011-03-15 18:56:18 +03:00
extern int drbd_connected ( int vnr , void * p , void * data ) ;
2011-02-18 16:23:11 +03:00
static inline void drbd_flush_workqueue ( struct drbd_conf * mdev )
{
conn_flush_workqueue ( mdev - > tconn ) ;
}
2009-09-26 03:07:19 +04:00
/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
* mess with get_fs / set_fs , we know we are KERNEL_DS always . */
static inline int drbd_setsockopt ( struct socket * sock , int level , int optname ,
char __user * optval , int optlen )
{
int err ;
if ( level = = SOL_SOCKET )
err = sock_setsockopt ( sock , level , optname , optval , optlen ) ;
else
err = sock - > ops - > setsockopt ( sock , level , optname , optval ,
optlen ) ;
return err ;
}
static inline void drbd_tcp_cork ( struct socket * sock )
{
int __user val = 1 ;
( void ) drbd_setsockopt ( sock , SOL_TCP , TCP_CORK ,
( char __user * ) & val , sizeof ( val ) ) ;
}
static inline void drbd_tcp_uncork ( struct socket * sock )
{
int __user val = 0 ;
( void ) drbd_setsockopt ( sock , SOL_TCP , TCP_CORK ,
( char __user * ) & val , sizeof ( val ) ) ;
}
static inline void drbd_tcp_nodelay ( struct socket * sock )
{
int __user val = 1 ;
( void ) drbd_setsockopt ( sock , SOL_TCP , TCP_NODELAY ,
( char __user * ) & val , sizeof ( val ) ) ;
}
static inline void drbd_tcp_quickack ( struct socket * sock )
{
2010-05-25 16:23:57 +04:00
int __user val = 2 ;
2009-09-26 03:07:19 +04:00
( void ) drbd_setsockopt ( sock , SOL_TCP , TCP_QUICKACK ,
( char __user * ) & val , sizeof ( val ) ) ;
}
void drbd_bump_write_ordering ( struct drbd_conf * mdev , enum write_ordering_e wo ) ;
/* drbd_proc.c */
extern struct proc_dir_entry * drbd_proc ;
2009-12-14 02:59:30 +03:00
extern const struct file_operations drbd_proc_fops ;
2009-09-26 03:07:19 +04:00
extern const char * drbd_conn_str ( enum drbd_conns s ) ;
extern const char * drbd_role_str ( enum drbd_role s ) ;
/* drbd_actlog.c */
2011-03-31 17:18:56 +04:00
extern void drbd_al_begin_io ( struct drbd_conf * mdev , struct drbd_interval * i ) ;
extern void drbd_al_complete_io ( struct drbd_conf * mdev , struct drbd_interval * i ) ;
2009-09-26 03:07:19 +04:00
extern void drbd_rs_complete_io ( struct drbd_conf * mdev , sector_t sector ) ;
extern int drbd_rs_begin_io ( struct drbd_conf * mdev , sector_t sector ) ;
extern int drbd_try_rs_begin_io ( struct drbd_conf * mdev , sector_t sector ) ;
extern void drbd_rs_cancel_all ( struct drbd_conf * mdev ) ;
extern int drbd_rs_del_all ( struct drbd_conf * mdev ) ;
extern void drbd_rs_failed_io ( struct drbd_conf * mdev ,
sector_t sector , int size ) ;
extern int drbd_al_read_log ( struct drbd_conf * mdev , struct drbd_backing_dev * ) ;
2010-11-05 11:48:01 +03:00
extern void drbd_advance_rs_marks ( struct drbd_conf * mdev , unsigned long still_to_go ) ;
2009-09-26 03:07:19 +04:00
extern void __drbd_set_in_sync ( struct drbd_conf * mdev , sector_t sector ,
int size , const char * file , const unsigned int line ) ;
# define drbd_set_in_sync(mdev, sector, size) \
__drbd_set_in_sync ( mdev , sector , size , __FILE__ , __LINE__ )
2010-10-27 16:33:00 +04:00
extern int __drbd_set_out_of_sync ( struct drbd_conf * mdev , sector_t sector ,
2009-09-26 03:07:19 +04:00
int size , const char * file , const unsigned int line ) ;
# define drbd_set_out_of_sync(mdev, sector, size) \
__drbd_set_out_of_sync ( mdev , sector , size , __FILE__ , __LINE__ )
extern void drbd_al_apply_to_bm ( struct drbd_conf * mdev ) ;
extern void drbd_al_shrink ( struct drbd_conf * mdev ) ;
/* drbd_nl.c */
2011-03-07 14:49:34 +03:00
/* state info broadcast */
struct sib_info {
enum drbd_state_info_bcast_reason sib_reason ;
union {
struct {
char * helper_name ;
unsigned helper_exit_code ;
} ;
struct {
union drbd_state os ;
union drbd_state ns ;
} ;
} ;
} ;
void drbd_bcast_event ( struct drbd_conf * mdev , const struct sib_info * sib ) ;
2009-09-26 03:07:19 +04:00
/*
* inline helper functions
* * * * * * * * * * * * * * * * * * * * * * * * */
2010-05-14 19:10:48 +04:00
/* see also page_chain_add and friends in drbd_receiver.c */
static inline struct page * page_chain_next ( struct page * page )
{
return ( struct page * ) page_private ( page ) ;
}
# define page_chain_for_each(page) \
for ( ; page & & ( { prefetch ( page_chain_next ( page ) ) ; 1 ; } ) ; \
page = page_chain_next ( page ) )
# define page_chain_for_each_safe(page, n) \
for ( ; page & & ( { n = page_chain_next ( page ) ; 1 ; } ) ; page = n )
static inline int drbd_bio_has_active_page ( struct bio * bio )
{
struct bio_vec * bvec ;
int i ;
__bio_for_each_segment ( bvec , bio , i , 0 ) {
if ( page_count ( bvec - > bv_page ) > 1 )
return 1 ;
}
return 0 ;
}
2011-04-07 23:34:24 +04:00
static inline int drbd_peer_req_has_active_page ( struct drbd_peer_request * peer_req )
2010-05-14 19:10:48 +04:00
{
2011-02-04 17:57:48 +03:00
struct page * page = peer_req - > pages ;
2010-05-14 19:10:48 +04:00
page_chain_for_each ( page ) {
if ( page_count ( page ) > 1 )
return 1 ;
}
return 0 ;
}
2010-12-08 02:39:32 +03:00
static inline enum drbd_state_rv
_drbd_set_state ( struct drbd_conf * mdev , union drbd_state ns ,
enum chg_state_flags flags , struct completion * done )
2009-09-26 03:07:19 +04:00
{
2010-12-08 02:39:32 +03:00
enum drbd_state_rv rv ;
2009-09-26 03:07:19 +04:00
read_lock ( & global_state_lock ) ;
rv = __drbd_set_state ( mdev , ns , flags , done ) ;
read_unlock ( & global_state_lock ) ;
return rv ;
}
2011-03-28 17:40:12 +04:00
static inline union drbd_state drbd_read_state ( struct drbd_conf * mdev )
{
union drbd_state rv ;
2011-03-29 12:52:01 +04:00
rv . i = mdev - > state . i ;
2011-03-28 18:18:39 +04:00
rv . susp = mdev - > tconn - > susp ;
rv . susp_nod = mdev - > tconn - > susp_nod ;
rv . susp_fen = mdev - > tconn - > susp_fen ;
2011-03-28 17:40:12 +04:00
return rv ;
}
2009-09-26 03:07:19 +04:00
# define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
static inline void __drbd_chk_io_error_ ( struct drbd_conf * mdev , int forcedetach , const char * where )
{
switch ( mdev - > ldev - > dc . on_io_error ) {
case EP_PASS_ON :
if ( ! forcedetach ) {
2010-05-27 13:51:56 +04:00
if ( __ratelimit ( & drbd_ratelimit_state ) )
2010-10-16 14:13:47 +04:00
dev_err ( DEV , " Local IO failed in %s. \n " , where ) ;
2011-03-14 13:54:47 +03:00
if ( mdev - > state . disk > D_INCONSISTENT )
_drbd_set_state ( _NS ( mdev , disk , D_INCONSISTENT ) , CS_HARD , NULL ) ;
2009-09-26 03:07:19 +04:00
break ;
}
/* NOTE fall through to detach case if forcedetach set */
case EP_DETACH :
case EP_CALL_HELPER :
2010-10-16 14:13:47 +04:00
set_bit ( WAS_IO_ERROR , & mdev - > flags ) ;
2009-09-26 03:07:19 +04:00
if ( mdev - > state . disk > D_FAILED ) {
_drbd_set_state ( _NS ( mdev , disk , D_FAILED ) , CS_HARD , NULL ) ;
2010-10-16 14:13:47 +04:00
dev_err ( DEV ,
" Local IO failed in %s. Detaching... \n " , where ) ;
2009-09-26 03:07:19 +04:00
}
break ;
}
}
/**
* drbd_chk_io_error : Handle the on_io_error setting , should be called from all io completion handlers
* @ mdev : DRBD device .
* @ error : Error code passed to the IO completion callback
* @ forcedetach : Force detach . I . e . the error happened while accessing the meta data
*
* See also drbd_main . c : after_state_ch ( ) if ( os . disk > D_FAILED & & ns . disk = = D_FAILED )
*/
# define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
static inline void drbd_chk_io_error_ ( struct drbd_conf * mdev ,
int error , int forcedetach , const char * where )
{
if ( error ) {
unsigned long flags ;
2011-01-19 16:16:30 +03:00
spin_lock_irqsave ( & mdev - > tconn - > req_lock , flags ) ;
2009-09-26 03:07:19 +04:00
__drbd_chk_io_error_ ( mdev , forcedetach , where ) ;
2011-01-19 16:16:30 +03:00
spin_unlock_irqrestore ( & mdev - > tconn - > req_lock , flags ) ;
2009-09-26 03:07:19 +04:00
}
}
/**
* drbd_md_first_sector ( ) - Returns the first sector number of the meta data area
* @ bdev : Meta data block device .
*
* BTW , for internal meta data , this happens to be the maximum capacity
* we could agree upon with our peer node .
*/
static inline sector_t drbd_md_first_sector ( struct drbd_backing_dev * bdev )
{
switch ( bdev - > dc . meta_dev_idx ) {
case DRBD_MD_INDEX_INTERNAL :
case DRBD_MD_INDEX_FLEX_INT :
return bdev - > md . md_offset + bdev - > md . bm_offset ;
case DRBD_MD_INDEX_FLEX_EXT :
default :
return bdev - > md . md_offset ;
}
}
/**
* drbd_md_last_sector ( ) - Return the last sector number of the meta data area
* @ bdev : Meta data block device .
*/
static inline sector_t drbd_md_last_sector ( struct drbd_backing_dev * bdev )
{
switch ( bdev - > dc . meta_dev_idx ) {
case DRBD_MD_INDEX_INTERNAL :
case DRBD_MD_INDEX_FLEX_INT :
return bdev - > md . md_offset + MD_AL_OFFSET - 1 ;
case DRBD_MD_INDEX_FLEX_EXT :
default :
return bdev - > md . md_offset + bdev - > md . md_size_sect ;
}
}
/* Returns the number of 512 byte sectors of the device */
static inline sector_t drbd_get_capacity ( struct block_device * bdev )
{
/* return bdev ? get_capacity(bdev->bd_disk) : 0; */
2010-11-08 16:39:12 +03:00
return bdev ? i_size_read ( bdev - > bd_inode ) > > 9 : 0 ;
2009-09-26 03:07:19 +04:00
}
/**
* drbd_get_max_capacity ( ) - Returns the capacity we announce to out peer
* @ bdev : Meta data block device .
*
* returns the capacity we announce to out peer . we clip ourselves at the
* various MAX_SECTORS , because if we don ' t , current implementation will
* oops sooner or later
*/
static inline sector_t drbd_get_max_capacity ( struct drbd_backing_dev * bdev )
{
sector_t s ;
switch ( bdev - > dc . meta_dev_idx ) {
case DRBD_MD_INDEX_INTERNAL :
case DRBD_MD_INDEX_FLEX_INT :
s = drbd_get_capacity ( bdev - > backing_bdev )
? min_t ( sector_t , DRBD_MAX_SECTORS_FLEX ,
drbd_md_first_sector ( bdev ) )
: 0 ;
break ;
case DRBD_MD_INDEX_FLEX_EXT :
s = min_t ( sector_t , DRBD_MAX_SECTORS_FLEX ,
drbd_get_capacity ( bdev - > backing_bdev ) ) ;
/* clip at maximum size the meta device can support */
s = min_t ( sector_t , s ,
BM_EXT_TO_SECT ( bdev - > md . md_size_sect
- bdev - > md . bm_offset ) ) ;
break ;
default :
s = min_t ( sector_t , DRBD_MAX_SECTORS ,
drbd_get_capacity ( bdev - > backing_bdev ) ) ;
}
return s ;
}
/**
* drbd_md_ss__ ( ) - Return the sector number of our meta data super block
* @ mdev : DRBD device .
* @ bdev : Meta data block device .
*/
static inline sector_t drbd_md_ss__ ( struct drbd_conf * mdev ,
struct drbd_backing_dev * bdev )
{
switch ( bdev - > dc . meta_dev_idx ) {
default : /* external, some index */
return MD_RESERVED_SECT * bdev - > dc . meta_dev_idx ;
case DRBD_MD_INDEX_INTERNAL :
/* with drbd08, internal meta data is always "flexible" */
case DRBD_MD_INDEX_FLEX_INT :
/* sizeof(struct md_on_disk_07) == 4k
* position : last 4 k aligned block of 4 k size */
if ( ! bdev - > backing_bdev ) {
if ( __ratelimit ( & drbd_ratelimit_state ) ) {
dev_err ( DEV , " bdev->backing_bdev==NULL \n " ) ;
dump_stack ( ) ;
}
return 0 ;
}
return ( drbd_get_capacity ( bdev - > backing_bdev ) & ~ 7ULL )
- MD_AL_OFFSET ;
case DRBD_MD_INDEX_FLEX_EXT :
return 0 ;
}
}
static inline void
drbd_queue_work_front ( struct drbd_work_queue * q , struct drbd_work * w )
{
unsigned long flags ;
spin_lock_irqsave ( & q - > q_lock , flags ) ;
list_add ( & w - > list , & q - > q ) ;
up ( & q - > s ) ; /* within the spinlock,
see comment near end of drbd_worker ( ) */
spin_unlock_irqrestore ( & q - > q_lock , flags ) ;
}
static inline void
drbd_queue_work ( struct drbd_work_queue * q , struct drbd_work * w )
{
unsigned long flags ;
spin_lock_irqsave ( & q - > q_lock , flags ) ;
list_add_tail ( & w - > list , & q - > q ) ;
up ( & q - > s ) ; /* within the spinlock,
see comment near end of drbd_worker ( ) */
spin_unlock_irqrestore ( & q - > q_lock , flags ) ;
}
2011-02-07 16:49:19 +03:00
static inline void wake_asender ( struct drbd_tconn * tconn )
2009-09-26 03:07:19 +04:00
{
2011-02-07 16:49:19 +03:00
if ( test_bit ( SIGNAL_ASENDER , & tconn - > flags ) )
force_sig ( DRBD_SIG , tconn - > asender . task ) ;
2009-09-26 03:07:19 +04:00
}
2011-02-07 16:49:19 +03:00
static inline void request_ping ( struct drbd_tconn * tconn )
2009-09-26 03:07:19 +04:00
{
2011-02-07 16:49:19 +03:00
set_bit ( SEND_PING , & tconn - > flags ) ;
wake_asender ( tconn ) ;
2009-09-26 03:07:19 +04:00
}
2011-03-29 18:55:40 +04:00
extern void * conn_prepare_command ( struct drbd_tconn * , struct drbd_socket * ) ;
extern void * drbd_prepare_command ( struct drbd_conf * , struct drbd_socket * ) ;
extern int conn_send_command ( struct drbd_tconn * , struct drbd_socket * ,
enum drbd_packet , unsigned int , void * ,
unsigned int ) ;
extern int drbd_send_command ( struct drbd_conf * , struct drbd_socket * ,
enum drbd_packet , unsigned int , void * ,
unsigned int ) ;
2011-03-22 12:55:48 +03:00
extern int drbd_send_ping ( struct drbd_tconn * tconn ) ;
extern int drbd_send_ping_ack ( struct drbd_tconn * tconn ) ;
2011-03-28 16:23:08 +04:00
extern int drbd_send_state_req ( struct drbd_conf * , union drbd_state , union drbd_state ) ;
extern int conn_send_state_req ( struct drbd_tconn * , union drbd_state , union drbd_state ) ;
2011-02-11 17:11:24 +03:00
2009-09-26 03:07:19 +04:00
static inline void drbd_thread_stop ( struct drbd_thread * thi )
{
2010-12-09 17:03:57 +03:00
_drbd_thread_stop ( thi , false , true ) ;
2009-09-26 03:07:19 +04:00
}
static inline void drbd_thread_stop_nowait ( struct drbd_thread * thi )
{
2010-12-09 17:03:57 +03:00
_drbd_thread_stop ( thi , false , false ) ;
2009-09-26 03:07:19 +04:00
}
static inline void drbd_thread_restart_nowait ( struct drbd_thread * thi )
{
2010-12-09 17:03:57 +03:00
_drbd_thread_stop ( thi , true , false ) ;
2009-09-26 03:07:19 +04:00
}
/* counts how many answer packets packets we expect from our peer,
* for either explicit application requests ,
* or implicit barrier packets as necessary .
* increased :
* w_send_barrier
2011-01-25 17:37:43 +03:00
* _req_mod ( req , QUEUE_FOR_NET_WRITE or QUEUE_FOR_NET_READ ) ;
2009-09-26 03:07:19 +04:00
* it is much easier and equally valid to count what we queue for the
* worker , even before it actually was queued or send .
* ( drbd_make_request_common ; recovery path on read io - error )
* decreased :
* got_BarrierAck ( respective tl_clear , tl_clear_barrier )
2011-01-25 17:37:43 +03:00
* _req_mod ( req , DATA_RECEIVED )
2009-09-26 03:07:19 +04:00
* [ from receive_DataReply ]
2011-01-25 17:37:43 +03:00
* _req_mod ( req , WRITE_ACKED_BY_PEER or RECV_ACKED_BY_PEER or NEG_ACKED )
2009-09-26 03:07:19 +04:00
* [ from got_BlockAck ( P_WRITE_ACK , P_RECV_ACK ) ]
* for some reason it is NOT decreased in got_NegAck ,
* but in the resulting cleanup code from report_params .
* we should try to remember the reason for that . . .
2011-01-25 17:37:43 +03:00
* _req_mod ( req , SEND_FAILED or SEND_CANCELED )
* _req_mod ( req , CONNECTION_LOST_WHILE_PENDING )
2009-09-26 03:07:19 +04:00
* [ from tl_clear_barrier ]
*/
static inline void inc_ap_pending ( struct drbd_conf * mdev )
{
atomic_inc ( & mdev - > ap_pending_cnt ) ;
}
2011-02-21 16:19:44 +03:00
# define ERR_IF_CNT_IS_NEGATIVE(which, func, line) \
if ( atomic_read ( & mdev - > which ) < 0 ) \
2009-09-26 03:07:19 +04:00
dev_err ( DEV , " in %s:%d: " # which " = %d < 0 ! \n " , \
2011-02-21 16:19:44 +03:00
func , line , \
atomic_read ( & mdev - > which ) )
2009-09-26 03:07:19 +04:00
2011-02-21 16:19:44 +03:00
# define dec_ap_pending(mdev) _dec_ap_pending(mdev, __FUNCTION__, __LINE__)
static inline void _dec_ap_pending ( struct drbd_conf * mdev , const char * func , int line )
{
if ( atomic_dec_and_test ( & mdev - > ap_pending_cnt ) )
wake_up ( & mdev - > misc_wait ) ;
ERR_IF_CNT_IS_NEGATIVE ( ap_pending_cnt , func , line ) ;
}
2009-09-26 03:07:19 +04:00
/* counts how many resync-related answers we still expect from the peer
* increase decrease
* C_SYNC_TARGET sends P_RS_DATA_REQUEST ( and expects P_RS_DATA_REPLY )
2011-03-31 05:57:33 +04:00
* C_SYNC_SOURCE sends P_RS_DATA_REPLY ( and expects P_WRITE_ACK with ID_SYNCER )
2009-09-26 03:07:19 +04:00
* ( or P_NEG_ACK with ID_SYNCER )
*/
static inline void inc_rs_pending ( struct drbd_conf * mdev )
{
atomic_inc ( & mdev - > rs_pending_cnt ) ;
}
2011-02-21 16:19:44 +03:00
# define dec_rs_pending(mdev) _dec_rs_pending(mdev, __FUNCTION__, __LINE__)
static inline void _dec_rs_pending ( struct drbd_conf * mdev , const char * func , int line )
{
atomic_dec ( & mdev - > rs_pending_cnt ) ;
ERR_IF_CNT_IS_NEGATIVE ( rs_pending_cnt , func , line ) ;
}
2009-09-26 03:07:19 +04:00
/* counts how many answers we still need to send to the peer.
* increased on
* receive_Data unless protocol A ;
* we need to send a P_RECV_ACK ( proto B )
* or P_WRITE_ACK ( proto C )
* receive_RSDataReply ( recv_resync_read ) we need to send a P_WRITE_ACK
* receive_DataRequest ( receive_RSDataRequest ) we need to send back P_DATA
* receive_Barrier_ * we need to send a P_BARRIER_ACK
*/
static inline void inc_unacked ( struct drbd_conf * mdev )
{
atomic_inc ( & mdev - > unacked_cnt ) ;
}
2011-02-21 16:19:44 +03:00
# define dec_unacked(mdev) _dec_unacked(mdev, __FUNCTION__, __LINE__)
static inline void _dec_unacked ( struct drbd_conf * mdev , const char * func , int line )
{
atomic_dec ( & mdev - > unacked_cnt ) ;
ERR_IF_CNT_IS_NEGATIVE ( unacked_cnt , func , line ) ;
}
2009-09-26 03:07:19 +04:00
2011-02-21 16:19:44 +03:00
# define sub_unacked(mdev, n) _sub_unacked(mdev, n, __FUNCTION__, __LINE__)
static inline void _sub_unacked ( struct drbd_conf * mdev , int n , const char * func , int line )
{
atomic_sub ( n , & mdev - > unacked_cnt ) ;
ERR_IF_CNT_IS_NEGATIVE ( unacked_cnt , func , line ) ;
}
2009-09-26 03:07:19 +04:00
2011-01-19 15:48:44 +03:00
static inline void put_net_conf ( struct drbd_tconn * tconn )
2009-09-26 03:07:19 +04:00
{
2011-01-19 15:48:44 +03:00
if ( atomic_dec_and_test ( & tconn - > net_cnt ) )
wake_up ( & tconn - > net_cnt_wait ) ;
2009-09-26 03:07:19 +04:00
}
/**
2011-01-19 15:12:45 +03:00
* get_net_conf ( ) - Increase ref count on mdev - > tconn - > net_conf ; Returns 0 if nothing there
2009-09-26 03:07:19 +04:00
* @ mdev : DRBD device .
*
2011-01-19 15:12:45 +03:00
* You have to call put_net_conf ( ) when finished working with mdev - > tconn - > net_conf .
2009-09-26 03:07:19 +04:00
*/
2011-01-19 15:48:44 +03:00
static inline int get_net_conf ( struct drbd_tconn * tconn )
2009-09-26 03:07:19 +04:00
{
int have_net_conf ;
2011-01-19 15:48:44 +03:00
atomic_inc ( & tconn - > net_cnt ) ;
2011-02-10 15:45:46 +03:00
have_net_conf = tconn - > cstate > = C_UNCONNECTED ;
2009-09-26 03:07:19 +04:00
if ( ! have_net_conf )
2011-01-19 15:48:44 +03:00
put_net_conf ( tconn ) ;
2009-09-26 03:07:19 +04:00
return have_net_conf ;
}
/**
* get_ldev ( ) - Increase the ref count on mdev - > ldev . Returns 0 if there is no ldev
* @ M : DRBD device .
*
* You have to call put_ldev ( ) when finished working with mdev - > ldev .
*/
# define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT))
# define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS))
static inline void put_ldev ( struct drbd_conf * mdev )
{
2010-08-11 23:21:50 +04:00
int i = atomic_dec_return ( & mdev - > local_cnt ) ;
2011-05-02 13:51:31 +04:00
/* This may be called from some endio handler,
* so we must not sleep here . */
2009-09-26 03:07:19 +04:00
__release ( local ) ;
2010-08-11 23:21:50 +04:00
D_ASSERT ( i > = 0 ) ;
2010-09-14 22:26:27 +04:00
if ( i = = 0 ) {
2010-10-16 14:13:47 +04:00
if ( mdev - > state . disk = = D_DISKLESS )
/* even internal references gone, safe to destroy */
drbd_ldev_destroy ( mdev ) ;
2010-09-14 22:26:27 +04:00
if ( mdev - > state . disk = = D_FAILED )
2010-10-16 14:13:47 +04:00
/* all application IO references gone. */
2010-09-14 22:26:27 +04:00
drbd_go_diskless ( mdev ) ;
2009-09-26 03:07:19 +04:00
wake_up ( & mdev - > misc_wait ) ;
2010-09-14 22:26:27 +04:00
}
2009-09-26 03:07:19 +04:00
}
# ifndef __CHECKER__
static inline int _get_ldev_if_state ( struct drbd_conf * mdev , enum drbd_disk_state mins )
{
int io_allowed ;
2010-10-16 14:13:47 +04:00
/* never get a reference while D_DISKLESS */
if ( mdev - > state . disk = = D_DISKLESS )
return 0 ;
2009-09-26 03:07:19 +04:00
atomic_inc ( & mdev - > local_cnt ) ;
io_allowed = ( mdev - > state . disk > = mins ) ;
if ( ! io_allowed )
put_ldev ( mdev ) ;
return io_allowed ;
}
# else
extern int _get_ldev_if_state ( struct drbd_conf * mdev , enum drbd_disk_state mins ) ;
# endif
/* you must have an "get_ldev" reference */
static inline void drbd_get_syncer_progress ( struct drbd_conf * mdev ,
unsigned long * bits_left , unsigned int * per_mil_done )
{
2010-12-14 17:13:04 +03:00
/* this is to break it at compile time when we change that, in case we
* want to support more than ( 1 < < 32 ) bits on a 32 bit arch . */
2009-09-26 03:07:19 +04:00
typecheck ( unsigned long , mdev - > rs_total ) ;
/* note: both rs_total and rs_left are in bits, i.e. in
* units of BM_BLOCK_SIZE .
* for the percentage , we don ' t care . */
2010-11-05 11:52:46 +03:00
if ( mdev - > state . conn = = C_VERIFY_S | | mdev - > state . conn = = C_VERIFY_T )
* bits_left = mdev - > ov_left ;
else
* bits_left = drbd_bm_total_weight ( mdev ) - mdev - > rs_failed ;
2009-09-26 03:07:19 +04:00
/* >> 10 to prevent overflow,
* + 1 to prevent division by zero */
if ( * bits_left > mdev - > rs_total ) {
/* doh. maybe a logic bug somewhere.
* may also be just a race condition
* between this and a disconnect during sync .
* for now , just prevent in - kernel buffer overflow .
*/
smp_rmb ( ) ;
dev_warn ( DEV , " cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu) \n " ,
drbd_conn_str ( mdev - > state . conn ) ,
* bits_left , mdev - > rs_total , mdev - > rs_failed ) ;
* per_mil_done = 0 ;
} else {
2010-12-14 17:13:04 +03:00
/* Make sure the division happens in long context.
* We allow up to one petabyte storage right now ,
* at a granularity of 4 k per bit that is 2 * * 38 bits .
* After shift right and multiplication by 1000 ,
* this should still fit easily into a 32 bit long ,
* so we don ' t need a 64 bit division on 32 bit arch .
* Note : currently we don ' t support such large bitmaps on 32 bit
* arch anyways , but no harm done to be prepared for it here .
*/
unsigned int shift = mdev - > rs_total > = ( 1ULL < < 32 ) ? 16 : 10 ;
unsigned long left = * bits_left > > shift ;
unsigned long total = 1UL + ( mdev - > rs_total > > shift ) ;
unsigned long tmp = 1000UL - left * 1000UL / total ;
2009-09-26 03:07:19 +04:00
* per_mil_done = tmp ;
}
}
/* this throttles on-the-fly application requests
* according to max_buffers settings ;
* maybe re - implement using semaphores ? */
static inline int drbd_get_max_buffers ( struct drbd_conf * mdev )
{
int mxb = 1000000 ; /* arbitrary limit on open requests */
2011-01-19 15:48:44 +03:00
if ( get_net_conf ( mdev - > tconn ) ) {
2011-01-19 15:12:45 +03:00
mxb = mdev - > tconn - > net_conf - > max_buffers ;
2011-01-19 15:48:44 +03:00
put_net_conf ( mdev - > tconn ) ;
2009-09-26 03:07:19 +04:00
}
return mxb ;
}
2010-11-10 14:08:37 +03:00
static inline int drbd_state_is_stable ( struct drbd_conf * mdev )
2009-09-26 03:07:19 +04:00
{
2011-03-29 12:52:01 +04:00
union drbd_dev_state s = mdev - > state ;
2009-09-26 03:07:19 +04:00
/* DO NOT add a default clause, we want the compiler to warn us
* for any newly introduced state we may have forgotten to add here */
switch ( ( enum drbd_conns ) s . conn ) {
/* new io only accepted when there is no connection, ... */
case C_STANDALONE :
case C_WF_CONNECTION :
/* ... or there is a well established connection. */
case C_CONNECTED :
case C_SYNC_SOURCE :
case C_SYNC_TARGET :
case C_VERIFY_S :
case C_VERIFY_T :
case C_PAUSED_SYNC_S :
case C_PAUSED_SYNC_T :
2010-10-27 14:21:30 +04:00
case C_AHEAD :
case C_BEHIND :
2010-11-10 14:08:37 +03:00
/* transitional states, IO allowed */
2009-09-26 03:07:19 +04:00
case C_DISCONNECTING :
case C_UNCONNECTED :
case C_TIMEOUT :
case C_BROKEN_PIPE :
case C_NETWORK_FAILURE :
case C_PROTOCOL_ERROR :
case C_TEAR_DOWN :
case C_WF_REPORT_PARAMS :
case C_STARTING_SYNC_S :
case C_STARTING_SYNC_T :
2010-11-10 14:08:37 +03:00
break ;
/* Allow IO in BM exchange states with new protocols */
2009-09-26 03:07:19 +04:00
case C_WF_BITMAP_S :
2011-01-19 16:12:51 +03:00
if ( mdev - > tconn - > agreed_pro_version < 96 )
2010-11-10 14:08:37 +03:00
return 0 ;
break ;
/* no new io accepted in these states */
2009-09-26 03:07:19 +04:00
case C_WF_BITMAP_T :
case C_WF_SYNC_UUID :
case C_MASK :
/* not "stable" */
return 0 ;
}
switch ( ( enum drbd_disk_state ) s . disk ) {
case D_DISKLESS :
case D_INCONSISTENT :
case D_OUTDATED :
case D_CONSISTENT :
case D_UP_TO_DATE :
/* disk state is stable as well. */
break ;
/* no new io accepted during tansitional states */
case D_ATTACHING :
case D_FAILED :
case D_NEGOTIATING :
case D_UNKNOWN :
case D_MASK :
/* not "stable" */
return 0 ;
}
return 1 ;
}
2011-03-28 18:48:11 +04:00
static inline int drbd_suspended ( struct drbd_conf * mdev )
2010-09-09 01:20:21 +04:00
{
2011-03-28 18:18:39 +04:00
struct drbd_tconn * tconn = mdev - > tconn ;
return tconn - > susp | | tconn - > susp_fen | | tconn - > susp_nod ;
2010-09-09 01:20:21 +04:00
}
2010-12-13 20:03:38 +03:00
static inline bool may_inc_ap_bio ( struct drbd_conf * mdev )
2009-09-26 03:07:19 +04:00
{
int mxb = drbd_get_max_buffers ( mdev ) ;
2011-03-28 18:48:11 +04:00
if ( drbd_suspended ( mdev ) )
2010-12-13 20:03:38 +03:00
return false ;
2009-09-26 03:07:19 +04:00
if ( test_bit ( SUSPEND_IO , & mdev - > flags ) )
2010-12-13 20:03:38 +03:00
return false ;
2009-09-26 03:07:19 +04:00
/* to avoid potential deadlock or bitmap corruption,
* in various places , we only allow new application io
* to start during " stable " states . */
/* no new io accepted when attaching or detaching the disk */
2010-11-10 14:08:37 +03:00
if ( ! drbd_state_is_stable ( mdev ) )
2010-12-13 20:03:38 +03:00
return false ;
2009-09-26 03:07:19 +04:00
/* since some older kernels don't have atomic_add_unless,
* and we are within the spinlock anyways , we have this workaround . */
if ( atomic_read ( & mdev - > ap_bio_cnt ) > mxb )
2010-12-13 20:03:38 +03:00
return false ;
2009-09-26 03:07:19 +04:00
if ( test_bit ( BITMAP_IO , & mdev - > flags ) )
2010-12-13 20:03:38 +03:00
return false ;
return true ;
2009-09-26 03:07:19 +04:00
}
2011-03-31 18:36:43 +04:00
static inline bool inc_ap_bio_cond ( struct drbd_conf * mdev )
2009-09-26 03:07:19 +04:00
{
2010-12-13 20:03:38 +03:00
bool rv = false ;
2010-11-17 20:24:19 +03:00
2011-01-19 16:16:30 +03:00
spin_lock_irq ( & mdev - > tconn - > req_lock ) ;
2010-12-13 20:03:38 +03:00
rv = may_inc_ap_bio ( mdev ) ;
2010-11-17 20:24:19 +03:00
if ( rv )
2011-03-31 18:36:43 +04:00
atomic_inc ( & mdev - > ap_bio_cnt ) ;
2011-01-19 16:16:30 +03:00
spin_unlock_irq ( & mdev - > tconn - > req_lock ) ;
2010-11-17 20:24:19 +03:00
return rv ;
}
2009-09-26 03:07:19 +04:00
2011-03-31 18:36:43 +04:00
static inline void inc_ap_bio ( struct drbd_conf * mdev )
2010-11-17 20:24:19 +03:00
{
2009-09-26 03:07:19 +04:00
/* we wait here
* as long as the device is suspended
* until the bitmap is no longer on the fly during connection
* handshake as long as we would exeed the max_buffer limit .
*
* to avoid races with the reconnect code ,
* we need to atomic_inc within the spinlock . */
2011-03-31 18:36:43 +04:00
wait_event ( mdev - > misc_wait , inc_ap_bio_cond ( mdev ) ) ;
2009-09-26 03:07:19 +04:00
}
static inline void dec_ap_bio ( struct drbd_conf * mdev )
{
int mxb = drbd_get_max_buffers ( mdev ) ;
int ap_bio = atomic_dec_return ( & mdev - > ap_bio_cnt ) ;
D_ASSERT ( ap_bio > = 0 ) ;
/* this currently does wake_up for every dec_ap_bio!
* maybe rather introduce some type of hysteresis ?
* e . g . ( ap_bio = = mxb / 2 | | ap_bio = = 0 ) ? */
if ( ap_bio < mxb )
wake_up ( & mdev - > misc_wait ) ;
if ( ap_bio = = 0 & & test_bit ( BITMAP_IO , & mdev - > flags ) ) {
if ( ! test_and_set_bit ( BITMAP_IO_QUEUED , & mdev - > flags ) )
2011-01-19 15:55:45 +03:00
drbd_queue_work ( & mdev - > tconn - > data . work , & mdev - > bm_io_work . w ) ;
2009-09-26 03:07:19 +04:00
}
}
2011-01-20 15:25:21 +03:00
static inline int drbd_set_ed_uuid ( struct drbd_conf * mdev , u64 val )
2009-09-26 03:07:19 +04:00
{
2011-01-20 15:25:21 +03:00
int changed = mdev - > ed_uuid ! = val ;
2009-09-26 03:07:19 +04:00
mdev - > ed_uuid = val ;
2011-01-20 15:25:21 +03:00
return changed ;
2009-09-26 03:07:19 +04:00
}
static inline int drbd_queue_order_type ( struct drbd_conf * mdev )
{
/* sorry, we currently have no working implementation
* of distributed TCQ stuff */
# ifndef QUEUE_ORDERED_NONE
# define QUEUE_ORDERED_NONE 0
# endif
return QUEUE_ORDERED_NONE ;
}
static inline void drbd_md_flush ( struct drbd_conf * mdev )
{
int r ;
2010-08-25 12:21:04 +04:00
if ( test_bit ( MD_NO_FUA , & mdev - > flags ) )
2009-09-26 03:07:19 +04:00
return ;
2010-09-16 22:51:46 +04:00
r = blkdev_issue_flush ( mdev - > ldev - > md_bdev , GFP_KERNEL , NULL ) ;
2009-09-26 03:07:19 +04:00
if ( r ) {
2010-08-25 12:21:04 +04:00
set_bit ( MD_NO_FUA , & mdev - > flags ) ;
2009-09-26 03:07:19 +04:00
dev_err ( DEV , " meta data flush failed with status %d, disabling md-flushes \n " , r ) ;
}
}
# endif