drbd: Implement handling of thinly provisioned storage on resync target nodes
If during resync we read only zeroes for a range of sectors assume that these secotors can be discarded on the sync target node. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
c5c2385481
commit
700ca8c04a
@ -471,6 +471,9 @@ enum {
|
||||
/* this originates from application on peer
|
||||
* (not some resync or verify or other DRBD internal request) */
|
||||
__EE_APPLICATION,
|
||||
|
||||
/* If it contains only 0 bytes, send back P_RS_DEALLOCATED */
|
||||
__EE_RS_THIN_REQ,
|
||||
};
|
||||
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
|
||||
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
|
||||
@ -485,6 +488,7 @@ enum {
|
||||
#define EE_SUBMITTED (1<<__EE_SUBMITTED)
|
||||
#define EE_WRITE (1<<__EE_WRITE)
|
||||
#define EE_APPLICATION (1<<__EE_APPLICATION)
|
||||
#define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ)
|
||||
|
||||
/* flag bits per device */
|
||||
enum {
|
||||
@ -1123,6 +1127,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
|
||||
extern int drbd_send_bitmap(struct drbd_device *device);
|
||||
extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
|
||||
extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
|
||||
extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *);
|
||||
extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
|
||||
extern void drbd_device_cleanup(struct drbd_device *device);
|
||||
void drbd_print_uuids(struct drbd_device *device, const char *text);
|
||||
|
@ -1377,6 +1377,22 @@ int drbd_send_ack_ex(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
|
||||
cpu_to_be64(block_id));
|
||||
}
|
||||
|
||||
int drbd_send_rs_deallocated(struct drbd_peer_device *peer_device,
|
||||
struct drbd_peer_request *peer_req)
|
||||
{
|
||||
struct drbd_socket *sock;
|
||||
struct p_block_desc *p;
|
||||
|
||||
sock = &peer_device->connection->data;
|
||||
p = drbd_prepare_command(peer_device, sock);
|
||||
if (!p)
|
||||
return -EIO;
|
||||
p->sector = cpu_to_be64(peer_req->i.sector);
|
||||
p->blksize = cpu_to_be32(peer_req->i.size);
|
||||
p->pad = 0;
|
||||
return drbd_send_command(peer_device, sock, P_RS_DEALLOCATED, sizeof(*p), NULL, 0);
|
||||
}
|
||||
|
||||
int drbd_send_drequest(struct drbd_peer_device *peer_device, int cmd,
|
||||
sector_t sector, int size, u64 block_id)
|
||||
{
|
||||
@ -3683,6 +3699,8 @@ const char *cmdname(enum drbd_packet cmd)
|
||||
[P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
|
||||
[P_RETRY_WRITE] = "retry_write",
|
||||
[P_PROTOCOL_UPDATE] = "protocol_update",
|
||||
[P_RS_THIN_REQ] = "rs_thin_req",
|
||||
[P_RS_DEALLOCATED] = "rs_deallocated",
|
||||
|
||||
/* enum drbd_packet, but not commands - obsoleted flags:
|
||||
* P_MAY_IGNORE
|
||||
|
@ -60,6 +60,10 @@ enum drbd_packet {
|
||||
* which is why I chose TRIM here, to disambiguate. */
|
||||
P_TRIM = 0x31,
|
||||
|
||||
/* Only use these two if both support FF_THIN_RESYNC */
|
||||
P_RS_THIN_REQ = 0x32, /* Request a block for resync or reply P_RS_DEALLOCATED */
|
||||
P_RS_DEALLOCATED = 0x33, /* Contains only zeros on sync source node */
|
||||
|
||||
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
|
||||
P_MAX_OPT_CMD = 0x101,
|
||||
|
||||
|
@ -1418,9 +1418,15 @@ int drbd_submit_peer_request(struct drbd_device *device,
|
||||
* so we can find it to present it in debugfs */
|
||||
peer_req->submit_jif = jiffies;
|
||||
peer_req->flags |= EE_SUBMITTED;
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
list_add_tail(&peer_req->w.list, &device->active_ee);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
|
||||
/* If this was a resync request from receive_rs_deallocated(),
|
||||
* it is already on the sync_ee list */
|
||||
if (list_empty(&peer_req->w.list)) {
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
list_add_tail(&peer_req->w.list, &device->active_ee);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
}
|
||||
|
||||
if (blkdev_issue_zeroout(device->ldev->backing_bdev,
|
||||
sector, data_size >> 9, GFP_NOIO, false))
|
||||
peer_req->flags |= EE_WAS_ERROR;
|
||||
@ -2585,6 +2591,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
|
||||
case P_DATA_REQUEST:
|
||||
drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
|
||||
break;
|
||||
case P_RS_THIN_REQ:
|
||||
case P_RS_DATA_REQUEST:
|
||||
case P_CSUM_RS_REQUEST:
|
||||
case P_OV_REQUEST:
|
||||
@ -2624,6 +2631,12 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
|
||||
peer_req->flags |= EE_APPLICATION;
|
||||
goto submit;
|
||||
|
||||
case P_RS_THIN_REQ:
|
||||
/* If at some point in the future we have a smart way to
|
||||
find out if this data block is completely deallocated,
|
||||
then we would do something smarter here than reading
|
||||
the block... */
|
||||
peer_req->flags |= EE_RS_THIN_REQ;
|
||||
case P_RS_DATA_REQUEST:
|
||||
peer_req->w.cb = w_e_end_rsdata_req;
|
||||
fault_type = DRBD_FAULT_RS_RD;
|
||||
@ -4599,6 +4612,72 @@ static int receive_out_of_sync(struct drbd_connection *connection, struct packet
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
|
||||
{
|
||||
struct drbd_peer_device *peer_device;
|
||||
struct p_block_desc *p = pi->data;
|
||||
struct drbd_device *device;
|
||||
sector_t sector;
|
||||
int size, err = 0;
|
||||
|
||||
peer_device = conn_peer_device(connection, pi->vnr);
|
||||
if (!peer_device)
|
||||
return -EIO;
|
||||
device = peer_device->device;
|
||||
|
||||
sector = be64_to_cpu(p->sector);
|
||||
size = be32_to_cpu(p->blksize);
|
||||
|
||||
dec_rs_pending(device);
|
||||
|
||||
if (get_ldev(device)) {
|
||||
struct drbd_peer_request *peer_req;
|
||||
const int op = REQ_OP_DISCARD;
|
||||
|
||||
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
|
||||
size, false, GFP_NOIO);
|
||||
if (!peer_req) {
|
||||
put_ldev(device);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
peer_req->w.cb = e_end_resync_block;
|
||||
peer_req->submit_jif = jiffies;
|
||||
peer_req->flags |= EE_IS_TRIM;
|
||||
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
list_add_tail(&peer_req->w.list, &device->sync_ee);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
|
||||
atomic_add(pi->size >> 9, &device->rs_sect_ev);
|
||||
err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
|
||||
|
||||
if (err) {
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
list_del(&peer_req->w.list);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
|
||||
drbd_free_peer_req(device, peer_req);
|
||||
put_ldev(device);
|
||||
err = 0;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
inc_unacked(device);
|
||||
|
||||
/* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
|
||||
as well as drbd_rs_complete_io() */
|
||||
} else {
|
||||
fail:
|
||||
drbd_rs_complete_io(device, sector);
|
||||
drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
|
||||
}
|
||||
|
||||
atomic_add(size >> 9, &device->rs_sect_in);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
struct data_cmd {
|
||||
int expect_payload;
|
||||
size_t pkt_size;
|
||||
@ -4626,11 +4705,14 @@ static struct data_cmd drbd_cmd_handler[] = {
|
||||
[P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
|
||||
[P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
|
||||
[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
|
||||
[P_RS_THIN_REQ] = { 0, sizeof(struct p_block_req), receive_DataRequest },
|
||||
[P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
|
||||
[P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
|
||||
[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
|
||||
[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
|
||||
[P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
|
||||
[P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
|
||||
|
||||
};
|
||||
|
||||
static void drbdd(struct drbd_connection *connection)
|
||||
|
@ -1036,6 +1036,30 @@ int w_e_end_data_req(struct drbd_work *w, int cancel)
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool all_zero(struct drbd_peer_request *peer_req)
|
||||
{
|
||||
struct page *page = peer_req->pages;
|
||||
unsigned int len = peer_req->i.size;
|
||||
|
||||
page_chain_for_each(page) {
|
||||
unsigned int l = min_t(unsigned int, len, PAGE_SIZE);
|
||||
unsigned int i, words = l / sizeof(long);
|
||||
unsigned long *d;
|
||||
|
||||
d = kmap_atomic(page);
|
||||
for (i = 0; i < words; i++) {
|
||||
if (d[i]) {
|
||||
kunmap_atomic(d);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
kunmap_atomic(d);
|
||||
len -= l;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
|
||||
* @w: work object.
|
||||
@ -1064,7 +1088,10 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
|
||||
} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
|
||||
if (likely(device->state.pdsk >= D_INCONSISTENT)) {
|
||||
inc_rs_pending(device);
|
||||
err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
|
||||
if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req))
|
||||
err = drbd_send_rs_deallocated(peer_device, peer_req);
|
||||
else
|
||||
err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
|
||||
} else {
|
||||
if (__ratelimit(&drbd_ratelimit_state))
|
||||
drbd_err(device, "Not sending RSDataReply, "
|
||||
|
Loading…
Reference in New Issue
Block a user