s390/dasd: Add discard support for FBA devices
The z/VM hypervisor provides virtual disks (VDISK) which are backed by main memory of the hypervisor. Those devices are seen as DASD FBA disks within the Linux guest. Whenever data is written to such a device, memory is allocated on-the-fly by z/VM accordingly. This memory, however, is not being freed if data on the device is deleted by the guest OS. In order to make memory usable after deletion again, add discard support to the FBA discipline. While at it, update comments regarding the DASD_FEATURE_* flags. Reviewed-by: Stefan Haberland <sth@linux.vnet.ibm.com> Signed-off-by: Jan Höppner <hoeppner@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
parent
8b94dd9e0d
commit
28b841b3a7
@ -72,7 +72,10 @@ typedef struct dasd_information2_t {
|
|||||||
* 0x02: use diag discipline (diag)
|
* 0x02: use diag discipline (diag)
|
||||||
* 0x04: set the device initially online (internal use only)
|
* 0x04: set the device initially online (internal use only)
|
||||||
* 0x08: enable ERP related logging
|
* 0x08: enable ERP related logging
|
||||||
* 0x20: give access to raw eckd data
|
* 0x10: allow I/O to fail on lost paths
|
||||||
|
* 0x20: allow I/O to fail when a lock was stolen
|
||||||
|
* 0x40: give access to raw eckd data
|
||||||
|
* 0x80: enable discard support
|
||||||
*/
|
*/
|
||||||
#define DASD_FEATURE_DEFAULT 0x00
|
#define DASD_FEATURE_DEFAULT 0x00
|
||||||
#define DASD_FEATURE_READONLY 0x01
|
#define DASD_FEATURE_READONLY 0x01
|
||||||
@ -82,6 +85,7 @@ typedef struct dasd_information2_t {
|
|||||||
#define DASD_FEATURE_FAILFAST 0x10
|
#define DASD_FEATURE_FAILFAST 0x10
|
||||||
#define DASD_FEATURE_FAILONSLCK 0x20
|
#define DASD_FEATURE_FAILONSLCK 0x20
|
||||||
#define DASD_FEATURE_USERAW 0x40
|
#define DASD_FEATURE_USERAW 0x40
|
||||||
|
#define DASD_FEATURE_DISCARD 0x80
|
||||||
|
|
||||||
#define DASD_PARTN_BITS 2
|
#define DASD_PARTN_BITS 2
|
||||||
|
|
||||||
|
@ -3178,7 +3178,9 @@ static int dasd_alloc_queue(struct dasd_block *block)
|
|||||||
*/
|
*/
|
||||||
static void dasd_setup_queue(struct dasd_block *block)
|
static void dasd_setup_queue(struct dasd_block *block)
|
||||||
{
|
{
|
||||||
|
unsigned int logical_block_size = block->bp_block;
|
||||||
struct request_queue *q = block->request_queue;
|
struct request_queue *q = block->request_queue;
|
||||||
|
unsigned int max_bytes, max_discard_sectors;
|
||||||
int max;
|
int max;
|
||||||
|
|
||||||
if (block->base->features & DASD_FEATURE_USERAW) {
|
if (block->base->features & DASD_FEATURE_USERAW) {
|
||||||
@ -3195,7 +3197,7 @@ static void dasd_setup_queue(struct dasd_block *block)
|
|||||||
}
|
}
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
|
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
|
||||||
q->limits.max_dev_sectors = max;
|
q->limits.max_dev_sectors = max;
|
||||||
blk_queue_logical_block_size(q, block->bp_block);
|
blk_queue_logical_block_size(q, logical_block_size);
|
||||||
blk_queue_max_hw_sectors(q, max);
|
blk_queue_max_hw_sectors(q, max);
|
||||||
blk_queue_max_segments(q, USHRT_MAX);
|
blk_queue_max_segments(q, USHRT_MAX);
|
||||||
/* with page sized segments we can translate each segement into
|
/* with page sized segments we can translate each segement into
|
||||||
@ -3203,6 +3205,21 @@ static void dasd_setup_queue(struct dasd_block *block)
|
|||||||
*/
|
*/
|
||||||
blk_queue_max_segment_size(q, PAGE_SIZE);
|
blk_queue_max_segment_size(q, PAGE_SIZE);
|
||||||
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
||||||
|
|
||||||
|
/* Only activate blocklayer discard support for devices that support it */
|
||||||
|
if (block->base->features & DASD_FEATURE_DISCARD) {
|
||||||
|
q->limits.discard_granularity = logical_block_size;
|
||||||
|
q->limits.discard_alignment = PAGE_SIZE;
|
||||||
|
|
||||||
|
/* Calculate max_discard_sectors and make it PAGE aligned */
|
||||||
|
max_bytes = USHRT_MAX * logical_block_size;
|
||||||
|
max_bytes = ALIGN(max_bytes, PAGE_SIZE) - PAGE_SIZE;
|
||||||
|
max_discard_sectors = max_bytes / logical_block_size;
|
||||||
|
|
||||||
|
blk_queue_max_discard_sectors(q, max_discard_sectors);
|
||||||
|
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
|
||||||
|
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1676,6 +1676,7 @@ dasd_set_feature(struct ccw_device *cdev, int feature, int flag)
|
|||||||
spin_unlock(&dasd_devmap_lock);
|
spin_unlock(&dasd_devmap_lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(dasd_set_feature);
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -174,6 +174,9 @@ dasd_fba_check_characteristics(struct dasd_device *device)
|
|||||||
if (readonly)
|
if (readonly)
|
||||||
set_bit(DASD_FLAG_DEVICE_RO, &device->flags);
|
set_bit(DASD_FLAG_DEVICE_RO, &device->flags);
|
||||||
|
|
||||||
|
/* FBA supports discard, set the according feature bit */
|
||||||
|
dasd_set_feature(cdev, DASD_FEATURE_DISCARD, 1);
|
||||||
|
|
||||||
dev_info(&device->cdev->dev,
|
dev_info(&device->cdev->dev,
|
||||||
"New FBA DASD %04X/%02X (CU %04X/%02X) with %d MB "
|
"New FBA DASD %04X/%02X (CU %04X/%02X) with %d MB "
|
||||||
"and %d B/blk%s\n",
|
"and %d B/blk%s\n",
|
||||||
@ -247,7 +250,190 @@ static void dasd_fba_check_for_device_change(struct dasd_device *device,
|
|||||||
dasd_generic_handle_state_change(device);
|
dasd_generic_handle_state_change(device);
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
|
|
||||||
|
/*
|
||||||
|
* Builds a CCW with no data payload
|
||||||
|
*/
|
||||||
|
static void ccw_write_no_data(struct ccw1 *ccw)
|
||||||
|
{
|
||||||
|
ccw->cmd_code = DASD_FBA_CCW_WRITE;
|
||||||
|
ccw->flags |= CCW_FLAG_SLI;
|
||||||
|
ccw->count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Builds a CCW that writes only zeroes.
|
||||||
|
*/
|
||||||
|
static void ccw_write_zero(struct ccw1 *ccw, int count)
|
||||||
|
{
|
||||||
|
ccw->cmd_code = DASD_FBA_CCW_WRITE;
|
||||||
|
ccw->flags |= CCW_FLAG_SLI;
|
||||||
|
ccw->count = count;
|
||||||
|
ccw->cda = (__u32) (addr_t) page_to_phys(ZERO_PAGE(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helper function to count the amount of necessary CCWs within a given range
|
||||||
|
* with 4k alignment and command chaining in mind.
|
||||||
|
*/
|
||||||
|
static int count_ccws(sector_t first_rec, sector_t last_rec,
|
||||||
|
unsigned int blocks_per_page)
|
||||||
|
{
|
||||||
|
sector_t wz_stop = 0, d_stop = 0;
|
||||||
|
int cur_pos = 0;
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
if (first_rec % blocks_per_page != 0) {
|
||||||
|
wz_stop = first_rec + blocks_per_page -
|
||||||
|
(first_rec % blocks_per_page) - 1;
|
||||||
|
if (wz_stop > last_rec)
|
||||||
|
wz_stop = last_rec;
|
||||||
|
cur_pos = wz_stop - first_rec + 1;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (last_rec - (first_rec + cur_pos) + 1 >= blocks_per_page) {
|
||||||
|
if ((last_rec - blocks_per_page + 1) % blocks_per_page != 0)
|
||||||
|
d_stop = last_rec - ((last_rec - blocks_per_page + 1) %
|
||||||
|
blocks_per_page);
|
||||||
|
else
|
||||||
|
d_stop = last_rec;
|
||||||
|
|
||||||
|
cur_pos += d_stop - (first_rec + cur_pos) + 1;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cur_pos == 0 || first_rec + cur_pos - 1 < last_rec)
|
||||||
|
count++;
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function builds a CCW request for block layer discard requests.
|
||||||
|
* Each page in the z/VM hypervisor that represents certain records of an FBA
|
||||||
|
* device will be padded with zeros. This is a special behaviour of the WRITE
|
||||||
|
* command which is triggered when no data payload is added to the CCW.
|
||||||
|
*
|
||||||
|
* Note: Due to issues in some z/VM versions, we can't fully utilise this
|
||||||
|
* special behaviour. We have to keep a 4k (or 8 block) alignment in mind to
|
||||||
|
* work around those issues and write actual zeroes to the unaligned parts in
|
||||||
|
* the request. This workaround might be removed in the future.
|
||||||
|
*/
|
||||||
|
static struct dasd_ccw_req *dasd_fba_build_cp_discard(
|
||||||
|
struct dasd_device *memdev,
|
||||||
|
struct dasd_block *block,
|
||||||
|
struct request *req)
|
||||||
|
{
|
||||||
|
struct LO_fba_data *LO_data;
|
||||||
|
struct dasd_ccw_req *cqr;
|
||||||
|
struct ccw1 *ccw;
|
||||||
|
|
||||||
|
sector_t wz_stop = 0, d_stop = 0;
|
||||||
|
sector_t first_rec, last_rec;
|
||||||
|
|
||||||
|
unsigned int blksize = block->bp_block;
|
||||||
|
unsigned int blocks_per_page;
|
||||||
|
int wz_count = 0;
|
||||||
|
int d_count = 0;
|
||||||
|
int cur_pos = 0; /* Current position within the extent */
|
||||||
|
int count = 0;
|
||||||
|
int cplength;
|
||||||
|
int datasize;
|
||||||
|
int nr_ccws;
|
||||||
|
|
||||||
|
first_rec = blk_rq_pos(req) >> block->s2b_shift;
|
||||||
|
last_rec =
|
||||||
|
(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
|
||||||
|
count = last_rec - first_rec + 1;
|
||||||
|
|
||||||
|
blocks_per_page = BLOCKS_PER_PAGE(blksize);
|
||||||
|
nr_ccws = count_ccws(first_rec, last_rec, blocks_per_page);
|
||||||
|
|
||||||
|
/* define extent + nr_ccws * locate record + nr_ccws * single CCW */
|
||||||
|
cplength = 1 + 2 * nr_ccws;
|
||||||
|
datasize = sizeof(struct DE_fba_data) +
|
||||||
|
nr_ccws * (sizeof(struct LO_fba_data) + sizeof(struct ccw1));
|
||||||
|
|
||||||
|
cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev);
|
||||||
|
if (IS_ERR(cqr))
|
||||||
|
return cqr;
|
||||||
|
|
||||||
|
ccw = cqr->cpaddr;
|
||||||
|
|
||||||
|
define_extent(ccw++, cqr->data, WRITE, blksize, first_rec, count);
|
||||||
|
LO_data = cqr->data + sizeof(struct DE_fba_data);
|
||||||
|
|
||||||
|
/* First part is not aligned. Calculate range to write zeroes. */
|
||||||
|
if (first_rec % blocks_per_page != 0) {
|
||||||
|
wz_stop = first_rec + blocks_per_page -
|
||||||
|
(first_rec % blocks_per_page) - 1;
|
||||||
|
if (wz_stop > last_rec)
|
||||||
|
wz_stop = last_rec;
|
||||||
|
wz_count = wz_stop - first_rec + 1;
|
||||||
|
|
||||||
|
ccw[-1].flags |= CCW_FLAG_CC;
|
||||||
|
locate_record(ccw++, LO_data++, WRITE, cur_pos, wz_count);
|
||||||
|
|
||||||
|
ccw[-1].flags |= CCW_FLAG_CC;
|
||||||
|
ccw_write_zero(ccw++, wz_count * blksize);
|
||||||
|
|
||||||
|
cur_pos = wz_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We can do proper discard when we've got at least blocks_per_page blocks. */
|
||||||
|
if (last_rec - (first_rec + cur_pos) + 1 >= blocks_per_page) {
|
||||||
|
/* is last record at page boundary? */
|
||||||
|
if ((last_rec - blocks_per_page + 1) % blocks_per_page != 0)
|
||||||
|
d_stop = last_rec - ((last_rec - blocks_per_page + 1) %
|
||||||
|
blocks_per_page);
|
||||||
|
else
|
||||||
|
d_stop = last_rec;
|
||||||
|
|
||||||
|
d_count = d_stop - (first_rec + cur_pos) + 1;
|
||||||
|
|
||||||
|
ccw[-1].flags |= CCW_FLAG_CC;
|
||||||
|
locate_record(ccw++, LO_data++, WRITE, cur_pos, d_count);
|
||||||
|
|
||||||
|
ccw[-1].flags |= CCW_FLAG_CC;
|
||||||
|
ccw_write_no_data(ccw++);
|
||||||
|
|
||||||
|
cur_pos += d_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We might still have some bits left which need to be zeroed. */
|
||||||
|
if (cur_pos == 0 || first_rec + cur_pos - 1 < last_rec) {
|
||||||
|
if (d_stop != 0)
|
||||||
|
wz_count = last_rec - d_stop;
|
||||||
|
else if (wz_stop != 0)
|
||||||
|
wz_count = last_rec - wz_stop;
|
||||||
|
else
|
||||||
|
wz_count = count;
|
||||||
|
|
||||||
|
ccw[-1].flags |= CCW_FLAG_CC;
|
||||||
|
locate_record(ccw++, LO_data++, WRITE, cur_pos, wz_count);
|
||||||
|
|
||||||
|
ccw[-1].flags |= CCW_FLAG_CC;
|
||||||
|
ccw_write_zero(ccw++, wz_count * blksize);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (blk_noretry_request(req) ||
|
||||||
|
block->base->features & DASD_FEATURE_FAILFAST)
|
||||||
|
set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
|
||||||
|
|
||||||
|
cqr->startdev = memdev;
|
||||||
|
cqr->memdev = memdev;
|
||||||
|
cqr->block = block;
|
||||||
|
cqr->expires = memdev->default_expires * HZ; /* default 5 minutes */
|
||||||
|
cqr->retries = memdev->default_retries;
|
||||||
|
cqr->buildclk = get_tod_clock();
|
||||||
|
cqr->status = DASD_CQR_FILLED;
|
||||||
|
|
||||||
|
return cqr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct dasd_ccw_req *dasd_fba_build_cp_regular(
|
||||||
|
struct dasd_device *memdev,
|
||||||
struct dasd_block *block,
|
struct dasd_block *block,
|
||||||
struct request *req)
|
struct request *req)
|
||||||
{
|
{
|
||||||
@ -372,6 +558,16 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
|
|||||||
return cqr;
|
return cqr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device *memdev,
|
||||||
|
struct dasd_block *block,
|
||||||
|
struct request *req)
|
||||||
|
{
|
||||||
|
if (req_op(req) == REQ_OP_DISCARD || req_op(req) == REQ_OP_WRITE_ZEROES)
|
||||||
|
return dasd_fba_build_cp_discard(memdev, block, req);
|
||||||
|
else
|
||||||
|
return dasd_fba_build_cp_regular(memdev, block, req);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req)
|
dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req)
|
||||||
{
|
{
|
||||||
|
@ -167,6 +167,9 @@ do { \
|
|||||||
printk(d_loglevel PRINTK_HEADER " " d_string "\n", d_args); \
|
printk(d_loglevel PRINTK_HEADER " " d_string "\n", d_args); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
/* Macro to calculate number of blocks per page */
|
||||||
|
#define BLOCKS_PER_PAGE(blksize) (PAGE_SIZE / blksize)
|
||||||
|
|
||||||
struct dasd_ccw_req {
|
struct dasd_ccw_req {
|
||||||
unsigned int magic; /* Eye catcher */
|
unsigned int magic; /* Eye catcher */
|
||||||
struct list_head devlist; /* for dasd_device request queue */
|
struct list_head devlist; /* for dasd_device request queue */
|
||||||
|
Loading…
Reference in New Issue
Block a user