GFS2: Implement iomap for block_map

This patch implements iomap for block mapping, and switches the
block_map function to use it under the covers.

The additional IOMAP_F_BOUNDARY iomap flag indicates when iomap has
reached a "metadata boundary" and fetching the next mapping is likely to
incur an additional I/O.  This flag is used for setting the bh buffer
boundary flag.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
This commit is contained in:
Bob Peterson 2017-02-16 10:27:16 -05:00 committed by Andreas Gruenbacher
parent 5f8bd4440d
commit 3974320ca6
4 changed files with 307 additions and 100 deletions

View File

@ -13,6 +13,7 @@
#include <linux/blkdev.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/iomap.h>
#include "gfs2.h"
#include "incore.h"
@ -505,10 +506,8 @@ static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt)
* Returns: errno on error
*/
static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
bool zero_new, struct metapath *mp,
const size_t maxlen, sector_t *dblock,
unsigned *dblks)
static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
unsigned flags, struct metapath *mp)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
@ -516,35 +515,36 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
struct buffer_head *dibh = mp->mp_bh[0];
u64 bn;
unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
unsigned dblks = 0;
unsigned ptrs_per_blk;
const unsigned end_of_metadata = mp->mp_fheight - 1;
int ret;
int eob = 0;
enum alloc_state state;
__be64 *ptr;
__be64 zero_bn = 0;
size_t maxlen = iomap->length >> inode->i_blkbits;
BUG_ON(mp->mp_aheight < 1);
BUG_ON(dibh == NULL);
*dblock = 0;
*dblks = 0;
gfs2_trans_add_meta(ip->i_gl, dibh);
if (mp->mp_fheight == mp->mp_aheight) {
struct buffer_head *bh;
int eob;
/* Bottom indirect block exists, find unalloced extent size */
ptr = metapointer(end_of_metadata, mp);
bh = mp->mp_bh[end_of_metadata];
*dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
maxlen, &eob);
BUG_ON(*dblks < 1);
BUG_ON(dblks < 1);
state = ALLOC_DATA;
} else {
/* Need to allocate indirect blocks */
ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
sdp->sd_diptrs;
*dblks = min(maxlen, (size_t)(ptrs_per_blk -
dblks = min(maxlen, (size_t)(ptrs_per_blk -
mp->mp_list[end_of_metadata]));
if (mp->mp_fheight == ip->i_height) {
/* Writing into existing tree, extend tree down */
@ -561,7 +561,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
/* start of the second part of the function (state machine) */
blks = *dblks + iblks;
blks = dblks + iblks;
i = mp->mp_aheight;
do {
int error;
@ -618,32 +618,219 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
break;
/* Tree complete, adding data blocks */
case ALLOC_DATA:
BUG_ON(n > *dblks);
BUG_ON(n > dblks);
BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
*dblks = n;
dblks = n;
ptr = metapointer(end_of_metadata, mp);
*dblock = bn;
iomap->addr = bn << inode->i_blkbits;
iomap->flags |= IOMAP_F_NEW;
while (n-- > 0)
*ptr++ = cpu_to_be64(bn++);
if (zero_new) {
ret = sb_issue_zeroout(sb, *dblock, *dblks,
GFP_NOFS);
if (flags & IOMAP_ZERO) {
ret = sb_issue_zeroout(sb, iomap->addr >> inode->i_blkbits,
dblks, GFP_NOFS);
if (ret) {
fs_err(sdp,
"Failed to zero data buffers\n");
flags &= ~IOMAP_ZERO;
}
}
break;
}
} while ((state != ALLOC_DATA) || !(*dblock));
} while (iomap->addr == IOMAP_NULL_ADDR);
iomap->length = (u64)dblks << inode->i_blkbits;
ip->i_height = mp->mp_fheight;
gfs2_add_inode_blocks(&ip->i_inode, alloced);
gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
return 0;
}
/**
* hole_size - figure out the size of a hole
* @inode: The inode
* @lblock: The logical starting block number
* @mp: The metapath
*
* Returns: The hole size in bytes
*
*/
static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct metapath mp_eof;
u64 factor = 1;
int hgt;
u64 holesz = 0;
const __be64 *first, *end, *ptr;
const struct buffer_head *bh;
u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
int zeroptrs;
bool done = false;
/* Get another metapath, to the very last byte */
find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
bh = mp->mp_bh[hgt];
if (bh) {
zeroptrs = 0;
first = metapointer(hgt, mp);
end = (const __be64 *)(bh->b_data + bh->b_size);
for (ptr = first; ptr < end; ptr++) {
if (*ptr) {
done = true;
break;
} else {
zeroptrs++;
}
}
} else {
zeroptrs = sdp->sd_inptrs;
}
if (factor * zeroptrs >= lblock_stop - lblock + 1) {
holesz = lblock_stop - lblock + 1;
break;
}
holesz += factor * zeroptrs;
factor *= sdp->sd_inptrs;
if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
(mp->mp_list[hgt - 1])++;
}
return holesz << inode->i_blkbits;
}
static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
{
struct gfs2_inode *ip = GFS2_I(inode);
iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
sizeof(struct gfs2_dinode);
iomap->offset = 0;
iomap->length = i_size_read(inode);
iomap->type = IOMAP_MAPPED;
iomap->flags = IOMAP_F_DATA_INLINE;
}
/**
* gfs2_iomap_begin - Map blocks from an inode to disk blocks
* @inode: The inode
* @pos: Starting position in bytes
* @length: Length to map, in bytes
* @flags: iomap flags
* @iomap: The iomap structure
*
* Returns: errno
*/
int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, struct iomap *iomap)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct metapath mp = { .mp_aheight = 1, };
unsigned int factor = sdp->sd_sb.sb_bsize;
const u64 *arr = sdp->sd_heightsize;
__be64 *ptr;
sector_t lblock;
sector_t lend;
int ret;
int eob;
unsigned int len;
struct buffer_head *bh;
u8 height;
trace_gfs2_iomap_start(ip, pos, length, flags);
if (!length) {
ret = -EINVAL;
goto out;
}
if ((flags & IOMAP_REPORT) && gfs2_is_stuffed(ip)) {
gfs2_stuffed_iomap(inode, iomap);
if (pos >= iomap->length)
return -ENOENT;
ret = 0;
goto out;
}
lblock = pos >> inode->i_blkbits;
lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
iomap->offset = lblock << inode->i_blkbits;
iomap->addr = IOMAP_NULL_ADDR;
iomap->type = IOMAP_HOLE;
iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
iomap->flags = IOMAP_F_MERGED;
bmap_lock(ip, 0);
/*
* Directory data blocks have a struct gfs2_meta_header header, so the
* remaining size is smaller than the filesystem block size. Logical
* block numbers for directories are in units of this remaining size!
*/
if (gfs2_is_dir(ip)) {
factor = sdp->sd_jbsize;
arr = sdp->sd_jheightsize;
}
ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
if (ret)
goto out_release;
height = ip->i_height;
while ((lblock + 1) * factor > arr[height])
height++;
find_metapath(sdp, lblock, &mp, height);
if (height > ip->i_height || gfs2_is_stuffed(ip))
goto do_alloc;
ret = lookup_metapath(ip, &mp);
if (ret < 0)
goto out_release;
if (mp.mp_aheight != ip->i_height)
goto do_alloc;
ptr = metapointer(ip->i_height - 1, &mp);
if (*ptr == 0)
goto do_alloc;
iomap->type = IOMAP_MAPPED;
iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
bh = mp.mp_bh[ip->i_height - 1];
len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
if (eob)
iomap->flags |= IOMAP_F_BOUNDARY;
iomap->length = (u64)len << inode->i_blkbits;
ret = 0;
out_release:
release_metapath(&mp);
bmap_unlock(ip, 0);
out:
trace_gfs2_iomap_end(ip, iomap, ret);
return ret;
do_alloc:
if (!(flags & IOMAP_WRITE)) {
if (pos >= i_size_read(inode)) {
ret = -ENOENT;
goto out_release;
}
ret = 0;
iomap->length = hole_size(inode, lblock, &mp);
goto out_release;
}
ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
goto out_release;
}
/**
* gfs2_block_map - Map a block from an inode to a disk block
* @inode: The inode
@ -662,93 +849,43 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
struct buffer_head *bh_map, int create)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
unsigned int factor = sdp->sd_sb.sb_bsize;
const size_t maxlen = bh_map->b_size >> inode->i_blkbits;
const u64 *arr = sdp->sd_heightsize;
__be64 *ptr;
u64 size;
struct metapath mp;
int ret;
int eob;
unsigned int len;
struct buffer_head *bh;
u8 height;
bool zero_new = false;
sector_t dblock = 0;
unsigned dblks;
struct iomap iomap;
int ret, flags = 0;
BUG_ON(maxlen == 0);
memset(&mp, 0, sizeof(mp));
bmap_lock(ip, create);
clear_buffer_mapped(bh_map);
clear_buffer_new(bh_map);
clear_buffer_boundary(bh_map);
trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
/*
* Directory data blocks have a struct gfs2_meta_header header, so the
* remaining size is smaller than the filesystem block size. Logical
* block numbers for directories are in units of this remaining size!
*/
if (gfs2_is_dir(ip)) {
factor = sdp->sd_jbsize;
arr = sdp->sd_jheightsize;
}
ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
if (ret)
goto out;
height = ip->i_height;
size = (lblock + 1) * factor;
while (size > arr[height])
height++;
find_metapath(sdp, lblock, &mp, height);
mp.mp_aheight = 1;
if (height > ip->i_height || gfs2_is_stuffed(ip))
goto do_alloc;
ret = lookup_metapath(ip, &mp);
if (ret < 0)
goto out;
if (mp.mp_aheight != ip->i_height)
goto do_alloc;
ptr = metapointer(ip->i_height - 1, &mp);
if (*ptr == 0)
goto do_alloc;
map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr));
bh = mp.mp_bh[ip->i_height - 1];
len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob);
bh_map->b_size = (len << inode->i_blkbits);
if (eob)
set_buffer_boundary(bh_map);
ret = 0;
out:
release_metapath(&mp);
trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
bmap_unlock(ip, create);
return ret;
do_alloc:
/* All allocations are done here, firstly check create flag */
if (!create) {
BUG_ON(gfs2_is_stuffed(ip));
ret = 0;
goto out;
}
/* At this point ret is the tree depth of already allocated blocks */
if (create)
flags |= IOMAP_WRITE;
if (buffer_zeronew(bh_map))
zero_new = true;
ret = gfs2_bmap_alloc(inode, lblock, zero_new, &mp, maxlen, &dblock,
&dblks);
if (ret == 0) {
map_bh(bh_map, inode->i_sb, dblock);
bh_map->b_size = dblks << inode->i_blkbits;
set_buffer_new(bh_map);
flags |= IOMAP_ZERO;
ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
bh_map->b_size, flags, &iomap);
if (ret) {
if (!create && ret == -ENOENT) {
/* Return unmapped buffer beyond the end of file. */
ret = 0;
}
goto out;
}
if (iomap.length > bh_map->b_size) {
iomap.length = bh_map->b_size;
iomap.flags &= ~IOMAP_F_BOUNDARY;
}
if (iomap.addr != IOMAP_NULL_ADDR)
map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
bh_map->b_size = iomap.length;
if (iomap.flags & IOMAP_F_BOUNDARY)
set_buffer_boundary(bh_map);
if (iomap.flags & IOMAP_F_NEW)
set_buffer_new(bh_map);
out:
trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
return ret;
}
/*

View File

@ -10,6 +10,8 @@
#ifndef __BMAP_DOT_H__
#define __BMAP_DOT_H__
#include <linux/iomap.h>
#include "inode.h"
struct inode;
@ -47,6 +49,8 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
extern int gfs2_block_map(struct inode *inode, sector_t lblock,
struct buffer_head *bh, int create);
extern int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, struct iomap *iomap);
extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
u64 *dblock, unsigned *extlen);
extern int gfs2_setattr_size(struct inode *inode, u64 size);

View File

@ -12,6 +12,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/writeback.h>
#include <linux/ktime.h>
#include <linux/iomap.h>
#include "incore.h"
#include "glock.h"
#include "rgrp.h"
@ -469,6 +470,70 @@ TRACE_EVENT(gfs2_bmap,
__entry->errno)
);
TRACE_EVENT(gfs2_iomap_start,
TP_PROTO(const struct gfs2_inode *ip, loff_t pos, ssize_t length,
u16 flags),
TP_ARGS(ip, pos, length, flags),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, inum )
__field( loff_t, pos )
__field( ssize_t, length )
__field( u16, flags )
),
TP_fast_assign(
__entry->dev = ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->inum = ip->i_no_addr;
__entry->pos = pos;
__entry->length = length;
__entry->flags = flags;
),
TP_printk("%u,%u bmap %llu iomap start %llu/%lu flags:%08x",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->pos,
(unsigned long)__entry->length, (u16)__entry->flags)
);
TRACE_EVENT(gfs2_iomap_end,
TP_PROTO(const struct gfs2_inode *ip, struct iomap *iomap, int ret),
TP_ARGS(ip, iomap, ret),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, inum )
__field( loff_t, offset )
__field( ssize_t, length )
__field( u16, flags )
__field( u16, type )
__field( int, ret )
),
TP_fast_assign(
__entry->dev = ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->inum = ip->i_no_addr;
__entry->offset = iomap->offset;
__entry->length = iomap->length;
__entry->flags = iomap->flags;
__entry->type = iomap->type;
__entry->ret = ret;
),
TP_printk("%u,%u bmap %llu iomap end %llu/%lu ty:%d flags:%08x rc:%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->offset,
(unsigned long)__entry->length, (u16)__entry->type,
(u16)__entry->flags, __entry->ret)
);
/* Keep track of blocks as they are allocated/freed */
TRACE_EVENT(gfs2_block_alloc,

View File

@ -22,6 +22,7 @@ struct vm_fault;
* Flags for all iomap mappings:
*/
#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */
#define IOMAP_F_BOUNDARY 0x02 /* mapping ends at metadata boundary */
/*
* Flags that only need to be reported for IOMAP_REPORT requests: