xfs: introduce interval queries on btrees

Create a function to enable querying of btree records mapping to a
range of keys.  This will be used in subsequent patches to allow
querying the reverse mapping btree to find the extents mapped to a
range of physical blocks, though the generic code can be used for
any range query.

The overlapped query range function needs to use the btree get_block
helper because the root block could be an inode, in which case
bc_bufs[nlevels-1] will be NULL.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
Darrick J. Wong 2016-08-03 11:10:21 +10:00 committed by Dave Chinner
parent 2c813ad66a
commit 105f7d83db
3 changed files with 282 additions and 5 deletions

View File

@ -4521,3 +4521,267 @@ xfs_btree_compute_maxlevels(
maxblocks = (maxblocks + limits[1] - 1) / limits[1];
return level;
}
/*
* Query a regular btree for all records overlapping a given interval.
* Start with a LE lookup of the key of low_rec and return all records
* until we find a record with a key greater than the key of high_rec.
*/
STATIC int
xfs_btree_simple_query_range(
struct xfs_btree_cur *cur,
union xfs_btree_key *low_key,
union xfs_btree_key *high_key,
xfs_btree_query_range_fn fn,
void *priv)
{
union xfs_btree_rec *recp;
union xfs_btree_key rec_key;
__int64_t diff;
int stat;
bool firstrec = true;
int error;
ASSERT(cur->bc_ops->init_high_key_from_rec);
ASSERT(cur->bc_ops->diff_two_keys);
/*
* Find the leftmost record. The btree cursor must be set
* to the low record used to generate low_key.
*/
stat = 0;
error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
if (error)
goto out;
while (stat) {
/* Find the record. */
error = xfs_btree_get_rec(cur, &recp, &stat);
if (error || !stat)
break;
cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
/* Skip if high_key(rec) < low_key. */
if (firstrec) {
firstrec = false;
diff = cur->bc_ops->diff_two_keys(cur, low_key,
&rec_key);
if (diff > 0)
goto advloop;
}
/* Stop if high_key < low_key(rec). */
diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
if (diff > 0)
break;
/* Callback */
error = fn(cur, recp, priv);
if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
break;
advloop:
/* Move on to the next record. */
error = xfs_btree_increment(cur, 0, &stat);
if (error)
break;
}
out:
return error;
}
/*
* Query an overlapped interval btree for all records overlapping a given
* interval. This function roughly follows the algorithm given in
* "Interval Trees" of _Introduction to Algorithms_, which is section
* 14.3 in the 2nd and 3rd editions.
*
* First, generate keys for the low and high records passed in.
*
* For any leaf node, generate the high and low keys for the record.
* If the record keys overlap with the query low/high keys, pass the
* record to the function iterator.
*
* For any internal node, compare the low and high keys of each
* pointer against the query low/high keys. If there's an overlap,
* follow the pointer.
*
* As an optimization, we stop scanning a block when we find a low key
* that is greater than the query's high key.
*/
STATIC int
xfs_btree_overlapped_query_range(
struct xfs_btree_cur *cur,
union xfs_btree_key *low_key,
union xfs_btree_key *high_key,
xfs_btree_query_range_fn fn,
void *priv)
{
union xfs_btree_ptr ptr;
union xfs_btree_ptr *pp;
union xfs_btree_key rec_key;
union xfs_btree_key rec_hkey;
union xfs_btree_key *lkp;
union xfs_btree_key *hkp;
union xfs_btree_rec *recp;
struct xfs_btree_block *block;
__int64_t ldiff;
__int64_t hdiff;
int level;
struct xfs_buf *bp;
int i;
int error;
/* Load the root of the btree. */
level = cur->bc_nlevels - 1;
cur->bc_ops->init_ptr_from_cur(cur, &ptr);
error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
if (error)
return error;
xfs_btree_get_block(cur, level, &bp);
trace_xfs_btree_overlapped_query_range(cur, level, bp);
#ifdef DEBUG
error = xfs_btree_check_block(cur, block, level, bp);
if (error)
goto out;
#endif
cur->bc_ptrs[level] = 1;
while (level < cur->bc_nlevels) {
block = xfs_btree_get_block(cur, level, &bp);
/* End of node, pop back towards the root. */
if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
pop_up:
if (level < cur->bc_nlevels - 1)
cur->bc_ptrs[level + 1]++;
level++;
continue;
}
if (level == 0) {
/* Handle a leaf node. */
recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
low_key);
cur->bc_ops->init_key_from_rec(&rec_key, recp);
hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
&rec_key);
/*
* If (record's high key >= query's low key) and
* (query's high key >= record's low key), then
* this record overlaps the query range; callback.
*/
if (ldiff >= 0 && hdiff >= 0) {
error = fn(cur, recp, priv);
if (error < 0 ||
error == XFS_BTREE_QUERY_RANGE_ABORT)
break;
} else if (hdiff < 0) {
/* Record is larger than high key; pop. */
goto pop_up;
}
cur->bc_ptrs[level]++;
continue;
}
/* Handle an internal node. */
lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
/*
* If (pointer's high key >= query's low key) and
* (query's high key >= pointer's low key), then
* this record overlaps the query range; follow pointer.
*/
if (ldiff >= 0 && hdiff >= 0) {
level--;
error = xfs_btree_lookup_get_block(cur, level, pp,
&block);
if (error)
goto out;
xfs_btree_get_block(cur, level, &bp);
trace_xfs_btree_overlapped_query_range(cur, level, bp);
#ifdef DEBUG
error = xfs_btree_check_block(cur, block, level, bp);
if (error)
goto out;
#endif
cur->bc_ptrs[level] = 1;
continue;
} else if (hdiff < 0) {
/* The low key is larger than the upper range; pop. */
goto pop_up;
}
cur->bc_ptrs[level]++;
}
out:
/*
* If we don't end this function with the cursor pointing at a record
* block, a subsequent non-error cursor deletion will not release
* node-level buffers, causing a buffer leak. This is quite possible
* with a zero-results range query, so release the buffers if we
* failed to return any results.
*/
if (cur->bc_bufs[0] == NULL) {
for (i = 0; i < cur->bc_nlevels; i++) {
if (cur->bc_bufs[i]) {
xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
cur->bc_bufs[i] = NULL;
cur->bc_ptrs[i] = 0;
cur->bc_ra[i] = 0;
}
}
}
return error;
}
/*
* Query a btree for all records overlapping a given interval of keys. The
* supplied function will be called with each record found; return one of the
* XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
* code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
* negative error code.
*/
int
xfs_btree_query_range(
struct xfs_btree_cur *cur,
union xfs_btree_irec *low_rec,
union xfs_btree_irec *high_rec,
xfs_btree_query_range_fn fn,
void *priv)
{
union xfs_btree_rec rec;
union xfs_btree_key low_key;
union xfs_btree_key high_key;
/* Find the keys of both ends of the interval. */
cur->bc_rec = *high_rec;
cur->bc_ops->init_rec_from_cur(cur, &rec);
cur->bc_ops->init_key_from_rec(&high_key, &rec);
cur->bc_rec = *low_rec;
cur->bc_ops->init_rec_from_cur(cur, &rec);
cur->bc_ops->init_key_from_rec(&low_key, &rec);
/* Enforce low key < high key. */
if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
return -EINVAL;
if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
return xfs_btree_simple_query_range(cur, &low_key,
&high_key, fn, priv);
return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
fn, priv);
}

View File

@ -227,6 +227,12 @@ struct xfs_btree_ops {
#define LASTREC_DELREC 2
union xfs_btree_irec {
struct xfs_alloc_rec_incore a;
struct xfs_bmbt_irec b;
struct xfs_inobt_rec_incore i;
};
/*
* Btree cursor structure.
* This collects all information needed by the btree code in one place.
@ -237,11 +243,7 @@ typedef struct xfs_btree_cur
struct xfs_mount *bc_mp; /* file system mount struct */
const struct xfs_btree_ops *bc_ops;
uint bc_flags; /* btree features - below */
union {
xfs_alloc_rec_incore_t a;
xfs_bmbt_irec_t b;
xfs_inobt_rec_incore_t i;
} bc_rec; /* current insert/search record value */
union xfs_btree_irec bc_rec; /* current insert/search record value */
struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
__uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */
@ -524,4 +526,14 @@ void xfs_btree_get_node_keys_overlapped(struct xfs_btree_cur *cur,
struct xfs_btree_block *block, union xfs_btree_key *key);
int xfs_btree_update_keys_overlapped(struct xfs_btree_cur *cur, int level);
/* return codes */
#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */
#define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */
typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
union xfs_btree_rec *rec, void *priv);
int xfs_btree_query_range(struct xfs_btree_cur *cur,
union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
xfs_btree_query_range_fn fn, void *priv);
#endif /* __XFS_BTREE_H__ */

View File

@ -2220,6 +2220,7 @@ DEFINE_EVENT(xfs_btree_cur_class, name, \
TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), \
TP_ARGS(cur, level, bp))
DEFINE_BTREE_CUR_EVENT(xfs_btree_updkeys);
DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range);
#endif /* _TRACE_XFS_H */