xfs: buffer cache bulk page allocation

This patchset makes use of the new bulk page allocation interface to reduce the overhead of allocating large numbers of pages in a loop. The first two patches are refactoring buffer memory allocation and converting the uncached buffer path to use the same page allocation path, followed by converting the page allocation path to use bulk allocation. The rest of the patches are then consolidation of the page allocation and freeing code to simplify the code and remove a chunk of unnecessary abstraction. This is largely based on a series of changes made by Christoph Hellwig. -----BEGIN PGP SIGNATURE----- iQJIBAABCgAyFiEEmJOoJ8GffZYWSjj/regpR/R1+h0FAmC+6OwUHGRhdmlkQGZy b21vcmJpdC5jb20ACgkQregpR/R1+h21QQ/8C0f7wq1OKwNI2oRubf6J8jtttiRS SD2TA03AP2OIOKx4y2G0h0dJeX9tgnbLerIlpfT80nHDoBgKHbZCYSEHQT0DscYo fuwQTVR8RCklKspjUlCGR+Gbm6vI8HakK1lAppw168e4c6t8wX1KiSibwaVTQdaZ NaXUqTUzGiNq+iiLS6fW3mJ3PKWFJYyrDOSR2jIPbUGIJdejRCGe0xVnu+hIsz+y c2gSGCB+j3cYaazhlJTDYPGja3Wq3eR+Ya9i1GcA1tJiJLsu0ZjaVQ69Bl4dud2F c3OyhFK0El1VMSEVb3hY8gTpAO02jNWSnB2Zlidt0h4ZJVAxKus0xe2w3eS4uST2 hcMI3lwjdzRQuoBwOgXQ+CpYVv2wI8HPNLTSR+NYcC2IZaCNieFRWdTYwXrAJBB3 H09m04GT/7TkkrYHFD1zRtIedP4DZ6MZn/33bufNxEt1NRCFw5AFAEUFfjDA317A 4nByCmU6XjmmpI/XLixwu0BYCfKVB4UsrgOyzXBy7ZU0+pIser+ynP1V4d9Bb43Y xVQ8S0QirT7gqXjx75mD4B4qkXZ5nrz5Z7fSn6YU4TwqsYtZYlsBauLlWmmHp9MT CP4PA4j+CQORhfZzWXw2ViXYGoIssc1cw5i4JB6a4u/OaDi19dYkE6SO8P3b9GSm khHqWgcTC4VGpmc= =JsrV -----END PGP SIGNATURE----- Merge tag 'xfs-buf-bulk-alloc-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs into xfs-5.14-merge2 xfs: buffer cache bulk page allocation This patchset makes use of the new bulk page allocation interface to reduce the overhead of allocating large numbers of pages in a loop. The first two patches are refactoring buffer memory allocation and converting the uncached buffer path to use the same page allocation path, followed by converting the page allocation path to use bulk allocation. The rest of the patches are then consolidation of the page allocation and freeing code to simplify the code and remove a chunk of unnecessary abstraction. This is largely based on a series of changes made by Christoph Hellwig. * tag 'xfs-buf-bulk-alloc-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: merge xfs_buf_allocate_memory xfs: cleanup error handling in xfs_buf_get_map xfs: get rid of xb_to_gfp() xfs: simplify the b_page_count calculation xfs: remove ->b_offset handling for page backed buffers xfs: move page freeing into _xfs_buf_free_pages() xfs: merge _xfs_buf_get_pages() xfs: use alloc_pages_bulk_array() for buffers xfs: use xfs_buf_alloc_pages for uncached buffers xfs: split up xfs_buf_allocate_memory
2021-06-08 09:10:01 -07:00 · 2021-06-08 09:10:01 -07:00 · ebf2e33723
commit ebf2e33723
parent 8124c8a6b3 8bcac7448a
3 changed files with 120 additions and 189 deletions
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@ -43,7 +43,6 @@ xfs_get_aghdr_buf(
 	if (error)
 		return error;
 	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
 	bp->b_bn = blkno;
 	bp->b_maps[0].bm_bn = blkno;
 	bp->b_ops = ops;
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@ -22,9 +22,6 @@
 static kmem_zone_t *xfs_buf_zone;
 #define xb_to_gfp(flags) \
 	((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
 /*
 * Locking orders
 *
@ -79,7 +76,7 @@ static inline int
 xfs_buf_vmap_len(
 	struct xfs_buf	*bp)
 {
-	return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
+	return (bp->b_page_count * PAGE_SIZE);
 }
 /*
@ -272,51 +269,30 @@ _xfs_buf_alloc(
 	return 0;
 }
-/*
+static void
- *	Allocate a page array capable of holding a specified number
+xfs_buf_free_pages(
 *	of pages, and point the page buf at it.
 */
 STATIC int
 _xfs_buf_get_pages(
 	struct xfs_buf		*bp,
 	int			page_count)
 {
 	/* Make sure that we have a page list */
 	if (bp->b_pages == NULL) {
 		bp->b_page_count = page_count;
 		if (page_count <= XB_PAGES) {
 			bp->b_pages = bp->b_page_array;
 		} else {
 			bp->b_pages = kmem_alloc(sizeof(struct page *) *
 						 page_count, KM_NOFS);
 			if (bp->b_pages == NULL)
 				return -ENOMEM;
 		}
 		memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
 	}
 	return 0;
 }
 /*
 *	Frees b_pages if it was allocated.
 */
 STATIC void
 _xfs_buf_free_pages(
 	struct xfs_buf	*bp)
 {
-	if (bp->b_pages != bp->b_page_array) {
+	uint		i;
-		kmem_free(bp->b_pages);
+
-		bp->b_pages = NULL;
+	ASSERT(bp->b_flags & _XBF_PAGES);
 	if (xfs_buf_is_vmapped(bp))
 		vm_unmap_ram(bp->b_addr, bp->b_page_count);
 	for (i = 0; i < bp->b_page_count; i++) {
 		if (bp->b_pages[i])
 			__free_page(bp->b_pages[i]);
 	}
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += bp->b_page_count;
 	if (bp->b_pages != bp->b_page_array)
 		kmem_free(bp->b_pages);
 	bp->b_pages = NULL;
 	bp->b_flags &= ~_XBF_PAGES;
 }
 /*
 *	Releases the specified buffer.
 *
 * 	The modification state of any associated pages is left unchanged.
 * 	The buffer must not be on any hash - use xfs_buf_rele instead for
 * 	hashed and refcounted buffers
 */
 static void
 xfs_buf_free(
 	struct xfs_buf		*bp)
@ -325,137 +301,103 @@ xfs_buf_free(
 	ASSERT(list_empty(&bp->b_lru));
-	if (bp->b_flags & _XBF_PAGES) {
+	if (bp->b_flags & _XBF_PAGES)
-		uint		i;
+		xfs_buf_free_pages(bp);
-
+	else if (bp->b_flags & _XBF_KMEM)
 		if (xfs_buf_is_vmapped(bp))
 			vm_unmap_ram(bp->b_addr - bp->b_offset,
 					bp->b_page_count);
 		for (i = 0; i < bp->b_page_count; i++) {
 			struct page	*page = bp->b_pages[i];
 			__free_page(page);
 		}
 		if (current->reclaim_state)
 			current->reclaim_state->reclaimed_slab +=
 							bp->b_page_count;
 	} else if (bp->b_flags & _XBF_KMEM)
 		kmem_free(bp->b_addr);
-	_xfs_buf_free_pages(bp);
+
 	xfs_buf_free_maps(bp);
 	kmem_cache_free(xfs_buf_zone, bp);
 }
-/*
+static int
- * Allocates all the pages for buffer in question and builds it's page list.
+xfs_buf_alloc_kmem(
- */
+	struct xfs_buf	*bp,
-STATIC int
+	xfs_buf_flags_t	flags)
 xfs_buf_allocate_memory(
 	struct xfs_buf		*bp,
 	uint			flags)
 {
-	size_t			size;
+	int		align_mask = xfs_buftarg_dma_alignment(bp->b_target);
-	size_t			nbytes, offset;
+	xfs_km_flags_t	kmflag_mask = KM_NOFS;
-	gfp_t			gfp_mask = xb_to_gfp(flags);
+	size_t		size = BBTOB(bp->b_length);
 	unsigned short		page_count, i;
 	xfs_off_t		start, end;
 	int			error;
 	xfs_km_flags_t		kmflag_mask = 0;
-	/*
+	/* Assure zeroed buffer for non-read cases. */
-	 * assure zeroed buffer for non-read cases.
+	if (!(flags & XBF_READ))
 	 */
 	if (!(flags & XBF_READ)) {
 		kmflag_mask |= KM_ZERO;
-		gfp_mask |= __GFP_ZERO;
+
 	bp->b_addr = kmem_alloc_io(size, align_mask, kmflag_mask);
 	if (!bp->b_addr)
 		return -ENOMEM;
 	if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
 	    ((unsigned long)bp->b_addr & PAGE_MASK)) {
 		/* b_addr spans two pages - use alloc_page instead */
 		kmem_free(bp->b_addr);
 		bp->b_addr = NULL;
 		return -ENOMEM;
 	}
 	bp->b_offset = offset_in_page(bp->b_addr);
 	bp->b_pages = bp->b_page_array;
 	bp->b_pages[0] = kmem_to_page(bp->b_addr);
 	bp->b_page_count = 1;
 	bp->b_flags |= _XBF_KMEM;
 	return 0;
 }
-	/*
+static int
-	 * for buffers that are contained within a single page, just allocate
+xfs_buf_alloc_pages(
-	 * the memory from the heap - there's no need for the complexity of
+	struct xfs_buf	*bp,
-	 * page arrays to keep allocation down to order 0.
+	xfs_buf_flags_t	flags)
-	 */
+{
-	size = BBTOB(bp->b_length);
+	gfp_t		gfp_mask = __GFP_NOWARN;
-	if (size < PAGE_SIZE) {
+	long		filled = 0;
 		int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
 		bp->b_addr = kmem_alloc_io(size, align_mask,
 					   KM_NOFS | kmflag_mask);
 		if (!bp->b_addr) {
 			/* low memory - use alloc_page loop instead */
 			goto use_alloc_page;
 		}
-		if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
+	if (flags & XBF_READ_AHEAD)
-		    ((unsigned long)bp->b_addr & PAGE_MASK)) {
+		gfp_mask |= __GFP_NORETRY;
-			/* b_addr spans two pages - use alloc_page instead */
+	else
-			kmem_free(bp->b_addr);
+		gfp_mask |= GFP_NOFS;
-			bp->b_addr = NULL;
+
-			goto use_alloc_page;
+	/* Make sure that we have a page list */
-		}
+	bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
-		bp->b_offset = offset_in_page(bp->b_addr);
+	if (bp->b_page_count <= XB_PAGES) {
 		bp->b_pages = bp->b_page_array;
-		bp->b_pages[0] = kmem_to_page(bp->b_addr);
+	} else {
-		bp->b_page_count = 1;
+		bp->b_pages = kzalloc(sizeof(struct page *) * bp->b_page_count,
-		bp->b_flags |= _XBF_KMEM;
+					gfp_mask);
-		return 0;
+		if (!bp->b_pages)
 			return -ENOMEM;
 	}
 use_alloc_page:
 	start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
 	end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
 								>> PAGE_SHIFT;
 	page_count = end - start;
 	error = _xfs_buf_get_pages(bp, page_count);
 	if (unlikely(error))
 		return error;
 	offset = bp->b_offset;
 	bp->b_flags |= _XBF_PAGES;
-	for (i = 0; i < bp->b_page_count; i++) {
+	/* Assure zeroed buffer for non-read cases. */
-		struct page	*page;
+	if (!(flags & XBF_READ))
-		uint		retries = 0;
+		gfp_mask |= __GFP_ZERO;
 retry:
 		page = alloc_page(gfp_mask);
 		if (unlikely(page == NULL)) {
 			if (flags & XBF_READ_AHEAD) {
 				bp->b_page_count = i;
 				error = -ENOMEM;
 				goto out_free_pages;
 			}
-			/*
+	/*
-			 * This could deadlock.
+	 * Bulk filling of pages can take multiple calls. Not filling the entire
-			 *
+	 * array is not an allocation failure, so don't back off if we get at
-			 * But until all the XFS lowlevel code is revamped to
+	 * least one extra page.
-			 * handle buffer allocation failures we can't do much.
+	 */
-			 */
+	for (;;) {
-			if (!(++retries % 100))
+		long	last = filled;
 				xfs_err(NULL,
 		"%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
 					current->comm, current->pid,
 					__func__, gfp_mask);
-			XFS_STATS_INC(bp->b_mount, xb_page_retries);
+		filled = alloc_pages_bulk_array(gfp_mask, bp->b_page_count,
-			congestion_wait(BLK_RW_ASYNC, HZ/50);
+						bp->b_pages);
-			goto retry;
+		if (filled == bp->b_page_count) {
 			XFS_STATS_INC(bp->b_mount, xb_page_found);
 			break;
 		}
-		XFS_STATS_INC(bp->b_mount, xb_page_found);
+		if (filled != last)
 			continue;
-		nbytes = min_t(size_t, size, PAGE_SIZE - offset);
+		if (flags & XBF_READ_AHEAD) {
-		size -= nbytes;
+			xfs_buf_free_pages(bp);
-		bp->b_pages[i] = page;
+			return -ENOMEM;
-		offset = 0;
+		}
 		XFS_STATS_INC(bp->b_mount, xb_page_retries);
 		congestion_wait(BLK_RW_ASYNC, HZ / 50);
 	}
 	return 0;
 out_free_pages:
 	for (i = 0; i < bp->b_page_count; i++)
 		__free_page(bp->b_pages[i]);
 	bp->b_flags &= ~_XBF_PAGES;
 	return error;
 }
 /*
@ -469,7 +411,7 @@ _xfs_buf_map_pages(
 	ASSERT(bp->b_flags & _XBF_PAGES);
 	if (bp->b_page_count == 1) {
 		/* A single page buffer is always mappable */
-		bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
+		bp->b_addr = page_address(bp->b_pages[0]);
 	} else if (flags & XBF_UNMAPPED) {
 		bp->b_addr = NULL;
 	} else {
@ -496,7 +438,6 @@ _xfs_buf_map_pages(
 		if (!bp->b_addr)
 			return -ENOMEM;
 		bp->b_addr += bp->b_offset;
 	}
 	return 0;
@ -720,17 +661,22 @@ xfs_buf_get_map(
 	if (error)
 		return error;
-	error = xfs_buf_allocate_memory(new_bp, flags);
+	/*
-	if (error) {
+	 * For buffers that fit entirely within a single page, first attempt to
-		xfs_buf_free(new_bp);
+	 * allocate the memory from the heap to minimise memory usage. If we
-		return error;
+	 * can't get heap memory for these small buffers, we fall back to using
 	 * the page allocator.
 	 */
 	if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
 	    xfs_buf_alloc_kmem(new_bp, flags) < 0) {
 		error = xfs_buf_alloc_pages(new_bp, flags);
 		if (error)
 			goto out_free_buf;
 	}
 	error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
-	if (error) {
+	if (error)
-		xfs_buf_free(new_bp);
+		goto out_free_buf;
 		return error;
 	}
 	if (bp != new_bp)
 		xfs_buf_free(new_bp);
@ -758,6 +704,9 @@ found:
 	trace_xfs_buf_get(bp, flags, _RET_IP_);
 	*bpp = bp;
 	return 0;
 out_free_buf:
 	xfs_buf_free(new_bp);
 	return error;
 }
 int
@ -950,8 +899,7 @@ xfs_buf_get_uncached(
 	int			flags,
 	struct xfs_buf		**bpp)
 {
-	unsigned long		page_count;
+	int			error;
 	int			error, i;
 	struct xfs_buf		*bp;
 	DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
@ -960,41 +908,25 @@ xfs_buf_get_uncached(
 	/* flags might contain irrelevant bits, pass only what we care about */
 	error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
 	if (error)
-		goto fail;
+		return error;
-	page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
+	error = xfs_buf_alloc_pages(bp, flags);
 	error = _xfs_buf_get_pages(bp, page_count);
 	if (error)
 		goto fail_free_buf;
 	for (i = 0; i < page_count; i++) {
 		bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
 		if (!bp->b_pages[i]) {
 			error = -ENOMEM;
 			goto fail_free_mem;
 		}
 	}
 	bp->b_flags |= _XBF_PAGES;
 	error = _xfs_buf_map_pages(bp, 0);
 	if (unlikely(error)) {
 		xfs_warn(target->bt_mount,
 			"%s: failed to map pages", __func__);
-		goto fail_free_mem;
+		goto fail_free_buf;
 	}
 	trace_xfs_buf_get_uncached(bp, _RET_IP_);
 	*bpp = bp;
 	return 0;
- fail_free_mem:
+fail_free_buf:
-	while (--i >= 0)
+	xfs_buf_free(bp);
 		__free_page(bp->b_pages[i]);
 	_xfs_buf_free_pages(bp);
 fail_free_buf:
 	xfs_buf_free_maps(bp);
 	kmem_cache_free(xfs_buf_zone, bp);
 fail:
 	return error;
 }
@ -1722,7 +1654,6 @@ xfs_buf_offset(
 	if (bp->b_addr)
 		return bp->b_addr + offset;
 	offset += bp->b_offset;
 	page = bp->b_pages[offset >> PAGE_SHIFT];
 	return page_address(page) + (offset & (PAGE_SIZE-1));
 }
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@ -167,7 +167,8 @@ struct xfs_buf {
 	atomic_t		b_pin_count;	/* pin count */
 	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
 	unsigned int		b_page_count;	/* size of page array */
-	unsigned int		b_offset;	/* page offset in first page */
+	unsigned int		b_offset;	/* page offset of b_addr,
 						   only for _XBF_KMEM buffers */
 	int			b_error;	/* error code on I/O */
 	/*