2019-07-15 18:50:57 +03:00
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright ( C ) 2018 Oracle . All Rights Reserved .
* Author : Darrick J . Wong < darrick . wong @ oracle . com >
*/
# include <linux/module.h>
# include <linux/compiler.h>
# include <linux/fs.h>
# include <linux/iomap.h>
# include <linux/swap.h>
/* Swapfile activation */
struct iomap_swapfile_info {
struct iomap iomap ; /* accumulated iomap */
struct swap_info_struct * sis ;
uint64_t lowest_ppage ; /* lowest physical addr seen (pages) */
uint64_t highest_ppage ; /* highest physical addr seen (pages) */
unsigned long nr_pages ; /* number of pages collected */
int nr_extents ; /* extent count */
2021-03-26 20:55:40 +03:00
struct file * file ;
2019-07-15 18:50:57 +03:00
} ;
/*
* Collect physical extents for this swap file . Physical extents reported to
* the swap code must be trimmed to align to a page boundary . The logical
* offset within the file is irrelevant since the swapfile code maps logical
* page numbers of the swap device to the physical page - aligned extents .
*/
static int iomap_swapfile_add_extent ( struct iomap_swapfile_info * isi )
{
struct iomap * iomap = & isi - > iomap ;
unsigned long nr_pages ;
mm/swap: consider max pages in iomap_swapfile_add_extent
When the max pages (last_page in the swap header + 1) is smaller than
the total pages (inode size) of the swapfile, iomap_swapfile_activate
overwrites sis->max with total pages.
However, frontswap_map is a swap page state bitmap allocated using the
initial sis->max page count read from the swap header. If swapfile
activation increases sis->max, it's possible for the frontswap code to
walk off the end of the bitmap, thereby corrupting kernel memory.
[djwong: modify the description a bit; the original paragraph reads:
"However, frontswap_map is allocated using max pages. When test and clear
the sis offset, which is larger than max pages, of frontswap_map in
__frontswap_invalidate_page(), neighbors of frontswap_map may be
overwritten, i.e., slab is polluted."
Note also that this bug resulted in a behavioral change: activating a
swap file that was formatted and later extended results in all pages
being activated, not the number of pages recorded in the swap header.]
This fixes the issue by considering the limitation of max pages of swap
info in iomap_swapfile_add_extent().
To reproduce the case, compile kernel with slub RED ZONE, then run test:
$ sudo stress-ng -a 1 -x softlockup,resources -t 72h --metrics --times \
--verify -v -Y /root/tmpdir/stress-ng/stress-statistic-12.yaml \
--log-file /root/tmpdir/stress-ng/stress-logfile-12.txt \
--temp-path /root/tmpdir/stress-ng/
We'll get the error log as below:
[ 1151.015141] =============================================================================
[ 1151.016489] BUG kmalloc-16 (Not tainted): Right Redzone overwritten
[ 1151.017486] -----------------------------------------------------------------------------
[ 1151.017486]
[ 1151.018997] Disabling lock debugging due to kernel taint
[ 1151.019873] INFO: 0x0000000084e43932-0x0000000098d17cae @offset=7392. First byte 0x0 instead of 0xcc
[ 1151.021303] INFO: Allocated in __do_sys_swapon+0xcf6/0x1170 age=43417 cpu=9 pid=3816
[ 1151.022538] __slab_alloc+0xe/0x20
[ 1151.023069] __kmalloc_node+0xfd/0x4b0
[ 1151.023704] __do_sys_swapon+0xcf6/0x1170
[ 1151.024346] do_syscall_64+0x33/0x40
[ 1151.024925] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1151.025749] INFO: Freed in put_cred_rcu+0xa1/0xc0 age=43424 cpu=3 pid=2041
[ 1151.026889] kfree+0x276/0x2b0
[ 1151.027405] put_cred_rcu+0xa1/0xc0
[ 1151.027949] rcu_do_batch+0x17d/0x410
[ 1151.028566] rcu_core+0x14e/0x2b0
[ 1151.029084] __do_softirq+0x101/0x29e
[ 1151.029645] asm_call_irq_on_stack+0x12/0x20
[ 1151.030381] do_softirq_own_stack+0x37/0x40
[ 1151.031037] do_softirq.part.15+0x2b/0x30
[ 1151.031710] __local_bh_enable_ip+0x4b/0x50
[ 1151.032412] copy_fpstate_to_sigframe+0x111/0x360
[ 1151.033197] __setup_rt_frame+0xce/0x480
[ 1151.033809] arch_do_signal+0x1a3/0x250
[ 1151.034463] exit_to_user_mode_prepare+0xcf/0x110
[ 1151.035242] syscall_exit_to_user_mode+0x27/0x190
[ 1151.035970] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1151.036795] INFO: Slab 0x000000003b9de4dc objects=44 used=9 fp=0x00000000539e349e flags=0xfffffc0010201
[ 1151.038323] INFO: Object 0x000000004855ba01 @offset=7376 fp=0x0000000000000000
[ 1151.038323]
[ 1151.039683] Redzone 000000008d0afd3d: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ................
[ 1151.041180] Object 000000004855ba01: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
[ 1151.042714] Redzone 0000000084e43932: 00 00 00 c0 cc cc cc cc ........
[ 1151.044120] Padding 000000000864c042: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
[ 1151.045615] CPU: 5 PID: 3816 Comm: stress-ng Tainted: G B 5.10.50+ #7
[ 1151.046846] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
[ 1151.048633] Call Trace:
[ 1151.049072] dump_stack+0x57/0x6a
[ 1151.049585] check_bytes_and_report+0xed/0x110
[ 1151.050320] check_object+0x1eb/0x290
[ 1151.050924] ? __x64_sys_swapoff+0x39a/0x540
[ 1151.051646] free_debug_processing+0x151/0x350
[ 1151.052333] __slab_free+0x21a/0x3a0
[ 1151.052938] ? _cond_resched+0x2d/0x40
[ 1151.053529] ? __vunmap+0x1de/0x220
[ 1151.054139] ? __x64_sys_swapoff+0x39a/0x540
[ 1151.054796] ? kfree+0x276/0x2b0
[ 1151.055307] kfree+0x276/0x2b0
[ 1151.055832] __x64_sys_swapoff+0x39a/0x540
[ 1151.056466] do_syscall_64+0x33/0x40
[ 1151.057084] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1151.057866] RIP: 0033:0x150340b0ffb7
[ 1151.058481] Code: Unable to access opcode bytes at RIP 0x150340b0ff8d.
[ 1151.059537] RSP: 002b:00007fff7f4ee238 EFLAGS: 00000246 ORIG_RAX: 00000000000000a8
[ 1151.060768] RAX: ffffffffffffffda RBX: 00007fff7f4ee66c RCX: 0000150340b0ffb7
[ 1151.061904] RDX: 000000000000000a RSI: 0000000000018094 RDI: 00007fff7f4ee860
[ 1151.063033] RBP: 00007fff7f4ef980 R08: 0000000000000000 R09: 0000150340a672bd
[ 1151.064135] R10: 00007fff7f4edca0 R11: 0000000000000246 R12: 0000000000018094
[ 1151.065253] R13: 0000000000000005 R14: 000000000160d930 R15: 00007fff7f4ee66c
[ 1151.066413] FIX kmalloc-16: Restoring 0x0000000084e43932-0x0000000098d17cae=0xcc
[ 1151.066413]
[ 1151.067890] FIX kmalloc-16: Object at 0x000000004855ba01 not freed
Fixes: 67482129cdab ("iomap: add a swapfile activation function")
Fixes: a45c0eccc564 ("iomap: move the swapfile code into a separate file")
Signed-off-by: Gang Deng <gavin.dg@linux.alibaba.com>
Signed-off-by: Xu Yu <xuyu@linux.alibaba.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2021-08-18 22:47:52 +03:00
unsigned long max_pages ;
2019-07-15 18:50:57 +03:00
uint64_t first_ppage ;
uint64_t first_ppage_reported ;
uint64_t next_ppage ;
int error ;
mm/swap: consider max pages in iomap_swapfile_add_extent
When the max pages (last_page in the swap header + 1) is smaller than
the total pages (inode size) of the swapfile, iomap_swapfile_activate
overwrites sis->max with total pages.
However, frontswap_map is a swap page state bitmap allocated using the
initial sis->max page count read from the swap header. If swapfile
activation increases sis->max, it's possible for the frontswap code to
walk off the end of the bitmap, thereby corrupting kernel memory.
[djwong: modify the description a bit; the original paragraph reads:
"However, frontswap_map is allocated using max pages. When test and clear
the sis offset, which is larger than max pages, of frontswap_map in
__frontswap_invalidate_page(), neighbors of frontswap_map may be
overwritten, i.e., slab is polluted."
Note also that this bug resulted in a behavioral change: activating a
swap file that was formatted and later extended results in all pages
being activated, not the number of pages recorded in the swap header.]
This fixes the issue by considering the limitation of max pages of swap
info in iomap_swapfile_add_extent().
To reproduce the case, compile kernel with slub RED ZONE, then run test:
$ sudo stress-ng -a 1 -x softlockup,resources -t 72h --metrics --times \
--verify -v -Y /root/tmpdir/stress-ng/stress-statistic-12.yaml \
--log-file /root/tmpdir/stress-ng/stress-logfile-12.txt \
--temp-path /root/tmpdir/stress-ng/
We'll get the error log as below:
[ 1151.015141] =============================================================================
[ 1151.016489] BUG kmalloc-16 (Not tainted): Right Redzone overwritten
[ 1151.017486] -----------------------------------------------------------------------------
[ 1151.017486]
[ 1151.018997] Disabling lock debugging due to kernel taint
[ 1151.019873] INFO: 0x0000000084e43932-0x0000000098d17cae @offset=7392. First byte 0x0 instead of 0xcc
[ 1151.021303] INFO: Allocated in __do_sys_swapon+0xcf6/0x1170 age=43417 cpu=9 pid=3816
[ 1151.022538] __slab_alloc+0xe/0x20
[ 1151.023069] __kmalloc_node+0xfd/0x4b0
[ 1151.023704] __do_sys_swapon+0xcf6/0x1170
[ 1151.024346] do_syscall_64+0x33/0x40
[ 1151.024925] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1151.025749] INFO: Freed in put_cred_rcu+0xa1/0xc0 age=43424 cpu=3 pid=2041
[ 1151.026889] kfree+0x276/0x2b0
[ 1151.027405] put_cred_rcu+0xa1/0xc0
[ 1151.027949] rcu_do_batch+0x17d/0x410
[ 1151.028566] rcu_core+0x14e/0x2b0
[ 1151.029084] __do_softirq+0x101/0x29e
[ 1151.029645] asm_call_irq_on_stack+0x12/0x20
[ 1151.030381] do_softirq_own_stack+0x37/0x40
[ 1151.031037] do_softirq.part.15+0x2b/0x30
[ 1151.031710] __local_bh_enable_ip+0x4b/0x50
[ 1151.032412] copy_fpstate_to_sigframe+0x111/0x360
[ 1151.033197] __setup_rt_frame+0xce/0x480
[ 1151.033809] arch_do_signal+0x1a3/0x250
[ 1151.034463] exit_to_user_mode_prepare+0xcf/0x110
[ 1151.035242] syscall_exit_to_user_mode+0x27/0x190
[ 1151.035970] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1151.036795] INFO: Slab 0x000000003b9de4dc objects=44 used=9 fp=0x00000000539e349e flags=0xfffffc0010201
[ 1151.038323] INFO: Object 0x000000004855ba01 @offset=7376 fp=0x0000000000000000
[ 1151.038323]
[ 1151.039683] Redzone 000000008d0afd3d: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ................
[ 1151.041180] Object 000000004855ba01: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
[ 1151.042714] Redzone 0000000084e43932: 00 00 00 c0 cc cc cc cc ........
[ 1151.044120] Padding 000000000864c042: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
[ 1151.045615] CPU: 5 PID: 3816 Comm: stress-ng Tainted: G B 5.10.50+ #7
[ 1151.046846] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
[ 1151.048633] Call Trace:
[ 1151.049072] dump_stack+0x57/0x6a
[ 1151.049585] check_bytes_and_report+0xed/0x110
[ 1151.050320] check_object+0x1eb/0x290
[ 1151.050924] ? __x64_sys_swapoff+0x39a/0x540
[ 1151.051646] free_debug_processing+0x151/0x350
[ 1151.052333] __slab_free+0x21a/0x3a0
[ 1151.052938] ? _cond_resched+0x2d/0x40
[ 1151.053529] ? __vunmap+0x1de/0x220
[ 1151.054139] ? __x64_sys_swapoff+0x39a/0x540
[ 1151.054796] ? kfree+0x276/0x2b0
[ 1151.055307] kfree+0x276/0x2b0
[ 1151.055832] __x64_sys_swapoff+0x39a/0x540
[ 1151.056466] do_syscall_64+0x33/0x40
[ 1151.057084] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1151.057866] RIP: 0033:0x150340b0ffb7
[ 1151.058481] Code: Unable to access opcode bytes at RIP 0x150340b0ff8d.
[ 1151.059537] RSP: 002b:00007fff7f4ee238 EFLAGS: 00000246 ORIG_RAX: 00000000000000a8
[ 1151.060768] RAX: ffffffffffffffda RBX: 00007fff7f4ee66c RCX: 0000150340b0ffb7
[ 1151.061904] RDX: 000000000000000a RSI: 0000000000018094 RDI: 00007fff7f4ee860
[ 1151.063033] RBP: 00007fff7f4ef980 R08: 0000000000000000 R09: 0000150340a672bd
[ 1151.064135] R10: 00007fff7f4edca0 R11: 0000000000000246 R12: 0000000000018094
[ 1151.065253] R13: 0000000000000005 R14: 000000000160d930 R15: 00007fff7f4ee66c
[ 1151.066413] FIX kmalloc-16: Restoring 0x0000000084e43932-0x0000000098d17cae=0xcc
[ 1151.066413]
[ 1151.067890] FIX kmalloc-16: Object at 0x000000004855ba01 not freed
Fixes: 67482129cdab ("iomap: add a swapfile activation function")
Fixes: a45c0eccc564 ("iomap: move the swapfile code into a separate file")
Signed-off-by: Gang Deng <gavin.dg@linux.alibaba.com>
Signed-off-by: Xu Yu <xuyu@linux.alibaba.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2021-08-18 22:47:52 +03:00
if ( unlikely ( isi - > nr_pages > = isi - > sis - > max ) )
return 0 ;
max_pages = isi - > sis - > max - isi - > nr_pages ;
2019-07-15 18:50:57 +03:00
/*
* Round the start up and the end down so that the physical
* extent aligns to a page boundary .
*/
first_ppage = ALIGN ( iomap - > addr , PAGE_SIZE ) > > PAGE_SHIFT ;
next_ppage = ALIGN_DOWN ( iomap - > addr + iomap - > length , PAGE_SIZE ) > >
PAGE_SHIFT ;
/* Skip too-short physical extents. */
if ( first_ppage > = next_ppage )
return 0 ;
nr_pages = next_ppage - first_ppage ;
mm/swap: consider max pages in iomap_swapfile_add_extent
When the max pages (last_page in the swap header + 1) is smaller than
the total pages (inode size) of the swapfile, iomap_swapfile_activate
overwrites sis->max with total pages.
However, frontswap_map is a swap page state bitmap allocated using the
initial sis->max page count read from the swap header. If swapfile
activation increases sis->max, it's possible for the frontswap code to
walk off the end of the bitmap, thereby corrupting kernel memory.
[djwong: modify the description a bit; the original paragraph reads:
"However, frontswap_map is allocated using max pages. When test and clear
the sis offset, which is larger than max pages, of frontswap_map in
__frontswap_invalidate_page(), neighbors of frontswap_map may be
overwritten, i.e., slab is polluted."
Note also that this bug resulted in a behavioral change: activating a
swap file that was formatted and later extended results in all pages
being activated, not the number of pages recorded in the swap header.]
This fixes the issue by considering the limitation of max pages of swap
info in iomap_swapfile_add_extent().
To reproduce the case, compile kernel with slub RED ZONE, then run test:
$ sudo stress-ng -a 1 -x softlockup,resources -t 72h --metrics --times \
--verify -v -Y /root/tmpdir/stress-ng/stress-statistic-12.yaml \
--log-file /root/tmpdir/stress-ng/stress-logfile-12.txt \
--temp-path /root/tmpdir/stress-ng/
We'll get the error log as below:
[ 1151.015141] =============================================================================
[ 1151.016489] BUG kmalloc-16 (Not tainted): Right Redzone overwritten
[ 1151.017486] -----------------------------------------------------------------------------
[ 1151.017486]
[ 1151.018997] Disabling lock debugging due to kernel taint
[ 1151.019873] INFO: 0x0000000084e43932-0x0000000098d17cae @offset=7392. First byte 0x0 instead of 0xcc
[ 1151.021303] INFO: Allocated in __do_sys_swapon+0xcf6/0x1170 age=43417 cpu=9 pid=3816
[ 1151.022538] __slab_alloc+0xe/0x20
[ 1151.023069] __kmalloc_node+0xfd/0x4b0
[ 1151.023704] __do_sys_swapon+0xcf6/0x1170
[ 1151.024346] do_syscall_64+0x33/0x40
[ 1151.024925] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1151.025749] INFO: Freed in put_cred_rcu+0xa1/0xc0 age=43424 cpu=3 pid=2041
[ 1151.026889] kfree+0x276/0x2b0
[ 1151.027405] put_cred_rcu+0xa1/0xc0
[ 1151.027949] rcu_do_batch+0x17d/0x410
[ 1151.028566] rcu_core+0x14e/0x2b0
[ 1151.029084] __do_softirq+0x101/0x29e
[ 1151.029645] asm_call_irq_on_stack+0x12/0x20
[ 1151.030381] do_softirq_own_stack+0x37/0x40
[ 1151.031037] do_softirq.part.15+0x2b/0x30
[ 1151.031710] __local_bh_enable_ip+0x4b/0x50
[ 1151.032412] copy_fpstate_to_sigframe+0x111/0x360
[ 1151.033197] __setup_rt_frame+0xce/0x480
[ 1151.033809] arch_do_signal+0x1a3/0x250
[ 1151.034463] exit_to_user_mode_prepare+0xcf/0x110
[ 1151.035242] syscall_exit_to_user_mode+0x27/0x190
[ 1151.035970] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1151.036795] INFO: Slab 0x000000003b9de4dc objects=44 used=9 fp=0x00000000539e349e flags=0xfffffc0010201
[ 1151.038323] INFO: Object 0x000000004855ba01 @offset=7376 fp=0x0000000000000000
[ 1151.038323]
[ 1151.039683] Redzone 000000008d0afd3d: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ................
[ 1151.041180] Object 000000004855ba01: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
[ 1151.042714] Redzone 0000000084e43932: 00 00 00 c0 cc cc cc cc ........
[ 1151.044120] Padding 000000000864c042: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
[ 1151.045615] CPU: 5 PID: 3816 Comm: stress-ng Tainted: G B 5.10.50+ #7
[ 1151.046846] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
[ 1151.048633] Call Trace:
[ 1151.049072] dump_stack+0x57/0x6a
[ 1151.049585] check_bytes_and_report+0xed/0x110
[ 1151.050320] check_object+0x1eb/0x290
[ 1151.050924] ? __x64_sys_swapoff+0x39a/0x540
[ 1151.051646] free_debug_processing+0x151/0x350
[ 1151.052333] __slab_free+0x21a/0x3a0
[ 1151.052938] ? _cond_resched+0x2d/0x40
[ 1151.053529] ? __vunmap+0x1de/0x220
[ 1151.054139] ? __x64_sys_swapoff+0x39a/0x540
[ 1151.054796] ? kfree+0x276/0x2b0
[ 1151.055307] kfree+0x276/0x2b0
[ 1151.055832] __x64_sys_swapoff+0x39a/0x540
[ 1151.056466] do_syscall_64+0x33/0x40
[ 1151.057084] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1151.057866] RIP: 0033:0x150340b0ffb7
[ 1151.058481] Code: Unable to access opcode bytes at RIP 0x150340b0ff8d.
[ 1151.059537] RSP: 002b:00007fff7f4ee238 EFLAGS: 00000246 ORIG_RAX: 00000000000000a8
[ 1151.060768] RAX: ffffffffffffffda RBX: 00007fff7f4ee66c RCX: 0000150340b0ffb7
[ 1151.061904] RDX: 000000000000000a RSI: 0000000000018094 RDI: 00007fff7f4ee860
[ 1151.063033] RBP: 00007fff7f4ef980 R08: 0000000000000000 R09: 0000150340a672bd
[ 1151.064135] R10: 00007fff7f4edca0 R11: 0000000000000246 R12: 0000000000018094
[ 1151.065253] R13: 0000000000000005 R14: 000000000160d930 R15: 00007fff7f4ee66c
[ 1151.066413] FIX kmalloc-16: Restoring 0x0000000084e43932-0x0000000098d17cae=0xcc
[ 1151.066413]
[ 1151.067890] FIX kmalloc-16: Object at 0x000000004855ba01 not freed
Fixes: 67482129cdab ("iomap: add a swapfile activation function")
Fixes: a45c0eccc564 ("iomap: move the swapfile code into a separate file")
Signed-off-by: Gang Deng <gavin.dg@linux.alibaba.com>
Signed-off-by: Xu Yu <xuyu@linux.alibaba.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2021-08-18 22:47:52 +03:00
nr_pages = min ( nr_pages , max_pages ) ;
2019-07-15 18:50:57 +03:00
/*
* Calculate how much swap space we ' re adding ; the first page contains
* the swap header and doesn ' t count . The mm still wants that first
* page fed to add_swap_extent , however .
*/
first_ppage_reported = first_ppage ;
if ( iomap - > offset = = 0 )
first_ppage_reported + + ;
if ( isi - > lowest_ppage > first_ppage_reported )
isi - > lowest_ppage = first_ppage_reported ;
if ( isi - > highest_ppage < ( next_ppage - 1 ) )
isi - > highest_ppage = next_ppage - 1 ;
/* Add extent, set up for the next call. */
error = add_swap_extent ( isi - > sis , isi - > nr_pages , nr_pages , first_ppage ) ;
if ( error < 0 )
return error ;
isi - > nr_extents + = error ;
isi - > nr_pages + = nr_pages ;
return 0 ;
}
2021-03-26 20:55:40 +03:00
static int iomap_swapfile_fail ( struct iomap_swapfile_info * isi , const char * str )
{
char * buf , * p = ERR_PTR ( - ENOMEM ) ;
buf = kmalloc ( PATH_MAX , GFP_KERNEL ) ;
if ( buf )
p = file_path ( isi - > file , buf , PATH_MAX ) ;
pr_err ( " swapon: file %s %s \n " , IS_ERR ( p ) ? " <unknown> " : p , str ) ;
kfree ( buf ) ;
return - EINVAL ;
}
2019-07-15 18:50:57 +03:00
/*
* Accumulate iomaps for this swap file . We have to accumulate iomaps because
* swap only cares about contiguous page - aligned physical extents and makes no
* distinction between written and unwritten extents .
*/
2021-08-11 04:33:12 +03:00
static loff_t iomap_swapfile_iter ( const struct iomap_iter * iter ,
struct iomap * iomap , struct iomap_swapfile_info * isi )
2019-07-15 18:50:57 +03:00
{
switch ( iomap - > type ) {
case IOMAP_MAPPED :
case IOMAP_UNWRITTEN :
/* Only real or unwritten extents. */
break ;
case IOMAP_INLINE :
/* No inline data. */
2021-03-26 20:55:40 +03:00
return iomap_swapfile_fail ( isi , " is inline " ) ;
2019-07-15 18:50:57 +03:00
default :
2021-03-26 20:55:40 +03:00
return iomap_swapfile_fail ( isi , " has unallocated extents " ) ;
2019-07-15 18:50:57 +03:00
}
/* No uncommitted metadata or shared blocks. */
2021-03-26 20:55:40 +03:00
if ( iomap - > flags & IOMAP_F_DIRTY )
return iomap_swapfile_fail ( isi , " is not committed " ) ;
if ( iomap - > flags & IOMAP_F_SHARED )
return iomap_swapfile_fail ( isi , " has shared extents " ) ;
2019-07-15 18:50:57 +03:00
/* Only one bdev per swap file. */
2021-03-26 20:55:40 +03:00
if ( iomap - > bdev ! = isi - > sis - > bdev )
return iomap_swapfile_fail ( isi , " outside the main device " ) ;
2019-07-15 18:50:57 +03:00
if ( isi - > iomap . length = = 0 ) {
/* No accumulated extent, so just store it. */
memcpy ( & isi - > iomap , iomap , sizeof ( isi - > iomap ) ) ;
} else if ( isi - > iomap . addr + isi - > iomap . length = = iomap - > addr ) {
/* Append this to the accumulated extent. */
isi - > iomap . length + = iomap - > length ;
} else {
/* Otherwise, add the retained iomap and store this one. */
2021-08-11 04:33:12 +03:00
int error = iomap_swapfile_add_extent ( isi ) ;
2019-07-15 18:50:57 +03:00
if ( error )
return error ;
memcpy ( & isi - > iomap , iomap , sizeof ( isi - > iomap ) ) ;
}
2021-08-11 04:33:12 +03:00
return iomap_length ( iter ) ;
2019-07-15 18:50:57 +03:00
}
/*
* Iterate a swap file ' s iomaps to construct physical extents that can be
* passed to the swapfile subsystem .
*/
int iomap_swapfile_activate ( struct swap_info_struct * sis ,
struct file * swap_file , sector_t * pagespan ,
const struct iomap_ops * ops )
{
2021-08-11 04:33:12 +03:00
struct inode * inode = swap_file - > f_mapping - > host ;
struct iomap_iter iter = {
. inode = inode ,
. pos = 0 ,
. len = ALIGN_DOWN ( i_size_read ( inode ) , PAGE_SIZE ) ,
. flags = IOMAP_REPORT ,
} ;
2019-07-15 18:50:57 +03:00
struct iomap_swapfile_info isi = {
. sis = sis ,
. lowest_ppage = ( sector_t ) - 1ULL ,
2021-03-26 20:55:40 +03:00
. file = swap_file ,
2019-07-15 18:50:57 +03:00
} ;
2021-08-11 04:33:12 +03:00
int ret ;
2019-07-15 18:50:57 +03:00
/*
* Persist all file mapping metadata so that we won ' t have any
* IOMAP_F_DIRTY iomaps .
*/
ret = vfs_fsync ( swap_file , 1 ) ;
if ( ret )
return ret ;
2021-08-11 04:33:12 +03:00
while ( ( ret = iomap_iter ( & iter , ops ) ) > 0 )
iter . processed = iomap_swapfile_iter ( & iter , & iter . iomap , & isi ) ;
if ( ret < 0 )
return ret ;
2019-07-15 18:50:57 +03:00
if ( isi . iomap . length ) {
ret = iomap_swapfile_add_extent ( & isi ) ;
if ( ret )
return ret ;
}
2021-03-09 20:29:11 +03:00
/*
* If this swapfile doesn ' t contain even a single page - aligned
* contiguous range of blocks , reject this useless swapfile to
* prevent confusion later on .
*/
if ( isi . nr_pages = = 0 ) {
pr_warn ( " swapon: Cannot find a single usable page in file. \n " ) ;
return - EINVAL ;
}
2019-07-15 18:50:57 +03:00
* pagespan = 1 + isi . highest_ppage - isi . lowest_ppage ;
sis - > max = isi . nr_pages ;
sis - > pages = isi . nr_pages - 1 ;
sis - > highest_bit = isi . nr_pages - 1 ;
return isi . nr_extents ;
}
EXPORT_SYMBOL_GPL ( iomap_swapfile_activate ) ;