5808fecc57
In case if isi.nr_pages is 0, we are making sis->pages (which is unsigned int) a huge value in iomap_swapfile_activate() by assigning -1. This could cause a kernel crash in kernel v4.18 (with below signature). Or could lead to unknown issues on latest kernel if the fake big swap gets used. Fix this issue by returning -EINVAL in case of nr_pages is 0, since it is anyway a invalid swapfile. Looks like this issue will be hit when we have pagesize < blocksize type of configuration. I was able to hit the issue in case of a tiny swap file with below test script. https://raw.githubusercontent.com/riteshharjani/LinuxStudy/master/scripts/swap-issue.sh kernel crash analysis on v4.18 ============================== On v4.18 kernel, it causes a kernel panic, since sis->pages becomes a huge value and isi.nr_extents is 0. When 0 is returned it is considered as a swapfile over NFS and SWP_FILE is set (sis->flags |= SWP_FILE). Then when swapoff was getting called it was calling a_ops->swap_deactivate() if (sis->flags & SWP_FILE) is true. Since a_ops->swap_deactivate() is NULL in case of XFS, it causes below panic. Panic signature on v4.18 kernel: ======================================= root@qemu:/home/qemu# [ 8291.723351] XFS (loop2): Unmounting Filesystem [ 8292.123104] XFS (loop2): Mounting V5 Filesystem [ 8292.132451] XFS (loop2): Ending clean mount [ 8292.263362] Adding 4294967232k swap on /mnt1/test/swapfile. Priority:-2 extents:1 across:274877906880k [ 8292.277834] Unable to handle kernel paging request for instruction fetch [ 8292.278677] Faulting instruction address: 0x00000000 cpu 0x19: Vector: 400 (Instruction Access) at [c0000009dd5b7ad0] pc: 0000000000000000 lr: c0000000003eb9dc: destroy_swap_extents+0xfc/0x120 sp: c0000009dd5b7d50 msr: 8000000040009033 current = 0xc0000009b6710080 paca = 0xc00000003ffcb280 irqmask: 0x03 irq_happened: 0x01 pid = 5604, comm = swapoff Linux version 4.18.0 (riteshh@xxxxxxx) (gcc version 8.4.0 (Ubuntu 8.4.0-1ubuntu1~18.04)) #57 SMP Wed Mar 3 01:33:04 CST 2021 enter ? for help [link register ] c0000000003eb9dc destroy_swap_extents+0xfc/0x120 [c0000009dd5b7d50] c0000000025a7058 proc_poll_event+0x0/0x4 (unreliable) [c0000009dd5b7da0] c0000000003f0498 sys_swapoff+0x3f8/0x910 [c0000009dd5b7e30] c00000000000bbe4 system_call+0x5c/0x70 Exception: c01 (System Call) at 00007ffff7d208d8 Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com> [djwong: rework the comment to provide more details] Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
190 lines
5.2 KiB
C
190 lines
5.2 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2018 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <darrick.wong@oracle.com>
|
|
*/
|
|
#include <linux/module.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/iomap.h>
|
|
#include <linux/swap.h>
|
|
|
|
/* Swapfile activation */
|
|
|
|
struct iomap_swapfile_info {
|
|
struct iomap iomap; /* accumulated iomap */
|
|
struct swap_info_struct *sis;
|
|
uint64_t lowest_ppage; /* lowest physical addr seen (pages) */
|
|
uint64_t highest_ppage; /* highest physical addr seen (pages) */
|
|
unsigned long nr_pages; /* number of pages collected */
|
|
int nr_extents; /* extent count */
|
|
};
|
|
|
|
/*
|
|
* Collect physical extents for this swap file. Physical extents reported to
|
|
* the swap code must be trimmed to align to a page boundary. The logical
|
|
* offset within the file is irrelevant since the swapfile code maps logical
|
|
* page numbers of the swap device to the physical page-aligned extents.
|
|
*/
|
|
static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
|
|
{
|
|
struct iomap *iomap = &isi->iomap;
|
|
unsigned long nr_pages;
|
|
uint64_t first_ppage;
|
|
uint64_t first_ppage_reported;
|
|
uint64_t next_ppage;
|
|
int error;
|
|
|
|
/*
|
|
* Round the start up and the end down so that the physical
|
|
* extent aligns to a page boundary.
|
|
*/
|
|
first_ppage = ALIGN(iomap->addr, PAGE_SIZE) >> PAGE_SHIFT;
|
|
next_ppage = ALIGN_DOWN(iomap->addr + iomap->length, PAGE_SIZE) >>
|
|
PAGE_SHIFT;
|
|
|
|
/* Skip too-short physical extents. */
|
|
if (first_ppage >= next_ppage)
|
|
return 0;
|
|
nr_pages = next_ppage - first_ppage;
|
|
|
|
/*
|
|
* Calculate how much swap space we're adding; the first page contains
|
|
* the swap header and doesn't count. The mm still wants that first
|
|
* page fed to add_swap_extent, however.
|
|
*/
|
|
first_ppage_reported = first_ppage;
|
|
if (iomap->offset == 0)
|
|
first_ppage_reported++;
|
|
if (isi->lowest_ppage > first_ppage_reported)
|
|
isi->lowest_ppage = first_ppage_reported;
|
|
if (isi->highest_ppage < (next_ppage - 1))
|
|
isi->highest_ppage = next_ppage - 1;
|
|
|
|
/* Add extent, set up for the next call. */
|
|
error = add_swap_extent(isi->sis, isi->nr_pages, nr_pages, first_ppage);
|
|
if (error < 0)
|
|
return error;
|
|
isi->nr_extents += error;
|
|
isi->nr_pages += nr_pages;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Accumulate iomaps for this swap file. We have to accumulate iomaps because
|
|
* swap only cares about contiguous page-aligned physical extents and makes no
|
|
* distinction between written and unwritten extents.
|
|
*/
|
|
static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
|
|
loff_t count, void *data, struct iomap *iomap,
|
|
struct iomap *srcmap)
|
|
{
|
|
struct iomap_swapfile_info *isi = data;
|
|
int error;
|
|
|
|
switch (iomap->type) {
|
|
case IOMAP_MAPPED:
|
|
case IOMAP_UNWRITTEN:
|
|
/* Only real or unwritten extents. */
|
|
break;
|
|
case IOMAP_INLINE:
|
|
/* No inline data. */
|
|
pr_err("swapon: file is inline\n");
|
|
return -EINVAL;
|
|
default:
|
|
pr_err("swapon: file has unallocated extents\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* No uncommitted metadata or shared blocks. */
|
|
if (iomap->flags & IOMAP_F_DIRTY) {
|
|
pr_err("swapon: file is not committed\n");
|
|
return -EINVAL;
|
|
}
|
|
if (iomap->flags & IOMAP_F_SHARED) {
|
|
pr_err("swapon: file has shared extents\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Only one bdev per swap file. */
|
|
if (iomap->bdev != isi->sis->bdev) {
|
|
pr_err("swapon: file is on multiple devices\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (isi->iomap.length == 0) {
|
|
/* No accumulated extent, so just store it. */
|
|
memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
|
|
} else if (isi->iomap.addr + isi->iomap.length == iomap->addr) {
|
|
/* Append this to the accumulated extent. */
|
|
isi->iomap.length += iomap->length;
|
|
} else {
|
|
/* Otherwise, add the retained iomap and store this one. */
|
|
error = iomap_swapfile_add_extent(isi);
|
|
if (error)
|
|
return error;
|
|
memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/*
|
|
* Iterate a swap file's iomaps to construct physical extents that can be
|
|
* passed to the swapfile subsystem.
|
|
*/
|
|
int iomap_swapfile_activate(struct swap_info_struct *sis,
|
|
struct file *swap_file, sector_t *pagespan,
|
|
const struct iomap_ops *ops)
|
|
{
|
|
struct iomap_swapfile_info isi = {
|
|
.sis = sis,
|
|
.lowest_ppage = (sector_t)-1ULL,
|
|
};
|
|
struct address_space *mapping = swap_file->f_mapping;
|
|
struct inode *inode = mapping->host;
|
|
loff_t pos = 0;
|
|
loff_t len = ALIGN_DOWN(i_size_read(inode), PAGE_SIZE);
|
|
loff_t ret;
|
|
|
|
/*
|
|
* Persist all file mapping metadata so that we won't have any
|
|
* IOMAP_F_DIRTY iomaps.
|
|
*/
|
|
ret = vfs_fsync(swap_file, 1);
|
|
if (ret)
|
|
return ret;
|
|
|
|
while (len > 0) {
|
|
ret = iomap_apply(inode, pos, len, IOMAP_REPORT,
|
|
ops, &isi, iomap_swapfile_activate_actor);
|
|
if (ret <= 0)
|
|
return ret;
|
|
|
|
pos += ret;
|
|
len -= ret;
|
|
}
|
|
|
|
if (isi.iomap.length) {
|
|
ret = iomap_swapfile_add_extent(&isi);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* If this swapfile doesn't contain even a single page-aligned
|
|
* contiguous range of blocks, reject this useless swapfile to
|
|
* prevent confusion later on.
|
|
*/
|
|
if (isi.nr_pages == 0) {
|
|
pr_warn("swapon: Cannot find a single usable page in file.\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
*pagespan = 1 + isi.highest_ppage - isi.lowest_ppage;
|
|
sis->max = isi.nr_pages;
|
|
sis->pages = isi.nr_pages - 1;
|
|
sis->highest_bit = isi.nr_pages - 1;
|
|
return isi.nr_extents;
|
|
}
|
|
EXPORT_SYMBOL_GPL(iomap_swapfile_activate);
|