linux/kernel/power/user.c

466 lines
9.7 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/kernel/power/user.c
*
* This file provides the user space interface for software suspend/resume.
*
* Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
*/
#include <linux/suspend.h>
#include <linux/reboot.h>
#include <linux/string.h>
#include <linux/device.h>
#include <linux/miscdevice.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/pm.h>
#include <linux/fs.h>
#include <linux/compat.h>
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
#include <linux/uaccess.h>
#include "power.h"
static struct snapshot_data {
struct snapshot_handle handle;
int swap;
int mode;
bool frozen;
bool ready;
bool platform_support;
bool free_bitmaps;
PM: hibernate: Restrict writes to the resume device Hibernation via snapshot device requires write permission to the swap block device, the one that more often (but not necessarily) is used to store the hibernation image. With this patch, such permissions are granted iff: 1) snapshot device config option is enabled 2) swap partition is used as resume device In other circumstances the swap device is not writable from userspace. In order to achieve this, every write attempt to a swap device is checked against the device configured as part of the uswsusp API [0] using a pointer to the inode struct in memory. If the swap device being written was not configured for resuming, the write request is denied. NOTE: this implementation works only for swap block devices, where the inode configured by swapon (which sets S_SWAPFILE) is the same used by SNAPSHOT_SET_SWAP_AREA. In case of swap file, SNAPSHOT_SET_SWAP_AREA indeed receives the inode of the block device containing the filesystem where the swap file is located (+ offset in it) which is never passed to swapon and then has not set S_SWAPFILE. As result, the swap file itself (as a file) has never an option to be written from userspace. Instead it remains writable if accessed directly from the containing block device, which is always writeable from root. [0] Documentation/power/userland-swsusp.rst v2: - rename is_hibernate_snapshot_dev() to is_hibernate_resume_dev() - fix description so to correctly refer to the resume device Signed-off-by: Domenico Andreoli <domenico.andreoli@linux.com> Acked-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2020-05-19 21:14:10 +03:00
struct inode *bd_inode;
} snapshot_state;
PM: hibernate: Restrict writes to the resume device Hibernation via snapshot device requires write permission to the swap block device, the one that more often (but not necessarily) is used to store the hibernation image. With this patch, such permissions are granted iff: 1) snapshot device config option is enabled 2) swap partition is used as resume device In other circumstances the swap device is not writable from userspace. In order to achieve this, every write attempt to a swap device is checked against the device configured as part of the uswsusp API [0] using a pointer to the inode struct in memory. If the swap device being written was not configured for resuming, the write request is denied. NOTE: this implementation works only for swap block devices, where the inode configured by swapon (which sets S_SWAPFILE) is the same used by SNAPSHOT_SET_SWAP_AREA. In case of swap file, SNAPSHOT_SET_SWAP_AREA indeed receives the inode of the block device containing the filesystem where the swap file is located (+ offset in it) which is never passed to swapon and then has not set S_SWAPFILE. As result, the swap file itself (as a file) has never an option to be written from userspace. Instead it remains writable if accessed directly from the containing block device, which is always writeable from root. [0] Documentation/power/userland-swsusp.rst v2: - rename is_hibernate_snapshot_dev() to is_hibernate_resume_dev() - fix description so to correctly refer to the resume device Signed-off-by: Domenico Andreoli <domenico.andreoli@linux.com> Acked-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2020-05-19 21:14:10 +03:00
int is_hibernate_resume_dev(const struct inode *bd_inode)
{
return hibernation_available() && snapshot_state.bd_inode == bd_inode;
}
static int snapshot_open(struct inode *inode, struct file *filp)
{
struct snapshot_data *data;
int error, nr_calls = 0;
if (!hibernation_available())
return -EPERM;
lock_system_sleep();
if (!hibernate_acquire()) {
error = -EBUSY;
goto Unlock;
}
if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
hibernate_release();
error = -ENOSYS;
goto Unlock;
}
nonseekable_open(inode, filp);
data = &snapshot_state;
filp->private_data = data;
memset(&data->handle, 0, sizeof(struct snapshot_handle));
if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
/* Hibernating. The image device should be accessible. */
2006-12-07 07:34:07 +03:00
data->swap = swsusp_resume_device ?
swap_type_of(swsusp_resume_device, 0, NULL) : -1;
data->mode = O_RDONLY;
data->free_bitmaps = false;
error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls);
if (error)
__pm_notifier_call_chain(PM_POST_HIBERNATION, --nr_calls, NULL);
} else {
/*
* Resuming. We may need to wait for the image device to
* appear.
*/
wait_for_device_probe();
data->swap = -1;
data->mode = O_WRONLY;
error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls);
if (!error) {
error = create_basic_memory_bitmaps();
data->free_bitmaps = !error;
} else
nr_calls--;
if (error)
__pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
}
if (error)
hibernate_release();
data->frozen = false;
data->ready = false;
data->platform_support = false;
PM: hibernate: Restrict writes to the resume device Hibernation via snapshot device requires write permission to the swap block device, the one that more often (but not necessarily) is used to store the hibernation image. With this patch, such permissions are granted iff: 1) snapshot device config option is enabled 2) swap partition is used as resume device In other circumstances the swap device is not writable from userspace. In order to achieve this, every write attempt to a swap device is checked against the device configured as part of the uswsusp API [0] using a pointer to the inode struct in memory. If the swap device being written was not configured for resuming, the write request is denied. NOTE: this implementation works only for swap block devices, where the inode configured by swapon (which sets S_SWAPFILE) is the same used by SNAPSHOT_SET_SWAP_AREA. In case of swap file, SNAPSHOT_SET_SWAP_AREA indeed receives the inode of the block device containing the filesystem where the swap file is located (+ offset in it) which is never passed to swapon and then has not set S_SWAPFILE. As result, the swap file itself (as a file) has never an option to be written from userspace. Instead it remains writable if accessed directly from the containing block device, which is always writeable from root. [0] Documentation/power/userland-swsusp.rst v2: - rename is_hibernate_snapshot_dev() to is_hibernate_resume_dev() - fix description so to correctly refer to the resume device Signed-off-by: Domenico Andreoli <domenico.andreoli@linux.com> Acked-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2020-05-19 21:14:10 +03:00
data->bd_inode = NULL;
Unlock:
unlock_system_sleep();
return error;
}
static int snapshot_release(struct inode *inode, struct file *filp)
{
struct snapshot_data *data;
lock_system_sleep();
swsusp_free();
data = filp->private_data;
PM: hibernate: Restrict writes to the resume device Hibernation via snapshot device requires write permission to the swap block device, the one that more often (but not necessarily) is used to store the hibernation image. With this patch, such permissions are granted iff: 1) snapshot device config option is enabled 2) swap partition is used as resume device In other circumstances the swap device is not writable from userspace. In order to achieve this, every write attempt to a swap device is checked against the device configured as part of the uswsusp API [0] using a pointer to the inode struct in memory. If the swap device being written was not configured for resuming, the write request is denied. NOTE: this implementation works only for swap block devices, where the inode configured by swapon (which sets S_SWAPFILE) is the same used by SNAPSHOT_SET_SWAP_AREA. In case of swap file, SNAPSHOT_SET_SWAP_AREA indeed receives the inode of the block device containing the filesystem where the swap file is located (+ offset in it) which is never passed to swapon and then has not set S_SWAPFILE. As result, the swap file itself (as a file) has never an option to be written from userspace. Instead it remains writable if accessed directly from the containing block device, which is always writeable from root. [0] Documentation/power/userland-swsusp.rst v2: - rename is_hibernate_snapshot_dev() to is_hibernate_resume_dev() - fix description so to correctly refer to the resume device Signed-off-by: Domenico Andreoli <domenico.andreoli@linux.com> Acked-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2020-05-19 21:14:10 +03:00
data->bd_inode = NULL;
free_all_swap_pages(data->swap);
if (data->frozen) {
pm_restore_gfp_mask();
free_basic_memory_bitmaps();
thaw_processes();
} else if (data->free_bitmaps) {
free_basic_memory_bitmaps();
}
pm_notifier_call_chain(data->mode == O_RDONLY ?
PM_POST_HIBERNATION : PM_POST_RESTORE);
hibernate_release();
unlock_system_sleep();
return 0;
}
static ssize_t snapshot_read(struct file *filp, char __user *buf,
size_t count, loff_t *offp)
{
struct snapshot_data *data;
ssize_t res;
loff_t pg_offp = *offp & ~PAGE_MASK;
lock_system_sleep();
data = filp->private_data;
if (!data->ready) {
res = -ENODATA;
goto Unlock;
}
if (!pg_offp) { /* on page boundary? */
res = snapshot_read_next(&data->handle);
if (res <= 0)
goto Unlock;
} else {
res = PAGE_SIZE - pg_offp;
}
res = simple_read_from_buffer(buf, count, &pg_offp,
data_of(data->handle), res);
if (res > 0)
*offp += res;
Unlock:
unlock_system_sleep();
return res;
}
static ssize_t snapshot_write(struct file *filp, const char __user *buf,
size_t count, loff_t *offp)
{
struct snapshot_data *data;
ssize_t res;
loff_t pg_offp = *offp & ~PAGE_MASK;
lock_system_sleep();
data = filp->private_data;
if (!pg_offp) {
res = snapshot_write_next(&data->handle);
if (res <= 0)
goto unlock;
} else {
res = PAGE_SIZE - pg_offp;
}
if (!data_of(data->handle)) {
res = -EINVAL;
goto unlock;
}
res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp,
buf, count);
if (res > 0)
*offp += res;
unlock:
unlock_system_sleep();
return res;
}
struct compat_resume_swap_area {
compat_loff_t offset;
u32 dev;
} __packed;
static int snapshot_set_swap_area(struct snapshot_data *data,
void __user *argp)
{
PM: hibernate: Restrict writes to the resume device Hibernation via snapshot device requires write permission to the swap block device, the one that more often (but not necessarily) is used to store the hibernation image. With this patch, such permissions are granted iff: 1) snapshot device config option is enabled 2) swap partition is used as resume device In other circumstances the swap device is not writable from userspace. In order to achieve this, every write attempt to a swap device is checked against the device configured as part of the uswsusp API [0] using a pointer to the inode struct in memory. If the swap device being written was not configured for resuming, the write request is denied. NOTE: this implementation works only for swap block devices, where the inode configured by swapon (which sets S_SWAPFILE) is the same used by SNAPSHOT_SET_SWAP_AREA. In case of swap file, SNAPSHOT_SET_SWAP_AREA indeed receives the inode of the block device containing the filesystem where the swap file is located (+ offset in it) which is never passed to swapon and then has not set S_SWAPFILE. As result, the swap file itself (as a file) has never an option to be written from userspace. Instead it remains writable if accessed directly from the containing block device, which is always writeable from root. [0] Documentation/power/userland-swsusp.rst v2: - rename is_hibernate_snapshot_dev() to is_hibernate_resume_dev() - fix description so to correctly refer to the resume device Signed-off-by: Domenico Andreoli <domenico.andreoli@linux.com> Acked-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2020-05-19 21:14:10 +03:00
struct block_device *bdev;
sector_t offset;
dev_t swdev;
if (swsusp_swap_in_use())
return -EPERM;
if (in_compat_syscall()) {
struct compat_resume_swap_area swap_area;
if (copy_from_user(&swap_area, argp, sizeof(swap_area)))
return -EFAULT;
swdev = new_decode_dev(swap_area.dev);
offset = swap_area.offset;
} else {
struct resume_swap_area swap_area;
if (copy_from_user(&swap_area, argp, sizeof(swap_area)))
return -EFAULT;
swdev = new_decode_dev(swap_area.dev);
offset = swap_area.offset;
}
/*
* User space encodes device types as two-byte values,
* so we need to recode them
*/
if (!swdev) {
data->swap = -1;
return -EINVAL;
}
PM: hibernate: Restrict writes to the resume device Hibernation via snapshot device requires write permission to the swap block device, the one that more often (but not necessarily) is used to store the hibernation image. With this patch, such permissions are granted iff: 1) snapshot device config option is enabled 2) swap partition is used as resume device In other circumstances the swap device is not writable from userspace. In order to achieve this, every write attempt to a swap device is checked against the device configured as part of the uswsusp API [0] using a pointer to the inode struct in memory. If the swap device being written was not configured for resuming, the write request is denied. NOTE: this implementation works only for swap block devices, where the inode configured by swapon (which sets S_SWAPFILE) is the same used by SNAPSHOT_SET_SWAP_AREA. In case of swap file, SNAPSHOT_SET_SWAP_AREA indeed receives the inode of the block device containing the filesystem where the swap file is located (+ offset in it) which is never passed to swapon and then has not set S_SWAPFILE. As result, the swap file itself (as a file) has never an option to be written from userspace. Instead it remains writable if accessed directly from the containing block device, which is always writeable from root. [0] Documentation/power/userland-swsusp.rst v2: - rename is_hibernate_snapshot_dev() to is_hibernate_resume_dev() - fix description so to correctly refer to the resume device Signed-off-by: Domenico Andreoli <domenico.andreoli@linux.com> Acked-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2020-05-19 21:14:10 +03:00
data->swap = swap_type_of(swdev, offset, &bdev);
if (data->swap < 0)
return -ENODEV;
PM: hibernate: Restrict writes to the resume device Hibernation via snapshot device requires write permission to the swap block device, the one that more often (but not necessarily) is used to store the hibernation image. With this patch, such permissions are granted iff: 1) snapshot device config option is enabled 2) swap partition is used as resume device In other circumstances the swap device is not writable from userspace. In order to achieve this, every write attempt to a swap device is checked against the device configured as part of the uswsusp API [0] using a pointer to the inode struct in memory. If the swap device being written was not configured for resuming, the write request is denied. NOTE: this implementation works only for swap block devices, where the inode configured by swapon (which sets S_SWAPFILE) is the same used by SNAPSHOT_SET_SWAP_AREA. In case of swap file, SNAPSHOT_SET_SWAP_AREA indeed receives the inode of the block device containing the filesystem where the swap file is located (+ offset in it) which is never passed to swapon and then has not set S_SWAPFILE. As result, the swap file itself (as a file) has never an option to be written from userspace. Instead it remains writable if accessed directly from the containing block device, which is always writeable from root. [0] Documentation/power/userland-swsusp.rst v2: - rename is_hibernate_snapshot_dev() to is_hibernate_resume_dev() - fix description so to correctly refer to the resume device Signed-off-by: Domenico Andreoli <domenico.andreoli@linux.com> Acked-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2020-05-19 21:14:10 +03:00
data->bd_inode = bdev->bd_inode;
bdput(bdev);
return 0;
}
static long snapshot_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
int error = 0;
struct snapshot_data *data;
loff_t size;
sector_t offset;
if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC)
return -ENOTTY;
if (_IOC_NR(cmd) > SNAPSHOT_IOC_MAXNR)
return -ENOTTY;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!mutex_trylock(&system_transition_mutex))
return -EBUSY;
lock_device_hotplug();
data = filp->private_data;
switch (cmd) {
case SNAPSHOT_FREEZE:
if (data->frozen)
break;
ksys_sync_helper();
error = freeze_processes();
if (error)
break;
error = create_basic_memory_bitmaps();
if (error)
thaw_processes();
else
data->frozen = true;
break;
case SNAPSHOT_UNFREEZE:
if (!data->frozen || data->ready)
break;
pm_restore_gfp_mask();
free_basic_memory_bitmaps();
data->free_bitmaps = false;
thaw_processes();
data->frozen = false;
break;
case SNAPSHOT_CREATE_IMAGE:
if (data->mode != O_RDONLY || !data->frozen || data->ready) {
error = -EPERM;
break;
}
pm_restore_gfp_mask();
error = hibernation_snapshot(data->platform_support);
if (!error) {
error = put_user(in_suspend, (int __user *)arg);
data->ready = !freezer_test_done && !error;
freezer_test_done = false;
}
break;
case SNAPSHOT_ATOMIC_RESTORE:
[PATCH] swsusp: Improve handling of highmem Currently swsusp saves the contents of highmem pages by copying them to the normal zone which is quite inefficient (eg. it requires two normal pages to be used for saving one highmem page). This may be improved by using highmem for saving the contents of saveable highmem pages. Namely, during the suspend phase of the suspend-resume cycle we try to allocate as many free highmem pages as there are saveable highmem pages. If there are not enough highmem image pages to store the contents of all of the saveable highmem pages, some of them will be stored in the "normal" memory. Next, we allocate as many free "normal" pages as needed to store the (remaining) image data. We use a memory bitmap to mark the allocated free pages (ie. highmem as well as "normal" image pages). Now, we use another memory bitmap to mark all of the saveable pages (highmem as well as "normal") and the contents of the saveable pages are copied into the image pages. Then, the second bitmap is used to save the pfns corresponding to the saveable pages and the first one is used to save their data. During the resume phase the pfns of the pages that were saveable during the suspend are loaded from the image and used to mark the "unsafe" page frames. Next, we try to allocate as many free highmem page frames as to load all of the image data that had been in the highmem before the suspend and we allocate so many free "normal" page frames that the total number of allocated free pages (highmem and "normal") is equal to the size of the image. While doing this we have to make sure that there will be some extra free "normal" and "safe" page frames for two lists of PBEs constructed later. Now, the image data are loaded, if possible, into their "original" page frames. The image data that cannot be written into their "original" page frames are loaded into "safe" page frames and their "original" kernel virtual addresses, as well as the addresses of the "safe" pages containing their copies, are stored in one of two lists of PBEs. One list of PBEs is for the copies of "normal" suspend pages (ie. "normal" pages that were saveable during the suspend) and it is used in the same way as previously (ie. by the architecture-dependent parts of swsusp). The other list of PBEs is for the copies of highmem suspend pages. The pages in this list are restored (in a reversible way) right before the arch-dependent code is called. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Cc: Pavel Machek <pavel@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07 07:34:18 +03:00
snapshot_write_finalize(&data->handle);
if (data->mode != O_WRONLY || !data->frozen ||
!snapshot_image_loaded(&data->handle)) {
error = -EPERM;
break;
}
error = hibernation_restore(data->platform_support);
break;
case SNAPSHOT_FREE:
swsusp_free();
memset(&data->handle, 0, sizeof(struct snapshot_handle));
data->ready = false;
/*
* It is necessary to thaw kernel threads here, because
* SNAPSHOT_CREATE_IMAGE may be invoked directly after
* SNAPSHOT_FREE. In that case, if kernel threads were not
* thawed, the preallocation of memory carried out by
* hibernation_snapshot() might run into problems (i.e. it
* might fail or even deadlock).
*/
thaw_kernel_threads();
break;
case SNAPSHOT_PREF_IMAGE_SIZE:
image_size = arg;
break;
case SNAPSHOT_GET_IMAGE_SIZE:
if (!data->ready) {
error = -ENODATA;
break;
}
size = snapshot_get_image_size();
size <<= PAGE_SHIFT;
error = put_user(size, (loff_t __user *)arg);
break;
case SNAPSHOT_AVAIL_SWAP_SIZE:
size = count_swap_pages(data->swap, 1);
size <<= PAGE_SHIFT;
error = put_user(size, (loff_t __user *)arg);
break;
case SNAPSHOT_ALLOC_SWAP_PAGE:
if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
error = -ENODEV;
break;
}
offset = alloc_swapdev_block(data->swap);
if (offset) {
offset <<= PAGE_SHIFT;
error = put_user(offset, (loff_t __user *)arg);
} else {
error = -ENOSPC;
}
break;
case SNAPSHOT_FREE_SWAP_PAGES:
if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
error = -ENODEV;
break;
}
free_all_swap_pages(data->swap);
break;
case SNAPSHOT_S2RAM:
if (!data->frozen) {
error = -EPERM;
break;
}
/*
* Tasks are frozen and the notifiers have been called with
* PM_HIBERNATION_PREPARE
*/
error = suspend_devices_and_enter(PM_SUSPEND_MEM);
data->ready = false;
break;
case SNAPSHOT_PLATFORM_SUPPORT:
data->platform_support = !!arg;
break;
case SNAPSHOT_POWER_OFF:
if (data->platform_support)
error = hibernation_platform_enter();
break;
case SNAPSHOT_SET_SWAP_AREA:
error = snapshot_set_swap_area(data, (void __user *)arg);
break;
default:
error = -ENOTTY;
}
unlock_device_hotplug();
mutex_unlock(&system_transition_mutex);
return error;
}
#ifdef CONFIG_COMPAT
static long
snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
BUILD_BUG_ON(sizeof(loff_t) != sizeof(compat_loff_t));
switch (cmd) {
case SNAPSHOT_GET_IMAGE_SIZE:
case SNAPSHOT_AVAIL_SWAP_SIZE:
case SNAPSHOT_ALLOC_SWAP_PAGE:
case SNAPSHOT_CREATE_IMAGE:
case SNAPSHOT_SET_SWAP_AREA:
return snapshot_ioctl(file, cmd,
(unsigned long) compat_ptr(arg));
default:
return snapshot_ioctl(file, cmd, arg);
}
}
#endif /* CONFIG_COMPAT */
static const struct file_operations snapshot_fops = {
.open = snapshot_open,
.release = snapshot_release,
.read = snapshot_read,
.write = snapshot_write,
.llseek = no_llseek,
.unlocked_ioctl = snapshot_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = snapshot_compat_ioctl,
#endif
};
static struct miscdevice snapshot_device = {
.minor = SNAPSHOT_MINOR,
.name = "snapshot",
.fops = &snapshot_fops,
};
static int __init snapshot_device_init(void)
{
return misc_register(&snapshot_device);
};
device_initcall(snapshot_device_init);