9a436f8ff6
It is unsafe to allow saving of secretmem areas to the hibernation snapshot as they would be visible after the resume and this essentially will defeat the purpose of secret memory mappings. Prevent hibernation whenever there are active secret memory users. Link: https://lkml.kernel.org/r/20210518072034.31572-6-rppt@kernel.org Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Acked-by: David Hildenbrand <david@redhat.com> Acked-by: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christopher Lameter <cl@linux.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Elena Reshetova <elena.reshetova@intel.com> Cc: Hagen Paul Pfeifer <hagen@jauu.net> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Bottomley <jejb@linux.ibm.com> Cc: "Kirill A. Shutemov" <kirill@shutemov.name> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Palmer Dabbelt <palmerdabbelt@google.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rick Edgecombe <rick.p.edgecombe@intel.com> Cc: Roman Gushchin <guro@fb.com> Cc: Shakeel Butt <shakeelb@google.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tycho Andersen <tycho@tycho.ws> Cc: Will Deacon <will@kernel.org> Cc: kernel test robot <lkp@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
255 lines
5.4 KiB
C
255 lines
5.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright IBM Corporation, 2021
|
|
*
|
|
* Author: Mike Rapoport <rppt@linux.ibm.com>
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/memfd.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/printk.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/pseudo_fs.h>
|
|
#include <linux/secretmem.h>
|
|
#include <linux/set_memory.h>
|
|
#include <linux/sched/signal.h>
|
|
|
|
#include <uapi/linux/magic.h>
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include "internal.h"
|
|
|
|
#undef pr_fmt
|
|
#define pr_fmt(fmt) "secretmem: " fmt
|
|
|
|
/*
|
|
* Define mode and flag masks to allow validation of the system call
|
|
* parameters.
|
|
*/
|
|
#define SECRETMEM_MODE_MASK (0x0)
|
|
#define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK
|
|
|
|
static bool secretmem_enable __ro_after_init;
|
|
module_param_named(enable, secretmem_enable, bool, 0400);
|
|
MODULE_PARM_DESC(secretmem_enable,
|
|
"Enable secretmem and memfd_secret(2) system call");
|
|
|
|
static atomic_t secretmem_users;
|
|
|
|
bool secretmem_active(void)
|
|
{
|
|
return !!atomic_read(&secretmem_users);
|
|
}
|
|
|
|
static vm_fault_t secretmem_fault(struct vm_fault *vmf)
|
|
{
|
|
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
|
struct inode *inode = file_inode(vmf->vma->vm_file);
|
|
pgoff_t offset = vmf->pgoff;
|
|
gfp_t gfp = vmf->gfp_mask;
|
|
unsigned long addr;
|
|
struct page *page;
|
|
int err;
|
|
|
|
if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
|
|
return vmf_error(-EINVAL);
|
|
|
|
retry:
|
|
page = find_lock_page(mapping, offset);
|
|
if (!page) {
|
|
page = alloc_page(gfp | __GFP_ZERO);
|
|
if (!page)
|
|
return VM_FAULT_OOM;
|
|
|
|
err = set_direct_map_invalid_noflush(page);
|
|
if (err) {
|
|
put_page(page);
|
|
return vmf_error(err);
|
|
}
|
|
|
|
__SetPageUptodate(page);
|
|
err = add_to_page_cache_lru(page, mapping, offset, gfp);
|
|
if (unlikely(err)) {
|
|
put_page(page);
|
|
/*
|
|
* If a split of large page was required, it
|
|
* already happened when we marked the page invalid
|
|
* which guarantees that this call won't fail
|
|
*/
|
|
set_direct_map_default_noflush(page);
|
|
if (err == -EEXIST)
|
|
goto retry;
|
|
|
|
return vmf_error(err);
|
|
}
|
|
|
|
addr = (unsigned long)page_address(page);
|
|
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
|
|
}
|
|
|
|
vmf->page = page;
|
|
return VM_FAULT_LOCKED;
|
|
}
|
|
|
|
static const struct vm_operations_struct secretmem_vm_ops = {
|
|
.fault = secretmem_fault,
|
|
};
|
|
|
|
static int secretmem_release(struct inode *inode, struct file *file)
|
|
{
|
|
atomic_dec(&secretmem_users);
|
|
return 0;
|
|
}
|
|
|
|
static int secretmem_mmap(struct file *file, struct vm_area_struct *vma)
|
|
{
|
|
unsigned long len = vma->vm_end - vma->vm_start;
|
|
|
|
if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
|
|
return -EINVAL;
|
|
|
|
if (mlock_future_check(vma->vm_mm, vma->vm_flags | VM_LOCKED, len))
|
|
return -EAGAIN;
|
|
|
|
vma->vm_flags |= VM_LOCKED | VM_DONTDUMP;
|
|
vma->vm_ops = &secretmem_vm_ops;
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool vma_is_secretmem(struct vm_area_struct *vma)
|
|
{
|
|
return vma->vm_ops == &secretmem_vm_ops;
|
|
}
|
|
|
|
static const struct file_operations secretmem_fops = {
|
|
.release = secretmem_release,
|
|
.mmap = secretmem_mmap,
|
|
};
|
|
|
|
static bool secretmem_isolate_page(struct page *page, isolate_mode_t mode)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static int secretmem_migratepage(struct address_space *mapping,
|
|
struct page *newpage, struct page *page,
|
|
enum migrate_mode mode)
|
|
{
|
|
return -EBUSY;
|
|
}
|
|
|
|
static void secretmem_freepage(struct page *page)
|
|
{
|
|
set_direct_map_default_noflush(page);
|
|
clear_highpage(page);
|
|
}
|
|
|
|
const struct address_space_operations secretmem_aops = {
|
|
.freepage = secretmem_freepage,
|
|
.migratepage = secretmem_migratepage,
|
|
.isolate_page = secretmem_isolate_page,
|
|
};
|
|
|
|
static struct vfsmount *secretmem_mnt;
|
|
|
|
static struct file *secretmem_file_create(unsigned long flags)
|
|
{
|
|
struct file *file = ERR_PTR(-ENOMEM);
|
|
struct inode *inode;
|
|
|
|
inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
|
|
if (IS_ERR(inode))
|
|
return ERR_CAST(inode);
|
|
|
|
file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
|
|
O_RDWR, &secretmem_fops);
|
|
if (IS_ERR(file))
|
|
goto err_free_inode;
|
|
|
|
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
|
|
mapping_set_unevictable(inode->i_mapping);
|
|
|
|
inode->i_mapping->a_ops = &secretmem_aops;
|
|
|
|
/* pretend we are a normal file with zero size */
|
|
inode->i_mode |= S_IFREG;
|
|
inode->i_size = 0;
|
|
|
|
return file;
|
|
|
|
err_free_inode:
|
|
iput(inode);
|
|
return file;
|
|
}
|
|
|
|
SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
|
|
{
|
|
struct file *file;
|
|
int fd, err;
|
|
|
|
/* make sure local flags do not confict with global fcntl.h */
|
|
BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
|
|
|
|
if (!secretmem_enable)
|
|
return -ENOSYS;
|
|
|
|
if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
|
|
return -EINVAL;
|
|
|
|
fd = get_unused_fd_flags(flags & O_CLOEXEC);
|
|
if (fd < 0)
|
|
return fd;
|
|
|
|
file = secretmem_file_create(flags);
|
|
if (IS_ERR(file)) {
|
|
err = PTR_ERR(file);
|
|
goto err_put_fd;
|
|
}
|
|
|
|
file->f_flags |= O_LARGEFILE;
|
|
|
|
fd_install(fd, file);
|
|
atomic_inc(&secretmem_users);
|
|
return fd;
|
|
|
|
err_put_fd:
|
|
put_unused_fd(fd);
|
|
return err;
|
|
}
|
|
|
|
static int secretmem_init_fs_context(struct fs_context *fc)
|
|
{
|
|
return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
|
|
}
|
|
|
|
static struct file_system_type secretmem_fs = {
|
|
.name = "secretmem",
|
|
.init_fs_context = secretmem_init_fs_context,
|
|
.kill_sb = kill_anon_super,
|
|
};
|
|
|
|
static int secretmem_init(void)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (!secretmem_enable)
|
|
return ret;
|
|
|
|
secretmem_mnt = kern_mount(&secretmem_fs);
|
|
if (IS_ERR(secretmem_mnt))
|
|
ret = PTR_ERR(secretmem_mnt);
|
|
|
|
/* prevent secretmem mappings from ever getting PROT_EXEC */
|
|
secretmem_mnt->mnt_flags |= MNT_NOEXEC;
|
|
|
|
return ret;
|
|
}
|
|
fs_initcall(secretmem_init);
|