888d3c9f7f
This pull request goes with only a few sysctl moves from the kernel/sysctl.c file, the rest of the work has been put towards deprecating two API calls which incur recursion and prevent us from simplifying the registration process / saving memory per move. Most of the changes have been soaking on linux-next since v6.3-rc3. I've slowed down the kernel/sysctl.c moves due to Matthew Wilcox's feedback that we should see if we could *save* memory with these moves instead of incurring more memory. We currently incur more memory since when we move a syctl from kernel/sysclt.c out to its own file we end up having to add a new empty sysctl used to register it. To achieve saving memory we want to allow syctls to be passed without requiring the end element being empty, and just have our registration process rely on ARRAY_SIZE(). Without this, supporting both styles of sysctls would make the sysctl registration pretty brittle, hard to read and maintain as can be seen from Meng Tang's efforts to do just this [0]. Fortunately, in order to use ARRAY_SIZE() for all sysctl registrations also implies doing the work to deprecate two API calls which use recursion in order to support sysctl declarations with subdirectories. And so during this development cycle quite a bit of effort went into this deprecation effort. I've annotated the following two APIs are deprecated and in few kernel releases we should be good to remove them: * register_sysctl_table() * register_sysctl_paths() During this merge window we should be able to deprecate and unexport register_sysctl_paths(), we can probably do that towards the end of this merge window. Deprecating register_sysctl_table() will take a bit more time but this pull request goes with a few example of how to do this. As it turns out each of the conversions to move away from either of these two API calls *also* saves memory. And so long term, all these changes *will* prove to have saved a bit of memory on boot. The way I see it then is if remove a user of one deprecated call, it gives us enough savings to move one kernel/sysctl.c out from the generic arrays as we end up with about the same amount of bytes. Since deprecating register_sysctl_table() and register_sysctl_paths() does not require maintainer coordination except the final unexport you'll see quite a bit of these changes from other pull requests, I've just kept the stragglers after rc3. Most of these changes have been soaking on linux-next since around rc3. [0] https://lkml.kernel.org/r/ZAD+cpbrqlc5vmry@bombadil.infradead.org -----BEGIN PGP SIGNATURE----- iQJGBAABCgAwFiEENnNq2KuOejlQLZofziMdCjCSiKcFAmRHAjQSHG1jZ3JvZkBr ZXJuZWwub3JnAAoJEM4jHQowkoinTzgQAI/uKHKi0VlUR1l2Psl0XbseUVueuyj3 ZDxSJpbVUmsoDf2MlLjzB8mYE3ricnNTDbLr7qOyA6pXdM1N0mY5LQmRVRu8/ffd 2T1hQ5pl7YnJdWP5dPhcF9Y+jnu1tjX1MW5DS4fzllwK7FnD86HuIruGq52RAPS/ /FH+BD9eodLWWXk6A/o2GFqoWxPKQI0GLxEYWa7Hg7yt8E/3PQL9QsRzn8i6U+HW BrN/+G3YD1VCCzXu0UAeXnm+i1Z7CdvqNdZuSkvE3DObiZ5WpOS+/i7FrDB7zdiu zAbHaifHnDPtcK3w2ZodbLAAwEWD/mG4iwIjE2kgIMVYxBv7TFDBRREXAWYAevIT UUuZnWDQsGaWdjywrebaUycEfd6dytKyan0fTXgMFkcoWRjejhitfdM2iZDdQROg q453p4HqOw4vTrhy4ov4zOX7J3EFiBzpZdl+SmLqcXk+jbLVb/Q9snUWz1AFtHBl gHoP5bS82uVktGG3MsObjgTzYYMQjO9YGIrVuW1VP9uWs8WaoWx6M9FQJIIhtwE+ h6wG2s7CjuFWnS0/IxWmDOn91QyUn1w7ohiz9TuvYj/5GLSBpBDGCJHsNB5T2WS1 qbQRaZ2Kg3j9TeyWfXxdlxBx7bt3ni+J/IXDY0zom2sTpGHKl8D2g5AzmEXJDTpl kd7Z3gsmwhDh =0U0W -----END PGP SIGNATURE----- Merge tag 'sysctl-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux Pull sysctl updates from Luis Chamberlain: "This only does a few sysctl moves from the kernel/sysctl.c file, the rest of the work has been put towards deprecating two API calls which incur recursion and prevent us from simplifying the registration process / saving memory per move. Most of the changes have been soaking on linux-next since v6.3-rc3. I've slowed down the kernel/sysctl.c moves due to Matthew Wilcox's feedback that we should see if we could *save* memory with these moves instead of incurring more memory. We currently incur more memory since when we move a syctl from kernel/sysclt.c out to its own file we end up having to add a new empty sysctl used to register it. To achieve saving memory we want to allow syctls to be passed without requiring the end element being empty, and just have our registration process rely on ARRAY_SIZE(). Without this, supporting both styles of sysctls would make the sysctl registration pretty brittle, hard to read and maintain as can be seen from Meng Tang's efforts to do just this [0]. Fortunately, in order to use ARRAY_SIZE() for all sysctl registrations also implies doing the work to deprecate two API calls which use recursion in order to support sysctl declarations with subdirectories. And so during this development cycle quite a bit of effort went into this deprecation effort. I've annotated the following two APIs are deprecated and in few kernel releases we should be good to remove them: - register_sysctl_table() - register_sysctl_paths() During this merge window we should be able to deprecate and unexport register_sysctl_paths(), we can probably do that towards the end of this merge window. Deprecating register_sysctl_table() will take a bit more time but this pull request goes with a few example of how to do this. As it turns out each of the conversions to move away from either of these two API calls *also* saves memory. And so long term, all these changes *will* prove to have saved a bit of memory on boot. The way I see it then is if remove a user of one deprecated call, it gives us enough savings to move one kernel/sysctl.c out from the generic arrays as we end up with about the same amount of bytes. Since deprecating register_sysctl_table() and register_sysctl_paths() does not require maintainer coordination except the final unexport you'll see quite a bit of these changes from other pull requests, I've just kept the stragglers after rc3" Link: https://lkml.kernel.org/r/ZAD+cpbrqlc5vmry@bombadil.infradead.org [0] * tag 'sysctl-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux: (29 commits) fs: fix sysctls.c built mm: compaction: remove incorrect #ifdef checks mm: compaction: move compaction sysctl to its own file mm: memory-failure: Move memory failure sysctls to its own file arm: simplify two-level sysctl registration for ctl_isa_vars ia64: simplify one-level sysctl registration for kdump_ctl_table utsname: simplify one-level sysctl registration for uts_kern_table ntfs: simplfy one-level sysctl registration for ntfs_sysctls coda: simplify one-level sysctl registration for coda_table fs/cachefiles: simplify one-level sysctl registration for cachefiles_sysctls xfs: simplify two-level sysctl registration for xfs_table nfs: simplify two-level sysctl registration for nfs_cb_sysctls nfs: simplify two-level sysctl registration for nfs4_cb_sysctls lockd: simplify two-level sysctl registration for nlm_sysctls proc_sysctl: enhance documentation xen: simplify sysctl registration for balloon md: simplify sysctl registration hv: simplify sysctl registration scsi: simplify sysctl registration with register_sysctl() csky: simplify alignment sysctl registration ...
445 lines
11 KiB
C
445 lines
11 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Module and Firmware Pinning Security Module
|
|
*
|
|
* Copyright 2011-2016 Google Inc.
|
|
*
|
|
* Author: Kees Cook <keescook@chromium.org>
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "LoadPin: " fmt
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/kernel_read_file.h>
|
|
#include <linux/lsm_hooks.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/path.h>
|
|
#include <linux/sched.h> /* current */
|
|
#include <linux/string_helpers.h>
|
|
#include <linux/dm-verity-loadpin.h>
|
|
#include <uapi/linux/loadpin.h>
|
|
|
|
#define VERITY_DIGEST_FILE_HEADER "# LOADPIN_TRUSTED_VERITY_ROOT_DIGESTS"
|
|
|
|
static void report_load(const char *origin, struct file *file, char *operation)
|
|
{
|
|
char *cmdline, *pathname;
|
|
|
|
pathname = kstrdup_quotable_file(file, GFP_KERNEL);
|
|
cmdline = kstrdup_quotable_cmdline(current, GFP_KERNEL);
|
|
|
|
pr_notice("%s %s obj=%s%s%s pid=%d cmdline=%s%s%s\n",
|
|
origin, operation,
|
|
(pathname && pathname[0] != '<') ? "\"" : "",
|
|
pathname,
|
|
(pathname && pathname[0] != '<') ? "\"" : "",
|
|
task_pid_nr(current),
|
|
cmdline ? "\"" : "", cmdline, cmdline ? "\"" : "");
|
|
|
|
kfree(cmdline);
|
|
kfree(pathname);
|
|
}
|
|
|
|
static int enforce = IS_ENABLED(CONFIG_SECURITY_LOADPIN_ENFORCE);
|
|
static char *exclude_read_files[READING_MAX_ID];
|
|
static int ignore_read_file_id[READING_MAX_ID] __ro_after_init;
|
|
static struct super_block *pinned_root;
|
|
static DEFINE_SPINLOCK(pinned_root_spinlock);
|
|
#ifdef CONFIG_SECURITY_LOADPIN_VERITY
|
|
static bool deny_reading_verity_digests;
|
|
#endif
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
static struct ctl_table loadpin_sysctl_table[] = {
|
|
{
|
|
.procname = "enforce",
|
|
.data = &enforce,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = SYSCTL_ONE,
|
|
.extra2 = SYSCTL_ONE,
|
|
},
|
|
{ }
|
|
};
|
|
|
|
static void set_sysctl(bool is_writable)
|
|
{
|
|
/*
|
|
* If load pinning is not enforced via a read-only block
|
|
* device, allow sysctl to change modes for testing.
|
|
*/
|
|
if (is_writable)
|
|
loadpin_sysctl_table[0].extra1 = SYSCTL_ZERO;
|
|
else
|
|
loadpin_sysctl_table[0].extra1 = SYSCTL_ONE;
|
|
}
|
|
#else
|
|
static inline void set_sysctl(bool is_writable) { }
|
|
#endif
|
|
|
|
static void report_writable(struct super_block *mnt_sb, bool writable)
|
|
{
|
|
if (mnt_sb->s_bdev) {
|
|
pr_info("%pg (%u:%u): %s\n", mnt_sb->s_bdev,
|
|
MAJOR(mnt_sb->s_bdev->bd_dev),
|
|
MINOR(mnt_sb->s_bdev->bd_dev),
|
|
writable ? "writable" : "read-only");
|
|
} else
|
|
pr_info("mnt_sb lacks block device, treating as: writable\n");
|
|
|
|
if (!writable)
|
|
pr_info("load pinning engaged.\n");
|
|
}
|
|
|
|
/*
|
|
* This must be called after early kernel init, since then the rootdev
|
|
* is available.
|
|
*/
|
|
static bool sb_is_writable(struct super_block *mnt_sb)
|
|
{
|
|
bool writable = true;
|
|
|
|
if (mnt_sb->s_bdev)
|
|
writable = !bdev_read_only(mnt_sb->s_bdev);
|
|
|
|
return writable;
|
|
}
|
|
|
|
static void loadpin_sb_free_security(struct super_block *mnt_sb)
|
|
{
|
|
/*
|
|
* When unmounting the filesystem we were using for load
|
|
* pinning, we acknowledge the superblock release, but make sure
|
|
* no other modules or firmware can be loaded when we are in
|
|
* enforcing mode. Otherwise, allow the root to be reestablished.
|
|
*/
|
|
if (!IS_ERR_OR_NULL(pinned_root) && mnt_sb == pinned_root) {
|
|
if (enforce) {
|
|
pinned_root = ERR_PTR(-EIO);
|
|
pr_info("umount pinned fs: refusing further loads\n");
|
|
} else {
|
|
pinned_root = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int loadpin_check(struct file *file, enum kernel_read_file_id id)
|
|
{
|
|
struct super_block *load_root;
|
|
const char *origin = kernel_read_file_id_str(id);
|
|
bool first_root_pin = false;
|
|
bool load_root_writable;
|
|
|
|
/* If the file id is excluded, ignore the pinning. */
|
|
if ((unsigned int)id < ARRAY_SIZE(ignore_read_file_id) &&
|
|
ignore_read_file_id[id]) {
|
|
report_load(origin, file, "pinning-excluded");
|
|
return 0;
|
|
}
|
|
|
|
/* This handles the older init_module API that has a NULL file. */
|
|
if (!file) {
|
|
if (!enforce) {
|
|
report_load(origin, NULL, "old-api-pinning-ignored");
|
|
return 0;
|
|
}
|
|
|
|
report_load(origin, NULL, "old-api-denied");
|
|
return -EPERM;
|
|
}
|
|
|
|
load_root = file->f_path.mnt->mnt_sb;
|
|
load_root_writable = sb_is_writable(load_root);
|
|
|
|
/* First loaded module/firmware defines the root for all others. */
|
|
spin_lock(&pinned_root_spinlock);
|
|
/*
|
|
* pinned_root is only NULL at startup or when the pinned root has
|
|
* been unmounted while we are not in enforcing mode. Otherwise, it
|
|
* is either a valid reference, or an ERR_PTR.
|
|
*/
|
|
if (!pinned_root) {
|
|
pinned_root = load_root;
|
|
first_root_pin = true;
|
|
}
|
|
spin_unlock(&pinned_root_spinlock);
|
|
|
|
if (first_root_pin) {
|
|
report_writable(pinned_root, load_root_writable);
|
|
set_sysctl(load_root_writable);
|
|
report_load(origin, file, "pinned");
|
|
}
|
|
|
|
if (IS_ERR_OR_NULL(pinned_root) ||
|
|
((load_root != pinned_root) && !dm_verity_loadpin_is_bdev_trusted(load_root->s_bdev))) {
|
|
if (unlikely(!enforce)) {
|
|
report_load(origin, file, "pinning-ignored");
|
|
return 0;
|
|
}
|
|
|
|
report_load(origin, file, "denied");
|
|
return -EPERM;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int loadpin_read_file(struct file *file, enum kernel_read_file_id id,
|
|
bool contents)
|
|
{
|
|
/*
|
|
* LoadPin only cares about the _origin_ of a file, not its
|
|
* contents, so we can ignore the "are full contents available"
|
|
* argument here.
|
|
*/
|
|
return loadpin_check(file, id);
|
|
}
|
|
|
|
static int loadpin_load_data(enum kernel_load_data_id id, bool contents)
|
|
{
|
|
/*
|
|
* LoadPin only cares about the _origin_ of a file, not its
|
|
* contents, so a NULL file is passed, and we can ignore the
|
|
* state of "contents".
|
|
*/
|
|
return loadpin_check(NULL, (enum kernel_read_file_id) id);
|
|
}
|
|
|
|
static struct security_hook_list loadpin_hooks[] __ro_after_init = {
|
|
LSM_HOOK_INIT(sb_free_security, loadpin_sb_free_security),
|
|
LSM_HOOK_INIT(kernel_read_file, loadpin_read_file),
|
|
LSM_HOOK_INIT(kernel_load_data, loadpin_load_data),
|
|
};
|
|
|
|
static void __init parse_exclude(void)
|
|
{
|
|
int i, j;
|
|
char *cur;
|
|
|
|
/*
|
|
* Make sure all the arrays stay within expected sizes. This
|
|
* is slightly weird because kernel_read_file_str[] includes
|
|
* READING_MAX_ID, which isn't actually meaningful here.
|
|
*/
|
|
BUILD_BUG_ON(ARRAY_SIZE(exclude_read_files) !=
|
|
ARRAY_SIZE(ignore_read_file_id));
|
|
BUILD_BUG_ON(ARRAY_SIZE(kernel_read_file_str) <
|
|
ARRAY_SIZE(ignore_read_file_id));
|
|
|
|
for (i = 0; i < ARRAY_SIZE(exclude_read_files); i++) {
|
|
cur = exclude_read_files[i];
|
|
if (!cur)
|
|
break;
|
|
if (*cur == '\0')
|
|
continue;
|
|
|
|
for (j = 0; j < ARRAY_SIZE(ignore_read_file_id); j++) {
|
|
if (strcmp(cur, kernel_read_file_str[j]) == 0) {
|
|
pr_info("excluding: %s\n",
|
|
kernel_read_file_str[j]);
|
|
ignore_read_file_id[j] = 1;
|
|
/*
|
|
* Can not break, because one read_file_str
|
|
* may map to more than on read_file_id.
|
|
*/
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static int __init loadpin_init(void)
|
|
{
|
|
pr_info("ready to pin (currently %senforcing)\n",
|
|
enforce ? "" : "not ");
|
|
parse_exclude();
|
|
#ifdef CONFIG_SYSCTL
|
|
if (!register_sysctl("kernel/loadpin", loadpin_sysctl_table))
|
|
pr_notice("sysctl registration failed!\n");
|
|
#endif
|
|
security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks), "loadpin");
|
|
|
|
return 0;
|
|
}
|
|
|
|
DEFINE_LSM(loadpin) = {
|
|
.name = "loadpin",
|
|
.init = loadpin_init,
|
|
};
|
|
|
|
#ifdef CONFIG_SECURITY_LOADPIN_VERITY
|
|
|
|
enum loadpin_securityfs_interface_index {
|
|
LOADPIN_DM_VERITY,
|
|
};
|
|
|
|
static int read_trusted_verity_root_digests(unsigned int fd)
|
|
{
|
|
struct fd f;
|
|
void *data;
|
|
int rc;
|
|
char *p, *d;
|
|
|
|
if (deny_reading_verity_digests)
|
|
return -EPERM;
|
|
|
|
/* The list of trusted root digests can only be set up once */
|
|
if (!list_empty(&dm_verity_loadpin_trusted_root_digests))
|
|
return -EPERM;
|
|
|
|
f = fdget(fd);
|
|
if (!f.file)
|
|
return -EINVAL;
|
|
|
|
data = kzalloc(SZ_4K, GFP_KERNEL);
|
|
if (!data) {
|
|
rc = -ENOMEM;
|
|
goto err;
|
|
}
|
|
|
|
rc = kernel_read_file(f.file, 0, (void **)&data, SZ_4K - 1, NULL, READING_POLICY);
|
|
if (rc < 0)
|
|
goto err;
|
|
|
|
p = data;
|
|
p[rc] = '\0';
|
|
p = strim(p);
|
|
|
|
p = strim(data);
|
|
while ((d = strsep(&p, "\n")) != NULL) {
|
|
int len;
|
|
struct dm_verity_loadpin_trusted_root_digest *trd;
|
|
|
|
if (d == data) {
|
|
/* first line, validate header */
|
|
if (strcmp(d, VERITY_DIGEST_FILE_HEADER)) {
|
|
rc = -EPROTO;
|
|
goto err;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
len = strlen(d);
|
|
|
|
if (len % 2) {
|
|
rc = -EPROTO;
|
|
goto err;
|
|
}
|
|
|
|
len /= 2;
|
|
|
|
trd = kzalloc(struct_size(trd, data, len), GFP_KERNEL);
|
|
if (!trd) {
|
|
rc = -ENOMEM;
|
|
goto err;
|
|
}
|
|
|
|
if (hex2bin(trd->data, d, len)) {
|
|
kfree(trd);
|
|
rc = -EPROTO;
|
|
goto err;
|
|
}
|
|
|
|
trd->len = len;
|
|
|
|
list_add_tail(&trd->node, &dm_verity_loadpin_trusted_root_digests);
|
|
}
|
|
|
|
if (list_empty(&dm_verity_loadpin_trusted_root_digests)) {
|
|
rc = -EPROTO;
|
|
goto err;
|
|
}
|
|
|
|
kfree(data);
|
|
fdput(f);
|
|
|
|
return 0;
|
|
|
|
err:
|
|
kfree(data);
|
|
|
|
/* any failure in loading/parsing invalidates the entire list */
|
|
{
|
|
struct dm_verity_loadpin_trusted_root_digest *trd, *tmp;
|
|
|
|
list_for_each_entry_safe(trd, tmp, &dm_verity_loadpin_trusted_root_digests, node) {
|
|
list_del(&trd->node);
|
|
kfree(trd);
|
|
}
|
|
}
|
|
|
|
/* disallow further attempts after reading a corrupt/invalid file */
|
|
deny_reading_verity_digests = true;
|
|
|
|
fdput(f);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/******************************** securityfs ********************************/
|
|
|
|
static long dm_verity_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|
{
|
|
void __user *uarg = (void __user *)arg;
|
|
unsigned int fd;
|
|
|
|
switch (cmd) {
|
|
case LOADPIN_IOC_SET_TRUSTED_VERITY_DIGESTS:
|
|
if (copy_from_user(&fd, uarg, sizeof(fd)))
|
|
return -EFAULT;
|
|
|
|
return read_trusted_verity_root_digests(fd);
|
|
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
static const struct file_operations loadpin_dm_verity_ops = {
|
|
.unlocked_ioctl = dm_verity_ioctl,
|
|
.compat_ioctl = compat_ptr_ioctl,
|
|
};
|
|
|
|
/**
|
|
* init_loadpin_securityfs - create the securityfs directory for LoadPin
|
|
*
|
|
* We can not put this method normally under the loadpin_init() code path since
|
|
* the security subsystem gets initialized before the vfs caches.
|
|
*
|
|
* Returns 0 if the securityfs directory creation was successful.
|
|
*/
|
|
static int __init init_loadpin_securityfs(void)
|
|
{
|
|
struct dentry *loadpin_dir, *dentry;
|
|
|
|
loadpin_dir = securityfs_create_dir("loadpin", NULL);
|
|
if (IS_ERR(loadpin_dir)) {
|
|
pr_err("LoadPin: could not create securityfs dir: %ld\n",
|
|
PTR_ERR(loadpin_dir));
|
|
return PTR_ERR(loadpin_dir);
|
|
}
|
|
|
|
dentry = securityfs_create_file("dm-verity", 0600, loadpin_dir,
|
|
(void *)LOADPIN_DM_VERITY, &loadpin_dm_verity_ops);
|
|
if (IS_ERR(dentry)) {
|
|
pr_err("LoadPin: could not create securityfs entry 'dm-verity': %ld\n",
|
|
PTR_ERR(dentry));
|
|
return PTR_ERR(dentry);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
fs_initcall(init_loadpin_securityfs);
|
|
|
|
#endif /* CONFIG_SECURITY_LOADPIN_VERITY */
|
|
|
|
/* Should not be mutable after boot, so not listed in sysfs (perm == 0). */
|
|
module_param(enforce, int, 0);
|
|
MODULE_PARM_DESC(enforce, "Enforce module/firmware pinning");
|
|
module_param_array_named(exclude, exclude_read_files, charp, NULL, 0);
|
|
MODULE_PARM_DESC(exclude, "Exclude pinning specific read file types");
|