From 0188d08a46ffe4a39c6b463451a41d8b503d04d6 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 12 Jun 2020 13:06:07 +0200 Subject: [PATCH 01/48] s390: convert to msecs_to_jiffies() Instead of using the old 'jiffies + HZ {/,*} something' calculation use msecs_to_jiffies() as that makes the code more readable. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/lgr.c | 2 +- arch/s390/kernel/time.c | 2 +- arch/s390/kernel/topology.c | 4 ++-- arch/s390/mm/cmm.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 452502f9a0d9..3b895971c3d0 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -167,7 +167,7 @@ static struct timer_list lgr_timer; */ static void lgr_timer_set(void) { - mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); + mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC)); } /* diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index b1113b519432..6bc20861fff9 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -669,7 +669,7 @@ static void stp_work_fn(struct work_struct *work) * There is a usable clock but the synchonization failed. * Retry after a second. */ - mod_timer(&stp_timer, jiffies + HZ); + mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC)); out_unlock: mutex_unlock(&stp_work_mutex); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 332b542548cd..ca47141a5be9 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -356,9 +356,9 @@ static atomic_t topology_poll = ATOMIC_INIT(0); static void set_topology_timer(void) { if (atomic_add_unless(&topology_poll, -1, 0)) - mod_timer(&topology_timer, jiffies + HZ / 10); + mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); else - mod_timer(&topology_timer, jiffies + HZ * 60); + mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); } void topology_expect_change(void) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 36bce727897b..5c15ae3daf71 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -189,7 +189,7 @@ static void cmm_set_timer(void) del_timer(&cmm_timer); return; } - mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ); + mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC)); } static void cmm_timer_fn(struct timer_list *unused) From b39e7724b0c28d569e9bd7e95f1b839f64e154bd Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Fri, 19 Jun 2020 10:38:46 +0200 Subject: [PATCH 02/48] s390/zcore: remove memmap device Remove unused /sys/kernel/debug/zcore/memmap device. Since at least version 1.24.0 of s390-tools zfcpdump no longer needs it and reads /proc/vmcore instead. Signed-off-by: Alexander Egorenkov Reviewed-by: Philipp Rudo Signed-off-by: Heiko Carstens --- drivers/s390/char/zcore.c | 57 ++------------------------------------- 1 file changed, 2 insertions(+), 55 deletions(-) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 08f812475f5e..d29f1b71618e 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-1.0+ /* * zcore module to export memory content and register sets for creating system - * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same - * dump format as s390 standalone dumps. + * dumps on SCSI disks (zfcpdump). * * For more information please refer to Documentation/s390/zfcpdump.rst * @@ -16,7 +15,6 @@ #include #include #include -#include #include #include @@ -33,8 +31,6 @@ #define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x) -#define CHUNK_INFO_SIZE 34 /* 2 16-byte char, each followed by blank */ - enum arch_id { ARCH_S390 = 0, ARCH_S390X = 1, @@ -48,7 +44,6 @@ struct ipib_info { static struct debug_info *zcore_dbf; static int hsa_available; static struct dentry *zcore_dir; -static struct dentry *zcore_memmap_file; static struct dentry *zcore_reipl_file; static struct dentry *zcore_hsa_file; static struct ipl_parameter_block *zcore_ipl_block; @@ -139,46 +134,6 @@ static void release_hsa(void) hsa_available = 0; } -static ssize_t zcore_memmap_read(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) -{ - return simple_read_from_buffer(buf, count, ppos, filp->private_data, - memblock.memory.cnt * CHUNK_INFO_SIZE); -} - -static int zcore_memmap_open(struct inode *inode, struct file *filp) -{ - struct memblock_region *reg; - char *buf; - int i = 0; - - buf = kcalloc(memblock.memory.cnt, CHUNK_INFO_SIZE, GFP_KERNEL); - if (!buf) { - return -ENOMEM; - } - for_each_memblock(memory, reg) { - sprintf(buf + (i++ * CHUNK_INFO_SIZE), "%016llx %016llx ", - (unsigned long long) reg->base, - (unsigned long long) reg->size); - } - filp->private_data = buf; - return nonseekable_open(inode, filp); -} - -static int zcore_memmap_release(struct inode *inode, struct file *filp) -{ - kfree(filp->private_data); - return 0; -} - -static const struct file_operations zcore_memmap_fops = { - .owner = THIS_MODULE, - .read = zcore_memmap_read, - .open = zcore_memmap_open, - .release = zcore_memmap_release, - .llseek = no_llseek, -}; - static ssize_t zcore_reipl_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { @@ -335,17 +290,11 @@ static int __init zcore_init(void) rc = -ENOMEM; goto fail; } - zcore_memmap_file = debugfs_create_file("memmap", S_IRUSR, zcore_dir, - NULL, &zcore_memmap_fops); - if (!zcore_memmap_file) { - rc = -ENOMEM; - goto fail_dir; - } zcore_reipl_file = debugfs_create_file("reipl", S_IRUSR, zcore_dir, NULL, &zcore_reipl_fops); if (!zcore_reipl_file) { rc = -ENOMEM; - goto fail_memmap_file; + goto fail_dir; } zcore_hsa_file = debugfs_create_file("hsa", S_IRUSR|S_IWUSR, zcore_dir, NULL, &zcore_hsa_fops); @@ -357,8 +306,6 @@ static int __init zcore_init(void) fail_reipl_file: debugfs_remove(zcore_reipl_file); -fail_memmap_file: - debugfs_remove(zcore_memmap_file); fail_dir: debugfs_remove(zcore_dir); fail: From 90ce70f06546e646713d036cfdec39427df296f7 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 12 May 2020 09:54:58 +0200 Subject: [PATCH 03/48] s390/pci: remove unused functions Signed-off-by: Sven Schnelle Acked-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pci_dma.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 419fac7a62c0..f62cd3ed2d44 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -131,12 +131,6 @@ static inline void validate_st_entry(unsigned long *entry) *entry |= ZPCI_TABLE_VALID; } -static inline void invalidate_table_entry(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_VALID_MASK; - *entry |= ZPCI_TABLE_INVALID; -} - static inline void invalidate_pt_entry(unsigned long *entry) { WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); @@ -173,11 +167,6 @@ static inline int pt_entry_isvalid(unsigned long entry) return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; } -static inline int entry_isprotected(unsigned long entry) -{ - return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED; -} - static inline unsigned long *get_rt_sto(unsigned long entry) { return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) From 7fa0d6ff35cfaae9cc7012d9220cd24400c650f1 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 12 May 2020 09:55:18 +0200 Subject: [PATCH 04/48] s390/time: remove unused function Signed-off-by: Sven Schnelle Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/include/asm/timex.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 6bf3a45ccfec..289aaff4d365 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -49,11 +49,6 @@ static inline void set_clock_comparator(__u64 time) asm volatile("sckc %0" : : "Q" (time)); } -static inline void store_clock_comparator(__u64 *time) -{ - asm volatile("stckc %0" : "=Q" (*time)); -} - void clock_comparator_work(void); void __init time_early_init(void); From ecb1ff6833c461ea3bcf16396cd4f1eb50b119c2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 18 Jun 2020 07:09:57 +0200 Subject: [PATCH 05/48] s390/debug: remove raw view There is not a single user of the debug raw view. Therefore remove it before anybody uses it. If anybody would make use of the view it would expose the struct __debug_entry definition to userspace and really would make it uapi. This wouldn't be good, since the definition is suboptimal and needs to be changed. Right now the structure definition is only defined to be uapi, however there is no user. Signed-off-by: Heiko Carstens --- Documentation/s390/s390dbf.rst | 17 ++++---------- arch/s390/include/asm/debug.h | 1 - arch/s390/kernel/debug.c | 42 ---------------------------------- 3 files changed, 4 insertions(+), 56 deletions(-) diff --git a/Documentation/s390/s390dbf.rst b/Documentation/s390/s390dbf.rst index cdb36842b898..af8bdc3629e7 100644 --- a/Documentation/s390/s390dbf.rst +++ b/Documentation/s390/s390dbf.rst @@ -67,7 +67,7 @@ corresponding component. The debugfs normally should be mounted to The content of the directories are files which represent different views to the debug log. Each component can decide which views should be used through registering them with the function :c:func:`debug_register_view()`. -Predefined views for hex/ascii, sprintf and raw binary data are provided. +Predefined views for hex/ascii and sprintf data are provided. It is also possible to define other views. The content of a view can be inspected simply by reading the corresponding debugfs file. @@ -119,8 +119,6 @@ Predefined views: extern struct debug_view debug_hex_ascii_view; - extern struct debug_view debug_raw_view; - extern struct debug_view debug_sprintf_view; Examples @@ -129,7 +127,7 @@ Examples .. code-block:: c /* - * hex_ascii- + raw-view Example + * hex_ascii-view Example */ #include @@ -143,7 +141,6 @@ Examples debug_info = debug_register("test", 1, 4, 4 ); debug_register_view(debug_info, &debug_hex_ascii_view); - debug_register_view(debug_info, &debug_raw_view); debug_text_event(debug_info, 4 , "one "); debug_int_exception(debug_info, 4, 4711); @@ -201,7 +198,7 @@ debugfs-files: Example:: > ls /sys/kernel/debug/s390dbf/dasd - flush hex_ascii level pages raw + flush hex_ascii level pages > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s 00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | .... 00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE @@ -298,10 +295,9 @@ order to see the debug entries well formatted. Predefined Views ---------------- -There are three predefined views: hex_ascii, raw and sprintf. +There are two predefined views: hex_ascii and sprintf. The hex_ascii view shows the data field in hex and ascii representation (e.g. ``45 43 4b 44 | ECKD``). -The raw view returns a bytestream as the debug areas are stored in memory. The sprintf view formats the debug entries in the same way as the sprintf function would do. The sprintf event/exception functions write to the @@ -334,11 +330,6 @@ The format of the hex_ascii and sprintf view is as follows: - Return Address to caller - data field -The format of the raw view is: - -- Header as described in debug.h -- datafield - A typical line of the hex_ascii view will look like the following (first line is only for explanation and will not be displayed when 'cating' the view):: diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 310134015541..d39da8f3130e 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -82,7 +82,6 @@ struct debug_view { }; extern struct debug_view debug_hex_ascii_view; -extern struct debug_view debug_raw_view; extern struct debug_view debug_sprintf_view; /* do NOT use the _common functions */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 263075a1af36..beb4b44a11d1 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -90,27 +90,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, size_t user_buf_size, loff_t *offset); static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, const char *in_buf); -static int debug_raw_format_fn(debug_info_t *id, - struct debug_view *view, char *out_buf, - const char *in_buf); -static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf); - static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, debug_sprintf_entry_t *curr_event); /* globals */ -struct debug_view debug_raw_view = { - "raw", - NULL, - &debug_raw_header_fn, - &debug_raw_format_fn, - NULL, - NULL -}; -EXPORT_SYMBOL(debug_raw_view); - struct debug_view debug_hex_ascii_view = { "hex_ascii", NULL, @@ -1385,32 +1369,6 @@ out: return rc; /* number of input characters */ } -/* - * prints debug header in raw format - */ -static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf) -{ - int rc; - - rc = sizeof(debug_entry_t); - memcpy(out_buf, entry, sizeof(debug_entry_t)); - return rc; -} - -/* - * prints debug data in raw format - */ -static int debug_raw_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf) -{ - int rc; - - rc = id->buf_size; - memcpy(out_buf, in_buf, id->buf_size); - return rc; -} - /* * prints debug data in hex/ascii format */ From 6ffb3f6b46d0d02c318946047dc5ce6553495848 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 18 Jun 2020 07:41:18 +0200 Subject: [PATCH 06/48] s390/debug: remove struct __debug_entry from uapi There is no interface to userspace which exposes anything that would require the struct __debug_entry definition. Therefore remove it from uapi. This allows to change the definition, since it is only kernel internally used. The only exception is the crash utility, however that tool must handle changes all the time anyway. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/debug.h | 17 ++++++++++++++- arch/s390/include/uapi/asm/debug.h | 35 ------------------------------ 2 files changed, 16 insertions(+), 36 deletions(-) delete mode 100644 arch/s390/include/uapi/asm/debug.h diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index d39da8f3130e..17a26261f288 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ #define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */ @@ -26,6 +26,21 @@ #define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */ /* the entry information */ +#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ + +struct __debug_entry { + union { + struct { + unsigned long clock : 52; + unsigned long exception : 1; + unsigned long level : 3; + unsigned long cpuid : 8; + } fields; + unsigned long stck; + } id; + void *caller; +} __packed; + typedef struct __debug_entry debug_entry_t; struct debug_view; diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h deleted file mode 100644 index c7c564d9aea4..000000000000 --- a/arch/s390/include/uapi/asm/debug.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * S/390 debug facility - * - * Copyright IBM Corp. 1999, 2000 - */ - -#ifndef _UAPIDEBUG_H -#define _UAPIDEBUG_H - -#include - -/* Note: - * struct __debug_entry must be defined outside of #ifdef __KERNEL__ - * in order to allow a user program to analyze the 'raw'-view. - */ - -struct __debug_entry{ - union { - struct { - unsigned long long clock:52; - unsigned long long exception:1; - unsigned long long level:3; - unsigned long long cpuid:8; - } fields; - - unsigned long long stck; - } id; - void* caller; -} __attribute__((packed)); - - -#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ - -#endif /* _UAPIDEBUG_H */ From 28ccce5f50af2e9484d6b74b22ff9eb54bb775a2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Jun 2020 16:29:30 -0500 Subject: [PATCH 07/48] s390/appldata: use struct_size() helper Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Message-Id: <20200617212930.GA11728@embeddedor> Signed-off-by: Heiko Carstens --- arch/s390/appldata/appldata_os.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 5503217366ec..a363d30ce739 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -129,8 +129,7 @@ static void appldata_get_os_data(void *data) os_data->nr_cpus = j; - new_size = sizeof(struct appldata_os_data) + - (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu)); + new_size = struct_size(os_data, os_cpu, os_data->nr_cpus); if (ops.size != new_size) { if (ops.active) { rc = appldata_diag(APPLDATA_RECORD_OS_ID, @@ -165,8 +164,7 @@ static int __init appldata_os_init(void) { int rc, max_size; - max_size = sizeof(struct appldata_os_data) + - (num_possible_cpus() * sizeof(struct appldata_os_per_cpu)); + max_size = struct_size(appldata_os_data, os_cpu, num_possible_cpus()); if (max_size > APPLDATA_MAX_REC_SIZE) { pr_err("Maximum OS record size %i exceeds the maximum " "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE); From 66a049b764a71dc32031b7b533f98fc0299e6e11 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 25 Jun 2020 21:53:17 +0200 Subject: [PATCH 08/48] s390/stp: allow group and users to read stp sysfs files There are no secrets in these files, so allow all users to read it. Signed-off-by: Sven Schnelle Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/time.c | 49 ++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 6bc20861fff9..700127ba689d 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -683,7 +683,7 @@ static struct bus_type stp_subsys = { .dev_name = "stp", }; -static ssize_t stp_ctn_id_show(struct device *dev, +static ssize_t ctn_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -693,9 +693,9 @@ static ssize_t stp_ctn_id_show(struct device *dev, *(unsigned long long *) stp_info.ctnid); } -static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); +static DEVICE_ATTR_RO(ctn_id); -static ssize_t stp_ctn_type_show(struct device *dev, +static ssize_t ctn_type_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -704,9 +704,9 @@ static ssize_t stp_ctn_type_show(struct device *dev, return sprintf(buf, "%i\n", stp_info.ctn); } -static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); +static DEVICE_ATTR_RO(ctn_type); -static ssize_t stp_dst_offset_show(struct device *dev, +static ssize_t dst_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -715,9 +715,9 @@ static ssize_t stp_dst_offset_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); } -static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); +static DEVICE_ATTR_RO(dst_offset); -static ssize_t stp_leap_seconds_show(struct device *dev, +static ssize_t leap_seconds_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -726,9 +726,9 @@ static ssize_t stp_leap_seconds_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); } -static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); +static DEVICE_ATTR_RO(leap_seconds); -static ssize_t stp_stratum_show(struct device *dev, +static ssize_t stratum_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -737,9 +737,9 @@ static ssize_t stp_stratum_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); } -static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); +static DEVICE_ATTR_RO(stratum); -static ssize_t stp_time_offset_show(struct device *dev, +static ssize_t time_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -748,9 +748,9 @@ static ssize_t stp_time_offset_show(struct device *dev, return sprintf(buf, "%i\n", (int) stp_info.tto); } -static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); +static DEVICE_ATTR_RO(time_offset); -static ssize_t stp_time_zone_offset_show(struct device *dev, +static ssize_t time_zone_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -759,10 +759,9 @@ static ssize_t stp_time_zone_offset_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); } -static DEVICE_ATTR(time_zone_offset, 0400, - stp_time_zone_offset_show, NULL); +static DEVICE_ATTR_RO(time_zone_offset); -static ssize_t stp_timing_mode_show(struct device *dev, +static ssize_t timing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -771,9 +770,9 @@ static ssize_t stp_timing_mode_show(struct device *dev, return sprintf(buf, "%i\n", stp_info.tmd); } -static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); +static DEVICE_ATTR_RO(timing_mode); -static ssize_t stp_timing_state_show(struct device *dev, +static ssize_t timing_state_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -782,16 +781,16 @@ static ssize_t stp_timing_state_show(struct device *dev, return sprintf(buf, "%i\n", stp_info.tst); } -static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); +static DEVICE_ATTR_RO(timing_state); -static ssize_t stp_online_show(struct device *dev, +static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", stp_online); } -static ssize_t stp_online_store(struct device *dev, +static ssize_t online_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -817,18 +816,14 @@ static ssize_t stp_online_store(struct device *dev, * Can't use DEVICE_ATTR because the attribute should be named * stp/online but dev_attr_online already exists in this file .. */ -static struct device_attribute dev_attr_stp_online = { - .attr = { .name = "online", .mode = 0600 }, - .show = stp_online_show, - .store = stp_online_store, -}; +static DEVICE_ATTR_RW(online); static struct device_attribute *stp_attributes[] = { &dev_attr_ctn_id, &dev_attr_ctn_type, &dev_attr_dst_offset, &dev_attr_leap_seconds, - &dev_attr_stp_online, + &dev_attr_online, &dev_attr_stratum, &dev_attr_time_offset, &dev_attr_time_zone_offset, From f05f62d04271faa265c7a4f75638ebc380d182fa Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 25 Jun 2020 17:00:29 +0200 Subject: [PATCH 09/48] s390/vmem: get rid of memory segment list I can't come up with a satisfying reason why we still need the memory segment list. We used to represent in the list: - boot memory - standby memory added via add_memory() - loaded dcss segments When loading/unloading dcss segments, we already track them in a separate list and check for overlaps (arch/s390/mm/extmem.c:segment_overlaps_others()) when loading segments. The overlap check was introduced for some segments in commit b2300b9efe1b ("[S390] dcssblk: add >2G DCSSs support and stacked contiguous DCSSs support.") and was extended to cover all dcss segments in commit ca57114609d1 ("s390/extmem: remove code for 31 bit addressing mode"). Although I doubt that overlaps with boot memory and standby memory are relevant, let's reshuffle the checks in load_segment() to request the resource first. This will bail out in case we have overlaps with other resources (esp. boot memory and standby memory). The order is now different compared to segment_unload() and segment_unload(), but that should not matter. This smells like a leftover from ancient times, let's get rid of it. We can now convert vmem_remove_mapping() into a void function - everybody ignored the return value already. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Andrew Morton Signed-off-by: David Hildenbrand Message-Id: <20200625150029.45019-1-david@redhat.com> Reviewed-by: Gerald Schaefer Tested-by: Gerald Schaefer [DCSS] Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pgtable.h | 2 +- arch/s390/mm/extmem.c | 25 +++---- arch/s390/mm/vmem.c | 115 ++------------------------------ 3 files changed, 21 insertions(+), 121 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 19d603bd1f36..7eb01a5459cd 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1669,7 +1669,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define kern_addr_valid(addr) (1) extern int vmem_add_mapping(unsigned long start, unsigned long size); -extern int vmem_remove_mapping(unsigned long start, unsigned long size); +extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int s390_enable_sie(void); extern int s390_enable_skey(void); extern void s390_reset_cmma(struct mm_struct *mm); diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 9e0aa7aa03ba..105c09282f8c 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -313,15 +313,10 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long goto out_free; } - rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); - - if (rc) - goto out_free; - seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (seg->res == NULL) { rc = -ENOMEM; - goto out_shared; + goto out_free; } seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; seg->res->start = seg->start_addr; @@ -335,12 +330,17 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long if (rc == SEG_TYPE_SC || ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared)) seg->res->flags |= IORESOURCE_READONLY; + + /* Check for overlapping resources before adding the mapping. */ if (request_resource(&iomem_resource, seg->res)) { rc = -EBUSY; - kfree(seg->res); - goto out_shared; + goto out_free_resource; } + rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); + if (rc) + goto out_resource; + if (do_nonshared) diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, &start_addr, &end_addr); @@ -351,14 +351,14 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); rc = diag_cc; - goto out_resource; + goto out_mapping; } if (diag_cc > 1) { pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr); rc = dcss_diag_translate_rc(end_addr); dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); - goto out_resource; + goto out_mapping; } seg->start_addr = start_addr; seg->end = end_addr; @@ -377,11 +377,12 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long (void*) seg->end, segtype_string[seg->vm_segtype]); } goto out; + out_mapping: + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); out_resource: release_resource(seg->res); + out_free_resource: kfree(seg->res); - out_shared: - vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); out_free: kfree(seg); out: diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 8b6282cf7d13..3b9e71654c37 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -20,14 +20,6 @@ static DEFINE_MUTEX(vmem_mutex); -struct memory_segment { - struct list_head list; - unsigned long start; - unsigned long size; -}; - -static LIST_HEAD(mem_segs); - static void __ref *vmem_alloc_pages(unsigned int order) { unsigned long size = PAGE_SIZE << order; @@ -300,94 +292,25 @@ void vmemmap_free(unsigned long start, unsigned long end, { } -/* - * Add memory segment to the segment list if it doesn't overlap with - * an already present segment. - */ -static int insert_memory_segment(struct memory_segment *seg) +void vmem_remove_mapping(unsigned long start, unsigned long size) { - struct memory_segment *tmp; - - if (seg->start + seg->size > VMEM_MAX_PHYS || - seg->start + seg->size < seg->start) - return -ERANGE; - - list_for_each_entry(tmp, &mem_segs, list) { - if (seg->start >= tmp->start + tmp->size) - continue; - if (seg->start + seg->size <= tmp->start) - continue; - return -ENOSPC; - } - list_add(&seg->list, &mem_segs); - return 0; -} - -/* - * Remove memory segment from the segment list. - */ -static void remove_memory_segment(struct memory_segment *seg) -{ - list_del(&seg->list); -} - -static void __remove_shared_memory(struct memory_segment *seg) -{ - remove_memory_segment(seg); - vmem_remove_range(seg->start, seg->size); -} - -int vmem_remove_mapping(unsigned long start, unsigned long size) -{ - struct memory_segment *seg; - int ret; - mutex_lock(&vmem_mutex); - - ret = -ENOENT; - list_for_each_entry(seg, &mem_segs, list) { - if (seg->start == start && seg->size == size) - break; - } - - if (seg->start != start || seg->size != size) - goto out; - - ret = 0; - __remove_shared_memory(seg); - kfree(seg); -out: + vmem_remove_range(start, size); mutex_unlock(&vmem_mutex); - return ret; } int vmem_add_mapping(unsigned long start, unsigned long size) { - struct memory_segment *seg; int ret; + if (start + size > VMEM_MAX_PHYS || + start + size < start) + return -ERANGE; + mutex_lock(&vmem_mutex); - ret = -ENOMEM; - seg = kzalloc(sizeof(*seg), GFP_KERNEL); - if (!seg) - goto out; - seg->start = start; - seg->size = size; - - ret = insert_memory_segment(seg); - if (ret) - goto out_free; - ret = vmem_add_mem(start, size); if (ret) - goto out_remove; - goto out; - -out_remove: - __remove_shared_memory(seg); -out_free: - kfree(seg); -out: + vmem_remove_range(start, size); mutex_unlock(&vmem_mutex); return ret; } @@ -421,27 +344,3 @@ void __init vmem_map_init(void) pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } - -/* - * Convert memblock.memory to a memory segment list so there is a single - * list that contains all memory segments. - */ -static int __init vmem_convert_memory_chunk(void) -{ - struct memblock_region *reg; - struct memory_segment *seg; - - mutex_lock(&vmem_mutex); - for_each_memblock(memory, reg) { - seg = kzalloc(sizeof(*seg), GFP_KERNEL); - if (!seg) - panic("Out of memory...\n"); - seg->start = reg->base; - seg->size = reg->size; - insert_memory_segment(seg); - } - mutex_unlock(&vmem_mutex); - return 0; -} - -core_initcall(vmem_convert_memory_chunk); From 5cdfbdce5de6b5b56e104676409762fc1289a9c2 Mon Sep 17 00:00:00 2001 From: Oscar Carter Date: Sat, 27 Jun 2020 14:54:17 +0200 Subject: [PATCH 10/48] s390/tty3270: remove function callback casts In an effort to enable -Wcast-function-type in the top-level Makefile to support Control Flow Integrity builds, remove all the function callback casts. To do this modify the function prototypes accordingly. Signed-off-by: Oscar Carter Message-Id: <20200627125417.18887-1-oscar.carter@gmx.com> Reviewed-by: Kees Cook [heiko.carstens@de.ibm.com: coding style changes] Signed-off-by: Heiko Carstens --- drivers/s390/char/tty3270.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index 98d7fc152e32..aec996de44d9 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -556,8 +556,9 @@ tty3270_scroll_backward(struct kbd_data *kbd) * Pass input line to tty. */ static void -tty3270_read_tasklet(struct raw3270_request *rrq) +tty3270_read_tasklet(unsigned long data) { + struct raw3270_request *rrq = (struct raw3270_request *)data; static char kreset_data = TW_KR; struct tty3270 *tp = container_of(rrq->view, struct tty3270, view); char *input; @@ -652,8 +653,9 @@ tty3270_issue_read(struct tty3270 *tp, int lock) * Hang up the tty */ static void -tty3270_hangup_tasklet(struct tty3270 *tp) +tty3270_hangup_tasklet(unsigned long data) { + struct tty3270 *tp = (struct tty3270 *)data; tty_port_tty_hangup(&tp->port, true); raw3270_put_view(&tp->view); } @@ -752,11 +754,9 @@ tty3270_alloc_view(void) tty_port_init(&tp->port); timer_setup(&tp->timer, tty3270_update, 0); - tasklet_init(&tp->readlet, - (void (*)(unsigned long)) tty3270_read_tasklet, + tasklet_init(&tp->readlet, tty3270_read_tasklet, (unsigned long) tp->read); - tasklet_init(&tp->hanglet, - (void (*)(unsigned long)) tty3270_hangup_tasklet, + tasklet_init(&tp->hanglet, tty3270_hangup_tasklet, (unsigned long) tp); INIT_WORK(&tp->resize_work, tty3270_resize_work); From 24840e76bf8a679d26d373a0edc44284bfd9dc18 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Jul 2020 11:16:16 +0200 Subject: [PATCH 11/48] s390/smp: move smp_cpus_done() to header file Saves us a couple of bytes. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/smp.h | 4 ++++ arch/s390/kernel/smp.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 7326f110d48c..20b37b059e2b 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -54,6 +54,10 @@ static inline int smp_get_base_cpu(int cpu) return cpu - (cpu % (smp_cpu_mtid + 1)); } +static inline void smp_cpus_done(unsigned int max_cpus) +{ +} + extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); extern void __cpu_die(unsigned int cpu); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e6be63ff162a..b4f2795a123d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1012,10 +1012,6 @@ void __init smp_prepare_boot_cpu(void) smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); } -void __init smp_cpus_done(unsigned int max_cpus) -{ -} - void __init smp_setup_processor_id(void) { pcpu_devices[0].address = stap(); From 8e1398f8987851bb266c1d8d911752a18e1d05b4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Jul 2020 11:17:52 +0200 Subject: [PATCH 12/48] s390/smp: add missing linebreak Signed-off-by: Heiko Carstens --- arch/s390/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b4f2795a123d..f685a38f166d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1141,6 +1141,7 @@ static int smp_cpu_online(unsigned int cpu) return sysfs_create_group(&s->kobj, &cpu_online_attr_group); } + static int smp_cpu_pre_down(unsigned int cpu) { struct device *s = &per_cpu(cpu_device, cpu)->dev; From 0ef5d691aae0322cbab0807c184ba534536a4698 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 30 Jun 2020 10:42:40 +0200 Subject: [PATCH 13/48] s390/extmem: remove stale -ENOSPC comment and handling segment_load() will no longer return -ENOSPC. If a segment overlaps with storage, we now also return -EBUSY. Remove the stale comment from __segment_load() and the stale handling from segment_warning(). Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Andrew Morton Suggested-by: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200630084240.8283-1-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/extmem.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 105c09282f8c..5060956b8e7d 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -401,8 +401,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long * -EIO : could not perform query or load diagnose * -ENOENT : no such segment * -EOPNOTSUPP: multi-part segment cannot be used with linux - * -ENOSPC : segment cannot be used (overlaps with storage) - * -EBUSY : segment can temporarily not be used (overlaps with dcss) + * -EBUSY : segment cannot be used (overlaps with dcss or storage) * -ERANGE : segment cannot be used (exceeds kernel mapping range) * -EPERM : segment is currently loaded with incompatible permissions * -ENOMEM : out of memory @@ -627,10 +626,6 @@ void segment_warning(int rc, char *seg_name) pr_err("DCSS %s has multiple page ranges and cannot be " "loaded or queried\n", seg_name); break; - case -ENOSPC: - pr_err("DCSS %s overlaps with used storage and cannot " - "be loaded\n", seg_name); - break; case -EBUSY: pr_err("%s needs used memory resources and cannot be " "loaded or queried\n", seg_name); From c6337c6e89a695819d94949a7170e1bd0d131e31 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 30 Jun 2020 09:42:23 +0200 Subject: [PATCH 14/48] s390/pkey: fix smatch warning inconsistent indenting Fix smatch warnings: pkey_api.c:1606 pkey_ccacipher_aes_attr_read() warn: inconsistent indenting Reported-by: kernel test robot Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- drivers/s390/crypto/pkey_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 74e63ec49068..d5880f52dc2b 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -1603,8 +1603,8 @@ static ssize_t pkey_ccacipher_aes_attr_read(enum pkey_key_size keybits, if (rc == 0) break; } - if (rc) - return rc; + if (rc) + return rc; if (is_xts) { keysize = CCACIPHERTOKENSIZE; From 47c07bffeb32aa2a8e798d8ce25fa693e1364e11 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 30 Jun 2020 09:54:50 +0200 Subject: [PATCH 15/48] s390/zcrypt: fix smatch warnings Fix these smatch warnings: zcrypt_api.c:986 _zcrypt_send_ep11_cprb() error: uninitialized symbol 'pref_weight'. zcrypt_api.c:1008 _zcrypt_send_ep11_cprb() error: uninitialized symbol 'weight'. zcrypt_api.c:676 zcrypt_rsa_modexpo() error: uninitialized symbol 'pref_weight'. zcrypt_api.c:694 zcrypt_rsa_modexpo() error: uninitialized symbol 'weight'. zcrypt_api.c:760 zcrypt_rsa_crt() error: uninitialized symbol 'pref_weight'. zcrypt_api.c:778 zcrypt_rsa_crt() error: uninitialized symbol 'weight'. zcrypt_api.c:824 _zcrypt_send_cprb() warn: always true condition '(tdom >= 0) => (0-u16max >= 0)' zcrypt_api.c:846 _zcrypt_send_cprb() error: uninitialized symbol 'pref_weight'. zcrypt_api.c:867 _zcrypt_send_cprb() error: uninitialized symbol 'weight'. zcrypt_api.c:1065 zcrypt_rng() error: uninitialized symbol 'pref_weight'. zcrypt_api.c:1079 zcrypt_rng() error: uninitialized symbol 'weight'. zcrypt_cex4.c:251 ep11_card_op_modes_show() warn: should '(1 << ep11_op_modes[i]->mode_bit)' be a 64 bit type? zcrypt_cex4.c:346 ep11_queue_op_modes_show() warn: should '(1 << ep11_op_modes[i]->mode_bit)' be a 64 bit type? Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- drivers/s390/crypto/zcrypt_api.c | 12 ++++++------ drivers/s390/crypto/zcrypt_cex4.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 56a405dce8bc..7775ff84f223 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -634,7 +634,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; - unsigned int weight, pref_weight; + unsigned int weight = 0, pref_weight = 0; unsigned int func_code; int qid = 0, rc = -ENODEV; struct module *mod; @@ -718,7 +718,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; - unsigned int weight, pref_weight; + unsigned int weight = 0, pref_weight = 0; unsigned int func_code; int qid = 0, rc = -ENODEV; struct module *mod; @@ -803,7 +803,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms, struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; struct ap_message ap_msg; - unsigned int weight, pref_weight; + unsigned int weight = 0, pref_weight = 0; unsigned int func_code; unsigned short *domain, tdom; int qid = 0, rc = -ENODEV; @@ -822,7 +822,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms, * domain but a control only domain, use the default domain as target. */ tdom = *domain; - if (tdom >= 0 && tdom < AP_DOMAINS && + if (tdom < AP_DOMAINS && !ap_test_config_usage_domain(tdom) && ap_test_config_ctrl_domain(tdom) && ap_domain_index >= 0) @@ -931,7 +931,7 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms, struct zcrypt_queue *zq, *pref_zq; struct ep11_target_dev *targets; unsigned short target_num; - unsigned int weight, pref_weight; + unsigned int weight = 0, pref_weight = 0; unsigned int func_code; struct ap_message ap_msg; int qid = 0, rc = -ENODEV; @@ -1040,7 +1040,7 @@ static long zcrypt_rng(char *buffer) { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; - unsigned int weight, pref_weight; + unsigned int weight = 0, pref_weight = 0; unsigned int func_code; struct ap_message ap_msg; unsigned int domain; diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index cdaa8348ad04..337ec71ddb58 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -250,7 +250,7 @@ static ssize_t ep11_card_op_modes_show(struct device *dev, ep11_get_card_info(ac->id, &ci, zc->online); for (i = 0; ep11_op_modes[i].mode_txt; i++) { - if (ci.op_mode & (1 << ep11_op_modes[i].mode_bit)) { + if (ci.op_mode & (1ULL << ep11_op_modes[i].mode_bit)) { if (n > 0) buf[n++] = ' '; n += scnprintf(buf + n, PAGE_SIZE - n, @@ -345,7 +345,7 @@ static ssize_t ep11_queue_op_modes_show(struct device *dev, &di); for (i = 0; ep11_op_modes[i].mode_txt; i++) { - if (di.op_mode & (1 << ep11_op_modes[i].mode_bit)) { + if (di.op_mode & (1ULL << ep11_op_modes[i].mode_bit)) { if (n > 0) buf[n++] = ' '; n += scnprintf(buf + n, PAGE_SIZE - n, From 74ecbef7b90800e368809642ecc671ba4a57ab09 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 30 Apr 2020 12:23:29 +0200 Subject: [PATCH 16/48] s390/zcrypt: code beautification and struct field renames Some beautifications related to the internal only used struct ap_message and related code. Instead of one int carrying only the special flag now a u32 flags field is used. At struct CPRBX the pointers to additional data are now marked with __user. This caused some changes needed on code, where these structs are also used within the zcrypt misc functions. The ica_rsa_* structs now use the generic types __u8, __u32, ... instead of char, unsigned int. zcrypt_msg6 and zcrypt_msg50 use min_t() instead of min(). Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- arch/s390/include/uapi/asm/zcrypt.h | 140 ++++++++++++------------- drivers/s390/crypto/ap_bus.h | 11 +- drivers/s390/crypto/ap_queue.c | 9 +- drivers/s390/crypto/zcrypt_ccamisc.c | 69 ++++++------ drivers/s390/crypto/zcrypt_cex2c.c | 15 ++- drivers/s390/crypto/zcrypt_error.h | 4 +- drivers/s390/crypto/zcrypt_msgtype50.c | 64 +++++------ drivers/s390/crypto/zcrypt_msgtype6.c | 112 ++++++++++---------- drivers/s390/crypto/zcrypt_msgtype6.h | 4 +- drivers/s390/crypto/zcrypt_queue.c | 8 +- 10 files changed, 217 insertions(+), 219 deletions(-) diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 5a2177e96e88..22fd202856bc 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -36,12 +36,12 @@ * - length(n_modulus) = inputdatalength */ struct ica_rsa_modexpo { - char __user *inputdata; - unsigned int inputdatalength; - char __user *outputdata; - unsigned int outputdatalength; - char __user *b_key; - char __user *n_modulus; + __u8 __user *inputdata; + __u32 inputdatalength; + __u8 __user *outputdata; + __u32 outputdatalength; + __u8 __user *b_key; + __u8 __user *n_modulus; }; /** @@ -59,15 +59,15 @@ struct ica_rsa_modexpo { * - length(u_mult_inv) = inputdatalength/2 + 8 */ struct ica_rsa_modexpo_crt { - char __user *inputdata; - unsigned int inputdatalength; - char __user *outputdata; - unsigned int outputdatalength; - char __user *bp_key; - char __user *bq_key; - char __user *np_prime; - char __user *nq_prime; - char __user *u_mult_inv; + __u8 __user *inputdata; + __u32 inputdatalength; + __u8 __user *outputdata; + __u32 outputdatalength; + __u8 __user *bp_key; + __u8 __user *bq_key; + __u8 __user *np_prime; + __u8 __user *nq_prime; + __u8 __user *u_mult_inv; }; /** @@ -83,67 +83,67 @@ struct ica_rsa_modexpo_crt { * key block */ struct CPRBX { - unsigned short cprb_len; /* CPRB length 220 */ - unsigned char cprb_ver_id; /* CPRB version id. 0x02 */ - unsigned char pad_000[3]; /* Alignment pad bytes */ - unsigned char func_id[2]; /* function id 0x5432 */ - unsigned char cprb_flags[4]; /* Flags */ - unsigned int req_parml; /* request parameter buffer len */ - unsigned int req_datal; /* request data buffer */ - unsigned int rpl_msgbl; /* reply message block length */ - unsigned int rpld_parml; /* replied parameter block len */ - unsigned int rpl_datal; /* reply data block len */ - unsigned int rpld_datal; /* replied data block len */ - unsigned int req_extbl; /* request extension block len */ - unsigned char pad_001[4]; /* reserved */ - unsigned int rpld_extbl; /* replied extension block len */ - unsigned char padx000[16 - sizeof(char *)]; - unsigned char *req_parmb; /* request parm block 'address' */ - unsigned char padx001[16 - sizeof(char *)]; - unsigned char *req_datab; /* request data block 'address' */ - unsigned char padx002[16 - sizeof(char *)]; - unsigned char *rpl_parmb; /* reply parm block 'address' */ - unsigned char padx003[16 - sizeof(char *)]; - unsigned char *rpl_datab; /* reply data block 'address' */ - unsigned char padx004[16 - sizeof(char *)]; - unsigned char *req_extb; /* request extension block 'addr'*/ - unsigned char padx005[16 - sizeof(char *)]; - unsigned char *rpl_extb; /* reply extension block 'address'*/ - unsigned short ccp_rtcode; /* server return code */ - unsigned short ccp_rscode; /* server reason code */ - unsigned int mac_data_len; /* Mac Data Length */ - unsigned char logon_id[8]; /* Logon Identifier */ - unsigned char mac_value[8]; /* Mac Value */ - unsigned char mac_content_flgs;/* Mac content flag byte */ - unsigned char pad_002; /* Alignment */ - unsigned short domain; /* Domain */ - unsigned char usage_domain[4];/* Usage domain */ - unsigned char cntrl_domain[4];/* Control domain */ - unsigned char S390enf_mask[4];/* S/390 enforcement mask */ - unsigned char pad_004[36]; /* reserved */ + __u16 cprb_len; /* CPRB length 220 */ + __u8 cprb_ver_id; /* CPRB version id. 0x02 */ + __u8 pad_000[3]; /* Alignment pad bytes */ + __u8 func_id[2]; /* function id 0x5432 */ + __u8 cprb_flags[4]; /* Flags */ + __u32 req_parml; /* request parameter buffer len */ + __u32 req_datal; /* request data buffer */ + __u32 rpl_msgbl; /* reply message block length */ + __u32 rpld_parml; /* replied parameter block len */ + __u32 rpl_datal; /* reply data block len */ + __u32 rpld_datal; /* replied data block len */ + __u32 req_extbl; /* request extension block len */ + __u8 pad_001[4]; /* reserved */ + __u32 rpld_extbl; /* replied extension block len */ + __u8 padx000[16 - sizeof(__u8 *)]; + __u8 __user *req_parmb; /* request parm block 'address' */ + __u8 padx001[16 - sizeof(__u8 *)]; + __u8 __user *req_datab; /* request data block 'address' */ + __u8 padx002[16 - sizeof(__u8 *)]; + __u8 __user *rpl_parmb; /* reply parm block 'address' */ + __u8 padx003[16 - sizeof(__u8 *)]; + __u8 __user *rpl_datab; /* reply data block 'address' */ + __u8 padx004[16 - sizeof(__u8 *)]; + __u8 __user *req_extb; /* request extension block 'addr'*/ + __u8 padx005[16 - sizeof(__u8 *)]; + __u8 __user *rpl_extb; /* reply extension block 'address'*/ + __u16 ccp_rtcode; /* server return code */ + __u16 ccp_rscode; /* server reason code */ + __u32 mac_data_len; /* Mac Data Length */ + __u8 logon_id[8]; /* Logon Identifier */ + __u8 mac_value[8]; /* Mac Value */ + __u8 mac_content_flgs; /* Mac content flag byte */ + __u8 pad_002; /* Alignment */ + __u16 domain; /* Domain */ + __u8 usage_domain[4]; /* Usage domain */ + __u8 cntrl_domain[4]; /* Control domain */ + __u8 S390enf_mask[4]; /* S/390 enforcement mask */ + __u8 pad_004[36]; /* reserved */ } __attribute__((packed)); /** * xcRB */ struct ica_xcRB { - unsigned short agent_ID; - unsigned int user_defined; - unsigned short request_ID; - unsigned int request_control_blk_length; - unsigned char padding1[16 - sizeof(char *)]; - char __user *request_control_blk_addr; - unsigned int request_data_length; - char padding2[16 - sizeof(char *)]; - char __user *request_data_address; - unsigned int reply_control_blk_length; - char padding3[16 - sizeof(char *)]; - char __user *reply_control_blk_addr; - unsigned int reply_data_length; - char padding4[16 - sizeof(char *)]; - char __user *reply_data_addr; - unsigned short priority_window; - unsigned int status; + __u16 agent_ID; + __u32 user_defined; + __u16 request_ID; + __u32 request_control_blk_length; + __u8 _padding1[16 - sizeof(__u8 *)]; + __u8 __user *request_control_blk_addr; + __u32 request_data_length; + __u8 _padding2[16 - sizeof(__u8 *)]; + __u8 __user *request_data_address; + __u32 reply_control_blk_length; + __u8 _padding3[16 - sizeof(__u8 *)]; + __u8 __user *reply_control_blk_addr; + __u32 reply_data_length; + __u8 __padding4[16 - sizeof(__u8 *)]; + __u8 __user *reply_data_addr; + __u16 priority_window; + __u32 status; } __attribute__((packed)); /** diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 053cc34d2ca2..69432e93643a 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -190,17 +190,18 @@ typedef enum ap_wait (ap_func_t)(struct ap_queue *queue); struct ap_message { struct list_head list; /* Request queueing. */ unsigned long long psmid; /* Message id. */ - void *message; /* Pointer to message buffer. */ - size_t length; /* Message length. */ + void *msg; /* Pointer to message buffer. */ + unsigned int len; /* Message length. */ + u32 flags; /* Flags, see AP_MSG_FLAG_xxx */ int rc; /* Return code for this message */ - void *private; /* ap driver private pointer. */ - unsigned int special:1; /* Used for special commands. */ /* receive is called from tasklet context */ void (*receive)(struct ap_queue *, struct ap_message *, struct ap_message *); }; +#define AP_MSG_FLAG_SPECIAL (1 << 16) /* flag msg as 'special' with NQAP */ + /** * ap_init_message() - Initialize ap_message. * Initialize a message before using. Otherwise this might result in @@ -218,7 +219,7 @@ static inline void ap_init_message(struct ap_message *ap_msg) */ static inline void ap_release_message(struct ap_message *ap_msg) { - kzfree(ap_msg->message); + kzfree(ap_msg->msg); kzfree(ap_msg->private); } diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 73b077dca3e6..d6cc384f294b 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -69,9 +69,9 @@ static int ap_queue_enable_interruption(struct ap_queue *aq, void *ind) */ static inline struct ap_queue_status __ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length, - unsigned int special) + int special) { - if (special == 1) + if (special) qid |= 0x400000UL; return ap_nqap(qid, psmid, msg, length); } @@ -137,7 +137,7 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq) struct ap_message *ap_msg; status = ap_dqap(aq->qid, &aq->reply->psmid, - aq->reply->message, aq->reply->length); + aq->reply->msg, aq->reply->len); switch (status.response_code) { case AP_RESPONSE_NORMAL: aq->queue_count--; @@ -216,7 +216,8 @@ static enum ap_wait ap_sm_write(struct ap_queue *aq) /* Start the next request on the queue. */ ap_msg = list_entry(aq->requestq.next, struct ap_message, list); status = __ap_send(aq->qid, ap_msg->psmid, - ap_msg->message, ap_msg->length, ap_msg->special); + ap_msg->msg, ap_msg->len, + ap_msg->flags & AP_MSG_FLAG_SPECIAL); switch (status.response_code) { case AP_RESPONSE_NORMAL: aq->queue_count++; diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 1b835398feec..3f5b61351cde 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -205,9 +205,9 @@ static int alloc_and_prep_cprbmem(size_t paramblen, preqcblk->rpl_msgbl = cprbplusparamblen; if (paramblen) { preqcblk->req_parmb = - ((u8 *) preqcblk) + sizeof(struct CPRBX); + ((u8 __user *) preqcblk) + sizeof(struct CPRBX); preqcblk->rpl_parmb = - ((u8 *) prepcblk) + sizeof(struct CPRBX); + ((u8 __user *) prepcblk) + sizeof(struct CPRBX); } *pcprbmem = cprbmem; @@ -274,7 +274,7 @@ int cca_genseckey(u16 cardnr, u16 domain, { int i, rc, keysize; int seckeysize; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct kgreqparm { @@ -320,7 +320,7 @@ int cca_genseckey(u16 cardnr, u16 domain, preqcblk->domain = domain; /* fill request cprb param block with KG request */ - preqparm = (struct kgreqparm *) preqcblk->req_parmb; + preqparm = (struct kgreqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "KG", 2); preqparm->rule_array_len = sizeof(preqparm->rule_array_len); preqparm->lv1.len = sizeof(struct lv1); @@ -377,8 +377,9 @@ int cca_genseckey(u16 cardnr, u16 domain, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct kgrepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct kgrepparm *) ptr; /* check length of the returned secure key token */ seckeysize = prepparm->lv3.keyblock.toklen @@ -415,7 +416,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize, const u8 *clrkey, u8 seckey[SECKEYBLOBSIZE]) { int rc, keysize, seckeysize; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct cmreqparm { @@ -460,7 +461,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize, preqcblk->domain = domain; /* fill request cprb param block with CM request */ - preqparm = (struct cmreqparm *) preqcblk->req_parmb; + preqparm = (struct cmreqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "CM", 2); memcpy(preqparm->rule_array, "AES ", 8); preqparm->rule_array_len = @@ -514,8 +515,9 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct cmrepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct cmrepparm *) ptr; /* check length of the returned secure key token */ seckeysize = prepparm->lv3.keyblock.toklen @@ -554,7 +556,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain, u8 *protkey, u32 *protkeylen, u32 *protkeytype) { int rc; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct uskreqparm { @@ -605,7 +607,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain, preqcblk->domain = domain; /* fill request cprb param block with USK request */ - preqparm = (struct uskreqparm *) preqcblk->req_parmb; + preqparm = (struct uskreqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "US", 2); preqparm->rule_array_len = sizeof(preqparm->rule_array_len); preqparm->lv1.len = sizeof(struct lv1); @@ -646,8 +648,9 @@ int cca_sec2protkey(u16 cardnr, u16 domain, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct uskrepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct uskrepparm *) ptr; /* check the returned keyblock */ if (prepparm->lv3.ckb.version != 0x01 && @@ -714,7 +717,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, u8 *keybuf, size_t *keybufsize) { int rc; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct gkreqparm { @@ -796,7 +799,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, preqcblk->req_parml = sizeof(struct gkreqparm); /* prepare request param block with GK request */ - preqparm = (struct gkreqparm *) preqcblk->req_parmb; + preqparm = (struct gkreqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "GK", 2); preqparm->rule_array_len = sizeof(uint16_t) + 2 * 8; memcpy(preqparm->rule_array, "AES OP ", 2*8); @@ -867,8 +870,9 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct gkrepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct gkrepparm *) ptr; /* do some plausibility checks on the key block */ if (prepparm->kb.len < 120 + 5 * sizeof(uint16_t) || @@ -917,7 +921,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, int *key_token_size) { int rc, n; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct rule_array_block { @@ -974,7 +978,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, preqcblk->req_parml = 0; /* prepare request param block with IP request */ - preq_ra_block = (struct rule_array_block *) preqcblk->req_parmb; + preq_ra_block = (struct rule_array_block __force *) preqcblk->req_parmb; memcpy(preq_ra_block->subfunc_code, "IP", 2); preq_ra_block->rule_array_len = sizeof(uint16_t) + 2 * 8; memcpy(preq_ra_block->rule_array, rule_array_1, 8); @@ -987,7 +991,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, } /* prepare vud block */ - preq_vud_block = (struct vud_block *) + preq_vud_block = (struct vud_block __force *) (preqcblk->req_parmb + preqcblk->req_parml); n = complete ? 0 : (clr_key_bit_size + 7) / 8; preq_vud_block->len = sizeof(struct vud_block) + n; @@ -1001,7 +1005,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, preqcblk->req_parml += preq_vud_block->len; /* prepare key block */ - preq_key_block = (struct key_block *) + preq_key_block = (struct key_block __force *) (preqcblk->req_parmb + preqcblk->req_parml); n = *key_token_size; preq_key_block->len = sizeof(struct key_block) + n; @@ -1034,8 +1038,9 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct iprepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct iprepparm *) ptr; /* do some plausibility checks on the key block */ if (prepparm->kb.len < 120 + 3 * sizeof(uint16_t) || @@ -1151,7 +1156,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, u8 *protkey, u32 *protkeylen, u32 *protkeytype) { int rc; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct aureqparm { @@ -1208,7 +1213,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, preqcblk->domain = domain; /* fill request cprb param block with AU request */ - preqparm = (struct aureqparm *) preqcblk->req_parmb; + preqparm = (struct aureqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "AU", 2); preqparm->rule_array_len = sizeof(preqparm->rule_array_len) @@ -1257,8 +1262,9 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct aurepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct aurepparm *) ptr; /* check the returned keyblock */ if (prepparm->vud.ckb.version != 0x01 && @@ -1347,7 +1353,7 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain, preqcblk->domain = domain; /* fill request cprb param block with FQ request */ - preqparm = (struct fqreqparm *) preqcblk->req_parmb; + preqparm = (struct fqreqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "FQ", 2); memcpy(preqparm->rule_array, keyword, sizeof(preqparm->rule_array)); preqparm->rule_array_len = @@ -1378,8 +1384,9 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct fqrepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct fqrepparm *) ptr; ptr = prepparm->lvdata; /* check and possibly copy reply rule array */ diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index 266440168bb7..993addb726e0 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -87,24 +87,23 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) int rc, i; ap_init_message(&ap_msg); - ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); - if (!ap_msg.message) + ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg.msg) return -ENOMEM; rng_type6CPRB_msgX(&ap_msg, 4, &domain); - msg = ap_msg.message; + msg = ap_msg.msg; msg->cprbx.domain = AP_QID_QUEUE(aq->qid); - rc = ap_send(aq->qid, 0x0102030405060708ULL, ap_msg.message, - ap_msg.length); + rc = ap_send(aq->qid, 0x0102030405060708ULL, ap_msg.msg, ap_msg.len); if (rc) goto out_free; /* Wait for the test message to complete. */ for (i = 0; i < 2 * HZ; i++) { msleep(1000 / HZ); - rc = ap_recv(aq->qid, &psmid, ap_msg.message, 4096); + rc = ap_recv(aq->qid, &psmid, ap_msg.msg, 4096); if (rc == 0 && psmid == 0x0102030405060708ULL) break; } @@ -115,13 +114,13 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) goto out_free; } - reply = ap_msg.message; + reply = ap_msg.msg; if (reply->cprbx.ccp_rtcode == 0 && reply->cprbx.ccp_rscode == 0) rc = 1; else rc = 0; out_free: - free_page((unsigned long) ap_msg.message); + free_page((unsigned long) ap_msg.msg); return rc; } diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 4f4dd9d727c9..54a04f8c38ef 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -80,7 +80,7 @@ struct error_hdr { static inline int convert_error(struct zcrypt_queue *zq, struct ap_message *reply) { - struct error_hdr *ehdr = reply->message; + struct error_hdr *ehdr = reply->msg; int card = AP_QID_CARD(zq->queue->qid); int queue = AP_QID_QUEUE(zq->queue->qid); @@ -127,7 +127,7 @@ static inline int convert_error(struct zcrypt_queue *zq, struct { struct type86_hdr hdr; struct type86_fmt2_ext fmt2; - } __packed * head = reply->message; + } __packed * head = reply->msg; unsigned int apfs = *((u32 *)head->fmt2.apfs); ZCRYPT_DBF(DBF_ERR, diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index fc4295b3d801..7aedc338b445 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -207,10 +207,10 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq, mod_len = mex->inputdatalength; if (mod_len <= 128) { - struct type50_meb1_msg *meb1 = ap_msg->message; + struct type50_meb1_msg *meb1 = ap_msg->msg; memset(meb1, 0, sizeof(*meb1)); - ap_msg->length = sizeof(*meb1); + ap_msg->len = sizeof(*meb1); meb1->header.msg_type_code = TYPE50_TYPE_CODE; meb1->header.msg_len = sizeof(*meb1); meb1->keyblock_type = TYPE50_MEB1_FMT; @@ -218,10 +218,10 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq, exp = meb1->exponent + sizeof(meb1->exponent) - mod_len; inp = meb1->message + sizeof(meb1->message) - mod_len; } else if (mod_len <= 256) { - struct type50_meb2_msg *meb2 = ap_msg->message; + struct type50_meb2_msg *meb2 = ap_msg->msg; memset(meb2, 0, sizeof(*meb2)); - ap_msg->length = sizeof(*meb2); + ap_msg->len = sizeof(*meb2); meb2->header.msg_type_code = TYPE50_TYPE_CODE; meb2->header.msg_len = sizeof(*meb2); meb2->keyblock_type = TYPE50_MEB2_FMT; @@ -229,10 +229,10 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq, exp = meb2->exponent + sizeof(meb2->exponent) - mod_len; inp = meb2->message + sizeof(meb2->message) - mod_len; } else if (mod_len <= 512) { - struct type50_meb3_msg *meb3 = ap_msg->message; + struct type50_meb3_msg *meb3 = ap_msg->msg; memset(meb3, 0, sizeof(*meb3)); - ap_msg->length = sizeof(*meb3); + ap_msg->len = sizeof(*meb3); meb3->header.msg_type_code = TYPE50_TYPE_CODE; meb3->header.msg_len = sizeof(*meb3); meb3->keyblock_type = TYPE50_MEB3_FMT; @@ -275,10 +275,10 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq, * 512 byte modulus (4k keys). */ if (mod_len <= 128) { /* up to 1024 bit key size */ - struct type50_crb1_msg *crb1 = ap_msg->message; + struct type50_crb1_msg *crb1 = ap_msg->msg; memset(crb1, 0, sizeof(*crb1)); - ap_msg->length = sizeof(*crb1); + ap_msg->len = sizeof(*crb1); crb1->header.msg_type_code = TYPE50_TYPE_CODE; crb1->header.msg_len = sizeof(*crb1); crb1->keyblock_type = TYPE50_CRB1_FMT; @@ -289,10 +289,10 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq, u = crb1->u + sizeof(crb1->u) - short_len; inp = crb1->message + sizeof(crb1->message) - mod_len; } else if (mod_len <= 256) { /* up to 2048 bit key size */ - struct type50_crb2_msg *crb2 = ap_msg->message; + struct type50_crb2_msg *crb2 = ap_msg->msg; memset(crb2, 0, sizeof(*crb2)); - ap_msg->length = sizeof(*crb2); + ap_msg->len = sizeof(*crb2); crb2->header.msg_type_code = TYPE50_TYPE_CODE; crb2->header.msg_len = sizeof(*crb2); crb2->keyblock_type = TYPE50_CRB2_FMT; @@ -304,10 +304,10 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq, inp = crb2->message + sizeof(crb2->message) - mod_len; } else if ((mod_len <= 512) && /* up to 4096 bit key size */ (zq->zcard->max_mod_size == CEX3A_MAX_MOD_SIZE)) { - struct type50_crb3_msg *crb3 = ap_msg->message; + struct type50_crb3_msg *crb3 = ap_msg->msg; memset(crb3, 0, sizeof(*crb3)); - ap_msg->length = sizeof(*crb3); + ap_msg->len = sizeof(*crb3); crb3->header.msg_type_code = TYPE50_TYPE_CODE; crb3->header.msg_len = sizeof(*crb3); crb3->keyblock_type = TYPE50_CRB3_FMT; @@ -350,7 +350,7 @@ static int convert_type80(struct zcrypt_queue *zq, char __user *outputdata, unsigned int outputdatalength) { - struct type80_hdr *t80h = reply->message; + struct type80_hdr *t80h = reply->msg; unsigned char *data; if (t80h->len < sizeof(*t80h) + outputdatalength) { @@ -370,7 +370,7 @@ static int convert_type80(struct zcrypt_queue *zq, BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE); else BUG_ON(t80h->len > CEX3A_MAX_RESPONSE_SIZE); - data = reply->message + t80h->len - outputdatalength; + data = reply->msg + t80h->len - outputdatalength; if (copy_to_user(outputdata, data, outputdatalength)) return -EFAULT; return 0; @@ -382,7 +382,7 @@ static int convert_response(struct zcrypt_queue *zq, unsigned int outputdatalength) { /* Response type byte is the second byte in the response. */ - unsigned char rtype = ((unsigned char *) reply->message)[1]; + unsigned char rtype = ((unsigned char *) reply->msg)[1]; switch (rtype) { case TYPE82_RSP_CODE: @@ -422,22 +422,20 @@ static void zcrypt_cex2a_receive(struct ap_queue *aq, .reply_code = REP82_ERROR_MACHINE_FAILURE, }; struct type80_hdr *t80h; - int length; + int len; /* Copy the reply message to the request message buffer. */ if (!reply) goto out; /* ap_msg->rc indicates the error */ - t80h = reply->message; + t80h = reply->msg; if (t80h->type == TYPE80_RSP_CODE) { if (aq->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A) - length = min_t(int, - CEX2A_MAX_RESPONSE_SIZE, t80h->len); + len = min_t(int, CEX2A_MAX_RESPONSE_SIZE, t80h->len); else - length = min_t(int, - CEX3A_MAX_RESPONSE_SIZE, t80h->len); - memcpy(msg->message, reply->message, length); + len = min_t(int, CEX3A_MAX_RESPONSE_SIZE, t80h->len); + memcpy(msg->msg, reply->msg, len); } else - memcpy(msg->message, reply->message, sizeof(error_reply)); + memcpy(msg->msg, reply->msg, sizeof(error_reply)); out: complete((struct completion *) msg->private); } @@ -460,12 +458,10 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, ap_init_message(&ap_msg); if (zq->zcard->user_space_type == ZCRYPT_CEX2A) - ap_msg.message = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, - GFP_KERNEL); + ap_msg.msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL); else - ap_msg.message = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, - GFP_KERNEL); - if (!ap_msg.message) + ap_msg.msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg.msg) return -ENOMEM; ap_msg.receive = zcrypt_cex2a_receive; ap_msg.psmid = (((unsigned long long) current->pid) << 32) + @@ -486,7 +482,7 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, /* Signal pending. */ ap_cancel_message(zq->queue, &ap_msg); out_free: - kfree(ap_msg.message); + kfree(ap_msg.msg); return rc; } @@ -506,12 +502,10 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, ap_init_message(&ap_msg); if (zq->zcard->user_space_type == ZCRYPT_CEX2A) - ap_msg.message = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, - GFP_KERNEL); + ap_msg.msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL); else - ap_msg.message = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, - GFP_KERNEL); - if (!ap_msg.message) + ap_msg.msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg.msg) return -ENOMEM; ap_msg.receive = zcrypt_cex2a_receive; ap_msg.psmid = (((unsigned long long) current->pid) << 32) + @@ -532,7 +526,7 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, /* Signal pending. */ ap_cancel_message(zq->queue, &ap_msg); out_free: - kfree(ap_msg.message); + kfree(ap_msg.msg); return rc; } diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index fd1cbb2d6b3f..d77991c74c25 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -266,7 +266,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, struct function_and_rules_block fr; unsigned short length; char text[0]; - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; int size; /* @@ -301,7 +301,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, msg->cprbx.req_parml = size - sizeof(msg->hdr) - sizeof(msg->cprbx); - ap_msg->length = size; + ap_msg->len = size; return 0; } @@ -336,7 +336,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq, struct function_and_rules_block fr; unsigned short length; char text[0]; - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; int size; /* @@ -370,7 +370,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq, msg->fr = static_pkd_fnr; - ap_msg->length = size; + ap_msg->len = size; return 0; } @@ -400,11 +400,11 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg, struct { struct type6_hdr hdr; struct CPRBX cprbx; - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; int rcblen = CEIL4(xcRB->request_control_blk_length); int replylen, req_sumlen, resp_sumlen; - char *req_data = ap_msg->message + sizeof(struct type6_hdr) + rcblen; + char *req_data = ap_msg->msg + sizeof(struct type6_hdr) + rcblen; char *function_code; if (CEIL4(xcRB->request_control_blk_length) < @@ -412,10 +412,10 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg, return -EINVAL; /* overflow after alignment*/ /* length checks */ - ap_msg->length = sizeof(struct type6_hdr) + + ap_msg->len = sizeof(struct type6_hdr) + CEIL4(xcRB->request_control_blk_length) + xcRB->request_data_length; - if (ap_msg->length > MSGTYPE06_MAX_MSG_SIZE) + if (ap_msg->len > MSGTYPE06_MAX_MSG_SIZE) return -EINVAL; /* @@ -480,9 +480,7 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg, if (memcmp(function_code, "US", 2) == 0 || memcmp(function_code, "AU", 2) == 0) - ap_msg->special = 1; - else - ap_msg->special = 0; + ap_msg->flags |= AP_MSG_FLAG_SPECIAL; /* copy data block */ if (xcRB->request_data_length && @@ -512,7 +510,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg, struct ep11_cprb cprbx; unsigned char pld_tag; /* fixed value 0x30 */ unsigned char pld_lenfmt; /* payload length format */ - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; struct pld_hdr { unsigned char func_tag; /* fixed value 0x4 */ @@ -527,7 +525,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg, return -EINVAL; /* overflow after alignment*/ /* length checks */ - ap_msg->length = sizeof(struct type6_hdr) + xcRB->req_len; + ap_msg->len = sizeof(struct type6_hdr) + xcRB->req_len; if (CEIL4(xcRB->req_len) > MSGTYPE06_MAX_MSG_SIZE - (sizeof(struct type6_hdr))) return -EINVAL; @@ -569,7 +567,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg, /* enable special processing based on the cprbs flags special bit */ if (msg->cprbx.flags & 0x20) - ap_msg->special = 1; + ap_msg->flags |= AP_MSG_FLAG_SPECIAL; return 0; } @@ -639,7 +637,7 @@ static int convert_type86_ica(struct zcrypt_queue *zq, 0x35, 0x9D, 0xD3, 0xD3, 0xA7, 0x9D, 0x5D, 0x41, 0x6F, 0x65, 0x1B, 0xCF, 0xA9, 0x87, 0x91, 0x09 }; - struct type86x_reply *msg = reply->message; + struct type86x_reply *msg = reply->msg; unsigned short service_rc, service_rs; unsigned int reply_len, pad_len; char *data; @@ -713,8 +711,8 @@ static int convert_type86_xcrb(struct zcrypt_queue *zq, struct ap_message *reply, struct ica_xcRB *xcRB) { - struct type86_fmt2_msg *msg = reply->message; - char *data = reply->message; + struct type86_fmt2_msg *msg = reply->msg; + char *data = reply->msg; /* Copy CPRB to user */ if (copy_to_user(xcRB->reply_control_blk_addr, @@ -744,8 +742,8 @@ static int convert_type86_ep11_xcrb(struct zcrypt_queue *zq, struct ap_message *reply, struct ep11_urb *xcRB) { - struct type86_fmt2_msg *msg = reply->message; - char *data = reply->message; + struct type86_fmt2_msg *msg = reply->msg; + char *data = reply->msg; if (xcRB->resp_len < msg->fmt2.count1) return -EINVAL; @@ -766,8 +764,8 @@ static int convert_type86_rng(struct zcrypt_queue *zq, struct type86_hdr hdr; struct type86_fmt2_ext fmt2; struct CPRBX cprbx; - } __packed * msg = reply->message; - char *data = reply->message; + } __packed * msg = reply->msg; + char *data = reply->msg; if (msg->cprbx.ccp_rtcode != 0 || msg->cprbx.ccp_rscode != 0) return -EINVAL; @@ -780,7 +778,7 @@ static int convert_response_ica(struct zcrypt_queue *zq, char __user *outputdata, unsigned int outputdatalength) { - struct type86x_reply *msg = reply->message; + struct type86x_reply *msg = reply->msg; switch (msg->hdr.type) { case TYPE82_RSP_CODE: @@ -820,7 +818,7 @@ static int convert_response_xcrb(struct zcrypt_queue *zq, struct ap_message *reply, struct ica_xcRB *xcRB) { - struct type86x_reply *msg = reply->message; + struct type86x_reply *msg = reply->msg; switch (msg->hdr.type) { case TYPE82_RSP_CODE: @@ -853,7 +851,7 @@ static int convert_response_xcrb(struct zcrypt_queue *zq, static int convert_response_ep11_xcrb(struct zcrypt_queue *zq, struct ap_message *reply, struct ep11_urb *xcRB) { - struct type86_ep11_reply *msg = reply->message; + struct type86_ep11_reply *msg = reply->msg; switch (msg->hdr.type) { case TYPE82_RSP_CODE: @@ -883,7 +881,7 @@ static int convert_response_rng(struct zcrypt_queue *zq, struct ap_message *reply, char *data) { - struct type86x_reply *msg = reply->message; + struct type86x_reply *msg = reply->msg; switch (msg->hdr.type) { case TYPE82_RSP_CODE: @@ -928,32 +926,30 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq, struct response_type *resp_type = (struct response_type *) msg->private; struct type86x_reply *t86r; - int length; + int len; /* Copy the reply message to the request message buffer. */ if (!reply) goto out; /* ap_msg->rc indicates the error */ - t86r = reply->message; + t86r = reply->msg; if (t86r->hdr.type == TYPE86_RSP_CODE && t86r->cprbx.cprb_ver_id == 0x02) { switch (resp_type->type) { case CEXXC_RESPONSE_TYPE_ICA: - length = sizeof(struct type86x_reply) - + t86r->length - 2; - length = min(CEXXC_MAX_ICA_RESPONSE_SIZE, length); - memcpy(msg->message, reply->message, length); + len = sizeof(struct type86x_reply) + t86r->length - 2; + len = min_t(int, CEXXC_MAX_ICA_RESPONSE_SIZE, len); + memcpy(msg->msg, reply->msg, len); break; case CEXXC_RESPONSE_TYPE_XCRB: - length = t86r->fmt2.offset2 + t86r->fmt2.count2; - length = min(MSGTYPE06_MAX_MSG_SIZE, length); - memcpy(msg->message, reply->message, length); + len = t86r->fmt2.offset2 + t86r->fmt2.count2; + len = min_t(int, MSGTYPE06_MAX_MSG_SIZE, len); + memcpy(msg->msg, reply->msg, len); break; default: - memcpy(msg->message, &error_reply, - sizeof(error_reply)); + memcpy(msg->msg, &error_reply, sizeof(error_reply)); } } else - memcpy(msg->message, reply->message, sizeof(error_reply)); + memcpy(msg->msg, reply->msg, sizeof(error_reply)); out: complete(&(resp_type->work)); } @@ -977,25 +973,25 @@ static void zcrypt_msgtype6_receive_ep11(struct ap_queue *aq, struct response_type *resp_type = (struct response_type *)msg->private; struct type86_ep11_reply *t86r; - int length; + int len; /* Copy the reply message to the request message buffer. */ if (!reply) goto out; /* ap_msg->rc indicates the error */ - t86r = reply->message; + t86r = reply->msg; if (t86r->hdr.type == TYPE86_RSP_CODE && t86r->cprbx.cprb_ver_id == 0x04) { switch (resp_type->type) { case CEXXC_RESPONSE_TYPE_EP11: - length = t86r->fmt2.offset1 + t86r->fmt2.count1; - length = min(MSGTYPE06_MAX_MSG_SIZE, length); - memcpy(msg->message, reply->message, length); + len = t86r->fmt2.offset1 + t86r->fmt2.count1; + len = min_t(int, MSGTYPE06_MAX_MSG_SIZE, len); + memcpy(msg->msg, reply->msg, len); break; default: - memcpy(msg->message, &error_reply, sizeof(error_reply)); + memcpy(msg->msg, &error_reply, sizeof(error_reply)); } } else { - memcpy(msg->message, reply->message, sizeof(error_reply)); + memcpy(msg->msg, reply->msg, sizeof(error_reply)); } out: complete(&(resp_type->work)); @@ -1020,8 +1016,8 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, int rc; ap_init_message(&ap_msg); - ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); - if (!ap_msg.message) + ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg.msg) return -ENOMEM; ap_msg.receive = zcrypt_msgtype6_receive; ap_msg.psmid = (((unsigned long long) current->pid) << 32) + @@ -1043,7 +1039,7 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, /* Signal pending. */ ap_cancel_message(zq->queue, &ap_msg); out_free: - free_page((unsigned long) ap_msg.message); + free_page((unsigned long) ap_msg.msg); return rc; } @@ -1064,8 +1060,8 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, int rc; ap_init_message(&ap_msg); - ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); - if (!ap_msg.message) + ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg.msg) return -ENOMEM; ap_msg.receive = zcrypt_msgtype6_receive; ap_msg.psmid = (((unsigned long long) current->pid) << 32) + @@ -1088,7 +1084,7 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, ap_cancel_message(zq->queue, &ap_msg); } out_free: - free_page((unsigned long) ap_msg.message); + free_page((unsigned long) ap_msg.msg); return rc; } @@ -1107,8 +1103,8 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB, .type = CEXXC_RESPONSE_TYPE_XCRB, }; - ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); - if (!ap_msg->message) + ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_msgtype6_receive; ap_msg->psmid = (((unsigned long long) current->pid) << 32) + @@ -1162,8 +1158,8 @@ unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb, .type = CEXXC_RESPONSE_TYPE_EP11, }; - ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); - if (!ap_msg->message) + ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_msgtype6_receive_ep11; ap_msg->psmid = (((unsigned long long) current->pid) << 32) + @@ -1193,7 +1189,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_queue *zq, struct ep11_cprb cprbx; unsigned char pld_tag; /* fixed value 0x30 */ unsigned char pld_lenfmt; /* payload length format */ - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; struct pld_hdr { unsigned char func_tag; /* fixed value 0x4 */ unsigned char func_len; /* fixed value 0x4 */ @@ -1256,8 +1252,8 @@ unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code, .type = CEXXC_RESPONSE_TYPE_XCRB, }; - ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); - if (!ap_msg->message) + ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_msgtype6_receive; ap_msg->psmid = (((unsigned long long) current->pid) << 32) + @@ -1290,7 +1286,7 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq, char rule[8]; short int verb_length; short int key_length; - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; struct response_type *rtype = (struct response_type *)(ap_msg->private); int rc; diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 41a0df5f070f..0de280a81dd4 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -127,7 +127,7 @@ static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg, char rule[8]; short int verb_length; short int key_length; - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; static struct type6_hdr static_type6_hdrX = { .type = 0x06, .offset1 = 0x00000058, @@ -154,7 +154,7 @@ static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg, memcpy(msg->rule, "RANDOM ", 8); msg->verb_length = 0x02; msg->key_length = 0x02; - ap_msg->length = sizeof(*msg); + ap_msg->len = sizeof(*msg); *domain = (unsigned short)msg->cprbx.domain; } diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index b7d9fa567880..8bae6ad159a7 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -107,10 +107,10 @@ struct zcrypt_queue *zcrypt_queue_alloc(size_t max_response_size) zq = kzalloc(sizeof(struct zcrypt_queue), GFP_KERNEL); if (!zq) return NULL; - zq->reply.message = kmalloc(max_response_size, GFP_KERNEL); - if (!zq->reply.message) + zq->reply.msg = kmalloc(max_response_size, GFP_KERNEL); + if (!zq->reply.msg) goto out_free; - zq->reply.length = max_response_size; + zq->reply.len = max_response_size; INIT_LIST_HEAD(&zq->list); kref_init(&zq->refcount); return zq; @@ -123,7 +123,7 @@ EXPORT_SYMBOL(zcrypt_queue_alloc); void zcrypt_queue_free(struct zcrypt_queue *zq) { - kfree(zq->reply.message); + kfree(zq->reply.msg); kfree(zq); } EXPORT_SYMBOL(zcrypt_queue_free); From 7e202acb5c4397b17e275c017f84e4df34314578 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 20 May 2020 16:07:19 +0200 Subject: [PATCH 17/48] s390/zcrypt: split ioctl function into smaller code units The zcrpyt_unlocked_ioctl() function has become large. So split away into new static functions the 4 ioctl ICARSAMODEXPO, ICARSACRT, ZSECSENDCPRB and ZSENDEP11CPRB. This makes the code more readable and is a preparation step for further improvements needed on these ioctls. Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- drivers/s390/crypto/zcrypt_api.c | 182 +++++++++++++++++-------------- 1 file changed, 101 insertions(+), 81 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 7775ff84f223..4dbbfd88262c 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1298,6 +1298,99 @@ static int zcrypt_requestq_count(void) return requestq_count; } +static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg) +{ + int rc; + struct ica_rsa_modexpo mex; + struct ica_rsa_modexpo __user *umex = (void __user *) arg; + + if (copy_from_user(&mex, umex, sizeof(mex))) + return -EFAULT; + do { + rc = zcrypt_rsa_modexpo(perms, &mex); + } while (rc == -EAGAIN); + /* on failure: retry once again after a requested rescan */ + if ((rc == -ENODEV) && (zcrypt_process_rescan())) + do { + rc = zcrypt_rsa_modexpo(perms, &mex); + } while (rc == -EAGAIN); + if (rc) { + ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSAMODEXPO rc=%d\n", rc); + return rc; + } + return put_user(mex.outputdatalength, &umex->outputdatalength); +} + +static int icarsacrt_ioctl(struct ap_perms *perms, unsigned long arg) +{ + int rc; + struct ica_rsa_modexpo_crt crt; + struct ica_rsa_modexpo_crt __user *ucrt = (void __user *) arg; + + if (copy_from_user(&crt, ucrt, sizeof(crt))) + return -EFAULT; + do { + rc = zcrypt_rsa_crt(perms, &crt); + } while (rc == -EAGAIN); + /* on failure: retry once again after a requested rescan */ + if ((rc == -ENODEV) && (zcrypt_process_rescan())) + do { + rc = zcrypt_rsa_crt(perms, &crt); + } while (rc == -EAGAIN); + if (rc) { + ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSACRT rc=%d\n", rc); + return rc; + } + return put_user(crt.outputdatalength, &ucrt->outputdatalength); +} + +static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg) +{ + int rc; + struct ica_xcRB xcRB; + struct ica_xcRB __user *uxcRB = (void __user *) arg; + + if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB))) + return -EFAULT; + do { + rc = _zcrypt_send_cprb(perms, &xcRB); + } while (rc == -EAGAIN); + /* on failure: retry once again after a requested rescan */ + if ((rc == -ENODEV) && (zcrypt_process_rescan())) + do { + rc = _zcrypt_send_cprb(perms, &xcRB); + } while (rc == -EAGAIN); + if (rc) + ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d status=0x%x\n", + rc, xcRB.status); + if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB))) + return -EFAULT; + return rc; +} + +static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg) +{ + int rc; + struct ep11_urb xcrb; + struct ep11_urb __user *uxcrb = (void __user *)arg; + + if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb))) + return -EFAULT; + do { + rc = _zcrypt_send_ep11_cprb(perms, &xcrb); + } while (rc == -EAGAIN); + /* on failure: retry once again after a requested rescan */ + if ((rc == -ENODEV) && (zcrypt_process_rescan())) + do { + rc = _zcrypt_send_ep11_cprb(perms, &xcrb); + } while (rc == -EAGAIN); + if (rc) + ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d\n", rc); + if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb))) + return -EFAULT; + return rc; +} + static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -1310,87 +1403,14 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, return rc; switch (cmd) { - case ICARSAMODEXPO: { - struct ica_rsa_modexpo __user *umex = (void __user *) arg; - struct ica_rsa_modexpo mex; - - if (copy_from_user(&mex, umex, sizeof(mex))) - return -EFAULT; - do { - rc = zcrypt_rsa_modexpo(perms, &mex); - } while (rc == -EAGAIN); - /* on failure: retry once again after a requested rescan */ - if ((rc == -ENODEV) && (zcrypt_process_rescan())) - do { - rc = zcrypt_rsa_modexpo(perms, &mex); - } while (rc == -EAGAIN); - if (rc) { - ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSAMODEXPO rc=%d\n", rc); - return rc; - } - return put_user(mex.outputdatalength, &umex->outputdatalength); - } - case ICARSACRT: { - struct ica_rsa_modexpo_crt __user *ucrt = (void __user *) arg; - struct ica_rsa_modexpo_crt crt; - - if (copy_from_user(&crt, ucrt, sizeof(crt))) - return -EFAULT; - do { - rc = zcrypt_rsa_crt(perms, &crt); - } while (rc == -EAGAIN); - /* on failure: retry once again after a requested rescan */ - if ((rc == -ENODEV) && (zcrypt_process_rescan())) - do { - rc = zcrypt_rsa_crt(perms, &crt); - } while (rc == -EAGAIN); - if (rc) { - ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSACRT rc=%d\n", rc); - return rc; - } - return put_user(crt.outputdatalength, &ucrt->outputdatalength); - } - case ZSECSENDCPRB: { - struct ica_xcRB __user *uxcRB = (void __user *) arg; - struct ica_xcRB xcRB; - - if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB))) - return -EFAULT; - do { - rc = _zcrypt_send_cprb(perms, &xcRB); - } while (rc == -EAGAIN); - /* on failure: retry once again after a requested rescan */ - if ((rc == -ENODEV) && (zcrypt_process_rescan())) - do { - rc = _zcrypt_send_cprb(perms, &xcRB); - } while (rc == -EAGAIN); - if (rc) - ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d status=0x%x\n", - rc, xcRB.status); - if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB))) - return -EFAULT; - return rc; - } - case ZSENDEP11CPRB: { - struct ep11_urb __user *uxcrb = (void __user *)arg; - struct ep11_urb xcrb; - - if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb))) - return -EFAULT; - do { - rc = _zcrypt_send_ep11_cprb(perms, &xcrb); - } while (rc == -EAGAIN); - /* on failure: retry once again after a requested rescan */ - if ((rc == -ENODEV) && (zcrypt_process_rescan())) - do { - rc = _zcrypt_send_ep11_cprb(perms, &xcrb); - } while (rc == -EAGAIN); - if (rc) - ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d\n", rc); - if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb))) - return -EFAULT; - return rc; - } + case ICARSAMODEXPO: + return icarsamodexpo_ioctl(perms, arg); + case ICARSACRT: + return icarsacrt_ioctl(perms, arg); + case ZSECSENDCPRB: + return zsecsendcprb_ioctl(perms, arg); + case ZSENDEP11CPRB: + return zsendep11cprb_ioctl(perms, arg); case ZCRYPT_DEVICE_STATUS: { struct zcrypt_device_status_ext *device_status; size_t total_size = MAX_ZDEV_ENTRIES_EXT From dc4b6ded3c17ebe1d7532943192b2308c031c43b Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 26 May 2020 10:49:33 +0200 Subject: [PATCH 18/48] s390/ap: rename and clarify ap state machine related stuff There is a state machine held for each ap queue device. The states and functions related to this where somethimes noted with _sm_ somethimes without. This patch clarifies and renames all the ap queue state machine related functions, enums and defines to have a _sm_ in the name. There is no functional change coming with this patch - it's only beautifying code. Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- drivers/s390/crypto/ap_bus.c | 18 +-- drivers/s390/crypto/ap_bus.h | 58 +++++----- drivers/s390/crypto/ap_queue.c | 200 ++++++++++++++++----------------- 3 files changed, 138 insertions(+), 138 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index e71ca4a719a5..64fa66788194 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -342,13 +342,13 @@ static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type, } } -void ap_wait(enum ap_wait wait) +void ap_wait(enum ap_sm_wait wait) { ktime_t hr_time; switch (wait) { - case AP_WAIT_AGAIN: - case AP_WAIT_INTERRUPT: + case AP_SM_WAIT_AGAIN: + case AP_SM_WAIT_INTERRUPT: if (ap_using_interrupts()) break; if (ap_poll_kthread) { @@ -356,7 +356,7 @@ void ap_wait(enum ap_wait wait) break; } fallthrough; - case AP_WAIT_TIMEOUT: + case AP_SM_WAIT_TIMEOUT: spin_lock_bh(&ap_poll_timer_lock); if (!hrtimer_is_queued(&ap_poll_timer)) { hr_time = poll_timeout; @@ -365,7 +365,7 @@ void ap_wait(enum ap_wait wait) } spin_unlock_bh(&ap_poll_timer_lock); break; - case AP_WAIT_NONE: + case AP_SM_WAIT_NONE: default: break; } @@ -382,7 +382,7 @@ void ap_request_timeout(struct timer_list *t) struct ap_queue *aq = from_timer(aq, t, timeout); spin_lock_bh(&aq->lock); - ap_wait(ap_sm_event(aq, AP_EVENT_TIMEOUT)); + ap_wait(ap_sm_event(aq, AP_SM_EVENT_TIMEOUT)); spin_unlock_bh(&aq->lock); } @@ -418,7 +418,7 @@ static void ap_tasklet_fn(unsigned long dummy) { int bkt; struct ap_queue *aq; - enum ap_wait wait = AP_WAIT_NONE; + enum ap_sm_wait wait = AP_SM_WAIT_NONE; /* Reset the indicator if interrupts are used. Thus new interrupts can * be received. Doing it in the beginning of the tasklet is therefor @@ -430,7 +430,7 @@ static void ap_tasklet_fn(unsigned long dummy) spin_lock_bh(&ap_queues_lock); hash_for_each(ap_queues, bkt, aq, hnode) { spin_lock_bh(&aq->lock); - wait = min(wait, ap_sm_event_loop(aq, AP_EVENT_POLL)); + wait = min(wait, ap_sm_event_loop(aq, AP_SM_EVENT_POLL)); spin_unlock_bh(&aq->lock); } spin_unlock_bh(&ap_queues_lock); @@ -1370,7 +1370,7 @@ static void _ap_scan_bus_adapter(int id) borked = 1; else { spin_lock_bh(&aq->lock); - borked = aq->state == AP_STATE_BORKED; + borked = aq->sm_state == AP_SM_STATE_BORKED; spin_unlock_bh(&aq->lock); } if (borked) { diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 69432e93643a..1a1d5e3c8d45 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -83,39 +83,39 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_INTR_ENABLED 1 /* AP interrupt enabled */ /* - * AP device states + * AP queue state machine states */ -enum ap_state { - AP_STATE_RESET_START, - AP_STATE_RESET_WAIT, - AP_STATE_SETIRQ_WAIT, - AP_STATE_IDLE, - AP_STATE_WORKING, - AP_STATE_QUEUE_FULL, - AP_STATE_REMOVE, /* about to be removed from driver */ - AP_STATE_UNBOUND, /* momentary not bound to a driver */ - AP_STATE_BORKED, /* broken */ - NR_AP_STATES +enum ap_sm_state { + AP_SM_STATE_RESET_START, + AP_SM_STATE_RESET_WAIT, + AP_SM_STATE_SETIRQ_WAIT, + AP_SM_STATE_IDLE, + AP_SM_STATE_WORKING, + AP_SM_STATE_QUEUE_FULL, + AP_SM_STATE_REMOVE, /* about to be removed from driver */ + AP_SM_STATE_UNBOUND, /* momentary not bound to a driver */ + AP_SM_STATE_BORKED, /* broken */ + NR_AP_SM_STATES }; /* - * AP device events + * AP queue state machine events */ -enum ap_event { - AP_EVENT_POLL, - AP_EVENT_TIMEOUT, - NR_AP_EVENTS +enum ap_sm_event { + AP_SM_EVENT_POLL, + AP_SM_EVENT_TIMEOUT, + NR_AP_SM_EVENTS }; /* - * AP wait behaviour + * AP queue state wait behaviour */ -enum ap_wait { - AP_WAIT_AGAIN, /* retry immediately */ - AP_WAIT_TIMEOUT, /* wait for timeout */ - AP_WAIT_INTERRUPT, /* wait for thin interrupt (if available) */ - AP_WAIT_NONE, /* no wait */ - NR_AP_WAIT +enum ap_sm_wait { + AP_SM_WAIT_AGAIN, /* retry immediately */ + AP_SM_WAIT_TIMEOUT, /* wait for timeout */ + AP_SM_WAIT_INTERRUPT, /* wait for thin interrupt (if available) */ + AP_SM_WAIT_NONE, /* no wait */ + NR_AP_SM_WAIT }; struct ap_device; @@ -172,7 +172,7 @@ struct ap_queue { ap_qid_t qid; /* AP queue id. */ int interrupt; /* indicate if interrupts are enabled */ int queue_count; /* # messages currently on AP queue. */ - enum ap_state state; /* State of the AP device. */ + enum ap_sm_state sm_state; /* ap queue state machine state */ int pendingq_count; /* # requests on pendingq list. */ int requestq_count; /* # requests on requestq list. */ u64 total_request_count; /* # requests ever for this AP device.*/ @@ -185,7 +185,7 @@ struct ap_queue { #define to_ap_queue(x) container_of((x), struct ap_queue, ap_dev.device) -typedef enum ap_wait (ap_func_t)(struct ap_queue *queue); +typedef enum ap_sm_wait (ap_func_t)(struct ap_queue *queue); struct ap_message { struct list_head list; /* Request queueing. */ @@ -231,15 +231,15 @@ static inline void ap_release_message(struct ap_message *ap_msg) int ap_send(ap_qid_t, unsigned long long, void *, size_t); int ap_recv(ap_qid_t, unsigned long long *, void *, size_t); -enum ap_wait ap_sm_event(struct ap_queue *aq, enum ap_event event); -enum ap_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_event event); +enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event); +enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event); void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg); void ap_cancel_message(struct ap_queue *aq, struct ap_message *ap_msg); void ap_flush_queue(struct ap_queue *aq); void *ap_airq_ptr(void); -void ap_wait(enum ap_wait wait); +void ap_wait(enum ap_sm_wait wait); void ap_request_timeout(struct timer_list *t); void ap_bus_force_rescan(void); diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index d6cc384f294b..688ebebbf98c 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -119,9 +119,9 @@ EXPORT_SYMBOL(ap_recv); /* State machine definitions and helpers */ -static enum ap_wait ap_sm_nop(struct ap_queue *aq) +static enum ap_sm_wait ap_sm_nop(struct ap_queue *aq) { - return AP_WAIT_NONE; + return AP_SM_WAIT_NONE; } /** @@ -129,7 +129,7 @@ static enum ap_wait ap_sm_nop(struct ap_queue *aq) * not change the state of the device. * @aq: pointer to the AP queue * - * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT + * Returns AP_SM_WAIT_NONE, AP_SM_WAIT_AGAIN, or AP_SM_WAIT_INTERRUPT */ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq) { @@ -172,31 +172,31 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq) * ap_sm_read(): Receive pending reply messages from an AP queue. * @aq: pointer to the AP queue * - * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT + * Returns AP_SM_WAIT_NONE, AP_SM_WAIT_AGAIN, or AP_SM_WAIT_INTERRUPT */ -static enum ap_wait ap_sm_read(struct ap_queue *aq) +static enum ap_sm_wait ap_sm_read(struct ap_queue *aq) { struct ap_queue_status status; if (!aq->reply) - return AP_WAIT_NONE; + return AP_SM_WAIT_NONE; status = ap_sm_recv(aq); switch (status.response_code) { case AP_RESPONSE_NORMAL: if (aq->queue_count > 0) { - aq->state = AP_STATE_WORKING; - return AP_WAIT_AGAIN; + aq->sm_state = AP_SM_STATE_WORKING; + return AP_SM_WAIT_AGAIN; } - aq->state = AP_STATE_IDLE; - return AP_WAIT_NONE; + aq->sm_state = AP_SM_STATE_IDLE; + return AP_SM_WAIT_NONE; case AP_RESPONSE_NO_PENDING_REPLY: if (aq->queue_count > 0) - return AP_WAIT_INTERRUPT; - aq->state = AP_STATE_IDLE; - return AP_WAIT_NONE; + return AP_SM_WAIT_INTERRUPT; + aq->sm_state = AP_SM_STATE_IDLE; + return AP_SM_WAIT_NONE; default: - aq->state = AP_STATE_BORKED; - return AP_WAIT_NONE; + aq->sm_state = AP_SM_STATE_BORKED; + return AP_SM_WAIT_NONE; } } @@ -204,15 +204,15 @@ static enum ap_wait ap_sm_read(struct ap_queue *aq) * ap_sm_write(): Send messages from the request queue to an AP queue. * @aq: pointer to the AP queue * - * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT + * Returns AP_SM_WAIT_NONE, AP_SM_WAIT_AGAIN, or AP_SM_WAIT_INTERRUPT */ -static enum ap_wait ap_sm_write(struct ap_queue *aq) +static enum ap_sm_wait ap_sm_write(struct ap_queue *aq) { struct ap_queue_status status; struct ap_message *ap_msg; if (aq->requestq_count <= 0) - return AP_WAIT_NONE; + return AP_SM_WAIT_NONE; /* Start the next request on the queue. */ ap_msg = list_entry(aq->requestq.next, struct ap_message, list); status = __ap_send(aq->qid, ap_msg->psmid, @@ -227,26 +227,26 @@ static enum ap_wait ap_sm_write(struct ap_queue *aq) aq->requestq_count--; aq->pendingq_count++; if (aq->queue_count < aq->card->queue_depth) { - aq->state = AP_STATE_WORKING; - return AP_WAIT_AGAIN; + aq->sm_state = AP_SM_STATE_WORKING; + return AP_SM_WAIT_AGAIN; } fallthrough; case AP_RESPONSE_Q_FULL: - aq->state = AP_STATE_QUEUE_FULL; - return AP_WAIT_INTERRUPT; + aq->sm_state = AP_SM_STATE_QUEUE_FULL; + return AP_SM_WAIT_INTERRUPT; case AP_RESPONSE_RESET_IN_PROGRESS: - aq->state = AP_STATE_RESET_WAIT; - return AP_WAIT_TIMEOUT; + aq->sm_state = AP_SM_STATE_RESET_WAIT; + return AP_SM_WAIT_TIMEOUT; case AP_RESPONSE_MESSAGE_TOO_BIG: case AP_RESPONSE_REQ_FAC_NOT_INST: list_del_init(&ap_msg->list); aq->requestq_count--; ap_msg->rc = -EINVAL; ap_msg->receive(aq, ap_msg, NULL); - return AP_WAIT_AGAIN; + return AP_SM_WAIT_AGAIN; default: - aq->state = AP_STATE_BORKED; - return AP_WAIT_NONE; + aq->sm_state = AP_SM_STATE_BORKED; + return AP_SM_WAIT_NONE; } } @@ -254,9 +254,9 @@ static enum ap_wait ap_sm_write(struct ap_queue *aq) * ap_sm_read_write(): Send and receive messages to/from an AP queue. * @aq: pointer to the AP queue * - * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT + * Returns AP_SM_WAIT_NONE, AP_SM_WAIT_AGAIN, or AP_SM_WAIT_INTERRUPT */ -static enum ap_wait ap_sm_read_write(struct ap_queue *aq) +static enum ap_sm_wait ap_sm_read_write(struct ap_queue *aq) { return min(ap_sm_read(aq), ap_sm_write(aq)); } @@ -267,7 +267,7 @@ static enum ap_wait ap_sm_read_write(struct ap_queue *aq) * * Submit the Reset command to an AP queue. */ -static enum ap_wait ap_sm_reset(struct ap_queue *aq) +static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq) { struct ap_queue_status status; @@ -275,17 +275,17 @@ static enum ap_wait ap_sm_reset(struct ap_queue *aq) switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: - aq->state = AP_STATE_RESET_WAIT; + aq->sm_state = AP_SM_STATE_RESET_WAIT; aq->interrupt = AP_INTR_DISABLED; - return AP_WAIT_TIMEOUT; + return AP_SM_WAIT_TIMEOUT; case AP_RESPONSE_BUSY: - return AP_WAIT_TIMEOUT; + return AP_SM_WAIT_TIMEOUT; case AP_RESPONSE_Q_NOT_AVAIL: case AP_RESPONSE_DECONFIGURED: case AP_RESPONSE_CHECKSTOPPED: default: - aq->state = AP_STATE_BORKED; - return AP_WAIT_NONE; + aq->sm_state = AP_SM_STATE_BORKED; + return AP_SM_WAIT_NONE; } } @@ -295,7 +295,7 @@ static enum ap_wait ap_sm_reset(struct ap_queue *aq) * * Returns AP_POLL_IMMEDIATELY, AP_POLL_AFTER_TIMEROUT or 0. */ -static enum ap_wait ap_sm_reset_wait(struct ap_queue *aq) +static enum ap_sm_wait ap_sm_reset_wait(struct ap_queue *aq) { struct ap_queue_status status; void *lsi_ptr; @@ -311,20 +311,20 @@ static enum ap_wait ap_sm_reset_wait(struct ap_queue *aq) case AP_RESPONSE_NORMAL: lsi_ptr = ap_airq_ptr(); if (lsi_ptr && ap_queue_enable_interruption(aq, lsi_ptr) == 0) - aq->state = AP_STATE_SETIRQ_WAIT; + aq->sm_state = AP_SM_STATE_SETIRQ_WAIT; else - aq->state = (aq->queue_count > 0) ? - AP_STATE_WORKING : AP_STATE_IDLE; - return AP_WAIT_AGAIN; + aq->sm_state = (aq->queue_count > 0) ? + AP_SM_STATE_WORKING : AP_SM_STATE_IDLE; + return AP_SM_WAIT_AGAIN; case AP_RESPONSE_BUSY: case AP_RESPONSE_RESET_IN_PROGRESS: - return AP_WAIT_TIMEOUT; + return AP_SM_WAIT_TIMEOUT; case AP_RESPONSE_Q_NOT_AVAIL: case AP_RESPONSE_DECONFIGURED: case AP_RESPONSE_CHECKSTOPPED: default: - aq->state = AP_STATE_BORKED; - return AP_WAIT_NONE; + aq->sm_state = AP_SM_STATE_BORKED; + return AP_SM_WAIT_NONE; } } @@ -334,7 +334,7 @@ static enum ap_wait ap_sm_reset_wait(struct ap_queue *aq) * * Returns AP_POLL_IMMEDIATELY, AP_POLL_AFTER_TIMEROUT or 0. */ -static enum ap_wait ap_sm_setirq_wait(struct ap_queue *aq) +static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq) { struct ap_queue_status status; @@ -348,75 +348,75 @@ static enum ap_wait ap_sm_setirq_wait(struct ap_queue *aq) if (status.irq_enabled == 1) { /* Irqs are now enabled */ aq->interrupt = AP_INTR_ENABLED; - aq->state = (aq->queue_count > 0) ? - AP_STATE_WORKING : AP_STATE_IDLE; + aq->sm_state = (aq->queue_count > 0) ? + AP_SM_STATE_WORKING : AP_SM_STATE_IDLE; } switch (status.response_code) { case AP_RESPONSE_NORMAL: if (aq->queue_count > 0) - return AP_WAIT_AGAIN; + return AP_SM_WAIT_AGAIN; fallthrough; case AP_RESPONSE_NO_PENDING_REPLY: - return AP_WAIT_TIMEOUT; + return AP_SM_WAIT_TIMEOUT; default: - aq->state = AP_STATE_BORKED; - return AP_WAIT_NONE; + aq->sm_state = AP_SM_STATE_BORKED; + return AP_SM_WAIT_NONE; } } /* * AP state machine jump table */ -static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = { - [AP_STATE_RESET_START] = { - [AP_EVENT_POLL] = ap_sm_reset, - [AP_EVENT_TIMEOUT] = ap_sm_nop, +static ap_func_t *ap_jumptable[NR_AP_SM_STATES][NR_AP_SM_EVENTS] = { + [AP_SM_STATE_RESET_START] = { + [AP_SM_EVENT_POLL] = ap_sm_reset, + [AP_SM_EVENT_TIMEOUT] = ap_sm_nop, }, - [AP_STATE_RESET_WAIT] = { - [AP_EVENT_POLL] = ap_sm_reset_wait, - [AP_EVENT_TIMEOUT] = ap_sm_nop, + [AP_SM_STATE_RESET_WAIT] = { + [AP_SM_EVENT_POLL] = ap_sm_reset_wait, + [AP_SM_EVENT_TIMEOUT] = ap_sm_nop, }, - [AP_STATE_SETIRQ_WAIT] = { - [AP_EVENT_POLL] = ap_sm_setirq_wait, - [AP_EVENT_TIMEOUT] = ap_sm_nop, + [AP_SM_STATE_SETIRQ_WAIT] = { + [AP_SM_EVENT_POLL] = ap_sm_setirq_wait, + [AP_SM_EVENT_TIMEOUT] = ap_sm_nop, }, - [AP_STATE_IDLE] = { - [AP_EVENT_POLL] = ap_sm_write, - [AP_EVENT_TIMEOUT] = ap_sm_nop, + [AP_SM_STATE_IDLE] = { + [AP_SM_EVENT_POLL] = ap_sm_write, + [AP_SM_EVENT_TIMEOUT] = ap_sm_nop, }, - [AP_STATE_WORKING] = { - [AP_EVENT_POLL] = ap_sm_read_write, - [AP_EVENT_TIMEOUT] = ap_sm_reset, + [AP_SM_STATE_WORKING] = { + [AP_SM_EVENT_POLL] = ap_sm_read_write, + [AP_SM_EVENT_TIMEOUT] = ap_sm_reset, }, - [AP_STATE_QUEUE_FULL] = { - [AP_EVENT_POLL] = ap_sm_read, - [AP_EVENT_TIMEOUT] = ap_sm_reset, + [AP_SM_STATE_QUEUE_FULL] = { + [AP_SM_EVENT_POLL] = ap_sm_read, + [AP_SM_EVENT_TIMEOUT] = ap_sm_reset, }, - [AP_STATE_REMOVE] = { - [AP_EVENT_POLL] = ap_sm_nop, - [AP_EVENT_TIMEOUT] = ap_sm_nop, + [AP_SM_STATE_REMOVE] = { + [AP_SM_EVENT_POLL] = ap_sm_nop, + [AP_SM_EVENT_TIMEOUT] = ap_sm_nop, }, - [AP_STATE_UNBOUND] = { - [AP_EVENT_POLL] = ap_sm_nop, - [AP_EVENT_TIMEOUT] = ap_sm_nop, + [AP_SM_STATE_UNBOUND] = { + [AP_SM_EVENT_POLL] = ap_sm_nop, + [AP_SM_EVENT_TIMEOUT] = ap_sm_nop, }, - [AP_STATE_BORKED] = { - [AP_EVENT_POLL] = ap_sm_nop, - [AP_EVENT_TIMEOUT] = ap_sm_nop, + [AP_SM_STATE_BORKED] = { + [AP_SM_EVENT_POLL] = ap_sm_nop, + [AP_SM_EVENT_TIMEOUT] = ap_sm_nop, }, }; -enum ap_wait ap_sm_event(struct ap_queue *aq, enum ap_event event) +enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event) { - return ap_jumptable[aq->state][event](aq); + return ap_jumptable[aq->sm_state][event](aq); } -enum ap_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_event event) +enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event) { - enum ap_wait wait; + enum ap_sm_wait wait; - while ((wait = ap_sm_event(aq, event)) == AP_WAIT_AGAIN) + while ((wait = ap_sm_event(aq, event)) == AP_SM_WAIT_AGAIN) ; return wait; } @@ -487,13 +487,13 @@ static ssize_t reset_show(struct device *dev, int rc = 0; spin_lock_bh(&aq->lock); - switch (aq->state) { - case AP_STATE_RESET_START: - case AP_STATE_RESET_WAIT: + switch (aq->sm_state) { + case AP_SM_STATE_RESET_START: + case AP_SM_STATE_RESET_WAIT: rc = scnprintf(buf, PAGE_SIZE, "Reset in progress.\n"); break; - case AP_STATE_WORKING: - case AP_STATE_QUEUE_FULL: + case AP_SM_STATE_WORKING: + case AP_SM_STATE_QUEUE_FULL: rc = scnprintf(buf, PAGE_SIZE, "Reset Timer armed.\n"); break; default: @@ -511,8 +511,8 @@ static ssize_t reset_store(struct device *dev, spin_lock_bh(&aq->lock); __ap_flush_queue(aq); - aq->state = AP_STATE_RESET_START; - ap_wait(ap_sm_event(aq, AP_EVENT_POLL)); + aq->sm_state = AP_SM_STATE_RESET_START; + ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); spin_unlock_bh(&aq->lock); AP_DBF(DBF_INFO, "reset queue=%02x.%04x triggered by user\n", @@ -530,7 +530,7 @@ static ssize_t interrupt_show(struct device *dev, int rc = 0; spin_lock_bh(&aq->lock); - if (aq->state == AP_STATE_SETIRQ_WAIT) + if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT) rc = scnprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n"); else if (aq->interrupt == AP_INTR_ENABLED) rc = scnprintf(buf, PAGE_SIZE, "Interrupts enabled.\n"); @@ -587,7 +587,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type) aq->ap_dev.device.type = &ap_queue_type; aq->ap_dev.device_type = device_type; aq->qid = qid; - aq->state = AP_STATE_UNBOUND; + aq->sm_state = AP_SM_STATE_UNBOUND; aq->interrupt = AP_INTR_DISABLED; spin_lock_init(&aq->lock); INIT_LIST_HEAD(&aq->pendingq); @@ -602,7 +602,7 @@ void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *reply) aq->reply = reply; spin_lock_bh(&aq->lock); - ap_wait(ap_sm_event(aq, AP_EVENT_POLL)); + ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); spin_unlock_bh(&aq->lock); } EXPORT_SYMBOL(ap_queue_init_reply); @@ -626,7 +626,7 @@ void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg) aq->total_request_count++; atomic64_inc(&aq->card->total_request_count); /* Send/receive as many request from the queue as possible. */ - ap_wait(ap_sm_event_loop(aq, AP_EVENT_POLL)); + ap_wait(ap_sm_event_loop(aq, AP_SM_EVENT_POLL)); spin_unlock_bh(&aq->lock); } EXPORT_SYMBOL(ap_queue_message); @@ -699,7 +699,7 @@ void ap_queue_prepare_remove(struct ap_queue *aq) /* flush queue */ __ap_flush_queue(aq); /* set REMOVE state to prevent new messages are queued in */ - aq->state = AP_STATE_REMOVE; + aq->sm_state = AP_SM_STATE_REMOVE; spin_unlock_bh(&aq->lock); del_timer_sync(&aq->timeout); } @@ -708,22 +708,22 @@ void ap_queue_remove(struct ap_queue *aq) { /* * all messages have been flushed and the state is - * AP_STATE_REMOVE. Now reset with zero which also + * AP_SM_STATE_REMOVE. Now reset with zero which also * clears the irq registration and move the state - * to AP_STATE_UNBOUND to signal that this queue + * to AP_SM_STATE_UNBOUND to signal that this queue * is not used by any driver currently. */ spin_lock_bh(&aq->lock); ap_zapq(aq->qid); - aq->state = AP_STATE_UNBOUND; + aq->sm_state = AP_SM_STATE_UNBOUND; spin_unlock_bh(&aq->lock); } void ap_queue_init_state(struct ap_queue *aq) { spin_lock_bh(&aq->lock); - aq->state = AP_STATE_RESET_START; - ap_wait(ap_sm_event(aq, AP_EVENT_POLL)); + aq->sm_state = AP_SM_STATE_RESET_START; + ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); spin_unlock_bh(&aq->lock); } EXPORT_SYMBOL(ap_queue_init_state); From a303e88743f6514995c31fe611011935ea7f040c Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 12 Jun 2020 10:13:23 +0200 Subject: [PATCH 19/48] s390/zcrypt: provide cex4 cca sysfs attributes for cex3 This patch introduces the sysfs attributes serialnr and mkvps for cex2c and cex3c cards. These sysfs attributes are available for cex4c and higher since commit 7c4e91c0959b ("s390/zcrypt: new sysfs attributes serialnr and mkvps")' and this patch now provides the same for the older cex2 and cex3 cards. Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- drivers/s390/crypto/zcrypt_cex2c.c | 114 +++++++++++++++++++++++++++++ drivers/s390/crypto/zcrypt_cex4.c | 26 +++++-- 2 files changed, 132 insertions(+), 8 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index 993addb726e0..f00127a78bab 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -25,6 +25,7 @@ #include "zcrypt_msgtype6.h" #include "zcrypt_cex2c.h" #include "zcrypt_cca_key.h" +#include "zcrypt_ccamisc.h" #define CEX2C_MIN_MOD_SIZE 16 /* 128 bits */ #define CEX2C_MAX_MOD_SIZE 256 /* 2048 bits */ @@ -58,6 +59,91 @@ static struct ap_device_id zcrypt_cex2c_queue_ids[] = { MODULE_DEVICE_TABLE(ap, zcrypt_cex2c_queue_ids); +/* + * CCA card additional device attributes + */ +static ssize_t cca_serialnr_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cca_info ci; + struct ap_card *ac = to_ap_card(dev); + struct zcrypt_card *zc = ac->private; + + memset(&ci, 0, sizeof(ci)); + + if (ap_domain_index >= 0) + cca_get_info(ac->id, ap_domain_index, &ci, zc->online); + + return scnprintf(buf, PAGE_SIZE, "%s\n", ci.serial); +} + +static struct device_attribute dev_attr_cca_serialnr = + __ATTR(serialnr, 0444, cca_serialnr_show, NULL); + +static struct attribute *cca_card_attrs[] = { + &dev_attr_cca_serialnr.attr, + NULL, +}; + +static const struct attribute_group cca_card_attr_grp = { + .attrs = cca_card_attrs, +}; + + /* + * CCA queue additional device attributes + */ +static ssize_t cca_mkvps_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int n = 0; + struct cca_info ci; + struct zcrypt_queue *zq = to_ap_queue(dev)->private; + static const char * const cao_state[] = { "invalid", "valid" }; + static const char * const new_state[] = { "empty", "partial", "full" }; + + memset(&ci, 0, sizeof(ci)); + + cca_get_info(AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), + &ci, zq->online); + + if (ci.new_mk_state >= '1' && ci.new_mk_state <= '3') + n = scnprintf(buf, PAGE_SIZE, "AES NEW: %s 0x%016llx\n", + new_state[ci.new_mk_state - '1'], ci.new_mkvp); + else + n = scnprintf(buf, PAGE_SIZE, "AES NEW: - -\n"); + + if (ci.cur_mk_state >= '1' && ci.cur_mk_state <= '2') + n += scnprintf(buf + n, PAGE_SIZE - n, + "AES CUR: %s 0x%016llx\n", + cao_state[ci.cur_mk_state - '1'], ci.cur_mkvp); + else + n += scnprintf(buf + n, PAGE_SIZE - n, "AES CUR: - -\n"); + + if (ci.old_mk_state >= '1' && ci.old_mk_state <= '2') + n += scnprintf(buf + n, PAGE_SIZE - n, + "AES OLD: %s 0x%016llx\n", + cao_state[ci.old_mk_state - '1'], ci.old_mkvp); + else + n += scnprintf(buf + n, PAGE_SIZE - n, "AES OLD: - -\n"); + + return n; +} + +static struct device_attribute dev_attr_cca_mkvps = + __ATTR(mkvps, 0444, cca_mkvps_show, NULL); + +static struct attribute *cca_queue_attrs[] = { + &dev_attr_cca_mkvps.attr, + NULL, +}; + +static const struct attribute_group cca_queue_attr_grp = { + .attrs = cca_queue_attrs, +}; + /** * Large random number detection function. Its sends a message to a CEX2C/CEX3C * card to find out if large random numbers are supported. @@ -178,6 +264,17 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev) if (rc) { ac->private = NULL; zcrypt_card_free(zc); + return rc; + } + + if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) { + rc = sysfs_create_group(&ap_dev->device.kobj, + &cca_card_attr_grp); + if (rc) { + zcrypt_card_unregister(zc); + ac->private = NULL; + zcrypt_card_free(zc); + } } return rc; @@ -189,8 +286,11 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev) */ static void zcrypt_cex2c_card_remove(struct ap_device *ap_dev) { + struct ap_card *ac = to_ap_card(&ap_dev->device); struct zcrypt_card *zc = to_ap_card(&ap_dev->device)->private; + if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) + sysfs_remove_group(&ap_dev->device.kobj, &cca_card_attr_grp); if (zc) zcrypt_card_unregister(zc); } @@ -239,7 +339,19 @@ static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev) if (rc) { aq->private = NULL; zcrypt_queue_free(zq); + return rc; } + + if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) { + rc = sysfs_create_group(&ap_dev->device.kobj, + &cca_queue_attr_grp); + if (rc) { + zcrypt_queue_unregister(zq); + aq->private = NULL; + zcrypt_queue_free(zq); + } + } + return rc; } @@ -252,6 +364,8 @@ static void zcrypt_cex2c_queue_remove(struct ap_device *ap_dev) struct ap_queue *aq = to_ap_queue(&ap_dev->device); struct zcrypt_queue *zq = aq->private; + if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) + sysfs_remove_group(&ap_dev->device.kobj, &cca_queue_attr_grp); if (zq) zcrypt_queue_unregister(zq); } diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 337ec71ddb58..dc20d983e468 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -529,22 +529,27 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) if (rc) { ac->private = NULL; zcrypt_card_free(zc); - goto out; + return rc; } if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) { rc = sysfs_create_group(&ap_dev->device.kobj, &cca_card_attr_grp); - if (rc) + if (rc) { zcrypt_card_unregister(zc); + ac->private = NULL; + zcrypt_card_free(zc); + } } else if (ap_test_bit(&ac->functions, AP_FUNC_EP11)) { rc = sysfs_create_group(&ap_dev->device.kobj, &ep11_card_attr_grp); - if (rc) + if (rc) { zcrypt_card_unregister(zc); + ac->private = NULL; + zcrypt_card_free(zc); + } } -out: return rc; } @@ -617,22 +622,27 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev) if (rc) { aq->private = NULL; zcrypt_queue_free(zq); - goto out; + return rc; } if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) { rc = sysfs_create_group(&ap_dev->device.kobj, &cca_queue_attr_grp); - if (rc) + if (rc) { zcrypt_queue_unregister(zq); + aq->private = NULL; + zcrypt_queue_free(zq); + } } else if (ap_test_bit(&aq->card->functions, AP_FUNC_EP11)) { rc = sysfs_create_group(&ap_dev->device.kobj, &ep11_queue_attr_grp); - if (rc) + if (rc) { zcrypt_queue_unregister(zq); + aq->private = NULL; + zcrypt_queue_free(zq); + } } -out: return rc; } From 776499058167d9f41c8eb468e21fe2d241c0b8e6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 1 Jul 2020 16:18:29 +0200 Subject: [PATCH 20/48] mm/memblock: expose only miminal interface to add/walk physmem "physmem" in the memblock allocator is somewhat weird: it's not actually used for allocation, it's simply information collected during boot, which describes the unmodified physical memory map at boot time, without any standby/hotplugged memory. It's only used on s390 and is currently the only reason s390 keeps using CONFIG_ARCH_KEEP_MEMBLOCK. Physmem isn't numa aware and current users don't specify any flags. Let's hide it from the user, exposing only for_each_physmem(), and simplify. The interface for physmem is now really minimalistic: - memblock_physmem_add() to add ranges - for_each_physmem() / __next_physmem_range() to walk physmem ranges Don't place it into an __init section and don't discard it without CONFIG_ARCH_KEEP_MEMBLOCK. As we're reusing __next_mem_range(), remove the __meminit notifier to avoid section mismatch warnings once CONFIG_ARCH_KEEP_MEMBLOCK is no longer used with CONFIG_HAVE_MEMBLOCK_PHYS_MAP. While fixing up the documentation, sneak in some related cleanups. We can stop setting CONFIG_ARCH_KEEP_MEMBLOCK for s390 next. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Mike Rapoport Cc: Andrew Morton Signed-off-by: David Hildenbrand Reviewed-by: Mike Rapoport Message-Id: <20200701141830.18749-2-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/kernel/crash_dump.c | 6 ++-- include/linux/memblock.h | 28 ++++++++++++++--- mm/memblock.c | 57 ++++++++++++++++++----------------- 3 files changed, 55 insertions(+), 36 deletions(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index f96a5857bbfd..c42ce348103c 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -549,8 +549,7 @@ static int get_mem_chunk_cnt(void) int cnt = 0; u64 idx; - for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE, - MEMBLOCK_NONE, NULL, NULL, NULL) + for_each_physmem_range(idx, &oldmem_type, NULL, NULL) cnt++; return cnt; } @@ -563,8 +562,7 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) phys_addr_t start, end; u64 idx; - for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE, - MEMBLOCK_NONE, &start, &end, NULL) { + for_each_physmem_range(idx, &oldmem_type, &start, &end) { phdr->p_filesz = end - start; phdr->p_type = PT_LOAD; phdr->p_offset = start; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 017fae833d4a..9d925db0d355 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -77,16 +77,12 @@ struct memblock_type { * @current_limit: physical address of the current allocation limit * @memory: usable memory regions * @reserved: reserved memory regions - * @physmem: all physical memory */ struct memblock { bool bottom_up; /* is bottom up direction? */ phys_addr_t current_limit; struct memblock_type memory; struct memblock_type reserved; -#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - struct memblock_type physmem; -#endif }; extern struct memblock memblock; @@ -145,6 +141,30 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, void __memblock_free_late(phys_addr_t base, phys_addr_t size); +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, + phys_addr_t *out_start, + phys_addr_t *out_end) +{ + extern struct memblock_type physmem; + + __next_mem_range(idx, NUMA_NO_NODE, MEMBLOCK_NONE, &physmem, type, + out_start, out_end, NULL); +} + +/** + * for_each_physmem_range - iterate through physmem areas not included in type. + * @i: u64 used as loop variable + * @type: ptr to memblock_type which excludes from the iteration, can be %NULL + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + */ +#define for_each_physmem_range(i, type, p_start, p_end) \ + for (i = 0, __next_physmem_range(&i, type, p_start, p_end); \ + i != (u64)ULLONG_MAX; \ + __next_physmem_range(&i, type, p_start, p_end)) +#endif /* CONFIG_HAVE_MEMBLOCK_PHYS_MAP */ + /** * for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. diff --git a/mm/memblock.c b/mm/memblock.c index 39aceafc57f6..45f198750be9 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -44,19 +44,20 @@ * in the system, for instance when the memory is restricted with * ``mem=`` command line parameter * * ``reserved`` - describes the regions that were allocated - * * ``physmap`` - describes the actual physical memory regardless of - * the possible restrictions; the ``physmap`` type is only available - * on some architectures. + * * ``physmem`` - describes the actual physical memory available during + * boot regardless of the possible restrictions and memory hot(un)plug; + * the ``physmem`` type is only available on some architectures. * * Each region is represented by :c:type:`struct memblock_region` that * defines the region extents, its attributes and NUMA node id on NUMA * systems. Every memory type is described by the :c:type:`struct * memblock_type` which contains an array of memory regions along with - * the allocator metadata. The memory types are nicely wrapped with - * :c:type:`struct memblock`. This structure is statically initialzed - * at build time. The region arrays for the "memory" and "reserved" - * types are initially sized to %INIT_MEMBLOCK_REGIONS and for the - * "physmap" type to %INIT_PHYSMEM_REGIONS. + * the allocator metadata. The "memory" and "reserved" types are nicely + * wrapped with :c:type:`struct memblock`. This structure is statically + * initialized at build time. The region arrays are initially sized to + * %INIT_MEMBLOCK_REGIONS for "memory" and %INIT_MEMBLOCK_RESERVED_REGIONS + * for "reserved". The region array for "physmem" is initially sized to + * %INIT_PHYSMEM_REGIONS. * The memblock_allow_resize() enables automatic resizing of the region * arrays during addition of new regions. This feature should be used * with care so that memory allocated for the region array will not @@ -87,8 +88,8 @@ * function frees all the memory to the buddy page allocator. * * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the - * memblock data structures will be discarded after the system - * initialization completes. + * memblock data structures (except "physmem") will be discarded after the + * system initialization completes. */ #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -104,7 +105,7 @@ unsigned long long max_possible_pfn; static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP -static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; +static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS]; #endif struct memblock memblock __initdata_memblock = { @@ -118,17 +119,19 @@ struct memblock memblock __initdata_memblock = { .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, .reserved.name = "reserved", -#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - .physmem.regions = memblock_physmem_init_regions, - .physmem.cnt = 1, /* empty dummy entry */ - .physmem.max = INIT_PHYSMEM_REGIONS, - .physmem.name = "physmem", -#endif - .bottom_up = false, .current_limit = MEMBLOCK_ALLOC_ANYWHERE, }; +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +struct memblock_type physmem = { + .regions = memblock_physmem_init_regions, + .cnt = 1, /* empty dummy entry */ + .max = INIT_PHYSMEM_REGIONS, + .name = "physmem", +}; +#endif + int memblock_debug __initdata_memblock; static bool system_has_some_mirror __initdata_memblock = false; static int memblock_can_resize __initdata_memblock; @@ -838,7 +841,7 @@ int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size) memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, &base, &end, (void *)_RET_IP_); - return memblock_add_range(&memblock.physmem, base, size, MAX_NUMNODES, 0); + return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0); } #endif @@ -1019,12 +1022,10 @@ static bool should_skip_region(struct memblock_region *m, int nid, int flags) * As both region arrays are sorted, the function advances the two indices * in lockstep and returns each intersection. */ -void __init_memblock __next_mem_range(u64 *idx, int nid, - enum memblock_flags flags, - struct memblock_type *type_a, - struct memblock_type *type_b, - phys_addr_t *out_start, - phys_addr_t *out_end, int *out_nid) +void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, + struct memblock_type *type_a, + struct memblock_type *type_b, phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid) { int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; @@ -1924,7 +1925,7 @@ void __init_memblock __memblock_dump_all(void) memblock_dump(&memblock.memory); memblock_dump(&memblock.reserved); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - memblock_dump(&memblock.physmem); + memblock_dump(&physmem); #endif } @@ -2064,8 +2065,8 @@ static int __init memblock_init_debugfs(void) debugfs_create_file("reserved", 0444, root, &memblock.reserved, &memblock_debug_fops); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - debugfs_create_file("physmem", 0444, root, - &memblock.physmem, &memblock_debug_fops); + debugfs_create_file("physmem", 0444, root, &physmem, + &memblock_debug_fops); #endif return 0; From fa49066fc326b78e7141d68387179f8968e0e1f0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 1 Jul 2020 16:18:30 +0200 Subject: [PATCH 21/48] s390/mm: don't set ARCH_KEEP_MEMBLOCK Commit 50be63450728 ("s390/mm: Convert bootmem to memblock") mentions "The original bootmem allocator is getting replaced by memblock. To cover the needs of the s390 kdump implementation the physical memory list is used." As we can now reference "physmem" managed in the memblock allocator after init even without ARCH_KEEP_MEMBLOCK, and s390x does no longer need other memblock metadata after boot (esp., the zcore memmap device that used it got removed), we can stop setting ARCH_KEEP_MEMBLOCK. With this change, we no longer create memblocks for standby/hotplugged memory (added via add_memory()) and free up memblock metadata (except physmem) after boot. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Philipp Rudo Cc: Mike Rapoport Cc: Andrew Morton Signed-off-by: David Hildenbrand Message-Id: <20200701141830.18749-3-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c7d7ede6300c..7697a1f8e819 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -102,7 +102,6 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE - select ARCH_KEEP_MEMBLOCK select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING From c8337c47deb9338417c61e7a6ba7de690eb1d300 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 1 Jul 2020 12:40:39 +0200 Subject: [PATCH 22/48] s390/ap: rework crypto config info and default domain code Rework of the QCI crypto info and how it is used. This is only a internal rework but does not affect the way how the ap bus acts with ap card and queue devices and domain handling. Tested on z15, z14, z12 (QCI support) and z196 (no QCI support). Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- drivers/s390/crypto/ap_bus.c | 305 ++++++++++++++++++----------------- 1 file changed, 155 insertions(+), 150 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 64fa66788194..f218a0b67ed5 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -73,8 +73,7 @@ EXPORT_SYMBOL(ap_perms); DEFINE_MUTEX(ap_perms_mutex); EXPORT_SYMBOL(ap_perms_mutex); -static struct ap_config_info *ap_configuration; -static bool initialised; +static struct ap_config_info *ap_qci_info; /* * AP bus related debug feature things. @@ -105,8 +104,10 @@ static struct hrtimer ap_poll_timer; */ static unsigned long long poll_timeout = 250000; -/* Maximum domain id */ -static int ap_max_domain_id; +/* Maximum domain id, if not given via qci */ +static int ap_max_domain_id = 15; +/* Maximum adapter id, if not given via qci */ +static int ap_max_adapter_id = 63; static struct bus_type ap_bus_type; @@ -154,12 +155,12 @@ static int ap_interrupts_available(void) } /** - * ap_configuration_available(): Test if AP configuration - * information is available. + * ap_qci_available(): Test if AP configuration + * information can be queried via QCI subfunction. * - * Returns 1 if AP configuration information is available. + * Returns 1 if subfunction PQAP(QCI) is available. */ -static int ap_configuration_available(void) +static int ap_qci_available(void) { return test_facility(12); } @@ -182,22 +183,22 @@ static int ap_apft_available(void) */ static inline int ap_qact_available(void) { - if (ap_configuration) - return ap_configuration->qact; + if (ap_qci_info) + return ap_qci_info->qact; return 0; } /* - * ap_query_configuration(): Fetch cryptographic config info + * ap_fetch_qci_info(): Fetch cryptographic config info * * Returns the ap configuration info fetched via PQAP(QCI). * On success 0 is returned, on failure a negative errno * is returned, e.g. if the PQAP(QCI) instruction is not * available, the return value will be -EOPNOTSUPP. */ -static inline int ap_query_configuration(struct ap_config_info *info) +static inline int ap_fetch_qci_info(struct ap_config_info *info) { - if (!ap_configuration_available()) + if (!ap_qci_available()) return -EOPNOTSUPP; if (!info) return -EINVAL; @@ -205,20 +206,39 @@ static inline int ap_query_configuration(struct ap_config_info *info) } /** - * ap_init_configuration(): Allocate and query configuration array. - */ -static void ap_init_configuration(void) -{ - if (!ap_configuration_available()) - return; + * ap_init_qci_info(): Allocate and query qci config info. + * Does also update the static variables ap_max_domain_id + * and ap_max_adapter_id if this info is available. - ap_configuration = kzalloc(sizeof(*ap_configuration), GFP_KERNEL); - if (!ap_configuration) + */ +static void __init ap_init_qci_info(void) +{ + if (!ap_qci_available()) { + AP_DBF(DBF_INFO, "%s QCI not supported\n", __func__); return; - if (ap_query_configuration(ap_configuration) != 0) { - kfree(ap_configuration); - ap_configuration = NULL; + } + + ap_qci_info = kzalloc(sizeof(*ap_qci_info), GFP_KERNEL); + if (!ap_qci_info) return; + if (ap_fetch_qci_info(ap_qci_info) != 0) { + kfree(ap_qci_info); + ap_qci_info = NULL; + return; + } + AP_DBF(DBF_INFO, "%s successful fetched initial qci info\n", __func__); + + if (ap_qci_info->apxa) { + if (ap_qci_info->Na) { + ap_max_adapter_id = ap_qci_info->Na; + AP_DBF(DBF_INFO, "%s new ap_max_adapter_id is %d\n", + __func__, ap_max_adapter_id); + } + if (ap_qci_info->Nd) { + ap_max_domain_id = ap_qci_info->Nd; + AP_DBF(DBF_INFO, "%s new ap_max_domain_id is %d\n", + __func__, ap_max_domain_id); + } } } @@ -233,7 +253,6 @@ static inline int ap_test_config(unsigned int *field, unsigned int nr) /* * ap_test_config_card_id(): Test, whether an AP card ID is configured. - * @id AP card ID * * Returns 0 if the card is not configured * 1 if the card is configured or @@ -241,16 +260,16 @@ static inline int ap_test_config(unsigned int *field, unsigned int nr) */ static inline int ap_test_config_card_id(unsigned int id) { - if (!ap_configuration) /* QCI not supported */ - /* only ids 0...3F may be probed */ - return id < 0x40 ? 1 : 0; - return ap_test_config(ap_configuration->apm, id); + if (id > ap_max_adapter_id) + return 0; + if (ap_qci_info) + return ap_test_config(ap_qci_info->apm, id); + return 1; } /* * ap_test_config_usage_domain(): Test, whether an AP usage domain * is configured. - * @domain AP usage domain ID * * Returns 0 if the usage domain is not configured * 1 if the usage domain is configured or @@ -258,9 +277,11 @@ static inline int ap_test_config_card_id(unsigned int id) */ int ap_test_config_usage_domain(unsigned int domain) { - if (!ap_configuration) /* QCI not supported */ - return domain < 16; - return ap_test_config(ap_configuration->aqm, domain); + if (domain > ap_max_domain_id) + return 0; + if (ap_qci_info) + return ap_test_config(ap_qci_info->aqm, domain); + return 1; } EXPORT_SYMBOL(ap_test_config_usage_domain); @@ -274,43 +295,44 @@ EXPORT_SYMBOL(ap_test_config_usage_domain); */ int ap_test_config_ctrl_domain(unsigned int domain) { - if (!ap_configuration) /* QCI not supported */ + if (!ap_qci_info || domain > ap_max_domain_id) return 0; - return ap_test_config(ap_configuration->adm, domain); + return ap_test_config(ap_qci_info->adm, domain); } EXPORT_SYMBOL(ap_test_config_ctrl_domain); -/** - * ap_query_queue(): Check if an AP queue is available. - * @qid: The AP queue number - * @queue_depth: Pointer to queue depth value - * @device_type: Pointer to device type value - * @facilities: Pointer to facility indicator +/* + * ap_queue_info(): Check and get AP queue info. + * Returns true if TAPQ succeeded and the info is filled or + * false otherwise. */ -static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type, - unsigned int *facilities) +static bool ap_queue_info(ap_qid_t qid, int *q_type, + unsigned int *q_fac, int *q_depth) { struct ap_queue_status status; - unsigned long info; - int nd; + unsigned long info = 0; - if (!ap_test_config_card_id(AP_QID_CARD(qid))) - return -ENODEV; + /* make sure we don't run into a specifiation exception */ + if (AP_QID_CARD(qid) > ap_max_adapter_id || + AP_QID_QUEUE(qid) > ap_max_domain_id) + return false; + /* call TAPQ on this APQN */ status = ap_test_queue(qid, ap_apft_available(), &info); switch (status.response_code) { case AP_RESPONSE_NORMAL: - *queue_depth = (int)(info & 0xff); - *device_type = (int)((info >> 24) & 0xff); - *facilities = (unsigned int)(info >> 32); - /* Update maximum domain id */ - nd = (info >> 16) & 0xff; - /* if N bit is available, z13 and newer */ - if ((info & (1UL << 57)) && nd > 0) - ap_max_domain_id = nd; - else /* older machine types */ - ap_max_domain_id = 15; - switch (*device_type) { + case AP_RESPONSE_RESET_IN_PROGRESS: + /* + * According to the architecture in all these cases the + * info should be filled. All bits 0 is not possible as + * there is at least one of the mode bits set. + */ + if (WARN_ON_ONCE(!info)) + return false; + *q_type = (int)((info >> 24) & 0xff); + *q_fac = (unsigned int)(info >> 32); + *q_depth = (int)(info & 0xff); + switch (*q_type) { /* For CEX2 and CEX3 the available functions * are not reflected by the facilities bits. * Instead it is coded into the type. So here @@ -318,27 +340,21 @@ static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type, */ case AP_DEVICE_TYPE_CEX2A: case AP_DEVICE_TYPE_CEX3A: - *facilities |= 0x08000000; + *q_fac |= 0x08000000; break; case AP_DEVICE_TYPE_CEX2C: case AP_DEVICE_TYPE_CEX3C: - *facilities |= 0x10000000; + *q_fac |= 0x10000000; break; default: break; } - return 0; - case AP_RESPONSE_Q_NOT_AVAIL: - case AP_RESPONSE_DECONFIGURED: - case AP_RESPONSE_CHECKSTOPPED: - case AP_RESPONSE_INVALID_ADDRESS: - return -ENODEV; - case AP_RESPONSE_RESET_IN_PROGRESS: - case AP_RESPONSE_OTHERWISE_CHANGED: - case AP_RESPONSE_BUSY: - return -EBUSY; + return true; default: - BUG(); + /* + * A response code which indicates, there is no info available. + */ + return false; } } @@ -751,9 +767,6 @@ int ap_driver_register(struct ap_driver *ap_drv, struct module *owner, { struct device_driver *drv = &ap_drv->driver; - if (!initialised) - return -ENODEV; - drv->bus = &ap_bus_type; drv->probe = ap_device_probe; drv->remove = ap_device_remove; @@ -929,11 +942,12 @@ static ssize_t ap_domain_store(struct bus_type *bus, domain < 0 || domain > ap_max_domain_id || !test_bit_inv(domain, ap_perms.aqm)) return -EINVAL; + spin_lock_bh(&ap_domain_lock); ap_domain_index = domain; spin_unlock_bh(&ap_domain_lock); - AP_DBF(DBF_DEBUG, "stored new default domain=%d\n", domain); + AP_DBF(DBF_INFO, "stored new default domain=%d\n", domain); return count; } @@ -942,45 +956,45 @@ static BUS_ATTR_RW(ap_domain); static ssize_t ap_control_domain_mask_show(struct bus_type *bus, char *buf) { - if (!ap_configuration) /* QCI not supported */ + if (!ap_qci_info) /* QCI not supported */ return scnprintf(buf, PAGE_SIZE, "not supported\n"); return scnprintf(buf, PAGE_SIZE, "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", - ap_configuration->adm[0], ap_configuration->adm[1], - ap_configuration->adm[2], ap_configuration->adm[3], - ap_configuration->adm[4], ap_configuration->adm[5], - ap_configuration->adm[6], ap_configuration->adm[7]); + ap_qci_info->adm[0], ap_qci_info->adm[1], + ap_qci_info->adm[2], ap_qci_info->adm[3], + ap_qci_info->adm[4], ap_qci_info->adm[5], + ap_qci_info->adm[6], ap_qci_info->adm[7]); } static BUS_ATTR_RO(ap_control_domain_mask); static ssize_t ap_usage_domain_mask_show(struct bus_type *bus, char *buf) { - if (!ap_configuration) /* QCI not supported */ + if (!ap_qci_info) /* QCI not supported */ return scnprintf(buf, PAGE_SIZE, "not supported\n"); return scnprintf(buf, PAGE_SIZE, "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", - ap_configuration->aqm[0], ap_configuration->aqm[1], - ap_configuration->aqm[2], ap_configuration->aqm[3], - ap_configuration->aqm[4], ap_configuration->aqm[5], - ap_configuration->aqm[6], ap_configuration->aqm[7]); + ap_qci_info->aqm[0], ap_qci_info->aqm[1], + ap_qci_info->aqm[2], ap_qci_info->aqm[3], + ap_qci_info->aqm[4], ap_qci_info->aqm[5], + ap_qci_info->aqm[6], ap_qci_info->aqm[7]); } static BUS_ATTR_RO(ap_usage_domain_mask); static ssize_t ap_adapter_mask_show(struct bus_type *bus, char *buf) { - if (!ap_configuration) /* QCI not supported */ + if (!ap_qci_info) /* QCI not supported */ return scnprintf(buf, PAGE_SIZE, "not supported\n"); return scnprintf(buf, PAGE_SIZE, "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", - ap_configuration->apm[0], ap_configuration->apm[1], - ap_configuration->apm[2], ap_configuration->apm[3], - ap_configuration->apm[4], ap_configuration->apm[5], - ap_configuration->apm[6], ap_configuration->apm[7]); + ap_qci_info->apm[0], ap_qci_info->apm[1], + ap_qci_info->apm[2], ap_qci_info->apm[3], + ap_qci_info->apm[4], ap_qci_info->apm[5], + ap_qci_info->apm[6], ap_qci_info->apm[7]); } static BUS_ATTR_RO(ap_adapter_mask); @@ -1066,17 +1080,18 @@ static BUS_ATTR_RW(poll_timeout); static ssize_t ap_max_domain_id_show(struct bus_type *bus, char *buf) { - int max_domain_id; - - if (ap_configuration) - max_domain_id = ap_max_domain_id ? : -1; - else - max_domain_id = 15; - return scnprintf(buf, PAGE_SIZE, "%d\n", max_domain_id); + return scnprintf(buf, PAGE_SIZE, "%d\n", ap_max_domain_id); } static BUS_ATTR_RO(ap_max_domain_id); +static ssize_t ap_max_adapter_id_show(struct bus_type *bus, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", ap_max_adapter_id); +} + +static BUS_ATTR_RO(ap_max_adapter_id); + static ssize_t apmask_show(struct bus_type *bus, char *buf) { int rc; @@ -1149,6 +1164,7 @@ static struct bus_attribute *const ap_bus_attrs[] = { &bus_attr_ap_interrupts, &bus_attr_poll_timeout, &bus_attr_ap_max_domain_id, + &bus_attr_ap_max_adapter_id, &bus_attr_apmask, &bus_attr_aqmask, NULL, @@ -1160,47 +1176,42 @@ static struct bus_attribute *const ap_bus_attrs[] = { */ static void ap_select_domain(void) { - int count, max_count, best_domain; struct ap_queue_status status; - int i, j; + int card, dom; /* - * We want to use a single domain. Either the one specified with - * the "domain=" parameter or the domain with the maximum number - * of devices. + * Choose the default domain. Either the one specified with + * the "domain=" parameter or the first domain with at least + * one valid APQN. */ spin_lock_bh(&ap_domain_lock); if (ap_domain_index >= 0) { /* Domain has already been selected. */ - spin_unlock_bh(&ap_domain_lock); - return; + goto out; } - best_domain = -1; - max_count = 0; - for (i = 0; i < AP_DOMAINS; i++) { - if (!ap_test_config_usage_domain(i) || - !test_bit_inv(i, ap_perms.aqm)) + for (dom = 0; dom <= ap_max_domain_id; dom++) { + if (!ap_test_config_usage_domain(dom) || + !test_bit_inv(dom, ap_perms.aqm)) continue; - count = 0; - for (j = 0; j < AP_DEVICES; j++) { - if (!ap_test_config_card_id(j)) + for (card = 0; card <= ap_max_adapter_id; card++) { + if (!ap_test_config_card_id(card) || + !test_bit_inv(card, ap_perms.apm)) continue; - status = ap_test_queue(AP_MKQID(j, i), + status = ap_test_queue(AP_MKQID(card, dom), ap_apft_available(), NULL); - if (status.response_code != AP_RESPONSE_NORMAL) - continue; - count++; - } - if (count > max_count) { - max_count = count; - best_domain = i; + if (status.response_code == AP_RESPONSE_NORMAL) + break; } + if (card <= ap_max_adapter_id) + break; } - if (best_domain >= 0) { - ap_domain_index = best_domain; - AP_DBF(DBF_DEBUG, "new ap_domain_index=%d\n", ap_domain_index); + if (dom <= ap_max_domain_id) { + ap_domain_index = dom; + AP_DBF(DBF_DEBUG, "%s new default domain is %d\n", + __func__, ap_domain_index); } +out: spin_unlock_bh(&ap_domain_lock); } @@ -1279,12 +1290,13 @@ static int __match_queue_device_with_queue_id(struct device *dev, const void *da */ static void _ap_scan_bus_adapter(int id) { + bool broken; ap_qid_t qid; unsigned int func; struct ap_card *ac; struct device *dev; struct ap_queue *aq; - int rc, dom, depth, type, comp_type, borked; + int rc, dom, depth, type, comp_type; /* check if there is a card device registered with this id */ dev = bus_find_device(&ap_bus_type, NULL, @@ -1312,23 +1324,23 @@ static void _ap_scan_bus_adapter(int id) /* find the first valid queue */ for (dom = 0; dom < AP_DOMAINS; dom++) { qid = AP_MKQID(id, dom); - if (ap_query_queue(qid, &depth, &type, &func) == 0) + if (ap_queue_info(qid, &type, &func, &depth)) break; } - borked = 0; + broken = false; if (dom >= AP_DOMAINS) { /* no accessible queue on this card */ - borked = 1; + broken = true; } else if (ac->raw_hwtype != type) { /* card type has changed */ AP_DBF(DBF_INFO, "card=%02x type changed.\n", id); - borked = 1; + broken = true; } else if (ac->functions != func) { /* card functions have changed */ AP_DBF(DBF_INFO, "card=%02x functions changed.\n", id); - borked = 1; + broken = true; } - if (borked) { + if (broken) { /* unregister card device and associated queues */ bus_for_each_dev(&ap_bus_type, NULL, (void *)(long) id, @@ -1364,16 +1376,14 @@ static void _ap_scan_bus_adapter(int id) continue; } /* try to fetch infos about this queue */ - rc = ap_query_queue(qid, &depth, &type, &func); + broken = !ap_queue_info(qid, &type, &func, &depth); if (dev) { - if (rc == -ENODEV) - borked = 1; - else { + if (!broken) { spin_lock_bh(&aq->lock); - borked = aq->sm_state == AP_SM_STATE_BORKED; + broken = aq->sm_state == AP_SM_STATE_BORKED; spin_unlock_bh(&aq->lock); } - if (borked) { + if (broken) { /* Remove broken device */ AP_DBF(DBF_DEBUG, "removing broken queue=%02x.%04x\n", @@ -1383,7 +1393,7 @@ static void _ap_scan_bus_adapter(int id) put_device(dev); continue; } - if (rc) + if (broken) continue; /* a new queue device is needed, check out comp type */ comp_type = ap_get_compatible_type(qid, type, func); @@ -1435,11 +1445,11 @@ static void ap_scan_bus(struct work_struct *unused) { int id; - AP_DBF(DBF_DEBUG, "%s running\n", __func__); - - ap_query_configuration(ap_configuration); + ap_fetch_qci_info(ap_qci_info); ap_select_domain(); + AP_DBF(DBF_DEBUG, "%s running\n", __func__); + /* loop over all possible adapters */ for (id = 0; id < AP_DEVICES; id++) _ap_scan_bus_adapter(id); @@ -1505,7 +1515,6 @@ static void __init ap_perms_init(void) */ static int __init ap_module_init(void) { - int max_domain_id; int rc, i; rc = ap_debug_init(); @@ -1524,14 +1533,10 @@ static int __init ap_module_init(void) ap_perms_init(); /* Get AP configuration data if available */ - ap_init_configuration(); + ap_init_qci_info(); - if (ap_configuration) - max_domain_id = - ap_max_domain_id ? ap_max_domain_id : AP_DOMAINS - 1; - else - max_domain_id = 15; - if (ap_domain_index < -1 || ap_domain_index > max_domain_id || + /* check default domain setting */ + if (ap_domain_index < -1 || ap_domain_index > ap_max_domain_id || (ap_domain_index >= 0 && !test_bit_inv(ap_domain_index, ap_perms.aqm))) { pr_warn("%d is not a valid cryptographic domain\n", @@ -1539,6 +1544,7 @@ static int __init ap_module_init(void) ap_domain_index = -1; } + /* enable interrupts if available */ if (ap_interrupts_available()) { rc = register_adapter_interrupt(&ap_airq); ap_airq_flag = (rc == 0); @@ -1581,7 +1587,6 @@ static int __init ap_module_init(void) } queue_work(system_long_wq, &ap_scan_work); - initialised = true; return 0; @@ -1595,7 +1600,7 @@ out_bus: out: if (ap_using_interrupts()) unregister_adapter_interrupt(&ap_airq); - kfree(ap_configuration); + kfree(ap_qci_info); return rc; } device_initcall(ap_module_init); From 7b7735c5be473473d7a4b9e31460ed8e129dcb36 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 7 Jul 2020 14:07:53 +0200 Subject: [PATCH 23/48] s390: fix comment regarding interrupts in svc With the removal of the critical section cleanup, we now enter the svc interrupt handler with interrupts disabled. Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Signed-off-by: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 969b35b177dd..23edf196d3dc 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -370,7 +370,7 @@ EXPORT_SYMBOL(sie_exit) /* * SVC interrupt handler routine. System calls are synchronous events and - * are executed with interrupts enabled. + * are entered with interrupts disabled. */ ENTRY(system_call) From 6589c93f99894e007a1260f009018effc958ab69 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 8 Jul 2020 11:21:25 +0200 Subject: [PATCH 24/48] s390: add trace events for idle enter/exit Helpful for debugging. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/idle.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 0d7fbdfe995a..88bb42ca5008 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include "entry.h" @@ -32,11 +33,12 @@ void enabled_wait(void) PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); + trace_cpu_idle_rcuidle(1, smp_processor_id()); local_irq_save(flags); /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); local_irq_restore(flags); - + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); /* Account time spent with enabled wait psw loaded as idle time. */ write_seqcount_begin(&idle->seqcount); From 7904aaa8b22fa07fd5457ee4a885cf9f665cb9c4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Jul 2020 07:43:26 +0200 Subject: [PATCH 25/48] s390/mm: fix typo in comment Signed-off-by: Heiko Carstens --- arch/s390/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d53c2e2ea1fd..598828517d9d 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -376,7 +376,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, * routines. * * interruption code (int_code): - * 04 Protection -> Write-Protection (suprression) + * 04 Protection -> Write-Protection (suppression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) From 529683d4705b6b1fa1c2f902e859ad6a8d17e31e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 15 Jun 2020 17:23:11 +0200 Subject: [PATCH 26/48] s390/qdio: fix statistics for 128 SBALs Old code would only scan up to 127 SBALs at once. So the last statistics bucket was set aside to count "discovered 127 SBALs with new work" events. But nowadays we allow to scan all 128 SBALs for Output Queues, and a subsequent patch will introduce the same for Input Queues. So fix up the accounting to use the last bucket only when all 128 SBALs have been discovered with new work. Signed-off-by: Julian Wiedmann Signed-off-by: Heiko Carstens --- drivers/s390/cio/qdio.h | 6 +----- drivers/s390/cio/qdio_debug.c | 2 +- drivers/s390/cio/qdio_main.c | 9 +-------- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index bb1c8402c67d..7f0aa95585a4 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -166,11 +166,7 @@ struct qdio_dev_perf_stat { } ____cacheline_aligned; struct qdio_queue_perf_stat { - /* - * Sorted into order-2 buckets: 1, 2-3, 4-7, ... 64-127, 128. - * Since max. 127 SBALs are scanned reuse entry for 128 as queue full - * aka 127 SBALs found. - */ + /* Sorted into order-2 buckets: 1, 2-3, 4-7, ... 64-127, 128. */ unsigned int nr_sbals[8]; unsigned int nr_sbal_error; unsigned int nr_sbal_nop; diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index da95c923d81a..863d17c802ca 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -165,7 +165,7 @@ static int qstat_show(struct seq_file *m, void *v) } seq_printf(m, "\n1 2.. 4.. 8.. " - "16.. 32.. 64.. 127\n"); + "16.. 32.. 64.. 128\n"); for (i = 0; i < ARRAY_SIZE(q->q_stats.nr_sbals); i++) seq_printf(m, "%-10u ", q->q_stats.nr_sbals[i]); seq_printf(m, "\nError NOP Total\n%-10u %-10u %-10u\n\n", diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 0c919a11a46e..d4c699773070 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -413,15 +413,8 @@ static inline void qdio_stop_polling(struct qdio_q *q) static inline void account_sbals(struct qdio_q *q, unsigned int count) { - int pos; - q->q_stats.nr_sbal_total += count; - if (count == QDIO_MAX_BUFFERS_MASK) { - q->q_stats.nr_sbals[7]++; - return; - } - pos = ilog2(count); - q->q_stats.nr_sbals[pos]++; + q->q_stats.nr_sbals[ilog2(count)]++; } static void process_buffer_error(struct qdio_q *q, unsigned int start, From 2bbf282a5e8e7e6b36586718b484a36117b6b8a0 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 17 Jun 2020 15:30:14 +0200 Subject: [PATCH 27/48] s390/qdio: allow to scan all 128 Input SBALs The comment is inaccurate, qdio_inbound_q_moved() and/or its callers no longer get confused by a count of 128 completed SBALs. Scanning all 128 SBALs at once can improve IRQ reduction (as we now place the ACK at the right spot), and reduce the amount of processing needed to handle all completed SBALs. Signed-off-by: Julian Wiedmann Signed-off-by: Heiko Carstens --- drivers/s390/cio/qdio_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index d4c699773070..0c1f186c6291 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -457,11 +457,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q, unsigned int start) q->timestamp = get_tod_clock_fast(); - /* - * Don't check 128 buffers, as otherwise qdio_inbound_q_moved - * would return 0. - */ - count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK); + count = atomic_read(&q->nr_buf_used); if (!count) return 0; From a709423f7a3a452e5fa7442425817c1bdccd7926 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 16 Jun 2020 14:13:00 +0200 Subject: [PATCH 28/48] s390/qdio: remove internal polling in non-thinint path For non-thinint devices in LPAR, qdio polls an idle Input Queue for a little while to catch more work. But platform support for thinints has been around practically _forever_ by now, so this micro-optimization is seeing 0 actual use. Remove it to reduce the overall complexity of the hot path. In the meantime we also grew support for driver-level polling (eg. NAPI in qeth), so it's quite questionable how useful this would actually be on current kernels. Signed-off-by: Julian Wiedmann Signed-off-by: Heiko Carstens --- drivers/s390/cio/qdio.h | 3 --- drivers/s390/cio/qdio_main.c | 26 ++------------------------ 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 7f0aa95585a4..cd2df4ff8e0e 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -15,7 +15,6 @@ #define QDIO_BUSY_BIT_PATIENCE (100 << 12) /* 100 microseconds */ #define QDIO_BUSY_BIT_RETRY_DELAY 10 /* 10 milliseconds */ #define QDIO_BUSY_BIT_RETRIES 1000 /* = 10s retry time */ -#define QDIO_INPUT_THRESHOLD (500 << 12) /* 500 microseconds */ enum qdio_irq_states { QDIO_IRQ_STATE_INACTIVE, @@ -181,8 +180,6 @@ struct qdio_input_q { /* Batch of SBALs that we processed while polling the queue: */ unsigned int batch_start; unsigned int batch_count; - /* last time of noticing incoming data */ - u64 timestamp; }; struct qdio_output_q { diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 0c1f186c6291..4fab8bba2cdd 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -510,14 +510,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q, unsigned int start) static int qdio_inbound_q_moved(struct qdio_q *q, unsigned int start) { - int count; - - count = get_inbound_buffer_frontier(q, start); - - if (count && !is_thinint_irq(q->irq_ptr) && MACHINE_IS_LPAR) - q->u.in.timestamp = get_tod_clock(); - - return count; + return get_inbound_buffer_frontier(q, start); } static inline int qdio_inbound_q_done(struct qdio_q *q, unsigned int start) @@ -535,22 +528,7 @@ static inline int qdio_inbound_q_done(struct qdio_q *q, unsigned int start) /* more work coming */ return 0; - if (is_thinint_irq(q->irq_ptr)) - return 1; - - /* don't poll under z/VM */ - if (MACHINE_IS_VM) - return 1; - - /* - * At this point we know, that inbound first_to_check - * has (probably) not moved (see qdio_inbound_processing). - */ - if (get_tod_clock_fast() > q->u.in.timestamp + QDIO_INPUT_THRESHOLD) { - DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in done:%02x", start); - return 1; - } else - return 0; + return 1; } static inline void qdio_handle_aobs(struct qdio_q *q, int start, int count) From 3c5f2eb9695cd241c9898a01388b19a149d0b7d2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Jul 2020 07:46:40 +0200 Subject: [PATCH 29/48] s390/mm: avoid trimming to MAX_ORDER Trimming to MAX_ORDER was originally done in order to avoid to set HOLES_IN_ZONE, which in turn would enable a quite expensive pfn_valid() check. pfn_valid() however only checks if a struct page exists for a given pfn. With sparsemen vmemmap there are always struct pages, since memmaps are allocated for whole sections. Therefore remove the HOLES_IN_ZONE comment and the trimming. Signed-off-by: Heiko Carstens --- arch/s390/kernel/setup.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 5853c9872dfe..295a02bab64d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1126,14 +1126,6 @@ void __init setup_arch(char **cmdline_p) free_mem_detect_info(); remove_oldmem(); - /* - * Make sure all chunks are MAX_ORDER aligned so we don't need the - * extra checks that HOLES_IN_ZONE would require. - * - * Is this still required? - */ - memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT)); - if (is_prot_virt_host()) setup_uv(); setup_memory_end(); From 771cf196cc92a6078656548bbc073aa932c053ab Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Jul 2020 08:22:21 +0200 Subject: [PATCH 30/48] s390/mm: allow order 10 allocations Get rid of FORCE_MAX_ZONEORDER which limited allocations to order 8 (= 1MB) and use the default, which allows for order 10 (= 4MB) allocations. Given that s390 allows less than the default this caused some memory allocation problems more or less unique to s390 from time to time. Note: this was originally introduced with commit 684de39bd795 ("[S390] Fix IPL from NSS.") in order to support Named Saved Segments, which could start/end at an arbitrary 1 megabyte boundary and also before support for sparsemem vmemmmap was enabled. Since NSS support is gone, but sparsemem vmemmap support is available this limitation can go away. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 7697a1f8e819..0df33cffec52 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -625,10 +625,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y -config FORCE_MAX_ZONEORDER - int - default "9" - config MAX_PHYSMEM_BITS int "Maximum size of supported physical memory in bits (42-53)" range 42 53 From 88aa8939c96781089e5ace3492d818074c5c6fe9 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 29 Jun 2020 20:48:09 +0200 Subject: [PATCH 31/48] s390/kernel: unify EX_TABLE* implementations Replace three implementations with one using using __stringify_in_c macro conveniently "borrowed" from powerpc and microblaze. Signed-off-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens --- arch/s390/include/asm/asm-const.h | 12 +++++++++++ arch/s390/include/asm/linkage.h | 34 ++++++++++--------------------- 2 files changed, 23 insertions(+), 23 deletions(-) create mode 100644 arch/s390/include/asm/asm-const.h diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h new file mode 100644 index 000000000000..11f615eb0066 --- /dev/null +++ b/arch/s390/include/asm/asm-const.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_ASM_CONST_H +#define _ASM_S390_ASM_CONST_H + +#ifdef __ASSEMBLY__ +# define stringify_in_c(...) __VA_ARGS__ +#else +/* This version of stringify will deal with commas... */ +# define __stringify_in_c(...) #__VA_ARGS__ +# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " " +#endif +#endif /* _ASM_S390_ASM_CONST_H */ diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index 7f22262b0e46..1b52c07b5642 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -2,38 +2,26 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H +#include #include #define __ALIGN .align 4, 0x07 #define __ALIGN_STR __stringify(__ALIGN) -#ifndef __ASSEMBLY__ - /* * Helper macro for exception table entries */ -#define EX_TABLE(_fault, _target) \ - ".section __ex_table,\"a\"\n" \ - ".align 4\n" \ - ".long (" #_fault ") - .\n" \ - ".long (" #_target ") - .\n" \ - ".previous\n" -#else /* __ASSEMBLY__ */ +#define __EX_TABLE(_section, _fault, _target) \ + stringify_in_c(.section _section,"a";) \ + stringify_in_c(.align 4;) \ + stringify_in_c(.long (_fault) - .;) \ + stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.previous) -#define EX_TABLE(_fault, _target) \ - .section __ex_table,"a" ; \ - .align 4 ; \ - .long (_fault) - . ; \ - .long (_target) - . ; \ - .previous +#define EX_TABLE(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target) +#define EX_TABLE_DMA(_fault, _target) \ + __EX_TABLE(.dma.ex_table, _fault, _target) -#define EX_TABLE_DMA(_fault, _target) \ - .section .dma.ex_table, "a" ; \ - .align 4 ; \ - .long (_fault) - . ; \ - .long (_target) - . ; \ - .previous - -#endif /* __ASSEMBLY__ */ #endif From 05a68e892e89c97df6650cd8cc55058002657cbc Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 30 Jun 2020 20:52:03 +0200 Subject: [PATCH 32/48] s390/kernel: expand exception table logic to allow new handling options This is a s390 port of commit 548acf19234d ("x86/mm: Expand the exception table logic to allow new handling options"), which is needed for implementing BPF_PROBE_MEM on s390. The new handler field is made 64-bit in order to allow pointing from dynamically allocated entries to handlers in kernel text. Unlike on x86, NULL is used instead of ex_handler_default. This is because exception tables are used by boot/text_dma.S, and it would be a pain to preserve ex_handler_default. The new infrastructure is ignored in early_pgm_check_handler, since there is no pt_regs. Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/include/asm/extable.h | 52 +++++++++++++++++++++++++++++---- arch/s390/include/asm/linkage.h | 3 +- arch/s390/kernel/kprobes.c | 4 +-- arch/s390/kernel/traps.c | 7 ++--- arch/s390/mm/fault.c | 4 +-- scripts/sorttable.c | 41 ++++++++++++++++++++++++++ 6 files changed, 94 insertions(+), 17 deletions(-) diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index ae27f756b409..3beb294fd553 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -1,12 +1,20 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __S390_EXTABLE_H #define __S390_EXTABLE_H + +#include +#include + /* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. + * The exception table consists of three addresses: + * + * - Address of an instruction that is allowed to fault. + * - Address at which the program should continue. + * - Optional address of handler that takes pt_regs * argument and runs in + * interrupt context. + * + * No registers are modified, so it is entirely up to the continuation code + * to figure out what to do. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -17,6 +25,7 @@ struct exception_table_entry { int insn, fixup; + long handler; }; extern struct exception_table_entry *__start_dma_ex_table; @@ -29,6 +38,39 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) return (unsigned long)&x->fixup + x->fixup; } +typedef bool (*ex_handler_t)(const struct exception_table_entry *, + struct pt_regs *); + +static inline ex_handler_t +ex_fixup_handler(const struct exception_table_entry *x) +{ + if (likely(!x->handler)) + return NULL; + return (ex_handler_t)((unsigned long)&x->handler + x->handler); +} + +static inline bool ex_handle(const struct exception_table_entry *x, + struct pt_regs *regs) +{ + ex_handler_t handler = ex_fixup_handler(x); + + if (unlikely(handler)) + return handler(x, regs); + regs->psw.addr = extable_fixup(x); + return true; +} + #define ARCH_HAS_RELATIVE_EXTABLE +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->handler = b->handler + delta; + b->handler = tmp.handler - delta; +} + #endif diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index 1b52c07b5642..a0a7a2c72bd4 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -14,9 +14,10 @@ #define __EX_TABLE(_section, _fault, _target) \ stringify_in_c(.section _section,"a";) \ - stringify_in_c(.align 4;) \ + stringify_in_c(.align 8;) \ stringify_in_c(.long (_fault) - .;) \ stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.quad 0;) \ stringify_in_c(.previous) #define EX_TABLE(_fault, _target) \ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 548d0ea9808d..d2a71d872638 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -523,10 +523,8 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ entry = s390_search_extables(regs->psw.addr); - if (entry) { - regs->psw.addr = extable_fixup(entry); + if (entry && ex_handle(entry, regs)) return 1; - } /* * fixup_exception() could not handle it, diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index ff9cc4c3290e..8d1e8a1a97df 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -50,11 +50,8 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) } else { const struct exception_table_entry *fixup; fixup = s390_search_extables(regs->psw.addr); - if (fixup) - regs->psw.addr = extable_fixup(fixup); - else { + if (!fixup || !ex_handle(fixup, regs)) die(regs, str); - } } } @@ -251,7 +248,7 @@ void monitor_event_exception(struct pt_regs *regs) case BUG_TRAP_TYPE_NONE: fixup = s390_search_extables(regs->psw.addr); if (fixup) - regs->psw.addr = extable_fixup(fixup); + ex_handle(fixup, regs); break; case BUG_TRAP_TYPE_WARN: break; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 598828517d9d..aebf9183bedd 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -255,10 +255,8 @@ static noinline void do_no_context(struct pt_regs *regs) /* Are we prepared to handle this kernel fault? */ fixup = s390_search_extables(regs->psw.addr); - if (fixup) { - regs->psw.addr = extable_fixup(fixup); + if (fixup && ex_handle(fixup, regs)) return; - } /* * Oops. The kernel tried to access some bad page. We'll have to diff --git a/scripts/sorttable.c b/scripts/sorttable.c index ec6b5e81eba1..0ef3abfc4a51 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -255,6 +255,45 @@ static void x86_sort_relative_table(char *extab_image, int image_size) } } +static void s390_sort_relative_table(char *extab_image, int image_size) +{ + int i; + + for (i = 0; i < image_size; i += 16) { + char *loc = extab_image + i; + uint64_t handler; + + w(r((uint32_t *)loc) + i, (uint32_t *)loc); + w(r((uint32_t *)(loc + 4)) + (i + 4), (uint32_t *)(loc + 4)); + /* + * 0 is a special self-relative handler value, which means that + * handler should be ignored. It is safe, because it means that + * handler field points to itself, which should never happen. + * When creating extable-relative values, keep it as 0, since + * this should never occur either: it would mean that handler + * field points to the first extable entry. + */ + handler = r8((uint64_t *)(loc + 8)); + if (handler) + handler += i + 8; + w8(handler, (uint64_t *)(loc + 8)); + } + + qsort(extab_image, image_size / 16, 16, compare_relative_table); + + for (i = 0; i < image_size; i += 16) { + char *loc = extab_image + i; + uint64_t handler; + + w(r((uint32_t *)loc) - i, (uint32_t *)loc); + w(r((uint32_t *)(loc + 4)) - (i + 4), (uint32_t *)(loc + 4)); + handler = r8((uint64_t *)(loc + 8)); + if (handler) + handler -= i + 8; + w8(handler, (uint64_t *)(loc + 8)); + } +} + static int do_file(char const *const fname, void *addr) { int rc = -1; @@ -297,6 +336,8 @@ static int do_file(char const *const fname, void *addr) custom_sort = x86_sort_relative_table; break; case EM_S390: + custom_sort = s390_sort_relative_table; + break; case EM_AARCH64: case EM_PARISC: case EM_PPC: From 3f161e0ae863a0456d00e5a6c9c81098c62ab7fe Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 24 Jun 2020 14:55:22 +0200 Subject: [PATCH 33/48] s390/bpf: implement BPF_PROBE_MEM This is a s390 port of x86 commit 3dec541b2e63 ("bpf: Add support for BTF pointers to x86 JIT"). Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/net/bpf_jit_comp.c | 139 ++++++++++++++++++++++++++++++++++- 1 file changed, 138 insertions(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index f4242b894cf2..8fe7bdfc8d15 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -49,6 +49,7 @@ struct bpf_jit { int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ int tail_call_start; /* Tail call start offset */ + int excnt; /* Number of exception table entries */ int labels[1]; /* Labels for local jumps */ }; @@ -588,6 +589,84 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) } } +static int get_probe_mem_regno(const u8 *insn) +{ + /* + * insn must point to llgc, llgh, llgf or lg, which have destination + * register at the same position. + */ + if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */ + return -1; + if (insn[5] != 0x90 && /* llgc */ + insn[5] != 0x91 && /* llgh */ + insn[5] != 0x16 && /* llgf */ + insn[5] != 0x04) /* lg */ + return -1; + return insn[1] >> 4; +} + +static bool ex_handler_bpf(const struct exception_table_entry *x, + struct pt_regs *regs) +{ + int regno; + u8 *insn; + + regs->psw.addr = extable_fixup(x); + insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16); + regno = get_probe_mem_regno(insn); + if (WARN_ON_ONCE(regno < 0)) + /* JIT bug - unexpected instruction. */ + return false; + regs->gprs[regno] = 0; + return true; +} + +static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, + int probe_prg, int nop_prg) +{ + struct exception_table_entry *ex; + s64 delta; + u8 *insn; + int prg; + int i; + + if (!fp->aux->extable) + /* Do nothing during early JIT passes. */ + return 0; + insn = jit->prg_buf + probe_prg; + if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0)) + /* JIT bug - unexpected probe instruction. */ + return -1; + if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg)) + /* JIT bug - gap between probe and nop instructions. */ + return -1; + for (i = 0; i < 2; i++) { + if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries)) + /* Verifier bug - not enough entries. */ + return -1; + ex = &fp->aux->extable[jit->excnt]; + /* Add extable entries for probe and nop instructions. */ + prg = i == 0 ? probe_prg : nop_prg; + delta = jit->prg_buf + prg - (u8 *)&ex->insn; + if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX)) + /* JIT bug - code and extable must be close. */ + return -1; + ex->insn = delta; + /* + * Always land on the nop. Note that extable infrastructure + * ignores fixup field, it is handled by ex_handler_bpf(). + */ + delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup; + if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX)) + /* JIT bug - landing pad and extable must be close. */ + return -1; + ex->fixup = delta; + ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; + jit->excnt++; + } + return 0; +} + /* * Compile one eBPF instruction into s390x code * @@ -604,7 +683,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, u32 *addrs = jit->addrs; s32 imm = insn->imm; s16 off = insn->off; + int probe_prg = -1; unsigned int mask; + int nop_prg; + int err; + + if (BPF_CLASS(insn->code) == BPF_LDX && + BPF_MODE(insn->code) == BPF_PROBE_MEM) + probe_prg = jit->prg; switch (insn->code) { /* @@ -1119,6 +1205,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, * BPF_LDX */ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_B: /* llgc %dst,0(off,%src) */ EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off); jit->seen |= SEEN_MEM; @@ -1126,6 +1213,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_H: /* llgh %dst,0(off,%src) */ EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off); jit->seen |= SEEN_MEM; @@ -1133,6 +1221,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_W: /* llgf %dst,off(%src) */ jit->seen |= SEEN_MEM; EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off); @@ -1140,6 +1229,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: /* lg %dst,0(off,%src) */ jit->seen |= SEEN_MEM; EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off); @@ -1485,6 +1575,23 @@ branch_oc: pr_err("Unknown opcode %02x\n", insn->code); return -1; } + + if (probe_prg != -1) { + /* + * Handlers of certain exceptions leave psw.addr pointing to + * the instruction directly after the failing one. Therefore, + * create two exception table entries and also add a nop in + * case two probing instructions come directly after each + * other. + */ + nop_prg = jit->prg; + /* bcr 0,%0 */ + _EMIT2(0x0700); + err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg); + if (err < 0) + return err; + } + return insn_count; } @@ -1527,6 +1634,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, jit->lit32 = jit->lit32_start; jit->lit64 = jit->lit64_start; jit->prg = 0; + jit->excnt = 0; bpf_jit_prologue(jit, stack_depth); if (bpf_set_addr(jit, 0) < 0) @@ -1551,6 +1659,12 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, jit->lit64_start = ALIGN(jit->lit64_start, 8); jit->size = jit->lit64_start + lit64_size; jit->size_prg = jit->prg; + + if (WARN_ON_ONCE(fp->aux->extable && + jit->excnt != fp->aux->num_exentries)) + /* Verifier bug - too many entries. */ + return -1; + return 0; } @@ -1565,6 +1679,29 @@ struct s390_jit_data { int pass; }; +static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit, + struct bpf_prog *fp) +{ + struct bpf_binary_header *header; + u32 extable_size; + u32 code_size; + + /* We need two entries per insn. */ + fp->aux->num_exentries *= 2; + + code_size = roundup(jit->size, + __alignof__(struct exception_table_entry)); + extable_size = fp->aux->num_exentries * + sizeof(struct exception_table_entry); + header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf, + 8, jit_fill_hole); + if (!header) + return NULL; + fp->aux->extable = (struct exception_table_entry *) + (jit->prg_buf + code_size); + return header; +} + /* * Compile eBPF program "fp" */ @@ -1631,7 +1768,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* * Final pass: Allocate and generate program */ - header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole); + header = bpf_jit_alloc(&jit, fp); if (!header) { fp = orig_fp; goto free_addrs; From 58e15716feb562cdba57e99d62c525a1faa37c08 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jul 2020 14:15:02 +0200 Subject: [PATCH 34/48] s390/time: use CLOCKSOURCE_MASK Make use of CLOCKSOURCE_MASK instead of open-coding it. Signed-off-by: Heiko Carstens --- arch/s390/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 700127ba689d..317059684847 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -253,7 +253,7 @@ static struct clocksource clocksource_tod = { .name = "tod", .rating = 400, .read = read_tod_clock, - .mask = -1ULL, + .mask = CLOCKSOURCE_MASK(64), .mult = 1000, .shift = 12, .flags = CLOCK_SOURCE_IS_CONTINUOUS, From 555701a714f77e01490f633c1080cf97f0ede1f0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jul 2020 14:16:03 +0200 Subject: [PATCH 35/48] s390/time: select CLOCKSOURCE_VALIDATE_LAST_CYCLE The value returned by read_tod_clock() will overflow on September 17th 2042. To avoid that system time jumps back select CLOCKSOURCE_VALIDATE_LAST_CYCLE which enables a sanity check in order to prevent negative "delta" values. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0df33cffec52..d95d323cf213 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -125,6 +125,7 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC + select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY From 411155820bb348e71ecc5b1db147b36af98cbc96 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jul 2020 14:28:36 +0200 Subject: [PATCH 36/48] s390/time: improve comparison for tod steering It doesn't make sense to add zero shifted by 15. It's still zero. Signed-off-by: Heiko Carstens --- arch/s390/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 317059684847..513e59d08a55 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -237,7 +237,7 @@ static u64 read_tod_clock(struct clocksource *cs) preempt_disable(); /* protect from changes to steering parameters */ now = get_tod_clock(); adj = tod_steering_end - now; - if (unlikely((s64) adj >= 0)) + if (unlikely((s64) adj > 0)) /* * manually steer by 1 cycle every 2^16 cycles. This * corresponds to shifting the tod delta by 15. 1s is From 4631f3ca493a7c8f9f31aef45fc0fc0e182155b7 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 7 Jul 2020 16:42:19 +0200 Subject: [PATCH 37/48] s390/pci: clarify comment in s390_mmio_read/write The existing comment was talking about reading in the write part and vice versa. While we are here make it more clear why restricting the syscalls to MIO capable devices is okay. Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_mmio.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 38efa3e852c4..401cf670a243 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -155,10 +155,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, return -EINVAL; /* - * Only support read access to MIO capable devices on a MIO enabled - * system. Otherwise we would have to check for every address if it is - * a special ZPCI_ADDR and we would have to do a get_pfn() which we - * don't need for MIO capable devices. + * We only support write access to MIO capable devices if we are on + * a MIO enabled system. Otherwise we would have to check for every + * address if it is a special ZPCI_ADDR and would have to do + * a get_pfn() which we don't need for MIO capable devices. Currently + * ISM devices are the only devices without MIO support and there is no + * known need for accessing these from userspace. */ if (static_branch_likely(&have_mio)) { ret = __memcpy_toio_inuser((void __iomem *) mmio_addr, @@ -282,10 +284,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, return -EINVAL; /* - * Only support write access to MIO capable devices on a MIO enabled - * system. Otherwise we would have to check for every address if it is - * a special ZPCI_ADDR and we would have to do a get_pfn() which we - * don't need for MIO capable devices. + * We only support read access to MIO capable devices if we are on + * a MIO enabled system. Otherwise we would have to check for every + * address if it is a special ZPCI_ADDR and would have to do + * a get_pfn() which we don't need for MIO capable devices. Currently + * ISM devices are the only devices without MIO support and there is no + * known need for accessing these from userspace. */ if (static_branch_likely(&have_mio)) { ret = __memcpy_fromio_inuser( From 73d6eb48d26930f0cbdc8bf1ccb0ad964e7d2b90 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 22 Jul 2020 23:58:54 +0200 Subject: [PATCH 38/48] s390: enable HAVE_FUNCTION_ERROR_INJECTION This kernel feature is required for enabling BPF_KPROBE_OVERRIDE. Define override_function_with_return() and regs_set_return_value() functions, and fix compile errors in syscall_wrapper.h. Signed-off-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + arch/s390/include/asm/ptrace.h | 5 +++++ arch/s390/include/asm/syscall_wrapper.h | 6 +++--- arch/s390/lib/Makefile | 2 ++ arch/s390/lib/error-inject.c | 14 ++++++++++++++ 5 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 arch/s390/lib/error-inject.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d95d323cf213..9cfd8de907cb 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -145,6 +145,7 @@ config S390 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FENTRY select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index f009a13afe71..16b3e4396312 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -184,5 +184,10 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) return regs->gprs[15]; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->gprs[2] = rc; +} + #endif /* __ASSEMBLY__ */ #endif /* _S390_PTRACE_H */ diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 3c3d6fe8e2f0..1320f4213d80 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -30,7 +30,7 @@ }) #define __S390_SYS_STUBx(x, name, ...) \ - asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ + asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));\ ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ { \ @@ -46,7 +46,7 @@ #define COMPAT_SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ asmlinkage long __s390_compat_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__s390_compat__sys_##sname, ERRNO); \ + ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \ asmlinkage long __s390_compat_sys_##sname(void) #define SYSCALL_DEFINE0(sname) \ @@ -72,7 +72,7 @@ asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ - ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ + ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 28fd66d558ff..678333936f78 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -14,3 +14,5 @@ KASAN_SANITIZE_uaccess.o := n obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o CFLAGS_test_unwind.o += -fno-optimize-sibling-calls + +lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/s390/lib/error-inject.c b/arch/s390/lib/error-inject.c new file mode 100644 index 000000000000..8c9d4da87eef --- /dev/null +++ b/arch/s390/lib/error-inject.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include +#include +#include + +void override_function_with_return(struct pt_regs *regs) +{ + /* + * Emulate 'br 14'. 'regs' is captured by kprobes on entry to some + * kernel function. + */ + regs->psw.addr = regs->gprs[14]; +} +NOKPROBE_SYMBOL(override_function_with_return); From 8398b226b8f01df902450658a139ee01d9f4c482 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:50 +0200 Subject: [PATCH 39/48] s390/vmem: rename vmem_add_mem() to vmem_add_range() Let's match the name to vmem_remove_range(). Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-2-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 3b9e71654c37..66c5333020ea 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -57,7 +57,7 @@ pte_t __ref *vmem_pte_alloc(void) /* * Add a physical memory range to the 1:1 mapping. */ -static int vmem_add_mem(unsigned long start, unsigned long size) +static int vmem_add_range(unsigned long start, unsigned long size) { unsigned long pgt_prot, sgt_prot, r3_prot; unsigned long pages4k, pages1m, pages2g; @@ -308,7 +308,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size) return -ERANGE; mutex_lock(&vmem_mutex); - ret = vmem_add_mem(start, size); + ret = vmem_add_range(start, size); if (ret) vmem_remove_range(start, size); mutex_unlock(&vmem_mutex); @@ -325,7 +325,7 @@ void __init vmem_map_init(void) struct memblock_region *reg; for_each_memblock(memory, reg) - vmem_add_mem(reg->base, reg->size); + vmem_add_range(reg->base, reg->size); __set_memory((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); From 3e0d3e408e63839625b210e5eb7269c45b870a38 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:51 +0200 Subject: [PATCH 40/48] s390/vmem: consolidate vmem_add_range() and vmem_remove_range() We want to have only a single pagetable walker and reuse the same functionality for vmemmap handling. Let's start by consolidating vmem_add_range() and vmem_remove_range(), converting it into a recursive implementation. A recursive implementation makes it easier to expand individual cases without harming readability. In addition, we minimize traversing the whole hierarchy over and over again. One change is that we don't unmap large PMDs/PUDs when not completely covered by the request, something that should never happen with direct mappings, unless one would be removing in other granularity than added, which would be broken already. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-3-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 337 +++++++++++++++++++++++++++----------------- 1 file changed, 208 insertions(+), 129 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 66c5333020ea..177daf389d39 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -54,88 +54,218 @@ pte_t __ref *vmem_pte_alloc(void) return pte; } +static void modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end, + bool add) +{ + unsigned long prot, pages = 0; + pte_t *pte; + + prot = pgprot_val(PAGE_KERNEL); + if (!MACHINE_HAS_NX) + prot &= ~_PAGE_NOEXEC; + + pte = pte_offset_kernel(pmd, addr); + for (; addr < end; addr += PAGE_SIZE, pte++) { + if (!add) { + if (pte_none(*pte)) + continue; + pte_clear(&init_mm, addr, pte); + } else if (pte_none(*pte)) { + pte_val(*pte) = addr | prot; + } else + continue; + + pages++; + } + + update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages); +} + +static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, + bool add) +{ + unsigned long next, prot, pages = 0; + int ret = -ENOMEM; + pmd_t *pmd; + pte_t *pte; + + prot = pgprot_val(SEGMENT_KERNEL); + if (!MACHINE_HAS_NX) + prot &= ~_SEGMENT_ENTRY_NOEXEC; + + pmd = pmd_offset(pud, addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + + if (!add) { + if (pmd_none(*pmd)) + continue; + if (pmd_large(*pmd) && !add) { + if (IS_ALIGNED(addr, PMD_SIZE) && + IS_ALIGNED(next, PMD_SIZE)) { + pmd_clear(pmd); + pages++; + } + continue; + } + } else if (pmd_none(*pmd)) { + if (IS_ALIGNED(addr, PMD_SIZE) && + IS_ALIGNED(next, PMD_SIZE) && + MACHINE_HAS_EDAT1 && addr && + !debug_pagealloc_enabled()) { + pmd_val(*pmd) = addr | prot; + pages++; + continue; + } + pte = vmem_pte_alloc(); + if (!pte) + goto out; + pmd_populate(&init_mm, pmd, pte); + } else if (pmd_large(*pmd)) + continue; + + modify_pte_table(pmd, addr, next, add); + } + ret = 0; +out: + update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages); + return ret; +} + +static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, + bool add) +{ + unsigned long next, prot, pages = 0; + int ret = -ENOMEM; + pud_t *pud; + pmd_t *pmd; + + prot = pgprot_val(REGION3_KERNEL); + if (!MACHINE_HAS_NX) + prot &= ~_REGION_ENTRY_NOEXEC; + + pud = pud_offset(p4d, addr); + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + + if (!add) { + if (pud_none(*pud)) + continue; + if (pud_large(*pud)) { + if (IS_ALIGNED(addr, PUD_SIZE) && + IS_ALIGNED(next, PUD_SIZE)) { + pud_clear(pud); + pages++; + } + continue; + } + } else if (pud_none(*pud)) { + if (IS_ALIGNED(addr, PUD_SIZE) && + IS_ALIGNED(next, PUD_SIZE) && + MACHINE_HAS_EDAT2 && addr && + !debug_pagealloc_enabled()) { + pud_val(*pud) = addr | prot; + pages++; + continue; + } + pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); + if (!pmd) + goto out; + pud_populate(&init_mm, pud, pmd); + } else if (pud_large(*pud)) + continue; + + ret = modify_pmd_table(pud, addr, next, add); + if (ret) + goto out; + } + ret = 0; +out: + update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages); + return ret; +} + +static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, + bool add) +{ + unsigned long next; + int ret = -ENOMEM; + p4d_t *p4d; + pud_t *pud; + + p4d = p4d_offset(pgd, addr); + for (; addr < end; addr = next, p4d++) { + next = p4d_addr_end(addr, end); + + if (!add) { + if (p4d_none(*p4d)) + continue; + } else if (p4d_none(*p4d)) { + pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); + if (!pud) + goto out; + } + + ret = modify_pud_table(p4d, addr, next, add); + if (ret) + goto out; + } + ret = 0; +out: + return ret; +} + +static int modify_pagetable(unsigned long start, unsigned long end, bool add) +{ + unsigned long addr, next; + int ret = -ENOMEM; + pgd_t *pgd; + p4d_t *p4d; + + if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) + return -EINVAL; + + for (addr = start; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + pgd = pgd_offset_k(addr); + + if (!add) { + if (pgd_none(*pgd)) + continue; + } else if (pgd_none(*pgd)) { + p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); + if (!p4d) + goto out; + pgd_populate(&init_mm, pgd, p4d); + } + + ret = modify_p4d_table(pgd, addr, next, add); + if (ret) + goto out; + } + ret = 0; +out: + if (!add) + flush_tlb_kernel_range(start, end); + return ret; +} + +static int add_pagetable(unsigned long start, unsigned long end) +{ + return modify_pagetable(start, end, true); +} + +static int remove_pagetable(unsigned long start, unsigned long end) +{ + return modify_pagetable(start, end, false); +} + /* * Add a physical memory range to the 1:1 mapping. */ static int vmem_add_range(unsigned long start, unsigned long size) { - unsigned long pgt_prot, sgt_prot, r3_prot; - unsigned long pages4k, pages1m, pages2g; - unsigned long end = start + size; - unsigned long address = start; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - int ret = -ENOMEM; - - pgt_prot = pgprot_val(PAGE_KERNEL); - sgt_prot = pgprot_val(SEGMENT_KERNEL); - r3_prot = pgprot_val(REGION3_KERNEL); - if (!MACHINE_HAS_NX) { - pgt_prot &= ~_PAGE_NOEXEC; - sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; - r3_prot &= ~_REGION_ENTRY_NOEXEC; - } - pages4k = pages1m = pages2g = 0; - while (address < end) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); - if (!p4_dir) - goto out; - pgd_populate(&init_mm, pg_dir, p4_dir); - } - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); - if (!pu_dir) - goto out; - p4d_populate(&init_mm, p4_dir, pu_dir); - } - pu_dir = pud_offset(p4_dir, address); - if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && - !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && - !debug_pagealloc_enabled()) { - pud_val(*pu_dir) = address | r3_prot; - address += PUD_SIZE; - pages2g++; - continue; - } - if (pud_none(*pu_dir)) { - pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); - if (!pm_dir) - goto out; - pud_populate(&init_mm, pu_dir, pm_dir); - } - pm_dir = pmd_offset(pu_dir, address); - if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && - !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && - !debug_pagealloc_enabled()) { - pmd_val(*pm_dir) = address | sgt_prot; - address += PMD_SIZE; - pages1m++; - continue; - } - if (pmd_none(*pm_dir)) { - pt_dir = vmem_pte_alloc(); - if (!pt_dir) - goto out; - pmd_populate(&init_mm, pm_dir, pt_dir); - } - - pt_dir = pte_offset_kernel(pm_dir, address); - pte_val(*pt_dir) = address | pgt_prot; - address += PAGE_SIZE; - pages4k++; - } - ret = 0; -out: - update_page_count(PG_DIRECT_MAP_4K, pages4k); - update_page_count(PG_DIRECT_MAP_1M, pages1m); - update_page_count(PG_DIRECT_MAP_2G, pages2g); - return ret; + return add_pagetable(start, start + size); } /* @@ -144,58 +274,7 @@ out: */ static void vmem_remove_range(unsigned long start, unsigned long size) { - unsigned long pages4k, pages1m, pages2g; - unsigned long end = start + size; - unsigned long address = start; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - - pages4k = pages1m = pages2g = 0; - while (address < end) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - address += PGDIR_SIZE; - continue; - } - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - address += P4D_SIZE; - continue; - } - pu_dir = pud_offset(p4_dir, address); - if (pud_none(*pu_dir)) { - address += PUD_SIZE; - continue; - } - if (pud_large(*pu_dir)) { - pud_clear(pu_dir); - address += PUD_SIZE; - pages2g++; - continue; - } - pm_dir = pmd_offset(pu_dir, address); - if (pmd_none(*pm_dir)) { - address += PMD_SIZE; - continue; - } - if (pmd_large(*pm_dir)) { - pmd_clear(pm_dir); - address += PMD_SIZE; - pages1m++; - continue; - } - pt_dir = pte_offset_kernel(pm_dir, address); - pte_clear(&init_mm, address, pt_dir); - address += PAGE_SIZE; - pages4k++; - } - flush_tlb_kernel_range(start, end); - update_page_count(PG_DIRECT_MAP_4K, -pages4k); - update_page_count(PG_DIRECT_MAP_1M, -pages1m); - update_page_count(PG_DIRECT_MAP_2G, -pages2g); + remove_pagetable(start, start + size); } /* From 9ec8fa8dc331be6b63726be696b2b21d0031a09b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:52 +0200 Subject: [PATCH 41/48] s390/vmemmap: extend modify_pagetable() to handle vmemmap Extend our shiny new modify_pagetable() to handle !direct (vmemmap) mappings. Convert vmemmap_populate() and implement vmemmap_free(). Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-4-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 181 +++++++++++++++++++------------------------- 1 file changed, 76 insertions(+), 105 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 177daf389d39..43fe1e2eb90e 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -29,6 +29,15 @@ static void __ref *vmem_alloc_pages(unsigned int order) return (void *) memblock_phys_alloc(size, size); } +static void vmem_free_pages(unsigned long addr, int order) +{ + /* We don't expect boot memory to be removed ever. */ + if (!slab_is_available() || + WARN_ON_ONCE(PageReserved(phys_to_page(addr)))) + return; + free_pages(addr, order); +} + void *vmem_crst_alloc(unsigned long val) { unsigned long *table; @@ -54,10 +63,12 @@ pte_t __ref *vmem_pte_alloc(void) return pte; } -static void modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end, - bool add) +/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ +static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, + unsigned long end, bool add, bool direct) { unsigned long prot, pages = 0; + int ret = -ENOMEM; pte_t *pte; prot = pgprot_val(PAGE_KERNEL); @@ -69,20 +80,34 @@ static void modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end, if (!add) { if (pte_none(*pte)) continue; + if (!direct) + vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0); pte_clear(&init_mm, addr, pte); } else if (pte_none(*pte)) { - pte_val(*pte) = addr | prot; + if (!direct) { + void *new_page = vmemmap_alloc_block(PAGE_SIZE, + NUMA_NO_NODE); + + if (!new_page) + goto out; + pte_val(*pte) = __pa(new_page) | prot; + } else + pte_val(*pte) = addr | prot; } else continue; pages++; } - - update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages); + ret = 0; +out: + if (direct) + update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages); + return ret; } -static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, - bool add) +/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ +static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, + unsigned long end, bool add, bool direct) { unsigned long next, prot, pages = 0; int ret = -ENOMEM; @@ -103,6 +128,9 @@ static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, if (pmd_large(*pmd) && !add) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { + if (!direct) + vmem_free_pages(pmd_deref(*pmd), + get_order(PMD_SIZE)); pmd_clear(pmd); pages++; } @@ -111,11 +139,27 @@ static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, } else if (pmd_none(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE) && - MACHINE_HAS_EDAT1 && addr && + MACHINE_HAS_EDAT1 && addr && direct && !debug_pagealloc_enabled()) { pmd_val(*pmd) = addr | prot; pages++; continue; + } else if (!direct && MACHINE_HAS_EDAT1) { + void *new_page; + + /* + * Use 1MB frames for vmemmap if available. We + * always use large frames even if they are only + * partially used. Otherwise we would have also + * page tables since vmemmap_populate gets + * called for each section separately. + */ + new_page = vmemmap_alloc_block(PMD_SIZE, + NUMA_NO_NODE); + if (!new_page) + goto out; + pmd_val(*pmd) = __pa(new_page) | prot; + continue; } pte = vmem_pte_alloc(); if (!pte) @@ -124,16 +168,19 @@ static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, } else if (pmd_large(*pmd)) continue; - modify_pte_table(pmd, addr, next, add); + ret = modify_pte_table(pmd, addr, next, add, direct); + if (ret) + goto out; } ret = 0; out: - update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages); + if (direct) + update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages); return ret; } static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, - bool add) + bool add, bool direct) { unsigned long next, prot, pages = 0; int ret = -ENOMEM; @@ -162,7 +209,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE) && - MACHINE_HAS_EDAT2 && addr && + MACHINE_HAS_EDAT2 && addr && direct && !debug_pagealloc_enabled()) { pud_val(*pud) = addr | prot; pages++; @@ -175,18 +222,19 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, } else if (pud_large(*pud)) continue; - ret = modify_pmd_table(pud, addr, next, add); + ret = modify_pmd_table(pud, addr, next, add, direct); if (ret) goto out; } ret = 0; out: - update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages); + if (direct) + update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages); return ret; } static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, - bool add) + bool add, bool direct) { unsigned long next; int ret = -ENOMEM; @@ -206,7 +254,7 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, goto out; } - ret = modify_pud_table(p4d, addr, next, add); + ret = modify_pud_table(p4d, addr, next, add, direct); if (ret) goto out; } @@ -215,7 +263,8 @@ out: return ret; } -static int modify_pagetable(unsigned long start, unsigned long end, bool add) +static int modify_pagetable(unsigned long start, unsigned long end, bool add, + bool direct) { unsigned long addr, next; int ret = -ENOMEM; @@ -239,7 +288,7 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add) pgd_populate(&init_mm, pgd, p4d); } - ret = modify_p4d_table(pgd, addr, next, add); + ret = modify_p4d_table(pgd, addr, next, add, direct); if (ret) goto out; } @@ -250,14 +299,14 @@ out: return ret; } -static int add_pagetable(unsigned long start, unsigned long end) +static int add_pagetable(unsigned long start, unsigned long end, bool direct) { - return modify_pagetable(start, end, true); + return modify_pagetable(start, end, true, direct); } -static int remove_pagetable(unsigned long start, unsigned long end) +static int remove_pagetable(unsigned long start, unsigned long end, bool direct) { - return modify_pagetable(start, end, false); + return modify_pagetable(start, end, false, direct); } /* @@ -265,7 +314,7 @@ static int remove_pagetable(unsigned long start, unsigned long end) */ static int vmem_add_range(unsigned long start, unsigned long size) { - return add_pagetable(start, start + size); + return add_pagetable(start, start + size, true); } /* @@ -274,7 +323,7 @@ static int vmem_add_range(unsigned long start, unsigned long size) */ static void vmem_remove_range(unsigned long start, unsigned long size) { - remove_pagetable(start, start + size); + remove_pagetable(start, start + size, true); } /* @@ -283,92 +332,14 @@ static void vmem_remove_range(unsigned long start, unsigned long size) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - unsigned long pgt_prot, sgt_prot; - unsigned long address = start; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - int ret = -ENOMEM; - - pgt_prot = pgprot_val(PAGE_KERNEL); - sgt_prot = pgprot_val(SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) { - pgt_prot &= ~_PAGE_NOEXEC; - sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; - } - for (address = start; address < end;) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); - if (!p4_dir) - goto out; - pgd_populate(&init_mm, pg_dir, p4_dir); - } - - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); - if (!pu_dir) - goto out; - p4d_populate(&init_mm, p4_dir, pu_dir); - } - - pu_dir = pud_offset(p4_dir, address); - if (pud_none(*pu_dir)) { - pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); - if (!pm_dir) - goto out; - pud_populate(&init_mm, pu_dir, pm_dir); - } - - pm_dir = pmd_offset(pu_dir, address); - if (pmd_none(*pm_dir)) { - /* Use 1MB frames for vmemmap if available. We always - * use large frames even if they are only partially - * used. - * Otherwise we would have also page tables since - * vmemmap_populate gets called for each section - * separately. */ - if (MACHINE_HAS_EDAT1) { - void *new_page; - - new_page = vmemmap_alloc_block(PMD_SIZE, node); - if (!new_page) - goto out; - pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - pt_dir = vmem_pte_alloc(); - if (!pt_dir) - goto out; - pmd_populate(&init_mm, pm_dir, pt_dir); - } else if (pmd_large(*pm_dir)) { - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - - pt_dir = pte_offset_kernel(pm_dir, address); - if (pte_none(*pt_dir)) { - void *new_page; - - new_page = vmemmap_alloc_block(PAGE_SIZE, node); - if (!new_page) - goto out; - pte_val(*pt_dir) = __pa(new_page) | pgt_prot; - } - address += PAGE_SIZE; - } - ret = 0; -out: - return ret; + /* We don't care about the node, just use NUMA_NO_NODE on allocations */ + return add_pagetable(start, end, false); } void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) { + remove_pagetable(start, end, false); } void vmem_remove_mapping(unsigned long start, unsigned long size) From c00f05a92424c7788fdbf0909b823f8027596d66 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:53 +0200 Subject: [PATCH 42/48] s390/vmemmap: cleanup when vmemmap_populate() fails Cleanup what we partially added in case vmemmap_populate() fails. For vmem, this is already handled by vmem_add_mapping(). Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-5-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 43fe1e2eb90e..be32a38bb91f 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -332,8 +332,13 @@ static void vmem_remove_range(unsigned long start, unsigned long size) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { + int ret; + /* We don't care about the node, just use NUMA_NO_NODE on allocations */ - return add_pagetable(start, end, false); + ret = add_pagetable(start, end, false); + if (ret) + remove_pagetable(start, end, false); + return ret; } void vmemmap_free(unsigned long start, unsigned long end, From aa18e0e65800bf3250b23914a28e0e3fd9cadec2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:54 +0200 Subject: [PATCH 43/48] s390/vmemmap: take the vmem_mutex when populating/freeing Let's synchronize all accesses to the 1:1 and vmemmap mappings. This will be especially relevant when wanting to cleanup empty page tables that could be shared by both. Avoid races when removing tables that might be just about to get reused. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-6-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index be32a38bb91f..a2b79681df69 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -334,17 +334,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, { int ret; + mutex_lock(&vmem_mutex); /* We don't care about the node, just use NUMA_NO_NODE on allocations */ ret = add_pagetable(start, end, false); if (ret) remove_pagetable(start, end, false); + mutex_unlock(&vmem_mutex); return ret; } void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) { + mutex_lock(&vmem_mutex); remove_pagetable(start, end, false); + mutex_unlock(&vmem_mutex); } void vmem_remove_mapping(unsigned long start, unsigned long size) From b9ff81003cf1a0b12b8d60b6ef33a34e84dfe7ac Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:55 +0200 Subject: [PATCH 44/48] s390/vmem: cleanup empty page tables Let's cleanup empty page tables. Consider only page tables that fully fall into the idendity mapping and the vmemmap range. As there are no valid accesses to vmem/vmemmap within non-populated ranges, the single tlb flush at the end should be sufficient. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-7-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 102 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index a2b79681df69..b831f9f9130a 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -63,6 +63,15 @@ pte_t __ref *vmem_pte_alloc(void) return pte; } +static void vmem_pte_free(unsigned long *table) +{ + /* We don't expect boot memory to be removed ever. */ + if (!slab_is_available() || + WARN_ON_ONCE(PageReserved(virt_to_page(table)))) + return; + page_table_free(&init_mm, table); +} + /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end, bool add, bool direct) @@ -105,6 +114,21 @@ out: return ret; } +static void try_free_pte_table(pmd_t *pmd, unsigned long start) +{ + pte_t *pte; + int i; + + /* We can safely assume this is fully in 1:1 mapping & vmemmap area */ + pte = pte_offset_kernel(pmd, start); + for (i = 0; i < PTRS_PER_PTE; i++, pte++) + if (!pte_none(*pte)) + return; + + vmem_pte_free(__va(pmd_deref(*pmd))); + pmd_clear(pmd); +} + /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, bool add, bool direct) @@ -171,6 +195,8 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, ret = modify_pte_table(pmd, addr, next, add, direct); if (ret) goto out; + if (!add) + try_free_pte_table(pmd, addr & PMD_MASK); } ret = 0; out: @@ -179,6 +205,29 @@ out: return ret; } +static void try_free_pmd_table(pud_t *pud, unsigned long start) +{ + const unsigned long end = start + PUD_SIZE; + pmd_t *pmd; + int i; + + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (end > VMALLOC_START) + return; +#ifdef CONFIG_KASAN + if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) + return; +#endif + + pmd = pmd_offset(pud, start); + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) + if (!pmd_none(*pmd)) + return; + + vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER); + pud_clear(pud); +} + static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, bool add, bool direct) { @@ -225,6 +274,8 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, ret = modify_pmd_table(pud, addr, next, add, direct); if (ret) goto out; + if (!add) + try_free_pmd_table(pud, addr & PUD_MASK); } ret = 0; out: @@ -233,6 +284,29 @@ out: return ret; } +static void try_free_pud_table(p4d_t *p4d, unsigned long start) +{ + const unsigned long end = start + P4D_SIZE; + pud_t *pud; + int i; + + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (end > VMALLOC_START) + return; +#ifdef CONFIG_KASAN + if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) + return; +#endif + + pud = pud_offset(p4d, start); + for (i = 0; i < PTRS_PER_PUD; i++, pud++) + if (!pud_none(*pud)) + return; + + vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER); + p4d_clear(p4d); +} + static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, bool add, bool direct) { @@ -257,12 +331,37 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, ret = modify_pud_table(p4d, addr, next, add, direct); if (ret) goto out; + if (!add) + try_free_pud_table(p4d, addr & P4D_MASK); } ret = 0; out: return ret; } +static void try_free_p4d_table(pgd_t *pgd, unsigned long start) +{ + const unsigned long end = start + PGDIR_SIZE; + p4d_t *p4d; + int i; + + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (end > VMALLOC_START) + return; +#ifdef CONFIG_KASAN + if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) + return; +#endif + + p4d = p4d_offset(pgd, start); + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) + if (!p4d_none(*p4d)) + return; + + vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER); + pgd_clear(pgd); +} + static int modify_pagetable(unsigned long start, unsigned long end, bool add, bool direct) { @@ -291,6 +390,8 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, ret = modify_p4d_table(pgd, addr, next, add, direct); if (ret) goto out; + if (!add) + try_free_p4d_table(pgd, addr & PGDIR_MASK); } ret = 0; out: @@ -319,7 +420,6 @@ static int vmem_add_range(unsigned long start, unsigned long size) /* * Remove a physical memory range from the 1:1 mapping. - * Currently only invalidates page table entries. */ static void vmem_remove_range(unsigned long start, unsigned long size) { From f2057b4266a6be469ea0630971cf3cd933e42cce Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:56 +0200 Subject: [PATCH 45/48] s390/vmemmap: fallback to PTEs if mapping large PMD fails Let's fallback to single pages if short on huge pages. No need to stop memory hotplug. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-8-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b831f9f9130a..e82a63de19db 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -180,10 +180,10 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, */ new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE); - if (!new_page) - goto out; - pmd_val(*pmd) = __pa(new_page) | prot; - continue; + if (new_page) { + pmd_val(*pmd) = __pa(new_page) | prot; + continue; + } } pte = vmem_pte_alloc(); if (!pte) From cd5781d63eaf6dbf89532d8c7c214786b767ee16 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:57 +0200 Subject: [PATCH 46/48] s390/vmemmap: remember unused sub-pmd ranges With a memmap size of 56 bytes or 72 bytes per page, the memmap for a 256 MB section won't span full PMDs. As we populate single sections and depopulate single sections, the depopulation step would not be able to free all vmemmap pmds anymore. Do it similarly to x86, marking the unused memmap ranges in a special way (pad it with 0xFD). This allows us to add/remove sections, cleaning up all allocated vmemmap pages even if the memmap size is not multiple of 16 bytes per page. A 56 byte memmap can, for example, be created with !CONFIG_MEMCG and !CONFIG_SLUB. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-9-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 51 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index e82a63de19db..df361bbacda1 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -72,6 +72,42 @@ static void vmem_pte_free(unsigned long *table) page_table_free(&init_mm, table); } +#define PAGE_UNUSED 0xFD + +static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +{ + /* + * As we expect to add in the same granularity as we remove, it's + * sufficient to mark only some piece used to block the memmap page from + * getting removed (just in case the memmap never gets initialized, + * e.g., because the memory block never gets onlined). + */ + memset(__va(start), 0, sizeof(struct page)); +} + +static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) +{ + void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + + /* Could be our memmap page is filled with PAGE_UNUSED already ... */ + vmemmap_use_sub_pmd(start, end); + + /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ + if (!IS_ALIGNED(start, PMD_SIZE)) + memset(page, PAGE_UNUSED, start - __pa(page)); + if (!IS_ALIGNED(end, PMD_SIZE)) + memset(__va(end), PAGE_UNUSED, __pa(page) + PMD_SIZE - end); +} + +/* Returns true if the PMD is completely unused and can be freed. */ +static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) +{ + void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + + memset(__va(start), PAGE_UNUSED, end - start); + return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); +} + /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end, bool add, bool direct) @@ -157,6 +193,11 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, get_order(PMD_SIZE)); pmd_clear(pmd); pages++; + } else if (!direct && + vmemmap_unuse_sub_pmd(addr, next)) { + vmem_free_pages(pmd_deref(*pmd), + get_order(PMD_SIZE)); + pmd_clear(pmd); } continue; } @@ -182,6 +223,11 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, NUMA_NO_NODE); if (new_page) { pmd_val(*pmd) = __pa(new_page) | prot; + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) { + vmemmap_use_new_sub_pmd(addr, + next); + } continue; } } @@ -189,8 +235,11 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, if (!pte) goto out; pmd_populate(&init_mm, pmd, pte); - } else if (pmd_large(*pmd)) + } else if (pmd_large(*pmd)) { + if (!direct) + vmemmap_use_sub_pmd(addr, next); continue; + } ret = modify_pte_table(pmd, addr, next, add, direct); if (ret) From 2c114df071935762ffa88144cdab03d84beaa702 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:58 +0200 Subject: [PATCH 47/48] s390/vmemmap: avoid memset(PAGE_UNUSED) when adding consecutive sections Let's avoid memset(PAGE_UNUSED) when adding consecutive sections, whereby the vmemmap of a single section does not span full PMDs. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-10-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index df361bbacda1..70ebfc7958a6 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -74,7 +74,22 @@ static void vmem_pte_free(unsigned long *table) #define PAGE_UNUSED 0xFD -static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +/* + * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges + * from unused_pmd_start to next PMD_SIZE boundary. + */ +static unsigned long unused_pmd_start; + +static void vmemmap_flush_unused_pmd(void) +{ + if (!unused_pmd_start) + return; + memset(__va(unused_pmd_start), PAGE_UNUSED, + ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start); + unused_pmd_start = 0; +} + +static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end) { /* * As we expect to add in the same granularity as we remove, it's @@ -85,18 +100,41 @@ static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) memset(__va(start), 0, sizeof(struct page)); } +static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +{ + /* + * We only optimize if the new used range directly follows the + * previously unused range (esp., when populating consecutive sections). + */ + if (unused_pmd_start == start) { + unused_pmd_start = end; + if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE))) + unused_pmd_start = 0; + return; + } + vmemmap_flush_unused_pmd(); + __vmemmap_use_sub_pmd(start, end); +} + static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) { void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + vmemmap_flush_unused_pmd(); + /* Could be our memmap page is filled with PAGE_UNUSED already ... */ - vmemmap_use_sub_pmd(start, end); + __vmemmap_use_sub_pmd(start, end); /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ if (!IS_ALIGNED(start, PMD_SIZE)) memset(page, PAGE_UNUSED, start - __pa(page)); + /* + * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of + * consecutive sections. Remember for the last added PMD the last + * unused range in the populated PMD. + */ if (!IS_ALIGNED(end, PMD_SIZE)) - memset(__va(end), PAGE_UNUSED, __pa(page) + PMD_SIZE - end); + unused_pmd_start = end; } /* Returns true if the PMD is completely unused and can be freed. */ @@ -104,6 +142,7 @@ static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) { void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + vmemmap_flush_unused_pmd(); memset(__va(start), PAGE_UNUSED, end - start); return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); } From 9a996c67a65d937b23408e56935ef23404c9418e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 23 Jul 2020 21:42:36 +0200 Subject: [PATCH 48/48] s390/vmemmap: coding style updates Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 55 +++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 35 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 70ebfc7958a6..1aed1a4dfc2d 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -169,17 +169,17 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, pte_clear(&init_mm, addr, pte); } else if (pte_none(*pte)) { if (!direct) { - void *new_page = vmemmap_alloc_block(PAGE_SIZE, - NUMA_NO_NODE); + void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); if (!new_page) goto out; pte_val(*pte) = __pa(new_page) | prot; - } else + } else { pte_val(*pte) = addr | prot; - } else + } + } else { continue; - + } pages++; } ret = 0; @@ -196,10 +196,10 @@ static void try_free_pte_table(pmd_t *pmd, unsigned long start) /* We can safely assume this is fully in 1:1 mapping & vmemmap area */ pte = pte_offset_kernel(pmd, start); - for (i = 0; i < PTRS_PER_PTE; i++, pte++) + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { if (!pte_none(*pte)) return; - + } vmem_pte_free(__va(pmd_deref(*pmd))); pmd_clear(pmd); } @@ -220,7 +220,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, pmd = pmd_offset(pud, addr); for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); - if (!add) { if (pmd_none(*pmd)) continue; @@ -228,14 +227,11 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) - vmem_free_pages(pmd_deref(*pmd), - get_order(PMD_SIZE)); + vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE)); pmd_clear(pmd); pages++; - } else if (!direct && - vmemmap_unuse_sub_pmd(addr, next)) { - vmem_free_pages(pmd_deref(*pmd), - get_order(PMD_SIZE)); + } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) { + vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE)); pmd_clear(pmd); } continue; @@ -258,14 +254,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, * page tables since vmemmap_populate gets * called for each section separately. */ - new_page = vmemmap_alloc_block(PMD_SIZE, - NUMA_NO_NODE); + new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE); if (new_page) { pmd_val(*pmd) = __pa(new_page) | prot; if (!IS_ALIGNED(addr, PMD_SIZE) || !IS_ALIGNED(next, PMD_SIZE)) { - vmemmap_use_new_sub_pmd(addr, - next); + vmemmap_use_new_sub_pmd(addr, next); } continue; } @@ -279,7 +273,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, vmemmap_use_sub_pmd(addr, next); continue; } - ret = modify_pte_table(pmd, addr, next, add, direct); if (ret) goto out; @@ -306,12 +299,10 @@ static void try_free_pmd_table(pud_t *pud, unsigned long start) if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) return; #endif - pmd = pmd_offset(pud, start); for (i = 0; i < PTRS_PER_PMD; i++, pmd++) if (!pmd_none(*pmd)) return; - vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER); pud_clear(pud); } @@ -327,11 +318,9 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, prot = pgprot_val(REGION3_KERNEL); if (!MACHINE_HAS_NX) prot &= ~_REGION_ENTRY_NOEXEC; - pud = pud_offset(p4d, addr); for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); - if (!add) { if (pud_none(*pud)) continue; @@ -356,9 +345,9 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, if (!pmd) goto out; pud_populate(&init_mm, pud, pmd); - } else if (pud_large(*pud)) + } else if (pud_large(*pud)) { continue; - + } ret = modify_pmd_table(pud, addr, next, add, direct); if (ret) goto out; @@ -387,10 +376,10 @@ static void try_free_pud_table(p4d_t *p4d, unsigned long start) #endif pud = pud_offset(p4d, start); - for (i = 0; i < PTRS_PER_PUD; i++, pud++) + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { if (!pud_none(*pud)) return; - + } vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER); p4d_clear(p4d); } @@ -406,7 +395,6 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, p4d = p4d_offset(pgd, addr); for (; addr < end; addr = next, p4d++) { next = p4d_addr_end(addr, end); - if (!add) { if (p4d_none(*p4d)) continue; @@ -415,7 +403,6 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, if (!pud) goto out; } - ret = modify_pud_table(p4d, addr, next, add, direct); if (ret) goto out; @@ -442,10 +429,10 @@ static void try_free_p4d_table(pgd_t *pgd, unsigned long start) #endif p4d = p4d_offset(pgd, start); - for (i = 0; i < PTRS_PER_P4D; i++, p4d++) + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { if (!p4d_none(*p4d)) return; - + } vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER); pgd_clear(pgd); } @@ -460,7 +447,6 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) return -EINVAL; - for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); @@ -474,7 +460,6 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, goto out; pgd_populate(&init_mm, pgd, p4d); } - ret = modify_p4d_table(pgd, addr, next, add, direct); if (ret) goto out; @@ -518,7 +503,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) * Add a backed mem_map array to the virtual mem_map array. */ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, - struct vmem_altmap *altmap) + struct vmem_altmap *altmap) { int ret; @@ -532,7 +517,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, } void vmemmap_free(unsigned long start, unsigned long end, - struct vmem_altmap *altmap) + struct vmem_altmap *altmap) { mutex_lock(&vmem_mutex); remove_pagetable(start, end, false);