From 8cf57d7217c32133d25615324c0ab4aaacf4d9c4 Mon Sep 17 00:00:00 2001 From: Anastasia Eskova Date: Fri, 28 Apr 2023 06:42:42 -0400 Subject: [PATCH 01/66] s390: add support for user-defined certificates Enable receiving the user-defined certificates from the s390x hypervisor via new diagnose 0x320 calls, and make them available to the Linux root user as 'cert_store_key' type keys in a so-called 'cert_store' keyring. New user-space interfaces: /sys/firmware/cert_store/refresh Writing to this attribute re-fetches certificates via DIAG 0x320 /sys/firmware/cert_store/cs_status Reading from this attribute returns either of: "uninitialized" If no certificate has been retrieved yet "ok" If certificates have been successfully retrieved "failed ()" If certificate retrieval failed with reason code New debug trace areas: /sys/kernel/debug/s390dbf/cert_store_msg /sys/kernel/debug/s390dbf/cert_store_hexdump Usage example: To initiate request for certificates available to the system as root: $ echo 1 > /sys/firmware/cert_store/refresh Upon success the '/sys/firmware/cert_store/cs_status' contains the value 'ok'. $ cat /sys/firmware/cert_store/cs_status ok Get the ID of the keyring 'cert_store': $ keyctl search @us keyring cert_store OR $ keyctl link @us @s; keyctl request keyring cert_store Obtain list of IDs of certificates: $ keyctl rlist Display certificate content as hex-dump: $ keyctl read Read certificate contents as binary data: $ keyctl pipe >cert_data Display certificate description: $ keyctl describe The certificate description has the following format: <64 bytes certificate name in EBCDIC> ':' ':' The certificate description in /proc/keys has certificate name represented in ASCII. Users can read but cannot update the content of the certificate. Signed-off-by: Anastasia Eskova Reviewed-by: Peter Oberparleiter Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 10 + arch/s390/include/asm/diag.h | 1 + arch/s390/include/asm/sclp.h | 1 + arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/cert_store.c | 810 +++++++++++++++++++++++++++++++++ arch/s390/kernel/diag.c | 1 + drivers/s390/char/sclp_early.c | 1 + 7 files changed, 825 insertions(+), 1 deletion(-) create mode 100644 arch/s390/kernel/cert_store.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5b39918b7042..c0afca69904e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -512,6 +512,16 @@ config KEXEC_SIG verification for the corresponding kernel image type being loaded in order for this to work. +config CERT_STORE + bool "Get user certificates via DIAG320" + depends on KEYS + help + Enable this option if you want to access user-provided secure boot + certificates via DIAG 0x320. + + These certificates will be made available via the keyring named + 'cert_store'. + config KERNEL_NOBP def_bool n prompt "Enable modified branch prediction for the kernel by default" diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 902e0330dd91..fb5a886ff47f 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -36,6 +36,7 @@ enum diag_stat_enum { DIAG_STAT_X304, DIAG_STAT_X308, DIAG_STAT_X318, + DIAG_STAT_X320, DIAG_STAT_X500, NR_DIAG_STAT }; diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index dac7da88f61f..5742d23bba13 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -86,6 +86,7 @@ struct sclp_info { unsigned char has_kss : 1; unsigned char has_gisaf : 1; unsigned char has_diag318 : 1; + unsigned char has_diag320 : 1; unsigned char has_sipl : 1; unsigned char has_sipl_eckd : 1; unsigned char has_dirq : 1; diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 6b2a051e1f8a..8d7514c72bb8 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -68,7 +68,7 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o - +obj-$(CONFIG_CERT_STORE) += cert_store.o obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c new file mode 100644 index 000000000000..1cbeb9ce0eb1 --- /dev/null +++ b/arch/s390/kernel/cert_store.c @@ -0,0 +1,810 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DIAG 0x320 support and certificate store handling + * + * Copyright IBM Corp. 2023 + * Author(s): Anastasia Eskova + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DIAG_MAX_RETRIES 10 + +#define VCE_FLAGS_VALID_MASK 0x80 + +#define ISM_LEN_DWORDS 4 +#define VCSSB_LEN_BYTES 128 +#define VCSSB_LEN_NO_CERTS 4 +#define VCB_LEN_NO_CERTS 64 +#define VC_NAME_LEN_BYTES 64 + +#define CERT_STORE_KEY_TYPE_NAME "cert_store_key" +#define CERT_STORE_KEYRING_NAME "cert_store" + +static debug_info_t *cert_store_dbf; +static debug_info_t *cert_store_hexdump; + +#define pr_dbf_msg(fmt, ...) \ + debug_sprintf_event(cert_store_dbf, 3, fmt "\n", ## __VA_ARGS__) + +enum diag320_subcode { + DIAG320_SUBCODES = 0, + DIAG320_STORAGE = 1, + DIAG320_CERT_BLOCK = 2, +}; + +enum diag320_rc { + DIAG320_RC_OK = 0x0001, + DIAG320_RC_CS_NOMATCH = 0x0306, +}; + +/* Verification Certificates Store Support Block (VCSSB). */ +struct vcssb { + u32 vcssb_length; + u8 pad_0x04[3]; + u8 version; + u8 pad_0x08[8]; + u32 cs_token; + u8 pad_0x14[12]; + u16 total_vc_index_count; + u16 max_vc_index_count; + u8 pad_0x24[28]; + u32 max_vce_length; + u32 max_vcxe_length; + u8 pad_0x48[8]; + u32 max_single_vcb_length; + u32 total_vcb_length; + u32 max_single_vcxb_length; + u32 total_vcxb_length; + u8 pad_0x60[32]; +} __packed __aligned(8); + +/* Verification Certificate Entry (VCE) Header. */ +struct vce_header { + u32 vce_length; + u8 flags; + u8 key_type; + u16 vc_index; + u8 vc_name[VC_NAME_LEN_BYTES]; /* EBCDIC */ + u8 vc_format; + u8 pad_0x49; + u16 key_id_length; + u8 pad_0x4c; + u8 vc_hash_type; + u16 vc_hash_length; + u8 pad_0x50[4]; + u32 vc_length; + u8 pad_0x58[8]; + u16 vc_hash_offset; + u16 vc_offset; + u8 pad_0x64[28]; +} __packed __aligned(4); + +/* Verification Certificate Block (VCB) Header. */ +struct vcb_header { + u32 vcb_input_length; + u8 pad_0x04[4]; + u16 first_vc_index; + u16 last_vc_index; + u32 pad_0x0c; + u32 cs_token; + u8 pad_0x14[12]; + u32 vcb_output_length; + u8 pad_0x24[3]; + u8 version; + u16 stored_vc_count; + u16 remaining_vc_count; + u8 pad_0x2c[20]; +} __packed __aligned(4); + +/* Verification Certificate Block (VCB). */ +struct vcb { + struct vcb_header vcb_hdr; + u8 vcb_buf[]; +} __packed __aligned(4); + +/* Verification Certificate Entry (VCE). */ +struct vce { + struct vce_header vce_hdr; + u8 cert_data_buf[]; +} __packed __aligned(4); + +static void cert_store_key_describe(const struct key *key, struct seq_file *m) +{ + char ascii[VC_NAME_LEN_BYTES + 1]; + + /* + * First 64 bytes of the key description is key name in EBCDIC CP 500. + * Convert it to ASCII for displaying in /proc/keys. + */ + strscpy(ascii, key->description, sizeof(ascii)); + EBCASC_500(ascii, VC_NAME_LEN_BYTES); + seq_puts(m, ascii); + + seq_puts(m, &key->description[VC_NAME_LEN_BYTES]); + if (key_is_positive(key)) + seq_printf(m, ": %u", key->datalen); +} + +/* + * Certificate store key type takes over properties of + * user key but cannot be updated. + */ +static struct key_type key_type_cert_store_key = { + .name = CERT_STORE_KEY_TYPE_NAME, + .preparse = user_preparse, + .free_preparse = user_free_preparse, + .instantiate = generic_key_instantiate, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = cert_store_key_describe, + .read = user_read, +}; + +/* Logging functions. */ +static void pr_dbf_vcb(const struct vcb *b) +{ + pr_dbf_msg("VCB Header:"); + pr_dbf_msg("vcb_input_length: %d", b->vcb_hdr.vcb_input_length); + pr_dbf_msg("first_vc_index: %d", b->vcb_hdr.first_vc_index); + pr_dbf_msg("last_vc_index: %d", b->vcb_hdr.last_vc_index); + pr_dbf_msg("cs_token: %d", b->vcb_hdr.cs_token); + pr_dbf_msg("vcb_output_length: %d", b->vcb_hdr.vcb_output_length); + pr_dbf_msg("version: %d", b->vcb_hdr.version); + pr_dbf_msg("stored_vc_count: %d", b->vcb_hdr.stored_vc_count); + pr_dbf_msg("remaining_vc_count: %d", b->vcb_hdr.remaining_vc_count); +} + +static void pr_dbf_vce(const struct vce *e) +{ + unsigned char vc_name[VC_NAME_LEN_BYTES + 1]; + char log_string[VC_NAME_LEN_BYTES + 40]; + + pr_dbf_msg("VCE Header:"); + pr_dbf_msg("vce_hdr.vce_length: %d", e->vce_hdr.vce_length); + pr_dbf_msg("vce_hdr.flags: %d", e->vce_hdr.flags); + pr_dbf_msg("vce_hdr.key_type: %d", e->vce_hdr.key_type); + pr_dbf_msg("vce_hdr.vc_index: %d", e->vce_hdr.vc_index); + pr_dbf_msg("vce_hdr.vc_format: %d", e->vce_hdr.vc_format); + pr_dbf_msg("vce_hdr.key_id_length: %d", e->vce_hdr.key_id_length); + pr_dbf_msg("vce_hdr.vc_hash_type: %d", e->vce_hdr.vc_hash_type); + pr_dbf_msg("vce_hdr.vc_hash_length: %d", e->vce_hdr.vc_hash_length); + pr_dbf_msg("vce_hdr.vc_hash_offset: %d", e->vce_hdr.vc_hash_offset); + pr_dbf_msg("vce_hdr.vc_length: %d", e->vce_hdr.vc_length); + pr_dbf_msg("vce_hdr.vc_offset: %d", e->vce_hdr.vc_offset); + + /* Certificate name in ASCII. */ + memcpy(vc_name, e->vce_hdr.vc_name, VC_NAME_LEN_BYTES); + EBCASC_500(vc_name, VC_NAME_LEN_BYTES); + vc_name[VC_NAME_LEN_BYTES] = '\0'; + + snprintf(log_string, sizeof(log_string), + "index: %d vce_hdr.vc_name (ASCII): %s", + e->vce_hdr.vc_index, vc_name); + debug_text_event(cert_store_hexdump, 3, log_string); + + /* Certificate data. */ + debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data start"); + debug_event(cert_store_hexdump, 3, (u8 *)e->cert_data_buf, 128); + debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data end"); + debug_event(cert_store_hexdump, 3, + (u8 *)e->cert_data_buf + e->vce_hdr.vce_length - 128, 128); +} + +static void pr_dbf_vcssb(const struct vcssb *s) +{ + debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode1"); + debug_event(cert_store_hexdump, 3, (u8 *)s, VCSSB_LEN_BYTES); + + pr_dbf_msg("VCSSB:"); + pr_dbf_msg("vcssb_length: %u", s->vcssb_length); + pr_dbf_msg("version: %u", s->version); + pr_dbf_msg("cs_token: %u", s->cs_token); + pr_dbf_msg("total_vc_index_count: %u", s->total_vc_index_count); + pr_dbf_msg("max_vc_index_count: %u", s->max_vc_index_count); + pr_dbf_msg("max_vce_length: %u", s->max_vce_length); + pr_dbf_msg("max_vcxe_length: %u", s->max_vce_length); + pr_dbf_msg("max_single_vcb_length: %u", s->max_single_vcb_length); + pr_dbf_msg("total_vcb_length: %u", s->total_vcb_length); + pr_dbf_msg("max_single_vcxb_length: %u", s->max_single_vcxb_length); + pr_dbf_msg("total_vcxb_length: %u", s->total_vcxb_length); +} + +static int __diag320(unsigned long subcode, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr, }; + + asm volatile( + " diag %[rp],%[subcode],0x320\n" + "0: nopr %%r7\n" + EX_TABLE(0b, 0b) + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "cc", "memory"); + + return rp.odd; +} + +static int diag320(unsigned long subcode, void *addr) +{ + diag_stat_inc(DIAG_STAT_X320); + + return __diag320(subcode, addr); +} + +/* + * Calculate SHA256 hash of the VCE certificate and compare it to hash stored in + * VCE. Return -EINVAL if hashes don't match. + */ +static int check_certificate_hash(const struct vce *vce) +{ + u8 hash[SHA256_DIGEST_SIZE]; + u16 vc_hash_length; + u8 *vce_hash; + + vce_hash = (u8 *)vce + vce->vce_hdr.vc_hash_offset; + vc_hash_length = vce->vce_hdr.vc_hash_length; + sha256((u8 *)vce + vce->vce_hdr.vc_offset, vce->vce_hdr.vc_length, hash); + if (memcmp(vce_hash, hash, vc_hash_length) == 0) + return 0; + + pr_dbf_msg("SHA256 hash of received certificate does not match"); + debug_text_event(cert_store_hexdump, 3, "VCE hash:"); + debug_event(cert_store_hexdump, 3, vce_hash, SHA256_DIGEST_SIZE); + debug_text_event(cert_store_hexdump, 3, "Calculated hash:"); + debug_event(cert_store_hexdump, 3, hash, SHA256_DIGEST_SIZE); + + return -EINVAL; +} + +static int check_certificate_valid(const struct vce *vce) +{ + if (!(vce->vce_hdr.flags & VCE_FLAGS_VALID_MASK)) { + pr_dbf_msg("Certificate entry is invalid"); + return -EINVAL; + } + if (vce->vce_hdr.vc_format != 1) { + pr_dbf_msg("Certificate format is not supported"); + return -EINVAL; + } + if (vce->vce_hdr.vc_hash_type != 1) { + pr_dbf_msg("Hash type is not supported"); + return -EINVAL; + } + + return check_certificate_hash(vce); +} + +static struct key *get_user_session_keyring(void) +{ + key_ref_t us_keyring_ref; + + us_keyring_ref = lookup_user_key(KEY_SPEC_USER_SESSION_KEYRING, + KEY_LOOKUP_CREATE, KEY_NEED_LINK); + if (IS_ERR(us_keyring_ref)) { + pr_dbf_msg("Couldn't get user session keyring: %ld", + PTR_ERR(us_keyring_ref)); + return ERR_PTR(-ENOKEY); + } + key_ref_put(us_keyring_ref); + return key_ref_to_ptr(us_keyring_ref); +} + +/* Invalidate all keys from cert_store keyring. */ +static int invalidate_keyring_keys(struct key *keyring) +{ + unsigned long num_keys, key_index; + size_t keyring_payload_len; + key_serial_t *key_array; + struct key *current_key; + int rc; + + keyring_payload_len = key_type_keyring.read(keyring, NULL, 0); + num_keys = keyring_payload_len / sizeof(key_serial_t); + key_array = kcalloc(num_keys, sizeof(key_serial_t), GFP_KERNEL); + if (!key_array) + return -ENOMEM; + + rc = key_type_keyring.read(keyring, (char *)key_array, keyring_payload_len); + if (rc != keyring_payload_len) { + pr_dbf_msg("Couldn't read keyring payload"); + goto out; + } + + for (key_index = 0; key_index < num_keys; key_index++) { + current_key = key_lookup(key_array[key_index]); + pr_dbf_msg("Invalidating key %08x", current_key->serial); + + key_invalidate(current_key); + key_put(current_key); + rc = key_unlink(keyring, current_key); + if (rc) { + pr_dbf_msg("Couldn't unlink key %08x: %d", current_key->serial, rc); + break; + } + } +out: + kfree(key_array); + return rc; +} + +static struct key *find_cs_keyring(void) +{ + key_ref_t cs_keyring_ref; + struct key *cs_keyring; + + cs_keyring_ref = keyring_search(make_key_ref(get_user_session_keyring(), true), + &key_type_keyring, CERT_STORE_KEYRING_NAME, + false); + if (!IS_ERR(cs_keyring_ref)) { + cs_keyring = key_ref_to_ptr(cs_keyring_ref); + key_ref_put(cs_keyring_ref); + goto found; + } + /* Search default locations: thread, process, session keyrings */ + cs_keyring = request_key(&key_type_keyring, CERT_STORE_KEYRING_NAME, NULL); + if (IS_ERR(cs_keyring)) + return NULL; + key_put(cs_keyring); +found: + return cs_keyring; +} + +static void cleanup_cs_keys(void) +{ + struct key *cs_keyring; + + cs_keyring = find_cs_keyring(); + if (!cs_keyring) + return; + + pr_dbf_msg("Found cert_store keyring. Purging..."); + /* + * Remove cert_store_key_type in case invalidation + * of old cert_store keys failed (= severe error). + */ + if (invalidate_keyring_keys(cs_keyring)) + unregister_key_type(&key_type_cert_store_key); + + keyring_clear(cs_keyring); + key_invalidate(cs_keyring); + key_put(cs_keyring); + key_unlink(get_user_session_keyring(), cs_keyring); +} + +static struct key *create_cs_keyring(void) +{ + static struct key *cs_keyring; + + /* Cleanup previous cs_keyring and all associated keys if any. */ + cleanup_cs_keys(); + cs_keyring = keyring_alloc(CERT_STORE_KEYRING_NAME, GLOBAL_ROOT_UID, + GLOBAL_ROOT_GID, current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_SET_KEEP, + NULL, get_user_session_keyring()); + if (IS_ERR(cs_keyring)) { + pr_dbf_msg("Can't allocate cert_store keyring"); + return NULL; + } + + pr_dbf_msg("Successfully allocated cert_store keyring: %08x", cs_keyring->serial); + + /* + * In case a previous clean-up ran into an + * error and unregistered key type. + */ + register_key_type(&key_type_cert_store_key); + + return cs_keyring; +} + +/* + * Allocate memory and create key description in format + * [key name in EBCDIC]:[VCE index]:[CS token]. + * Return a pointer to key description or NULL if memory + * allocation failed. Memory should be freed by caller. + */ +static char *get_key_description(struct vcssb *vcssb, const struct vce *vce) +{ + size_t len, name_len; + u32 cs_token; + char *desc; + + cs_token = vcssb->cs_token; + /* Description string contains "%64s:%04u:%08u\0". */ + name_len = sizeof(vce->vce_hdr.vc_name); + len = name_len + 1 + 4 + 1 + 8 + 1; + desc = kmalloc(len, GFP_KERNEL); + if (!desc) + return NULL; + + memcpy(desc, vce->vce_hdr.vc_name, name_len); + sprintf(desc + name_len, ":%04u:%08u", vce->vce_hdr.vc_index, cs_token); + + return desc; +} + +/* + * Create a key of type "cert_store_key" using the data from VCE for key + * payload and key description. Link the key to "cert_store" keyring. + */ +static int create_key_from_vce(struct vcssb *vcssb, struct vce *vce, + struct key *keyring) +{ + key_ref_t newkey; + char *desc; + int rc; + + desc = get_key_description(vcssb, vce); + if (!desc) + return -ENOMEM; + + newkey = key_create_or_update( + make_key_ref(keyring, true), CERT_STORE_KEY_TYPE_NAME, + desc, (u8 *)vce + vce->vce_hdr.vc_offset, + vce->vce_hdr.vc_length, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA); + + rc = PTR_ERR_OR_ZERO(newkey); + if (rc) { + pr_dbf_msg("Couldn't create a key from Certificate Entry (%d)", rc); + rc = -ENOKEY; + goto out; + } + + key_ref_put(newkey); +out: + kfree(desc); + return rc; +} + +/* Get Verification Certificate Storage Size block with DIAG320 subcode2. */ +static int get_vcssb(struct vcssb *vcssb) +{ + int diag320_rc; + + memset(vcssb, 0, sizeof(*vcssb)); + vcssb->vcssb_length = VCSSB_LEN_BYTES; + diag320_rc = diag320(DIAG320_STORAGE, vcssb); + pr_dbf_vcssb(vcssb); + + if (diag320_rc != DIAG320_RC_OK) { + pr_dbf_msg("Diag 320 Subcode 1 returned bad RC: %04x", diag320_rc); + return -EIO; + } + if (vcssb->vcssb_length == VCSSB_LEN_NO_CERTS) { + pr_dbf_msg("No certificates available for current configuration"); + return -ENOKEY; + } + + return 0; +} + +static u32 get_4k_mult_vcb_size(struct vcssb *vcssb) +{ + return round_up(vcssb->max_single_vcb_length, PAGE_SIZE); +} + +/* Fill input fields of single-entry VCB that will be read by LPAR. */ +static void fill_vcb_input(struct vcssb *vcssb, struct vcb *vcb, u16 index) +{ + memset(vcb, 0, sizeof(*vcb)); + vcb->vcb_hdr.vcb_input_length = get_4k_mult_vcb_size(vcssb); + vcb->vcb_hdr.cs_token = vcssb->cs_token; + + /* Request single entry. */ + vcb->vcb_hdr.first_vc_index = index; + vcb->vcb_hdr.last_vc_index = index; +} + +static void extract_vce_from_sevcb(struct vcb *vcb, struct vce *vce) +{ + struct vce *extracted_vce; + + extracted_vce = (struct vce *)vcb->vcb_buf; + memcpy(vce, vcb->vcb_buf, extracted_vce->vce_hdr.vce_length); + pr_dbf_vce(vce); +} + +static int get_sevcb(struct vcssb *vcssb, u16 index, struct vcb *vcb) +{ + int rc, diag320_rc; + + fill_vcb_input(vcssb, vcb, index); + + diag320_rc = diag320(DIAG320_CERT_BLOCK, vcb); + pr_dbf_msg("Diag 320 Subcode2 RC %2x", diag320_rc); + pr_dbf_vcb(vcb); + + switch (diag320_rc) { + case DIAG320_RC_OK: + rc = 0; + if (vcb->vcb_hdr.vcb_output_length == VCB_LEN_NO_CERTS) { + pr_dbf_msg("No certificate entry for index %u", index); + rc = -ENOKEY; + } else if (vcb->vcb_hdr.remaining_vc_count != 0) { + /* Retry on insufficient space. */ + pr_dbf_msg("Couldn't get all requested certificates"); + rc = -EAGAIN; + } + break; + case DIAG320_RC_CS_NOMATCH: + pr_dbf_msg("Certificate Store token mismatch"); + rc = -EAGAIN; + break; + default: + pr_dbf_msg("Diag 320 Subcode2 returned bad rc (0x%4x)", diag320_rc); + rc = -EINVAL; + break; + } + + return rc; +} + +/* + * Allocate memory for single-entry VCB, get VCB via DIAG320 subcode 2 call, + * extract VCE and create a key from its' certificate. + */ +static int create_key_from_sevcb(struct vcssb *vcssb, u16 index, + struct key *keyring) +{ + struct vcb *vcb; + struct vce *vce; + int rc; + + rc = -ENOMEM; + vcb = vmalloc(get_4k_mult_vcb_size(vcssb)); + vce = vmalloc(vcssb->max_single_vcb_length - sizeof(vcb->vcb_hdr)); + if (!vcb || !vce) + goto out; + + rc = get_sevcb(vcssb, index, vcb); + if (rc) + goto out; + + extract_vce_from_sevcb(vcb, vce); + rc = check_certificate_valid(vce); + if (rc) + goto out; + + rc = create_key_from_vce(vcssb, vce, keyring); + if (rc) + goto out; + + pr_dbf_msg("Successfully created key from Certificate Entry %d", index); +out: + vfree(vce); + vfree(vcb); + return rc; +} + +/* + * Request a single-entry VCB for each VCE available for the partition. + * Create a key from it and link it to cert_store keyring. If no keys + * could be created (i.e. VCEs were invalid) return -ENOKEY. + */ +static int add_certificates_to_keyring(struct vcssb *vcssb, struct key *keyring) +{ + int rc, index, count, added; + + count = 0; + added = 0; + /* Certificate Store entries indices start with 1 and have no gaps. */ + for (index = 1; index < vcssb->total_vc_index_count + 1; index++) { + pr_dbf_msg("Creating key from VCE %u", index); + rc = create_key_from_sevcb(vcssb, index, keyring); + count++; + + if (rc == -EAGAIN) + return rc; + + if (rc) + pr_dbf_msg("Creating key from VCE %u failed (%d)", index, rc); + else + added++; + } + + if (added == 0) { + pr_dbf_msg("Processed %d entries. No keys created", count); + return -ENOKEY; + } + + pr_info("Added %d of %d keys to cert_store keyring", added, count); + + /* + * Do not allow to link more keys to certificate store keyring after all + * the VCEs were processed. + */ + rc = keyring_restrict(make_key_ref(keyring, true), NULL, NULL); + if (rc) + pr_dbf_msg("Failed to set restriction to cert_store keyring (%d)", rc); + + return 0; +} + +/* + * Check which DIAG320 subcodes are installed. + * Return -ENOENT if subcodes 1 or 2 are not available. + */ +static int query_diag320_subcodes(void) +{ + unsigned long ism[ISM_LEN_DWORDS]; + int rc; + + rc = diag320(0, ism); + if (rc != DIAG320_RC_OK) { + pr_dbf_msg("DIAG320 subcode query returned %04x", rc); + return -ENOENT; + } + + debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode 0"); + debug_event(cert_store_hexdump, 3, ism, sizeof(ism)); + + if (!test_bit_inv(1, ism) || !test_bit_inv(2, ism)) { + pr_dbf_msg("Not all required DIAG320 subcodes are installed"); + return -ENOENT; + } + + return 0; +} + +/* + * Check if Certificate Store is supported by the firmware and DIAG320 subcodes + * 1 and 2 are installed. Create cert_store keyring and link all certificates + * available for the current partition to it as "cert_store_key" type + * keys. On refresh or error invalidate cert_store keyring and destroy + * all keys of "cert_store_key" type. + */ +static int fill_cs_keyring(void) +{ + struct key *cs_keyring; + struct vcssb *vcssb; + int rc; + + rc = -ENOMEM; + vcssb = kmalloc(VCSSB_LEN_BYTES, GFP_KERNEL); + if (!vcssb) + goto cleanup_keys; + + rc = -ENOENT; + if (!sclp.has_diag320) { + pr_dbf_msg("Certificate Store is not supported"); + goto cleanup_keys; + } + + rc = query_diag320_subcodes(); + if (rc) + goto cleanup_keys; + + rc = get_vcssb(vcssb); + if (rc) + goto cleanup_keys; + + cs_keyring = create_cs_keyring(); + if (!cs_keyring) + goto cleanup_keys; + + rc = add_certificates_to_keyring(vcssb, cs_keyring); + if (rc) + goto cleanup_cs_keyring; + + goto out; + +cleanup_cs_keyring: + key_put(cs_keyring); +cleanup_keys: + cleanup_cs_keys(); +out: + kfree(vcssb); + return rc; +} + +static DEFINE_MUTEX(cs_refresh_lock); +static int cs_status_val = -1; + +static ssize_t cs_status_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + if (cs_status_val == -1) + return sysfs_emit(buf, "uninitialized\n"); + else if (cs_status_val == 0) + return sysfs_emit(buf, "ok\n"); + + return sysfs_emit(buf, "failed (%d)\n", cs_status_val); +} + +static struct kobj_attribute cs_status_attr = __ATTR_RO(cs_status); + +static ssize_t refresh_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int rc, retries; + + pr_dbf_msg("Refresh certificate store information requested"); + rc = mutex_lock_interruptible(&cs_refresh_lock); + if (rc) + return rc; + + for (retries = 0; retries < DIAG_MAX_RETRIES; retries++) { + /* Request certificates from certificate store. */ + rc = fill_cs_keyring(); + if (rc) + pr_dbf_msg("Failed to refresh certificate store information (%d)", rc); + if (rc != -EAGAIN) + break; + } + cs_status_val = rc; + mutex_unlock(&cs_refresh_lock); + + return rc ?: count; +} + +static struct kobj_attribute refresh_attr = __ATTR_WO(refresh); + +static const struct attribute *cert_store_attrs[] __initconst = { + &cs_status_attr.attr, + &refresh_attr.attr, + NULL, +}; + +static struct kobject *cert_store_kobj; + +static int __init cert_store_init(void) +{ + int rc = -ENOMEM; + + cert_store_dbf = debug_register("cert_store_msg", 10, 1, 64); + if (!cert_store_dbf) + goto cleanup_dbf; + + cert_store_hexdump = debug_register("cert_store_hexdump", 3, 1, 128); + if (!cert_store_hexdump) + goto cleanup_dbf; + + debug_register_view(cert_store_hexdump, &debug_hex_ascii_view); + debug_register_view(cert_store_dbf, &debug_sprintf_view); + + /* Create directory /sys/firmware/cert_store. */ + cert_store_kobj = kobject_create_and_add("cert_store", firmware_kobj); + if (!cert_store_kobj) + goto cleanup_dbf; + + rc = sysfs_create_files(cert_store_kobj, cert_store_attrs); + if (rc) + goto cleanup_kobj; + + register_key_type(&key_type_cert_store_key); + + return rc; + +cleanup_kobj: + kobject_put(cert_store_kobj); +cleanup_dbf: + debug_unregister(cert_store_dbf); + debug_unregister(cert_store_hexdump); + + return rc; +} +device_initcall(cert_store_init); diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 82079f2d8583..cca56b649ca3 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -50,6 +50,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" }, [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, + [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" }, [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index f480d6c7fd39..fdc8668f3fba 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -55,6 +55,7 @@ static void __init sclp_early_facilities_detect(void) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST; if (sccb->cpuoff > 134) { sclp.has_diag318 = !!(sccb->byte_134 & 0x80); + sclp.has_diag320 = !!(sccb->byte_134 & 0x04); sclp.has_iplcc = !!(sccb->byte_134 & 0x02); } if (sccb->cpuoff > 137) { From c83cd4fe31d52bca0587370d9e98f00072aefa27 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 2 Jul 2023 21:20:09 +0200 Subject: [PATCH 02/66] s390/diag: handle diag 204 subcode 4 address correctly Diagnose 204 subcode 4 requires a real (physical) address, but a virtual address is passed to the inline assembly. Convert the address to a physical address for only this specific case. Acked-by: Alexander Gordeev Reviewed-by: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/include/asm/diag.h | 2 ++ arch/s390/kernel/diag.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index fb5a886ff47f..bed804137537 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -109,6 +109,8 @@ enum diag204_sc { DIAG204_SUBC_STIB7 = 7 }; +#define DIAG204_SUBCODE_MASK 0xffff + /* The two available diag 204 data formats */ enum diag204_format { DIAG204_INFO_SIMPLE = 0, diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index cca56b649ca3..f3a0f39cbd6c 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -171,6 +171,8 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad int diag204(unsigned long subcode, unsigned long size, void *addr) { diag_stat_inc(DIAG_STAT_X204); + if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4) + addr = (void *)__pa(addr); size = __diag204(&subcode, size, addr); if (subcode) return -1; From 86e74965bbdf534b9c7f1f678b963492de41276e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 3 Jul 2023 14:34:25 +0200 Subject: [PATCH 03/66] s390/sthyi: enforce 4k alignment of vmalloc'ed area vmalloc() does not guarantee any alignment, unless it is explicitly requested with e.g. __vmalloc_node(). Using diag204() with subcode 7 requires a 4k aligned virtual buffer. Therefore switch to __vmalloc_node(). Note: with the current vmalloc() implementation callers would still get a 4k aligned area, even though this is quite non-obvious looking at the code. So changing this in sthyi doesn't fix a real bug. It is just to make sure the code will not suffer from some obscure options, like it happened in the past with kmalloc() where debug options changed the assumed alignment of allocated memory areas. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/sthyi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 4d141e2c132e..98ebedbb5761 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -317,7 +317,9 @@ static void fill_diag(struct sthyi_sctns *sctns) if (pages <= 0) return; - diag204_buf = vmalloc(array_size(pages, PAGE_SIZE)); + diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE), + PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, + __builtin_return_address(0)); if (!diag204_buf) return; From 5ac8c72462cdad56e37981eb2172c5baa1ea40d6 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 28 Jun 2023 12:36:08 +0200 Subject: [PATCH 04/66] s390/zcrypt: remove CEX2 and CEX3 device drivers Remove the legacy device driver code for CEX2 and CEX3 cards. The last machines which are able to handle CEX2 crypto cards are z10 EC first available 2008 and z10 BC first available 2009. The last machines able to handle a CEX3 crypto card are z196 first available 2010 and z114 first available 2011. Please note that this does not imply to drop CEX2 and CEX3 support in general. With older kernels on hardware up to the aforementioned machine models these crypto cards will get support by IBM. The removal of the CEX2 and CEX3 device drivers code opens up some simplifications, for example support for crypto cards without rng support can be removed also. Signed-off-by: Harald Freudenberger Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- drivers/crypto/Kconfig | 7 +- drivers/s390/crypto/Makefile | 2 +- drivers/s390/crypto/ap_bus.c | 25 +- drivers/s390/crypto/ap_bus.h | 19 +- drivers/s390/crypto/ap_queue.c | 47 +-- drivers/s390/crypto/zcrypt_cex2a.c | 227 ------------- drivers/s390/crypto/zcrypt_cex2a.h | 134 -------- drivers/s390/crypto/zcrypt_cex2c.c | 421 ------------------------- drivers/s390/crypto/zcrypt_cex2c.h | 18 -- drivers/s390/crypto/zcrypt_msgtype50.c | 64 ++-- drivers/s390/crypto/zcrypt_msgtype50.h | 3 +- drivers/s390/crypto/zcrypt_msgtype6.c | 14 +- 12 files changed, 42 insertions(+), 939 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 44e44b8d9ce6..c761952f0dc6 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -70,10 +70,9 @@ config ZCRYPT select HW_RANDOM help Select this option if you want to enable support for - s390 cryptographic adapters like: - + Crypto Express 2 up to 7 Coprocessor (CEXxC) - + Crypto Express 2 up to 7 Accelerator (CEXxA) - + Crypto Express 4 up to 7 EP11 Coprocessor (CEXxP) + s390 cryptographic adapters like Crypto Express 4 up + to 8 in Coprocessor (CEXxC), EP11 Coprocessor (CEXxP) + or Accelerator (CEXxA) mode. config ZCRYPT_DEBUG bool "Enable debug features for s390 cryptographic adapters" diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile index 22d2db690cd3..0edacd101c12 100644 --- a/drivers/s390/crypto/Makefile +++ b/drivers/s390/crypto/Makefile @@ -11,7 +11,7 @@ zcrypt-objs += zcrypt_msgtype6.o zcrypt_msgtype50.o zcrypt-objs += zcrypt_ccamisc.o zcrypt_ep11misc.o obj-$(CONFIG_ZCRYPT) += zcrypt.o # adapter drivers depend on ap.o and zcrypt.o -obj-$(CONFIG_ZCRYPT) += zcrypt_cex2c.o zcrypt_cex2a.o zcrypt_cex4.o +obj-$(CONFIG_ZCRYPT) += zcrypt_cex4.o # pkey kernel module pkey-objs := pkey_api.o diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 420120be300f..b1d2fedea086 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * Copyright IBM Corp. 2006, 2021 + * Copyright IBM Corp. 2006, 2023 * Author(s): Cornelia Huck * Martin Schwidefsky * Ralph Wuerthner @@ -387,23 +387,6 @@ static int ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, *q_ml = tapq_info.ml; *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED; - switch (*q_type) { - /* For CEX2 and CEX3 the available functions - * are not reflected by the facilities bits. - * Instead it is coded into the type. So here - * modify the function bits based on the type. - */ - case AP_DEVICE_TYPE_CEX2A: - case AP_DEVICE_TYPE_CEX3A: - *q_fac |= 0x08000000; - break; - case AP_DEVICE_TYPE_CEX2C: - case AP_DEVICE_TYPE_CEX3C: - *q_fac |= 0x10000000; - break; - default: - break; - } return 1; default: /* @@ -1678,8 +1661,8 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func) { int comp_type = 0; - /* < CEX2A is not supported */ - if (rawtype < AP_DEVICE_TYPE_CEX2A) { + /* < CEX4 is not supported */ + if (rawtype < AP_DEVICE_TYPE_CEX4) { AP_DBF_WARN("%s queue=%02x.%04x unsupported type %d\n", __func__, AP_QID_CARD(qid), AP_QID_QUEUE(qid), rawtype); @@ -1701,7 +1684,7 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func) apinfo.cat = AP_DEVICE_TYPE_CEX8; status = ap_qact(qid, 0, &apinfo); if (status.response_code == AP_RESPONSE_NORMAL && - apinfo.cat >= AP_DEVICE_TYPE_CEX2A && + apinfo.cat >= AP_DEVICE_TYPE_CEX4 && apinfo.cat <= AP_DEVICE_TYPE_CEX8) comp_type = apinfo.cat; } diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 0d7b7eb374ad..47bbe9babc59 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * Copyright IBM Corp. 2006, 2019 + * Copyright IBM Corp. 2006, 2023 * Author(s): Cornelia Huck * Martin Schwidefsky * Ralph Wuerthner @@ -67,15 +67,8 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_RESPONSE_INVALID_DOMAIN 0x42 /* - * Known device types + * Supported AP device types */ -#define AP_DEVICE_TYPE_PCICC 3 -#define AP_DEVICE_TYPE_PCICA 4 -#define AP_DEVICE_TYPE_PCIXCC 5 -#define AP_DEVICE_TYPE_CEX2A 6 -#define AP_DEVICE_TYPE_CEX2C 7 -#define AP_DEVICE_TYPE_CEX3A 8 -#define AP_DEVICE_TYPE_CEX3C 9 #define AP_DEVICE_TYPE_CEX4 10 #define AP_DEVICE_TYPE_CEX5 11 #define AP_DEVICE_TYPE_CEX6 12 @@ -272,14 +265,6 @@ static inline void ap_release_message(struct ap_message *ap_msg) kfree_sensitive(ap_msg->private); } -/* - * Note: don't use ap_send/ap_recv after using ap_queue_message - * for the first time. Otherwise the ap message queue will get - * confused. - */ -int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen); -int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t msglen); - enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event); enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event); diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 30df83735adf..1336e632adc4 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright IBM Corp. 2016 + * Copyright IBM Corp. 2016, 2023 * Author(s): Martin Schwidefsky * * Adjunct processor bus, queue related code. @@ -93,51 +93,6 @@ __ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen, return ap_nqap(qid, psmid, msg, msglen); } -int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen) -{ - struct ap_queue_status status; - - status = __ap_send(qid, psmid, msg, msglen, 0); - if (status.async) - return -EPERM; - switch (status.response_code) { - case AP_RESPONSE_NORMAL: - return 0; - case AP_RESPONSE_Q_FULL: - case AP_RESPONSE_RESET_IN_PROGRESS: - return -EBUSY; - case AP_RESPONSE_REQ_FAC_NOT_INST: - return -EINVAL; - default: /* Device is gone. */ - return -ENODEV; - } -} -EXPORT_SYMBOL(ap_send); - -int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t msglen) -{ - struct ap_queue_status status; - - if (!msg) - return -EINVAL; - status = ap_dqap(qid, psmid, msg, msglen, NULL, NULL, NULL); - if (status.async) - return -EPERM; - switch (status.response_code) { - case AP_RESPONSE_NORMAL: - return 0; - case AP_RESPONSE_NO_PENDING_REPLY: - if (status.queue_empty) - return -ENOENT; - return -EBUSY; - case AP_RESPONSE_RESET_IN_PROGRESS: - return -EBUSY; - default: - return -ENODEV; - } -} -EXPORT_SYMBOL(ap_recv); - /* State machine definitions and helpers */ static enum ap_sm_wait ap_sm_nop(struct ap_queue *aq) diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index 83f692c9c197..e69de29bb2d1 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -1,227 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright IBM Corp. 2001, 2012 - * Author(s): Robert Burroughs - * Eric Rossman (edrossma@us.ibm.com) - * - * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * Major cleanup & driver split: Martin Schwidefsky - * Ralph Wuerthner - * MSGTYPE restruct: Holger Dengler - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "ap_bus.h" -#include "zcrypt_api.h" -#include "zcrypt_error.h" -#include "zcrypt_cex2a.h" -#include "zcrypt_msgtype50.h" - -#define CEX2A_MIN_MOD_SIZE 1 /* 8 bits */ -#define CEX2A_MAX_MOD_SIZE 256 /* 2048 bits */ -#define CEX3A_MIN_MOD_SIZE CEX2A_MIN_MOD_SIZE -#define CEX3A_MAX_MOD_SIZE 512 /* 4096 bits */ - -#define CEX2A_MAX_MESSAGE_SIZE 0x390 /* sizeof(struct type50_crb2_msg) */ -#define CEX2A_MAX_RESPONSE_SIZE 0x110 /* max outputdatalength + type80_hdr */ - -#define CEX3A_MAX_RESPONSE_SIZE 0x210 /* 512 bit modulus - * (max outputdatalength) + - * type80_hdr - */ -#define CEX3A_MAX_MESSAGE_SIZE sizeof(struct type50_crb3_msg) - -#define CEX2A_CLEANUP_TIME (15 * HZ) -#define CEX3A_CLEANUP_TIME CEX2A_CLEANUP_TIME - -MODULE_AUTHOR("IBM Corporation"); -MODULE_DESCRIPTION("CEX2A/CEX3A Cryptographic Coprocessor device driver, " \ - "Copyright IBM Corp. 2001, 2018"); -MODULE_LICENSE("GPL"); - -static struct ap_device_id zcrypt_cex2a_card_ids[] = { - { .dev_type = AP_DEVICE_TYPE_CEX2A, - .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, - { .dev_type = AP_DEVICE_TYPE_CEX3A, - .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, - { /* end of list */ }, -}; - -MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_card_ids); - -static struct ap_device_id zcrypt_cex2a_queue_ids[] = { - { .dev_type = AP_DEVICE_TYPE_CEX2A, - .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, - { .dev_type = AP_DEVICE_TYPE_CEX3A, - .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, - { /* end of list */ }, -}; - -MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_queue_ids); - -/* - * Probe function for CEX2A card devices. It always accepts the AP device - * since the bus_match already checked the card type. - * @ap_dev: pointer to the AP device. - */ -static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev) -{ - /* - * Normalized speed ratings per crypto adapter - * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY - */ - static const int CEX2A_SPEED_IDX[] = { - 800, 1000, 2000, 900, 1200, 2400, 0, 0}; - static const int CEX3A_SPEED_IDX[] = { - 400, 500, 1000, 450, 550, 1200, 0, 0}; - - struct ap_card *ac = to_ap_card(&ap_dev->device); - struct zcrypt_card *zc; - int rc = 0; - - zc = zcrypt_card_alloc(); - if (!zc) - return -ENOMEM; - zc->card = ac; - dev_set_drvdata(&ap_dev->device, zc); - - if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A) { - zc->min_mod_size = CEX2A_MIN_MOD_SIZE; - zc->max_mod_size = CEX2A_MAX_MOD_SIZE; - zc->speed_rating = CEX2A_SPEED_IDX; - zc->max_exp_bit_length = CEX2A_MAX_MOD_SIZE; - zc->type_string = "CEX2A"; - zc->user_space_type = ZCRYPT_CEX2A; - } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX3A) { - zc->min_mod_size = CEX2A_MIN_MOD_SIZE; - zc->max_mod_size = CEX2A_MAX_MOD_SIZE; - zc->max_exp_bit_length = CEX2A_MAX_MOD_SIZE; - if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) && - ap_test_bit(&ac->functions, AP_FUNC_CRT4K)) { - zc->max_mod_size = CEX3A_MAX_MOD_SIZE; - zc->max_exp_bit_length = CEX3A_MAX_MOD_SIZE; - } - zc->speed_rating = CEX3A_SPEED_IDX; - zc->type_string = "CEX3A"; - zc->user_space_type = ZCRYPT_CEX3A; - } else { - zcrypt_card_free(zc); - return -ENODEV; - } - zc->online = 1; - - rc = zcrypt_card_register(zc); - if (rc) - zcrypt_card_free(zc); - - return rc; -} - -/* - * This is called to remove the CEX2A card driver information - * if an AP card device is removed. - */ -static void zcrypt_cex2a_card_remove(struct ap_device *ap_dev) -{ - struct zcrypt_card *zc = dev_get_drvdata(&ap_dev->device); - - zcrypt_card_unregister(zc); -} - -static struct ap_driver zcrypt_cex2a_card_driver = { - .probe = zcrypt_cex2a_card_probe, - .remove = zcrypt_cex2a_card_remove, - .ids = zcrypt_cex2a_card_ids, - .flags = AP_DRIVER_FLAG_DEFAULT, -}; - -/* - * Probe function for CEX2A queue devices. It always accepts the AP device - * since the bus_match already checked the queue type. - * @ap_dev: pointer to the AP device. - */ -static int zcrypt_cex2a_queue_probe(struct ap_device *ap_dev) -{ - struct ap_queue *aq = to_ap_queue(&ap_dev->device); - struct zcrypt_queue *zq = NULL; - int rc; - - switch (ap_dev->device_type) { - case AP_DEVICE_TYPE_CEX2A: - zq = zcrypt_queue_alloc(CEX2A_MAX_RESPONSE_SIZE); - if (!zq) - return -ENOMEM; - break; - case AP_DEVICE_TYPE_CEX3A: - zq = zcrypt_queue_alloc(CEX3A_MAX_RESPONSE_SIZE); - if (!zq) - return -ENOMEM; - break; - } - if (!zq) - return -ENODEV; - zq->ops = zcrypt_msgtype(MSGTYPE50_NAME, MSGTYPE50_VARIANT_DEFAULT); - zq->queue = aq; - zq->online = 1; - atomic_set(&zq->load, 0); - ap_queue_init_state(aq); - ap_queue_init_reply(aq, &zq->reply); - aq->request_timeout = CEX2A_CLEANUP_TIME; - dev_set_drvdata(&ap_dev->device, zq); - rc = zcrypt_queue_register(zq); - if (rc) - zcrypt_queue_free(zq); - - return rc; -} - -/* - * This is called to remove the CEX2A queue driver information - * if an AP queue device is removed. - */ -static void zcrypt_cex2a_queue_remove(struct ap_device *ap_dev) -{ - struct zcrypt_queue *zq = dev_get_drvdata(&ap_dev->device); - - zcrypt_queue_unregister(zq); -} - -static struct ap_driver zcrypt_cex2a_queue_driver = { - .probe = zcrypt_cex2a_queue_probe, - .remove = zcrypt_cex2a_queue_remove, - .ids = zcrypt_cex2a_queue_ids, - .flags = AP_DRIVER_FLAG_DEFAULT, -}; - -int __init zcrypt_cex2a_init(void) -{ - int rc; - - rc = ap_driver_register(&zcrypt_cex2a_card_driver, - THIS_MODULE, "cex2acard"); - if (rc) - return rc; - - rc = ap_driver_register(&zcrypt_cex2a_queue_driver, - THIS_MODULE, "cex2aqueue"); - if (rc) - ap_driver_unregister(&zcrypt_cex2a_card_driver); - - return rc; -} - -void __exit zcrypt_cex2a_exit(void) -{ - ap_driver_unregister(&zcrypt_cex2a_queue_driver); - ap_driver_unregister(&zcrypt_cex2a_card_driver); -} - -module_init(zcrypt_cex2a_init); -module_exit(zcrypt_cex2a_exit); diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h index 7842214d9d09..e69de29bb2d1 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.h +++ b/drivers/s390/crypto/zcrypt_cex2a.h @@ -1,134 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * Copyright IBM Corp. 2001, 2006 - * Author(s): Robert Burroughs - * Eric Rossman (edrossma@us.ibm.com) - * - * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * Major cleanup & driver split: Martin Schwidefsky - */ - -#ifndef _ZCRYPT_CEX2A_H_ -#define _ZCRYPT_CEX2A_H_ - -/** - * The type 50 message family is associated with CEXxA cards. - * - * The four members of the family are described below. - * - * Note that all unsigned char arrays are right-justified and left-padded - * with zeroes. - * - * Note that all reserved fields must be zeroes. - */ -struct type50_hdr { - unsigned char reserved1; - unsigned char msg_type_code; /* 0x50 */ - unsigned short msg_len; - unsigned char reserved2; - unsigned char ignored; - unsigned short reserved3; -} __packed; - -#define TYPE50_TYPE_CODE 0x50 - -#define TYPE50_MEB1_FMT 0x0001 -#define TYPE50_MEB2_FMT 0x0002 -#define TYPE50_MEB3_FMT 0x0003 -#define TYPE50_CRB1_FMT 0x0011 -#define TYPE50_CRB2_FMT 0x0012 -#define TYPE50_CRB3_FMT 0x0013 - -/* Mod-Exp, with a small modulus */ -struct type50_meb1_msg { - struct type50_hdr header; - unsigned short keyblock_type; /* 0x0001 */ - unsigned char reserved[6]; - unsigned char exponent[128]; - unsigned char modulus[128]; - unsigned char message[128]; -} __packed; - -/* Mod-Exp, with a large modulus */ -struct type50_meb2_msg { - struct type50_hdr header; - unsigned short keyblock_type; /* 0x0002 */ - unsigned char reserved[6]; - unsigned char exponent[256]; - unsigned char modulus[256]; - unsigned char message[256]; -} __packed; - -/* Mod-Exp, with a larger modulus */ -struct type50_meb3_msg { - struct type50_hdr header; - unsigned short keyblock_type; /* 0x0003 */ - unsigned char reserved[6]; - unsigned char exponent[512]; - unsigned char modulus[512]; - unsigned char message[512]; -} __packed; - -/* CRT, with a small modulus */ -struct type50_crb1_msg { - struct type50_hdr header; - unsigned short keyblock_type; /* 0x0011 */ - unsigned char reserved[6]; - unsigned char p[64]; - unsigned char q[64]; - unsigned char dp[64]; - unsigned char dq[64]; - unsigned char u[64]; - unsigned char message[128]; -} __packed; - -/* CRT, with a large modulus */ -struct type50_crb2_msg { - struct type50_hdr header; - unsigned short keyblock_type; /* 0x0012 */ - unsigned char reserved[6]; - unsigned char p[128]; - unsigned char q[128]; - unsigned char dp[128]; - unsigned char dq[128]; - unsigned char u[128]; - unsigned char message[256]; -} __packed; - -/* CRT, with a larger modulus */ -struct type50_crb3_msg { - struct type50_hdr header; - unsigned short keyblock_type; /* 0x0013 */ - unsigned char reserved[6]; - unsigned char p[256]; - unsigned char q[256]; - unsigned char dp[256]; - unsigned char dq[256]; - unsigned char u[256]; - unsigned char message[512]; -} __packed; - -/** - * The type 80 response family is associated with a CEXxA cards. - * - * Note that all unsigned char arrays are right-justified and left-padded - * with zeroes. - * - * Note that all reserved fields must be zeroes. - */ - -#define TYPE80_RSP_CODE 0x80 - -struct type80_hdr { - unsigned char reserved1; - unsigned char type; /* 0x80 */ - unsigned short len; - unsigned char code; /* 0x00 */ - unsigned char reserved2[3]; - unsigned char reserved3[8]; -} __packed; - -int zcrypt_cex2a_init(void); -void zcrypt_cex2a_exit(void); - -#endif /* _ZCRYPT_CEX2A_H_ */ diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index 251b5bd3d19c..e69de29bb2d1 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -1,421 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright IBM Corp. 2001, 2018 - * Author(s): Robert Burroughs - * Eric Rossman (edrossma@us.ibm.com) - * - * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * Major cleanup & driver split: Martin Schwidefsky - * Ralph Wuerthner - * MSGTYPE restruct: Holger Dengler - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ap_bus.h" -#include "zcrypt_api.h" -#include "zcrypt_error.h" -#include "zcrypt_msgtype6.h" -#include "zcrypt_cex2c.h" -#include "zcrypt_cca_key.h" -#include "zcrypt_ccamisc.h" - -#define CEX2C_MIN_MOD_SIZE 16 /* 128 bits */ -#define CEX2C_MAX_MOD_SIZE 256 /* 2048 bits */ -#define CEX3C_MIN_MOD_SIZE 16 /* 128 bits */ -#define CEX3C_MAX_MOD_SIZE 512 /* 4096 bits */ -#define CEX2C_MAX_XCRB_MESSAGE_SIZE (12 * 1024) -#define CEX2C_CLEANUP_TIME (15 * HZ) - -MODULE_AUTHOR("IBM Corporation"); -MODULE_DESCRIPTION("CEX2C/CEX3C Cryptographic Coprocessor device driver, " \ - "Copyright IBM Corp. 2001, 2018"); -MODULE_LICENSE("GPL"); - -static struct ap_device_id zcrypt_cex2c_card_ids[] = { - { .dev_type = AP_DEVICE_TYPE_CEX2C, - .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, - { .dev_type = AP_DEVICE_TYPE_CEX3C, - .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, - { /* end of list */ }, -}; - -MODULE_DEVICE_TABLE(ap, zcrypt_cex2c_card_ids); - -static struct ap_device_id zcrypt_cex2c_queue_ids[] = { - { .dev_type = AP_DEVICE_TYPE_CEX2C, - .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, - { .dev_type = AP_DEVICE_TYPE_CEX3C, - .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, - { /* end of list */ }, -}; - -MODULE_DEVICE_TABLE(ap, zcrypt_cex2c_queue_ids); - -/* - * CCA card additional device attributes - */ -static ssize_t cca_serialnr_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct zcrypt_card *zc = dev_get_drvdata(dev); - struct cca_info ci; - struct ap_card *ac = to_ap_card(dev); - - memset(&ci, 0, sizeof(ci)); - - if (ap_domain_index >= 0) - cca_get_info(ac->id, ap_domain_index, &ci, zc->online); - - return sysfs_emit(buf, "%s\n", ci.serial); -} - -static struct device_attribute dev_attr_cca_serialnr = - __ATTR(serialnr, 0444, cca_serialnr_show, NULL); - -static struct attribute *cca_card_attrs[] = { - &dev_attr_cca_serialnr.attr, - NULL, -}; - -static const struct attribute_group cca_card_attr_grp = { - .attrs = cca_card_attrs, -}; - - /* - * CCA queue additional device attributes - */ -static ssize_t cca_mkvps_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct zcrypt_queue *zq = dev_get_drvdata(dev); - int n = 0; - struct cca_info ci; - static const char * const cao_state[] = { "invalid", "valid" }; - static const char * const new_state[] = { "empty", "partial", "full" }; - - memset(&ci, 0, sizeof(ci)); - - cca_get_info(AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid), - &ci, zq->online); - - if (ci.new_aes_mk_state >= '1' && ci.new_aes_mk_state <= '3') - n = sysfs_emit(buf, "AES NEW: %s 0x%016llx\n", - new_state[ci.new_aes_mk_state - '1'], - ci.new_aes_mkvp); - else - n = sysfs_emit(buf, "AES NEW: - -\n"); - - if (ci.cur_aes_mk_state >= '1' && ci.cur_aes_mk_state <= '2') - n += sysfs_emit_at(buf, n, "AES CUR: %s 0x%016llx\n", - cao_state[ci.cur_aes_mk_state - '1'], - ci.cur_aes_mkvp); - else - n += sysfs_emit_at(buf, n, "AES CUR: - -\n"); - - if (ci.old_aes_mk_state >= '1' && ci.old_aes_mk_state <= '2') - n += sysfs_emit_at(buf, n, "AES OLD: %s 0x%016llx\n", - cao_state[ci.old_aes_mk_state - '1'], - ci.old_aes_mkvp); - else - n += sysfs_emit_at(buf, n, "AES OLD: - -\n"); - - if (ci.new_apka_mk_state >= '1' && ci.new_apka_mk_state <= '3') - n += sysfs_emit_at(buf, n, "APKA NEW: %s 0x%016llx\n", - new_state[ci.new_apka_mk_state - '1'], - ci.new_apka_mkvp); - else - n += sysfs_emit_at(buf, n, "APKA NEW: - -\n"); - - if (ci.cur_apka_mk_state >= '1' && ci.cur_apka_mk_state <= '2') - n += sysfs_emit_at(buf, n, "APKA CUR: %s 0x%016llx\n", - cao_state[ci.cur_apka_mk_state - '1'], - ci.cur_apka_mkvp); - else - n += sysfs_emit_at(buf, n, "APKA CUR: - -\n"); - - if (ci.old_apka_mk_state >= '1' && ci.old_apka_mk_state <= '2') - n += sysfs_emit_at(buf, n, "APKA OLD: %s 0x%016llx\n", - cao_state[ci.old_apka_mk_state - '1'], - ci.old_apka_mkvp); - else - n += sysfs_emit_at(buf, n, "APKA OLD: - -\n"); - - return n; -} - -static struct device_attribute dev_attr_cca_mkvps = - __ATTR(mkvps, 0444, cca_mkvps_show, NULL); - -static struct attribute *cca_queue_attrs[] = { - &dev_attr_cca_mkvps.attr, - NULL, -}; - -static const struct attribute_group cca_queue_attr_grp = { - .attrs = cca_queue_attrs, -}; - -/* - * Large random number detection function. Its sends a message to a CEX2C/CEX3C - * card to find out if large random numbers are supported. - * @ap_dev: pointer to the AP device. - * - * Returns 1 if large random numbers are supported, 0 if not and < 0 on error. - */ -static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) -{ - struct ap_message ap_msg; - unsigned long psmid; - unsigned int domain; - struct { - struct type86_hdr hdr; - struct type86_fmt2_ext fmt2; - struct CPRBX cprbx; - } __packed *reply; - struct { - struct type6_hdr hdr; - struct CPRBX cprbx; - char function_code[2]; - short int rule_length; - char rule[8]; - short int verb_length; - short int key_length; - } __packed *msg; - int rc, i; - - ap_init_message(&ap_msg); - ap_msg.msg = (void *)get_zeroed_page(GFP_KERNEL); - if (!ap_msg.msg) - return -ENOMEM; - ap_msg.bufsize = PAGE_SIZE; - - rng_type6cprb_msgx(&ap_msg, 4, &domain); - - msg = ap_msg.msg; - msg->cprbx.domain = AP_QID_QUEUE(aq->qid); - - rc = ap_send(aq->qid, 0x0102030405060708UL, ap_msg.msg, ap_msg.len); - if (rc) - goto out_free; - - /* Wait for the test message to complete. */ - for (i = 0; i < 2 * HZ; i++) { - msleep(1000 / HZ); - rc = ap_recv(aq->qid, &psmid, ap_msg.msg, ap_msg.bufsize); - if (rc == 0 && psmid == 0x0102030405060708UL) - break; - } - - if (i >= 2 * HZ) { - /* Got no answer. */ - rc = -ENODEV; - goto out_free; - } - - reply = ap_msg.msg; - if (reply->cprbx.ccp_rtcode == 0 && reply->cprbx.ccp_rscode == 0) - rc = 1; - else - rc = 0; -out_free: - free_page((unsigned long)ap_msg.msg); - return rc; -} - -/* - * Probe function for CEX2C/CEX3C card devices. It always accepts the - * AP device since the bus_match already checked the hardware type. - * @ap_dev: pointer to the AP card device. - */ -static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev) -{ - /* - * Normalized speed ratings per crypto adapter - * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY - */ - static const int CEX2C_SPEED_IDX[] = { - 1000, 1400, 2400, 1100, 1500, 2600, 100, 12}; - static const int CEX3C_SPEED_IDX[] = { - 500, 700, 1400, 550, 800, 1500, 80, 10}; - - struct ap_card *ac = to_ap_card(&ap_dev->device); - struct zcrypt_card *zc; - int rc = 0; - - zc = zcrypt_card_alloc(); - if (!zc) - return -ENOMEM; - zc->card = ac; - dev_set_drvdata(&ap_dev->device, zc); - switch (ac->ap_dev.device_type) { - case AP_DEVICE_TYPE_CEX2C: - zc->user_space_type = ZCRYPT_CEX2C; - zc->type_string = "CEX2C"; - zc->speed_rating = CEX2C_SPEED_IDX; - zc->min_mod_size = CEX2C_MIN_MOD_SIZE; - zc->max_mod_size = CEX2C_MAX_MOD_SIZE; - zc->max_exp_bit_length = CEX2C_MAX_MOD_SIZE; - break; - case AP_DEVICE_TYPE_CEX3C: - zc->user_space_type = ZCRYPT_CEX3C; - zc->type_string = "CEX3C"; - zc->speed_rating = CEX3C_SPEED_IDX; - zc->min_mod_size = CEX3C_MIN_MOD_SIZE; - zc->max_mod_size = CEX3C_MAX_MOD_SIZE; - zc->max_exp_bit_length = CEX3C_MAX_MOD_SIZE; - break; - default: - zcrypt_card_free(zc); - return -ENODEV; - } - zc->online = 1; - - rc = zcrypt_card_register(zc); - if (rc) { - zcrypt_card_free(zc); - return rc; - } - - if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) { - rc = sysfs_create_group(&ap_dev->device.kobj, - &cca_card_attr_grp); - if (rc) { - zcrypt_card_unregister(zc); - zcrypt_card_free(zc); - } - } - - return rc; -} - -/* - * This is called to remove the CEX2C/CEX3C card driver information - * if an AP card device is removed. - */ -static void zcrypt_cex2c_card_remove(struct ap_device *ap_dev) -{ - struct zcrypt_card *zc = dev_get_drvdata(&ap_dev->device); - struct ap_card *ac = to_ap_card(&ap_dev->device); - - if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) - sysfs_remove_group(&ap_dev->device.kobj, &cca_card_attr_grp); - - zcrypt_card_unregister(zc); -} - -static struct ap_driver zcrypt_cex2c_card_driver = { - .probe = zcrypt_cex2c_card_probe, - .remove = zcrypt_cex2c_card_remove, - .ids = zcrypt_cex2c_card_ids, - .flags = AP_DRIVER_FLAG_DEFAULT, -}; - -/* - * Probe function for CEX2C/CEX3C queue devices. It always accepts the - * AP device since the bus_match already checked the hardware type. - * @ap_dev: pointer to the AP card device. - */ -static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev) -{ - struct ap_queue *aq = to_ap_queue(&ap_dev->device); - struct zcrypt_queue *zq; - int rc; - - zq = zcrypt_queue_alloc(CEX2C_MAX_XCRB_MESSAGE_SIZE); - if (!zq) - return -ENOMEM; - zq->queue = aq; - zq->online = 1; - atomic_set(&zq->load, 0); - ap_rapq(aq->qid, 0); - rc = zcrypt_cex2c_rng_supported(aq); - if (rc < 0) { - zcrypt_queue_free(zq); - return rc; - } - if (rc) - zq->ops = zcrypt_msgtype(MSGTYPE06_NAME, - MSGTYPE06_VARIANT_DEFAULT); - else - zq->ops = zcrypt_msgtype(MSGTYPE06_NAME, - MSGTYPE06_VARIANT_NORNG); - ap_queue_init_state(aq); - ap_queue_init_reply(aq, &zq->reply); - aq->request_timeout = CEX2C_CLEANUP_TIME; - dev_set_drvdata(&ap_dev->device, zq); - rc = zcrypt_queue_register(zq); - if (rc) { - zcrypt_queue_free(zq); - return rc; - } - - if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) { - rc = sysfs_create_group(&ap_dev->device.kobj, - &cca_queue_attr_grp); - if (rc) { - zcrypt_queue_unregister(zq); - zcrypt_queue_free(zq); - } - } - - return rc; -} - -/* - * This is called to remove the CEX2C/CEX3C queue driver information - * if an AP queue device is removed. - */ -static void zcrypt_cex2c_queue_remove(struct ap_device *ap_dev) -{ - struct zcrypt_queue *zq = dev_get_drvdata(&ap_dev->device); - struct ap_queue *aq = to_ap_queue(&ap_dev->device); - - if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) - sysfs_remove_group(&ap_dev->device.kobj, &cca_queue_attr_grp); - - zcrypt_queue_unregister(zq); -} - -static struct ap_driver zcrypt_cex2c_queue_driver = { - .probe = zcrypt_cex2c_queue_probe, - .remove = zcrypt_cex2c_queue_remove, - .ids = zcrypt_cex2c_queue_ids, - .flags = AP_DRIVER_FLAG_DEFAULT, -}; - -int __init zcrypt_cex2c_init(void) -{ - int rc; - - rc = ap_driver_register(&zcrypt_cex2c_card_driver, - THIS_MODULE, "cex2card"); - if (rc) - return rc; - - rc = ap_driver_register(&zcrypt_cex2c_queue_driver, - THIS_MODULE, "cex2cqueue"); - if (rc) - ap_driver_unregister(&zcrypt_cex2c_card_driver); - - return rc; -} - -void zcrypt_cex2c_exit(void) -{ - ap_driver_unregister(&zcrypt_cex2c_queue_driver); - ap_driver_unregister(&zcrypt_cex2c_card_driver); -} - -module_init(zcrypt_cex2c_init); -module_exit(zcrypt_cex2c_exit); diff --git a/drivers/s390/crypto/zcrypt_cex2c.h b/drivers/s390/crypto/zcrypt_cex2c.h index 6ec405c2bec2..e69de29bb2d1 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.h +++ b/drivers/s390/crypto/zcrypt_cex2c.h @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * Copyright IBM Corp. 2001, 2018 - * Author(s): Robert Burroughs - * Eric Rossman (edrossma@us.ibm.com) - * - * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * Major cleanup & driver split: Martin Schwidefsky - * MSGTYPE restruct: Holger Dengler - */ - -#ifndef _ZCRYPT_CEX2C_H_ -#define _ZCRYPT_CEX2C_H_ - -int zcrypt_cex2c_init(void); -void zcrypt_cex2c_exit(void); - -#endif /* _ZCRYPT_CEX2C_H_ */ diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 51f8f7a463f7..2e155de8abe5 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * Copyright IBM Corp. 2001, 2012 + * Copyright IBM Corp. 2001, 2023 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) * @@ -28,15 +28,12 @@ /* >= CEX3A: 4096 bits */ #define CEX3A_MAX_MOD_SIZE 512 -/* CEX2A: max outputdatalength + type80_hdr */ -#define CEX2A_MAX_RESPONSE_SIZE 0x110 - /* >= CEX3A: 512 bit modulus, (max outputdatalength) + type80_hdr */ #define CEX3A_MAX_RESPONSE_SIZE 0x210 MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("Cryptographic Accelerator (message type 50), " \ - "Copyright IBM Corp. 2001, 2012"); + "Copyright IBM Corp. 2001, 2023"); MODULE_LICENSE("GPL"); /* @@ -366,20 +363,17 @@ static int convert_type80(struct zcrypt_queue *zq, ap_send_online_uevent(&zq->queue->ap_dev, zq->online); return -EAGAIN; } - if (zq->zcard->user_space_type == ZCRYPT_CEX2A) - BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE); - else - BUG_ON(t80h->len > CEX3A_MAX_RESPONSE_SIZE); + BUG_ON(t80h->len > CEX3A_MAX_RESPONSE_SIZE); data = reply->msg + t80h->len - outputdatalength; if (copy_to_user(outputdata, data, outputdatalength)) return -EFAULT; return 0; } -static int convert_response_cex2a(struct zcrypt_queue *zq, - struct ap_message *reply, - char __user *outputdata, - unsigned int outputdatalength) +static int convert_response(struct zcrypt_queue *zq, + struct ap_message *reply, + char __user *outputdata, + unsigned int outputdatalength) { /* Response type byte is the second byte in the response. */ unsigned char rtype = ((unsigned char *)reply->msg)[1]; @@ -414,9 +408,9 @@ static int convert_response_cex2a(struct zcrypt_queue *zq, * @msg: pointer to the AP message * @reply: pointer to the AP reply message */ -static void zcrypt_cex2a_receive(struct ap_queue *aq, - struct ap_message *msg, - struct ap_message *reply) +static void zcrypt_msgtype50_receive(struct ap_queue *aq, + struct ap_message *msg, + struct ap_message *reply) { static struct error_hdr error_reply = { .type = TYPE82_RSP_CODE, @@ -456,19 +450,18 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0); * CEXxA device to the request distributor * @mex: pointer to the modexpo request buffer */ -static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, - struct ica_rsa_modexpo *mex, - struct ap_message *ap_msg) +static long zcrypt_msgtype50_modexpo(struct zcrypt_queue *zq, + struct ica_rsa_modexpo *mex, + struct ap_message *ap_msg) { struct completion work; int rc; - ap_msg->bufsize = (zq->zcard->user_space_type == ZCRYPT_CEX2A) ? - MSGTYPE50_CRB2_MAX_MSG_SIZE : MSGTYPE50_CRB3_MAX_MSG_SIZE; + ap_msg->bufsize = MSGTYPE50_CRB3_MAX_MSG_SIZE; ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL); if (!ap_msg->msg) return -ENOMEM; - ap_msg->receive = zcrypt_cex2a_receive; + ap_msg->receive = zcrypt_msgtype50_receive; ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); ap_msg->private = &work; @@ -483,9 +476,9 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, if (rc == 0) { rc = ap_msg->rc; if (rc == 0) - rc = convert_response_cex2a(zq, ap_msg, - mex->outputdata, - mex->outputdatalength); + rc = convert_response(zq, ap_msg, + mex->outputdata, + mex->outputdatalength); } else { /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); @@ -507,19 +500,18 @@ out: * CEXxA device to the request distributor * @crt: pointer to the modexpoc_crt request buffer */ -static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, - struct ica_rsa_modexpo_crt *crt, - struct ap_message *ap_msg) +static long zcrypt_msgtype50_modexpo_crt(struct zcrypt_queue *zq, + struct ica_rsa_modexpo_crt *crt, + struct ap_message *ap_msg) { struct completion work; int rc; - ap_msg->bufsize = (zq->zcard->user_space_type == ZCRYPT_CEX2A) ? - MSGTYPE50_CRB2_MAX_MSG_SIZE : MSGTYPE50_CRB3_MAX_MSG_SIZE; + ap_msg->bufsize = MSGTYPE50_CRB3_MAX_MSG_SIZE; ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL); if (!ap_msg->msg) return -ENOMEM; - ap_msg->receive = zcrypt_cex2a_receive; + ap_msg->receive = zcrypt_msgtype50_receive; ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); ap_msg->private = &work; @@ -534,9 +526,9 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, if (rc == 0) { rc = ap_msg->rc; if (rc == 0) - rc = convert_response_cex2a(zq, ap_msg, - crt->outputdata, - crt->outputdatalength); + rc = convert_response(zq, ap_msg, + crt->outputdata, + crt->outputdatalength); } else { /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); @@ -555,8 +547,8 @@ out: * The crypto operations for message type 50. */ static struct zcrypt_ops zcrypt_msgtype50_ops = { - .rsa_modexpo = zcrypt_cex2a_modexpo, - .rsa_modexpo_crt = zcrypt_cex2a_modexpo_crt, + .rsa_modexpo = zcrypt_msgtype50_modexpo, + .rsa_modexpo_crt = zcrypt_msgtype50_modexpo_crt, .owner = THIS_MODULE, .name = MSGTYPE50_NAME, .variant = MSGTYPE50_VARIANT_DEFAULT, diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index eb49f06bed29..323e93b90b12 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * Copyright IBM Corp. 2001, 2012 + * Copyright IBM Corp. 2001, 2023 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) * @@ -15,7 +15,6 @@ #define MSGTYPE50_NAME "zcrypt_msgtype50" #define MSGTYPE50_VARIANT_DEFAULT 0 -#define MSGTYPE50_CRB2_MAX_MSG_SIZE 0x390 /* sizeof(struct type50_crb2_msg) */ #define MSGTYPE50_CRB3_MAX_MSG_SIZE 0x710 /* sizeof(struct type50_crb3_msg) */ #define MSGTYPE_ADJUSTMENT 0x08 /* type04 extension (not needed in type50) */ diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index e668ff5eb384..3c53abbdc342 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * Copyright IBM Corp. 2001, 2022 + * Copyright IBM Corp. 2001, 2023 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) * @@ -42,7 +42,7 @@ struct response_type { MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \ - "Copyright IBM Corp. 2001, 2012"); + "Copyright IBM Corp. 2001, 2023"); MODULE_LICENSE("GPL"); struct function_and_rules_block { @@ -1348,14 +1348,6 @@ out: /* * The crypto operations for a CEXxC card. */ -static struct zcrypt_ops zcrypt_msgtype6_norng_ops = { - .owner = THIS_MODULE, - .name = MSGTYPE06_NAME, - .variant = MSGTYPE06_VARIANT_NORNG, - .rsa_modexpo = zcrypt_msgtype6_modexpo, - .rsa_modexpo_crt = zcrypt_msgtype6_modexpo_crt, - .send_cprb = zcrypt_msgtype6_send_cprb, -}; static struct zcrypt_ops zcrypt_msgtype6_ops = { .owner = THIS_MODULE, @@ -1378,14 +1370,12 @@ static struct zcrypt_ops zcrypt_msgtype6_ep11_ops = { void __init zcrypt_msgtype6_init(void) { - zcrypt_msgtype_register(&zcrypt_msgtype6_norng_ops); zcrypt_msgtype_register(&zcrypt_msgtype6_ops); zcrypt_msgtype_register(&zcrypt_msgtype6_ep11_ops); } void __exit zcrypt_msgtype6_exit(void) { - zcrypt_msgtype_unregister(&zcrypt_msgtype6_norng_ops); zcrypt_msgtype_unregister(&zcrypt_msgtype6_ops); zcrypt_msgtype_unregister(&zcrypt_msgtype6_ep11_ops); } From 83f95671943e6394eda4d20fa9458d4a2ae13c5c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Jul 2023 15:47:11 +0200 Subject: [PATCH 05/66] s390/hypfs: simplify memory allocation Simplify memory allocation for diagnose 204 memory buffer: - allocate with __vmalloc_node() to enure page alignment - allocate real / physical memory area also within vmalloc area and handle vmalloc to real / physical address translation within diag204(). Acked-by: Alexander Gordeev Reviewed-by: Mete Durlu Signed-off-by: Heiko Carstens --- arch/s390/hypfs/hypfs_diag.c | 40 ++++++++---------------------------- arch/s390/kernel/diag.c | 24 ++++++++++++++++++++-- 2 files changed, 30 insertions(+), 34 deletions(-) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index c3be533c4cd3..c8083dc08db3 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -29,7 +29,6 @@ static enum diag204_sc diag204_store_sc; /* used subcode for store */ static enum diag204_format diag204_info_type; /* used diag 204 data format */ static void *diag204_buf; /* 4K aligned buffer for diag204 data */ -static void *diag204_buf_vmalloc; /* vmalloc pointer for diag204 data */ static int diag204_buf_pages; /* number of pages for diag204 data */ static struct dentry *dbfs_d204_file; @@ -212,14 +211,7 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) static void diag204_free_buffer(void) { - if (!diag204_buf) - return; - if (diag204_buf_vmalloc) { - vfree(diag204_buf_vmalloc); - diag204_buf_vmalloc = NULL; - } else { - free_pages((unsigned long) diag204_buf, 0); - } + vfree(diag204_buf); diag204_buf = NULL; } @@ -228,26 +220,6 @@ static void *page_align_ptr(void *ptr) return (void *) PAGE_ALIGN((unsigned long) ptr); } -static void *diag204_alloc_vbuf(int pages) -{ - /* The buffer has to be page aligned! */ - diag204_buf_vmalloc = vmalloc(array_size(PAGE_SIZE, (pages + 1))); - if (!diag204_buf_vmalloc) - return ERR_PTR(-ENOMEM); - diag204_buf = page_align_ptr(diag204_buf_vmalloc); - diag204_buf_pages = pages; - return diag204_buf; -} - -static void *diag204_alloc_rbuf(void) -{ - diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0); - if (!diag204_buf) - return ERR_PTR(-ENOMEM); - diag204_buf_pages = 1; - return diag204_buf; -} - static void *diag204_get_buffer(enum diag204_format fmt, int *pages) { if (diag204_buf) { @@ -256,15 +228,19 @@ static void *diag204_get_buffer(enum diag204_format fmt, int *pages) } if (fmt == DIAG204_INFO_SIMPLE) { *pages = 1; - return diag204_alloc_rbuf(); } else {/* DIAG204_INFO_EXT */ *pages = diag204((unsigned long)DIAG204_SUBC_RSI | (unsigned long)DIAG204_INFO_EXT, 0, NULL); if (*pages <= 0) return ERR_PTR(-ENOSYS); - else - return diag204_alloc_vbuf(*pages); } + diag204_buf = __vmalloc_node(array_size(*pages, PAGE_SIZE), + PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, + __builtin_return_address(0)); + if (!diag204_buf) + return ERR_PTR(-ENOMEM); + diag204_buf_pages = *pages; + return diag204_buf; } /* diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index f3a0f39cbd6c..f287713baf6d 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -168,11 +169,30 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad return rp.odd; } +/** + * diag204() - Issue diagnose 204 call. + * @subcode: Subcode of diagnose 204 to be executed. + * @size: Size of area in pages which @area points to, if given. + * @addr: Vmalloc'ed memory area where the result is written to. + * + * Execute diagnose 204 with the given subcode and write the result to the + * memory area specified with @addr. For subcodes which do not write a + * result to memory both @size and @addr must be zero. If @addr is + * specified it must be page aligned and must have been allocated with + * vmalloc(). Conversion to real / physical addresses will be handled by + * this function if required. + */ int diag204(unsigned long subcode, unsigned long size, void *addr) { - diag_stat_inc(DIAG_STAT_X204); + if (addr) { + if (WARN_ON_ONCE(!is_vmalloc_addr(addr))) + return -1; + if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE))) + return -1; + } if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4) - addr = (void *)__pa(addr); + addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr)); + diag_stat_inc(DIAG_STAT_X204); size = __diag204(&subcode, size, addr); if (subcode) return -1; From b7857acc1b1105da5f088fe2593f1a6e3a3d47ce Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Jul 2023 15:47:12 +0200 Subject: [PATCH 06/66] s390/hypfs: remove open-coded PTR_ALIGN() Get rid of page_align_ptr() and use PTR_ALIGN() instead. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/hypfs/hypfs_diag.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index c8083dc08db3..889d83c77826 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -215,11 +215,6 @@ static void diag204_free_buffer(void) diag204_buf = NULL; } -static void *page_align_ptr(void *ptr) -{ - return (void *) PAGE_ALIGN((unsigned long) ptr); -} - static void *diag204_get_buffer(enum diag204_format fmt, int *pages) { if (diag204_buf) { @@ -379,7 +374,7 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size) base = vzalloc(buf_size); if (!base) return -ENOMEM; - d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr); + d204 = PTR_ALIGN(base + sizeof(d204->hdr), PAGE_SIZE) - sizeof(d204->hdr); rc = diag204_do_store(d204->buf, diag204_buf_pages); if (rc) { vfree(base); From 3325b4d85799957aa53514e69bed5c9df7771caf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Jul 2023 15:47:13 +0200 Subject: [PATCH 07/66] s390/hypfs: factor out filesystem code The s390_hypfs filesystem is deprecated and shouldn't be used due to its rather odd semantics. It creates a whole directory structure with static file contents so a user can read a consistent state while within that directory. Writing to its update attribute will remove and rebuild nearly the whole filesystem, so that again a user can read a consistent state, even if multiple files need to be read. Given that this wastes a lot of CPU cycles, and involves a lot of code, binary interfaces have been added quite a couple of years ago, which simply pass the binary data to user space, and let user space decode the data. This is the preferred and only way how the data should be retrieved. The assumption is that there are no users of the s390_hypfs filesystem. However instead of just removing the code, and having to revert in case there are actually users, factor the filesystem code out and make it only available via a new config option. This config option is supposed to be disabled. If it turns out there are no complaints the filesystem code can be removed probably in a couple of years. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/Kbuild | 2 +- arch/s390/Kconfig | 15 +- arch/s390/hypfs/Makefile | 11 +- arch/s390/hypfs/hypfs.h | 10 +- arch/s390/hypfs/hypfs_dbfs.c | 31 ++- arch/s390/hypfs/hypfs_diag.c | 400 ++------------------------------ arch/s390/hypfs/hypfs_diag.h | 35 +++ arch/s390/hypfs/hypfs_diag_fs.c | 393 +++++++++++++++++++++++++++++++ arch/s390/hypfs/hypfs_vm.c | 175 +------------- arch/s390/hypfs/hypfs_vm.h | 50 ++++ arch/s390/hypfs/hypfs_vm_fs.c | 139 +++++++++++ arch/s390/hypfs/inode.c | 35 +-- 12 files changed, 709 insertions(+), 587 deletions(-) create mode 100644 arch/s390/hypfs/hypfs_diag.h create mode 100644 arch/s390/hypfs/hypfs_diag_fs.c create mode 100644 arch/s390/hypfs/hypfs_vm.h create mode 100644 arch/s390/hypfs/hypfs_vm_fs.c diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index 76e362277179..8e4d74f51115 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -3,7 +3,7 @@ obj-y += kernel/ obj-y += mm/ obj-$(CONFIG_KVM) += kvm/ obj-y += crypto/ -obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ +obj-$(CONFIG_S390_HYPFS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ obj-$(CONFIG_PCI) += pci/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c0afca69904e..ea0e8f34eb0d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -877,13 +877,24 @@ config APPLDATA_NET_SUM This can also be compiled as a module, which will be called appldata_net_sum.o. -config S390_HYPFS_FS +config S390_HYPFS def_bool y + prompt "s390 hypervisor information" + help + This provides several binary files at (debugfs)/s390_hypfs/ to + provide accounting information in an s390 hypervisor environment. + +config S390_HYPFS_FS + def_bool n prompt "s390 hypervisor file system support" select SYS_HYPERVISOR + depends on S390_HYPFS help This is a virtual file system intended to provide accounting - information in an s390 hypervisor environment. + information in an s390 hypervisor environment. This file system + is deprecated and should not be used. + + Say N if you are unsure. source "arch/s390/kvm/Kconfig" diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile index 06f601509ce9..c34854d298f8 100644 --- a/arch/s390/hypfs/Makefile +++ b/arch/s390/hypfs/Makefile @@ -3,7 +3,12 @@ # Makefile for the linux hypfs filesystem routines. # -obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o +obj-$(CONFIG_S390_HYPFS) += hypfs_dbfs.o +obj-$(CONFIG_S390_HYPFS) += hypfs_diag.o +obj-$(CONFIG_S390_HYPFS) += hypfs_diag0c.o +obj-$(CONFIG_S390_HYPFS) += hypfs_sprp.o +obj-$(CONFIG_S390_HYPFS) += hypfs_vm.o -s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o -s390_hypfs-objs += hypfs_diag0c.o +obj-$(CONFIG_S390_HYPFS_FS) += hypfs_diag_fs.o +obj-$(CONFIG_S390_HYPFS_FS) += hypfs_vm_fs.o +obj-$(CONFIG_S390_HYPFS_FS) += inode.o diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index 05f3f9aee5fc..65f4036fd541 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -46,6 +46,15 @@ void hypfs_diag0c_exit(void); void hypfs_sprp_init(void); void hypfs_sprp_exit(void); +int __hypfs_fs_init(void); + +static inline int hypfs_fs_init(void) +{ + if (IS_ENABLED(CONFIG_S390_HYPFS_FS)) + return __hypfs_fs_init(); + return 0; +} + /* debugfs interface */ struct hypfs_dbfs_file; @@ -69,7 +78,6 @@ struct hypfs_dbfs_file { struct dentry *dentry; }; -extern void hypfs_dbfs_init(void); extern void hypfs_dbfs_exit(void); extern void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df); extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df); diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index f4c7dbfaf8ee..4024599eb448 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -90,12 +90,33 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) debugfs_remove(df->dentry); } -void hypfs_dbfs_init(void) +static int __init hypfs_dbfs_init(void) { - dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); -} + int rc = -ENODATA; -void hypfs_dbfs_exit(void) -{ + dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); + if (hypfs_diag_init()) + goto fail_dbfs_exit; + if (hypfs_vm_init()) + goto fail_hypfs_diag_exit; + hypfs_sprp_init(); + if (hypfs_diag0c_init()) + goto fail_hypfs_sprp_exit; + rc = hypfs_fs_init(); + if (rc) + goto fail_hypfs_diag0c_exit; + return 0; + +fail_hypfs_diag0c_exit: + hypfs_diag0c_exit(); +fail_hypfs_sprp_exit: + hypfs_sprp_exit(); + hypfs_vm_exit(); +fail_hypfs_diag_exit: + hypfs_diag_exit(); + pr_err("Initialization of hypfs failed with rc=%i\n", rc); +fail_dbfs_exit: debugfs_remove(dbfs_dir); + return rc; } +device_initcall(hypfs_dbfs_init) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 889d83c77826..ea4c436f86a0 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -18,13 +18,11 @@ #include #include #include +#include "hypfs_diag.h" #include "hypfs.h" -#define TMP_SIZE 64 /* size of temporary buffers */ - #define DBFS_D204_HDR_VERSION 0 -static char *diag224_cpu_names; /* diag 224 name table */ static enum diag204_sc diag204_store_sc; /* used subcode for store */ static enum diag204_format diag204_info_type; /* used diag 204 data format */ @@ -33,172 +31,14 @@ static int diag204_buf_pages; /* number of pages for diag204 data */ static struct dentry *dbfs_d204_file; -/* - * DIAG 204 member access functions. - * - * Since we have two different diag 204 data formats for old and new s390 - * machines, we do not access the structs directly, but use getter functions for - * each struct member instead. This should make the code more readable. - */ - -/* Time information block */ - -static inline int info_blk_hdr__size(enum diag204_format type) +enum diag204_format diag204_get_info_type(void) { - if (type == DIAG204_INFO_SIMPLE) - return sizeof(struct diag204_info_blk_hdr); - else /* DIAG204_INFO_EXT */ - return sizeof(struct diag204_x_info_blk_hdr); + return diag204_info_type; } -static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr) +static void diag204_set_info_type(enum diag204_format type) { - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_info_blk_hdr *)hdr)->npar; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_info_blk_hdr *)hdr)->npar; -} - -static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_info_blk_hdr *)hdr)->flags; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_info_blk_hdr *)hdr)->flags; -} - -/* Partition header */ - -static inline int part_hdr__size(enum diag204_format type) -{ - if (type == DIAG204_INFO_SIMPLE) - return sizeof(struct diag204_part_hdr); - else /* DIAG204_INFO_EXT */ - return sizeof(struct diag204_x_part_hdr); -} - -static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_part_hdr *)hdr)->cpus; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_part_hdr *)hdr)->rcpus; -} - -static inline void part_hdr__part_name(enum diag204_format type, void *hdr, - char *name) -{ - if (type == DIAG204_INFO_SIMPLE) - memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name, - DIAG204_LPAR_NAME_LEN); - else /* DIAG204_INFO_EXT */ - memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name, - DIAG204_LPAR_NAME_LEN); - EBCASC(name, DIAG204_LPAR_NAME_LEN); - name[DIAG204_LPAR_NAME_LEN] = 0; - strim(name); -} - -/* CPU info block */ - -static inline int cpu_info__size(enum diag204_format type) -{ - if (type == DIAG204_INFO_SIMPLE) - return sizeof(struct diag204_cpu_info); - else /* DIAG204_INFO_EXT */ - return sizeof(struct diag204_x_cpu_info); -} - -static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_cpu_info *)hdr)->ctidx; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_cpu_info *)hdr)->ctidx; -} - -static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_cpu_info *)hdr)->cpu_addr; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_cpu_info *)hdr)->cpu_addr; -} - -static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_cpu_info *)hdr)->acc_time; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_cpu_info *)hdr)->acc_time; -} - -static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_cpu_info *)hdr)->lp_time; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_cpu_info *)hdr)->lp_time; -} - -static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return 0; /* online_time not available in simple info */ - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_cpu_info *)hdr)->online_time; -} - -/* Physical header */ - -static inline int phys_hdr__size(enum diag204_format type) -{ - if (type == DIAG204_INFO_SIMPLE) - return sizeof(struct diag204_phys_hdr); - else /* DIAG204_INFO_EXT */ - return sizeof(struct diag204_x_phys_hdr); -} - -static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_phys_hdr *)hdr)->cpus; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_phys_hdr *)hdr)->cpus; -} - -/* Physical CPU info block */ - -static inline int phys_cpu__size(enum diag204_format type) -{ - if (type == DIAG204_INFO_SIMPLE) - return sizeof(struct diag204_phys_cpu); - else /* DIAG204_INFO_EXT */ - return sizeof(struct diag204_x_phys_cpu); -} - -static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_phys_cpu *)hdr)->cpu_addr; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr; -} - -static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_phys_cpu *)hdr)->mgm_time; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_phys_cpu *)hdr)->mgm_time; -} - -static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_phys_cpu *)hdr)->ctidx; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_phys_cpu *)hdr)->ctidx; + diag204_info_type = type; } /* Diagnose 204 functions */ @@ -215,7 +55,7 @@ static void diag204_free_buffer(void) diag204_buf = NULL; } -static void *diag204_get_buffer(enum diag204_format fmt, int *pages) +void *diag204_get_buffer(enum diag204_format fmt, int *pages) { if (diag204_buf) { *pages = diag204_buf_pages; @@ -262,13 +102,13 @@ static int diag204_probe(void) if (diag204((unsigned long)DIAG204_SUBC_STIB7 | (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) { diag204_store_sc = DIAG204_SUBC_STIB7; - diag204_info_type = DIAG204_INFO_EXT; + diag204_set_info_type(DIAG204_INFO_EXT); goto out; } if (diag204((unsigned long)DIAG204_SUBC_STIB6 | (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) { diag204_store_sc = DIAG204_SUBC_STIB6; - diag204_info_type = DIAG204_INFO_EXT; + diag204_set_info_type(DIAG204_INFO_EXT); goto out; } diag204_free_buffer(); @@ -284,7 +124,7 @@ static int diag204_probe(void) if (diag204((unsigned long)DIAG204_SUBC_STIB4 | (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) { diag204_store_sc = DIAG204_SUBC_STIB4; - diag204_info_type = DIAG204_INFO_SIMPLE; + diag204_set_info_type(DIAG204_INFO_SIMPLE); goto out; } else { rc = -ENOSYS; @@ -298,60 +138,15 @@ fail_alloc: return rc; } -static int diag204_do_store(void *buf, int pages) +int diag204_store(void *buf, int pages) { int rc; - rc = diag204((unsigned long) diag204_store_sc | - (unsigned long) diag204_info_type, pages, buf); + rc = diag204((unsigned long)diag204_store_sc | + (unsigned long)diag204_get_info_type(), pages, buf); return rc < 0 ? -ENOSYS : 0; } -static void *diag204_store(void) -{ - void *buf; - int pages, rc; - - buf = diag204_get_buffer(diag204_info_type, &pages); - if (IS_ERR(buf)) - goto out; - rc = diag204_do_store(buf, pages); - if (rc) - return ERR_PTR(rc); -out: - return buf; -} - -/* Diagnose 224 functions */ - -static int diag224_get_name_table(void) -{ - /* memory must be below 2GB */ - diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA); - if (!diag224_cpu_names) - return -ENOMEM; - if (diag224(diag224_cpu_names)) { - free_page((unsigned long) diag224_cpu_names); - return -EOPNOTSUPP; - } - EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16); - return 0; -} - -static void diag224_delete_name_table(void) -{ - free_page((unsigned long) diag224_cpu_names); -} - -static int diag224_idx2name(int index, char *name) -{ - memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN), - DIAG204_CPU_NAME_LEN); - name[DIAG204_CPU_NAME_LEN] = 0; - strim(name); - return 0; -} - struct dbfs_d204_hdr { u64 len; /* Length of d204 buffer without header */ u16 version; /* Version of header */ @@ -375,7 +170,7 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size) if (!base) return -ENOMEM; d204 = PTR_ALIGN(base + sizeof(d204->hdr), PAGE_SIZE) - sizeof(d204->hdr); - rc = diag204_do_store(d204->buf, diag204_buf_pages); + rc = diag204_store(d204->buf, diag204_buf_pages); if (rc) { vfree(base); return rc; @@ -404,176 +199,21 @@ __init int hypfs_diag_init(void) return -ENODATA; } - if (diag204_info_type == DIAG204_INFO_EXT) + if (diag204_get_info_type() == DIAG204_INFO_EXT) hypfs_dbfs_create_file(&dbfs_file_d204); - if (MACHINE_IS_LPAR) { - rc = diag224_get_name_table(); - if (rc) { - pr_err("The hardware system does not provide all " - "functions required by hypfs\n"); - debugfs_remove(dbfs_d204_file); - return rc; - } + rc = hypfs_diag_fs_init(); + if (rc) { + pr_err("The hardware system does not provide all functions required by hypfs\n"); + debugfs_remove(dbfs_d204_file); } - return 0; + return rc; } void hypfs_diag_exit(void) { debugfs_remove(dbfs_d204_file); - diag224_delete_name_table(); + hypfs_diag_fs_exit(); diag204_free_buffer(); hypfs_dbfs_remove_file(&dbfs_file_d204); } - -/* - * Functions to create the directory structure - * ******************************************* - */ - -static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) -{ - struct dentry *cpu_dir; - char buffer[TMP_SIZE]; - void *rc; - - snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type, - cpu_info)); - cpu_dir = hypfs_mkdir(cpus_dir, buffer); - rc = hypfs_create_u64(cpu_dir, "mgmtime", - cpu_info__acc_time(diag204_info_type, cpu_info) - - cpu_info__lp_time(diag204_info_type, cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); - rc = hypfs_create_u64(cpu_dir, "cputime", - cpu_info__lp_time(diag204_info_type, cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); - if (diag204_info_type == DIAG204_INFO_EXT) { - rc = hypfs_create_u64(cpu_dir, "onlinetime", - cpu_info__online_time(diag204_info_type, - cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); - } - diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); - rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_ERR_OR_ZERO(rc); -} - -static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) -{ - struct dentry *cpus_dir; - struct dentry *lpar_dir; - char lpar_name[DIAG204_LPAR_NAME_LEN + 1]; - void *cpu_info; - int i; - - part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); - lpar_name[DIAG204_LPAR_NAME_LEN] = 0; - lpar_dir = hypfs_mkdir(systems_dir, lpar_name); - if (IS_ERR(lpar_dir)) - return lpar_dir; - cpus_dir = hypfs_mkdir(lpar_dir, "cpus"); - if (IS_ERR(cpus_dir)) - return cpus_dir; - cpu_info = part_hdr + part_hdr__size(diag204_info_type); - for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) { - int rc; - rc = hypfs_create_cpu_files(cpus_dir, cpu_info); - if (rc) - return ERR_PTR(rc); - cpu_info += cpu_info__size(diag204_info_type); - } - return cpu_info; -} - -static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) -{ - struct dentry *cpu_dir; - char buffer[TMP_SIZE]; - void *rc; - - snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type, - cpu_info)); - cpu_dir = hypfs_mkdir(cpus_dir, buffer); - if (IS_ERR(cpu_dir)) - return PTR_ERR(cpu_dir); - rc = hypfs_create_u64(cpu_dir, "mgmtime", - phys_cpu__mgm_time(diag204_info_type, cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); - diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); - rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_ERR_OR_ZERO(rc); -} - -static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) -{ - int i; - void *cpu_info; - struct dentry *cpus_dir; - - cpus_dir = hypfs_mkdir(parent_dir, "cpus"); - if (IS_ERR(cpus_dir)) - return cpus_dir; - cpu_info = phys_hdr + phys_hdr__size(diag204_info_type); - for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) { - int rc; - rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info); - if (rc) - return ERR_PTR(rc); - cpu_info += phys_cpu__size(diag204_info_type); - } - return cpu_info; -} - -int hypfs_diag_create_files(struct dentry *root) -{ - struct dentry *systems_dir, *hyp_dir; - void *time_hdr, *part_hdr; - int i, rc; - void *buffer, *ptr; - - buffer = diag204_store(); - if (IS_ERR(buffer)) - return PTR_ERR(buffer); - - systems_dir = hypfs_mkdir(root, "systems"); - if (IS_ERR(systems_dir)) { - rc = PTR_ERR(systems_dir); - goto err_out; - } - time_hdr = (struct x_info_blk_hdr *)buffer; - part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type); - for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) { - part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr); - if (IS_ERR(part_hdr)) { - rc = PTR_ERR(part_hdr); - goto err_out; - } - } - if (info_blk_hdr__flags(diag204_info_type, time_hdr) & - DIAG204_LPAR_PHYS_FLG) { - ptr = hypfs_create_phys_files(root, part_hdr); - if (IS_ERR(ptr)) { - rc = PTR_ERR(ptr); - goto err_out; - } - } - hyp_dir = hypfs_mkdir(root, "hyp"); - if (IS_ERR(hyp_dir)) { - rc = PTR_ERR(hyp_dir); - goto err_out; - } - ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); - if (IS_ERR(ptr)) { - rc = PTR_ERR(ptr); - goto err_out; - } - rc = 0; - -err_out: - return rc; -} diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h new file mode 100644 index 000000000000..7090eff27fef --- /dev/null +++ b/arch/s390/hypfs/hypfs_diag.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hypervisor filesystem for Linux on s390. Diag 204 and 224 + * implementation. + * + * Copyright IBM Corp. 2006, 2008 + * Author(s): Michael Holzheu + */ + +#ifndef _S390_HYPFS_DIAG_H_ +#define _S390_HYPFS_DIAG_H_ + +#include + +enum diag204_format diag204_get_info_type(void); +void *diag204_get_buffer(enum diag204_format fmt, int *pages); +int diag204_store(void *buf, int pages); + +int __hypfs_diag_fs_init(void); +void __hypfs_diag_fs_exit(void); + +static inline int hypfs_diag_fs_init(void) +{ + if (IS_ENABLED(CONFIG_S390_HYPFS_FS)) + return __hypfs_diag_fs_init(); + return 0; +} + +static inline void hypfs_diag_fs_exit(void) +{ + if (IS_ENABLED(CONFIG_S390_HYPFS_FS)) + __hypfs_diag_fs_exit(); +} + +#endif /* _S390_HYPFS_DIAG_H_ */ diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c new file mode 100644 index 000000000000..00a6d370a280 --- /dev/null +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hypervisor filesystem for Linux on s390. Diag 204 and 224 + * implementation. + * + * Copyright IBM Corp. 2006, 2008 + * Author(s): Michael Holzheu + */ + +#define KMSG_COMPONENT "hypfs" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include "hypfs_diag.h" +#include "hypfs.h" + +#define TMP_SIZE 64 /* size of temporary buffers */ + +static char *diag224_cpu_names; /* diag 224 name table */ +static int diag224_idx2name(int index, char *name); + +/* + * DIAG 204 member access functions. + * + * Since we have two different diag 204 data formats for old and new s390 + * machines, we do not access the structs directly, but use getter functions for + * each struct member instead. This should make the code more readable. + */ + +/* Time information block */ + +static inline int info_blk_hdr__size(enum diag204_format type) +{ + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_info_blk_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_info_blk_hdr); +} + +static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->npar; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->npar; +} + +static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->flags; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->flags; +} + +/* Partition header */ + +static inline int part_hdr__size(enum diag204_format type) +{ + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_part_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_part_hdr); +} + +static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_part_hdr *)hdr)->cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_part_hdr *)hdr)->rcpus; +} + +static inline void part_hdr__part_name(enum diag204_format type, void *hdr, + char *name) +{ + if (type == DIAG204_INFO_SIMPLE) + memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name, + DIAG204_LPAR_NAME_LEN); + else /* DIAG204_INFO_EXT */ + memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name, + DIAG204_LPAR_NAME_LEN); + EBCASC(name, DIAG204_LPAR_NAME_LEN); + name[DIAG204_LPAR_NAME_LEN] = 0; + strim(name); +} + +/* CPU info block */ + +static inline int cpu_info__size(enum diag204_format type) +{ + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_cpu_info); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_cpu_info); +} + +static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->ctidx; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->ctidx; +} + +static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->cpu_addr; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->cpu_addr; +} + +static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->acc_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->acc_time; +} + +static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->lp_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->lp_time; +} + +static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return 0; /* online_time not available in simple info */ + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->online_time; +} + +/* Physical header */ + +static inline int phys_hdr__size(enum diag204_format type) +{ + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_phys_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_phys_hdr); +} + +static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_hdr *)hdr)->cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_hdr *)hdr)->cpus; +} + +/* Physical CPU info block */ + +static inline int phys_cpu__size(enum diag204_format type) +{ + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_phys_cpu); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_phys_cpu); +} + +static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->cpu_addr; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr; +} + +static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->mgm_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->mgm_time; +} + +static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->ctidx; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->ctidx; +} + +/* + * Functions to create the directory structure + * ******************************************* + */ + +static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) +{ + struct dentry *cpu_dir; + char buffer[TMP_SIZE]; + void *rc; + + snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(), + cpu_info)); + cpu_dir = hypfs_mkdir(cpus_dir, buffer); + rc = hypfs_create_u64(cpu_dir, "mgmtime", + cpu_info__acc_time(diag204_get_info_type(), cpu_info) - + cpu_info__lp_time(diag204_get_info_type(), cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + rc = hypfs_create_u64(cpu_dir, "cputime", + cpu_info__lp_time(diag204_get_info_type(), cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + if (diag204_get_info_type() == DIAG204_INFO_EXT) { + rc = hypfs_create_u64(cpu_dir, "onlinetime", + cpu_info__online_time(diag204_get_info_type(), + cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + } + diag224_idx2name(cpu_info__ctidx(diag204_get_info_type(), cpu_info), buffer); + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_ERR_OR_ZERO(rc); +} + +static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) +{ + struct dentry *cpus_dir; + struct dentry *lpar_dir; + char lpar_name[DIAG204_LPAR_NAME_LEN + 1]; + void *cpu_info; + int i; + + part_hdr__part_name(diag204_get_info_type(), part_hdr, lpar_name); + lpar_name[DIAG204_LPAR_NAME_LEN] = 0; + lpar_dir = hypfs_mkdir(systems_dir, lpar_name); + if (IS_ERR(lpar_dir)) + return lpar_dir; + cpus_dir = hypfs_mkdir(lpar_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return cpus_dir; + cpu_info = part_hdr + part_hdr__size(diag204_get_info_type()); + for (i = 0; i < part_hdr__rcpus(diag204_get_info_type(), part_hdr); i++) { + int rc; + + rc = hypfs_create_cpu_files(cpus_dir, cpu_info); + if (rc) + return ERR_PTR(rc); + cpu_info += cpu_info__size(diag204_get_info_type()); + } + return cpu_info; +} + +static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) +{ + struct dentry *cpu_dir; + char buffer[TMP_SIZE]; + void *rc; + + snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_get_info_type(), + cpu_info)); + cpu_dir = hypfs_mkdir(cpus_dir, buffer); + if (IS_ERR(cpu_dir)) + return PTR_ERR(cpu_dir); + rc = hypfs_create_u64(cpu_dir, "mgmtime", + phys_cpu__mgm_time(diag204_get_info_type(), cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + diag224_idx2name(phys_cpu__ctidx(diag204_get_info_type(), cpu_info), buffer); + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_ERR_OR_ZERO(rc); +} + +static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) +{ + int i; + void *cpu_info; + struct dentry *cpus_dir; + + cpus_dir = hypfs_mkdir(parent_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return cpus_dir; + cpu_info = phys_hdr + phys_hdr__size(diag204_get_info_type()); + for (i = 0; i < phys_hdr__cpus(diag204_get_info_type(), phys_hdr); i++) { + int rc; + + rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info); + if (rc) + return ERR_PTR(rc); + cpu_info += phys_cpu__size(diag204_get_info_type()); + } + return cpu_info; +} + +int hypfs_diag_create_files(struct dentry *root) +{ + struct dentry *systems_dir, *hyp_dir; + void *time_hdr, *part_hdr; + void *buffer, *ptr; + int i, rc, pages; + + buffer = diag204_get_buffer(diag204_get_info_type(), &pages); + if (IS_ERR(buffer)) + return PTR_ERR(buffer); + rc = diag204_store(buffer, pages); + if (rc) + return rc; + + systems_dir = hypfs_mkdir(root, "systems"); + if (IS_ERR(systems_dir)) { + rc = PTR_ERR(systems_dir); + goto err_out; + } + time_hdr = (struct x_info_blk_hdr *)buffer; + part_hdr = time_hdr + info_blk_hdr__size(diag204_get_info_type()); + for (i = 0; i < info_blk_hdr__npar(diag204_get_info_type(), time_hdr); i++) { + part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr); + if (IS_ERR(part_hdr)) { + rc = PTR_ERR(part_hdr); + goto err_out; + } + } + if (info_blk_hdr__flags(diag204_get_info_type(), time_hdr) & + DIAG204_LPAR_PHYS_FLG) { + ptr = hypfs_create_phys_files(root, part_hdr); + if (IS_ERR(ptr)) { + rc = PTR_ERR(ptr); + goto err_out; + } + } + hyp_dir = hypfs_mkdir(root, "hyp"); + if (IS_ERR(hyp_dir)) { + rc = PTR_ERR(hyp_dir); + goto err_out; + } + ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); + if (IS_ERR(ptr)) { + rc = PTR_ERR(ptr); + goto err_out; + } + rc = 0; + +err_out: + return rc; +} + +/* Diagnose 224 functions */ + +static int diag224_idx2name(int index, char *name) +{ + memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN), + DIAG204_CPU_NAME_LEN); + name[DIAG204_CPU_NAME_LEN] = 0; + strim(name); + return 0; +} + +static int diag224_get_name_table(void) +{ + /* memory must be below 2GB */ + diag224_cpu_names = (char *)__get_free_page(GFP_KERNEL | GFP_DMA); + if (!diag224_cpu_names) + return -ENOMEM; + if (diag224(diag224_cpu_names)) { + free_page((unsigned long)diag224_cpu_names); + return -EOPNOTSUPP; + } + EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16); + return 0; +} + +static void diag224_delete_name_table(void) +{ + free_page((unsigned long)diag224_cpu_names); +} + +int __init __hypfs_diag_fs_init(void) +{ + if (MACHINE_IS_LPAR) + return diag224_get_name_table(); + return 0; +} + +void __hypfs_diag_fs_exit(void) +{ + diag224_delete_name_table(); +} diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index a3d881ca0a98..3db40ad853e0 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -14,47 +14,15 @@ #include #include #include +#include "hypfs_vm.h" #include "hypfs.h" -#define NAME_LEN 8 #define DBFS_D2FC_HDR_VERSION 0 static char local_guest[] = " "; static char all_guests[] = "* "; static char *all_groups = all_guests; -static char *guest_query; - -struct diag2fc_data { - __u32 version; - __u32 flags; - __u64 used_cpu; - __u64 el_time; - __u64 mem_min_kb; - __u64 mem_max_kb; - __u64 mem_share_kb; - __u64 mem_used_kb; - __u32 pcpus; - __u32 lcpus; - __u32 vcpus; - __u32 ocpus; - __u32 cpu_max; - __u32 cpu_shares; - __u32 cpu_use_samp; - __u32 cpu_delay_samp; - __u32 page_wait_samp; - __u32 idle_samp; - __u32 other_samp; - __u32 total_samp; - char guest_name[NAME_LEN]; -}; - -struct diag2fc_parm_list { - char userid[NAME_LEN]; - char aci_grp[NAME_LEN]; - __u64 addr; - __u32 size; - __u32 fmt; -}; +char *diag2fc_guest_query; static int diag2fc(int size, char* query, void *addr) { @@ -62,10 +30,10 @@ static int diag2fc(int size, char* query, void *addr) unsigned long rc; struct diag2fc_parm_list parm_list; - memcpy(parm_list.userid, query, NAME_LEN); - ASCEBC(parm_list.userid, NAME_LEN); - memcpy(parm_list.aci_grp, all_groups, NAME_LEN); - ASCEBC(parm_list.aci_grp, NAME_LEN); + memcpy(parm_list.userid, query, DIAG2FC_NAME_LEN); + ASCEBC(parm_list.userid, DIAG2FC_NAME_LEN); + memcpy(parm_list.aci_grp, all_groups, DIAG2FC_NAME_LEN); + ASCEBC(parm_list.aci_grp, DIAG2FC_NAME_LEN); parm_list.addr = (unsigned long)addr; parm_list.size = size; parm_list.fmt = 0x02; @@ -87,7 +55,7 @@ static int diag2fc(int size, char* query, void *addr) /* * Allocate buffer for "query" and store diag 2fc at "offset" */ -static void *diag2fc_store(char *query, unsigned int *count, int offset) +void *diag2fc_store(char *query, unsigned int *count, int offset) { void *data; int size; @@ -108,132 +76,11 @@ static void *diag2fc_store(char *query, unsigned int *count, int offset) return data; } -static void diag2fc_free(const void *data) +void diag2fc_free(const void *data) { vfree(data); } -#define ATTRIBUTE(dir, name, member) \ -do { \ - void *rc; \ - rc = hypfs_create_u64(dir, name, member); \ - if (IS_ERR(rc)) \ - return PTR_ERR(rc); \ -} while(0) - -static int hypfs_vm_create_guest(struct dentry *systems_dir, - struct diag2fc_data *data) -{ - char guest_name[NAME_LEN + 1] = {}; - struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir; - int dedicated_flag, capped_value; - - capped_value = (data->flags & 0x00000006) >> 1; - dedicated_flag = (data->flags & 0x00000008) >> 3; - - /* guest dir */ - memcpy(guest_name, data->guest_name, NAME_LEN); - EBCASC(guest_name, NAME_LEN); - strim(guest_name); - guest_dir = hypfs_mkdir(systems_dir, guest_name); - if (IS_ERR(guest_dir)) - return PTR_ERR(guest_dir); - ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time); - - /* logical cpu information */ - cpus_dir = hypfs_mkdir(guest_dir, "cpus"); - if (IS_ERR(cpus_dir)) - return PTR_ERR(cpus_dir); - ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu); - ATTRIBUTE(cpus_dir, "capped", capped_value); - ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag); - ATTRIBUTE(cpus_dir, "count", data->vcpus); - /* - * Note: The "weight_min" attribute got the wrong name. - * The value represents the number of non-stopped (operating) - * CPUS. - */ - ATTRIBUTE(cpus_dir, "weight_min", data->ocpus); - ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max); - ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares); - - /* memory information */ - mem_dir = hypfs_mkdir(guest_dir, "mem"); - if (IS_ERR(mem_dir)) - return PTR_ERR(mem_dir); - ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb); - ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb); - ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb); - ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb); - - /* samples */ - samples_dir = hypfs_mkdir(guest_dir, "samples"); - if (IS_ERR(samples_dir)) - return PTR_ERR(samples_dir); - ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp); - ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp); - ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp); - ATTRIBUTE(samples_dir, "idle", data->idle_samp); - ATTRIBUTE(samples_dir, "other", data->other_samp); - ATTRIBUTE(samples_dir, "total", data->total_samp); - return 0; -} - -int hypfs_vm_create_files(struct dentry *root) -{ - struct dentry *dir, *file; - struct diag2fc_data *data; - unsigned int count = 0; - int rc, i; - - data = diag2fc_store(guest_query, &count, 0); - if (IS_ERR(data)) - return PTR_ERR(data); - - /* Hypervisor Info */ - dir = hypfs_mkdir(root, "hyp"); - if (IS_ERR(dir)) { - rc = PTR_ERR(dir); - goto failed; - } - file = hypfs_create_str(dir, "type", "z/VM Hypervisor"); - if (IS_ERR(file)) { - rc = PTR_ERR(file); - goto failed; - } - - /* physical cpus */ - dir = hypfs_mkdir(root, "cpus"); - if (IS_ERR(dir)) { - rc = PTR_ERR(dir); - goto failed; - } - file = hypfs_create_u64(dir, "count", data->lcpus); - if (IS_ERR(file)) { - rc = PTR_ERR(file); - goto failed; - } - - /* guests */ - dir = hypfs_mkdir(root, "systems"); - if (IS_ERR(dir)) { - rc = PTR_ERR(dir); - goto failed; - } - - for (i = 0; i < count; i++) { - rc = hypfs_vm_create_guest(dir, &(data[i])); - if (rc) - goto failed; - } - diag2fc_free(data); - return 0; - -failed: - diag2fc_free(data); - return rc; -} - struct dbfs_d2fc_hdr { u64 len; /* Length of d2fc buffer without header */ u16 version; /* Version of header */ @@ -252,7 +99,7 @@ static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size) struct dbfs_d2fc *d2fc; unsigned int count; - d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr)); + d2fc = diag2fc_store(diag2fc_guest_query, &count, sizeof(d2fc->hdr)); if (IS_ERR(d2fc)) return PTR_ERR(d2fc); store_tod_clock_ext(&d2fc->hdr.tod_ext); @@ -277,9 +124,9 @@ int hypfs_vm_init(void) if (!MACHINE_IS_VM) return 0; if (diag2fc(0, all_guests, NULL) > 0) - guest_query = all_guests; + diag2fc_guest_query = all_guests; else if (diag2fc(0, local_guest, NULL) > 0) - guest_query = local_guest; + diag2fc_guest_query = local_guest; else return -EACCES; hypfs_dbfs_create_file(&dbfs_file_2fc); diff --git a/arch/s390/hypfs/hypfs_vm.h b/arch/s390/hypfs/hypfs_vm.h new file mode 100644 index 000000000000..fe2e5851addd --- /dev/null +++ b/arch/s390/hypfs/hypfs_vm.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hypervisor filesystem for Linux on s390. z/VM implementation. + * + * Copyright IBM Corp. 2006 + * Author(s): Michael Holzheu + */ + +#ifndef _S390_HYPFS_VM_H_ +#define _S390_HYPFS_VM_H_ + +#define DIAG2FC_NAME_LEN 8 + +struct diag2fc_data { + __u32 version; + __u32 flags; + __u64 used_cpu; + __u64 el_time; + __u64 mem_min_kb; + __u64 mem_max_kb; + __u64 mem_share_kb; + __u64 mem_used_kb; + __u32 pcpus; + __u32 lcpus; + __u32 vcpus; + __u32 ocpus; + __u32 cpu_max; + __u32 cpu_shares; + __u32 cpu_use_samp; + __u32 cpu_delay_samp; + __u32 page_wait_samp; + __u32 idle_samp; + __u32 other_samp; + __u32 total_samp; + char guest_name[DIAG2FC_NAME_LEN]; +}; + +struct diag2fc_parm_list { + char userid[DIAG2FC_NAME_LEN]; + char aci_grp[DIAG2FC_NAME_LEN]; + __u64 addr; + __u32 size; + __u32 fmt; +}; + +void *diag2fc_store(char *query, unsigned int *count, int offset); +void diag2fc_free(const void *data); +extern char *diag2fc_guest_query; + +#endif /* _S390_HYPFS_VM_H_ */ diff --git a/arch/s390/hypfs/hypfs_vm_fs.c b/arch/s390/hypfs/hypfs_vm_fs.c new file mode 100644 index 000000000000..6011289afa8c --- /dev/null +++ b/arch/s390/hypfs/hypfs_vm_fs.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hypervisor filesystem for Linux on s390. z/VM implementation. + * + * Copyright IBM Corp. 2006 + * Author(s): Michael Holzheu + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "hypfs_vm.h" +#include "hypfs.h" + +#define ATTRIBUTE(dir, name, member) \ +do { \ + void *rc; \ + rc = hypfs_create_u64(dir, name, member); \ + if (IS_ERR(rc)) \ + return PTR_ERR(rc); \ +} while (0) + +static int hypfs_vm_create_guest(struct dentry *systems_dir, + struct diag2fc_data *data) +{ + char guest_name[DIAG2FC_NAME_LEN + 1] = {}; + struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir; + int dedicated_flag, capped_value; + + capped_value = (data->flags & 0x00000006) >> 1; + dedicated_flag = (data->flags & 0x00000008) >> 3; + + /* guest dir */ + memcpy(guest_name, data->guest_name, DIAG2FC_NAME_LEN); + EBCASC(guest_name, DIAG2FC_NAME_LEN); + strim(guest_name); + guest_dir = hypfs_mkdir(systems_dir, guest_name); + if (IS_ERR(guest_dir)) + return PTR_ERR(guest_dir); + ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time); + + /* logical cpu information */ + cpus_dir = hypfs_mkdir(guest_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return PTR_ERR(cpus_dir); + ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu); + ATTRIBUTE(cpus_dir, "capped", capped_value); + ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag); + ATTRIBUTE(cpus_dir, "count", data->vcpus); + /* + * Note: The "weight_min" attribute got the wrong name. + * The value represents the number of non-stopped (operating) + * CPUS. + */ + ATTRIBUTE(cpus_dir, "weight_min", data->ocpus); + ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max); + ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares); + + /* memory information */ + mem_dir = hypfs_mkdir(guest_dir, "mem"); + if (IS_ERR(mem_dir)) + return PTR_ERR(mem_dir); + ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb); + ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb); + ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb); + ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb); + + /* samples */ + samples_dir = hypfs_mkdir(guest_dir, "samples"); + if (IS_ERR(samples_dir)) + return PTR_ERR(samples_dir); + ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp); + ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp); + ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp); + ATTRIBUTE(samples_dir, "idle", data->idle_samp); + ATTRIBUTE(samples_dir, "other", data->other_samp); + ATTRIBUTE(samples_dir, "total", data->total_samp); + return 0; +} + +int hypfs_vm_create_files(struct dentry *root) +{ + struct dentry *dir, *file; + struct diag2fc_data *data; + unsigned int count = 0; + int rc, i; + + data = diag2fc_store(diag2fc_guest_query, &count, 0); + if (IS_ERR(data)) + return PTR_ERR(data); + + /* Hypervisor Info */ + dir = hypfs_mkdir(root, "hyp"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + file = hypfs_create_str(dir, "type", "z/VM Hypervisor"); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + goto failed; + } + + /* physical cpus */ + dir = hypfs_mkdir(root, "cpus"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + file = hypfs_create_u64(dir, "count", data->lcpus); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + goto failed; + } + + /* guests */ + dir = hypfs_mkdir(root, "systems"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + + for (i = 0; i < count; i++) { + rc = hypfs_vm_create_guest(dir, &data[i]); + if (rc) + goto failed; + } + diag2fc_free(data); + return 0; + +failed: + diag2fc_free(data); + return rc; +} diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index ee919bfc8186..0d53483fec34 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -460,45 +460,18 @@ static const struct super_operations hypfs_s_ops = { .show_options = hypfs_show_options, }; -static int __init hypfs_init(void) +int __init __hypfs_fs_init(void) { int rc; - hypfs_dbfs_init(); - - if (hypfs_diag_init()) { - rc = -ENODATA; - goto fail_dbfs_exit; - } - if (hypfs_vm_init()) { - rc = -ENODATA; - goto fail_hypfs_diag_exit; - } - hypfs_sprp_init(); - if (hypfs_diag0c_init()) { - rc = -ENODATA; - goto fail_hypfs_sprp_exit; - } rc = sysfs_create_mount_point(hypervisor_kobj, "s390"); if (rc) - goto fail_hypfs_diag0c_exit; + return rc; rc = register_filesystem(&hypfs_type); if (rc) - goto fail_filesystem; + goto fail; return 0; - -fail_filesystem: +fail: sysfs_remove_mount_point(hypervisor_kobj, "s390"); -fail_hypfs_diag0c_exit: - hypfs_diag0c_exit(); -fail_hypfs_sprp_exit: - hypfs_sprp_exit(); - hypfs_vm_exit(); -fail_hypfs_diag_exit: - hypfs_diag_exit(); - pr_err("Initialization of hypfs failed with rc=%i\n", rc); -fail_dbfs_exit: - hypfs_dbfs_exit(); return rc; } -device_initcall(hypfs_init) From 1256e70a082ad855efe351146dfa39207d5e3e70 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 29 Jun 2023 12:02:19 +0200 Subject: [PATCH 08/66] s390/ftrace: enable HAVE_FUNCTION_GRAPH_RETVAL Add support for tracing return values in the function graph tracer. This requires return_to_handler() to record gpr2 and the frame pointer Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + arch/s390/include/asm/ftrace.h | 17 +++++++++++++++++ arch/s390/kernel/asm-offsets.c | 7 +++++++ arch/s390/kernel/mcount.S | 8 ++++++-- 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ea0e8f34eb0d..c5be7199067b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -174,6 +174,7 @@ config S390 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION + select HAVE_FUNCTION_GRAPH_RETVAL select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index e5c5cb1207e2..5a82b08f03cd 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -54,6 +54,23 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs * return NULL; } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +struct fgraph_ret_regs { + unsigned long gpr2; + unsigned long fp; +}; + +static __always_inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs) +{ + return ret_regs->gpr2; +} + +static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs) +{ + return ret_regs->fp; +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + static __always_inline unsigned long ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) { diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 81cf72088041..150809c9ffa5 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -177,5 +178,11 @@ int main(void) DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size)); DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line)); DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size)); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* function graph return value tracing */ + OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2); + OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp); + DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs)); +#endif return 0; } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index dbece2803c50..fd27ff9f2cf3 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -128,10 +128,14 @@ SYM_CODE_END(ftrace_common) SYM_FUNC_START(return_to_handler) stmg %r2,%r5,32(%r15) lgr %r1,%r15 - aghi %r15,-STACK_FRAME_OVERHEAD + aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE) stg %r1,__SF_BACKCHAIN(%r15) + aghik %r3,%r15,STACK_FRAME_OVERHEAD + stg %r1,__FGRAPH_RET_FP(%r3) + stg %r2,__FGRAPH_RET_GPR2(%r3) + lgr %r2,%r3 brasl %r14,ftrace_return_to_handler - aghi %r15,STACK_FRAME_OVERHEAD + aghi %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE lgr %r14,%r2 lmg %r2,%r5,32(%r15) BR_EX %r14 From b9b4568843bb6ceee85bf1280473f510701c82e2 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 6 Jul 2023 16:24:11 +0200 Subject: [PATCH 09/66] s390/kexec: make machine_kexec() depend on CONFIG_KEXEC_CORE Make machine_kexec.o and relocate_kernel.o depend on CONFIG_KEXEC_CORE option as other architectures do. Still generate machine_kexec_reloc.o unconditionally, since arch_kexec_do_relocs() function is neded by the decompressor. Suggested-by: Nathan Chancellor Reported-by: Nathan Chancellor Reported-by: Linux Kernel Functional Testing Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 8d7514c72bb8..0df2b88cc0da 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -37,9 +37,9 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o -obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o +obj-y += sysinfo.o lgr.o os_info.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o -obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o +obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o @@ -63,6 +63,7 @@ obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o From 04b8698ae879b88f96c083292f328bd31b555422 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 11 Jul 2023 17:59:42 +0200 Subject: [PATCH 10/66] s390/dcssblk: use IS_ALIGNED() for alignment checks Use IS_ALIGNED() instead of cumbersome bit manipulations. Reviewed-by: Gerald Schaefer Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- drivers/s390/block/dcssblk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 09acf3853a77..4f157dc1608e 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -868,8 +868,8 @@ dcssblk_submit_bio(struct bio *bio) dev_info = bio->bi_bdev->bd_disk->private_data; if (dev_info == NULL) goto fail; - if ((bio->bi_iter.bi_sector & 7) != 0 || - (bio->bi_iter.bi_size & 4095) != 0) + if (!IS_ALIGNED(bio->bi_iter.bi_sector, 8) || + !IS_ALIGNED(bio->bi_iter.bi_size, PAGE_SIZE)) /* Request is not page-aligned. */ goto fail; /* verify data transfer direction */ @@ -891,7 +891,8 @@ dcssblk_submit_bio(struct bio *bio) bio_for_each_segment(bvec, bio, iter) { page_addr = (unsigned long)bvec_virt(&bvec); source_addr = dev_info->start + (index<<12) + bytes_done; - if (unlikely((page_addr & 4095) != 0) || (bvec.bv_len & 4095) != 0) + if (unlikely(!IS_ALIGNED(page_addr, PAGE_SIZE) || + !IS_ALIGNED(bvec.bv_len, PAGE_SIZE))) // More paranoia. goto fail; if (bio_data_dir(bio) == READ) { From 3b53d7b131bd79d97dd553af84846fde456e029f Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 11 Jul 2023 17:41:37 +0200 Subject: [PATCH 11/66] s390/dcssblk: fix virtual vs physical address confusion Fix virtual vs physical address confusion (which currently are the same). Reviewed-by: Gerald Schaefer Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- drivers/s390/block/dcssblk.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 4f157dc1608e..6eafd0a34483 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -860,7 +860,7 @@ dcssblk_submit_bio(struct bio *bio) struct bio_vec bvec; struct bvec_iter iter; unsigned long index; - unsigned long page_addr; + void *page_addr; unsigned long source_addr; unsigned long bytes_done; @@ -889,19 +889,16 @@ dcssblk_submit_bio(struct bio *bio) index = (bio->bi_iter.bi_sector >> 3); bio_for_each_segment(bvec, bio, iter) { - page_addr = (unsigned long)bvec_virt(&bvec); + page_addr = bvec_virt(&bvec); source_addr = dev_info->start + (index<<12) + bytes_done; - if (unlikely(!IS_ALIGNED(page_addr, PAGE_SIZE) || + if (unlikely(!IS_ALIGNED((unsigned long)page_addr, PAGE_SIZE) || !IS_ALIGNED(bvec.bv_len, PAGE_SIZE))) // More paranoia. goto fail; - if (bio_data_dir(bio) == READ) { - memcpy((void*)page_addr, (void*)source_addr, - bvec.bv_len); - } else { - memcpy((void*)source_addr, (void*)page_addr, - bvec.bv_len); - } + if (bio_data_dir(bio) == READ) + memcpy(page_addr, __va(source_addr), bvec.bv_len); + else + memcpy(__va(source_addr), page_addr, bvec.bv_len); bytes_done += bvec.bv_len; } bio_endio(bio); From 355e30ca1a707526b23a9b016fa4f740e9379c8d Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sun, 2 Jul 2023 22:29:10 +0200 Subject: [PATCH 12/66] s390/mm: remove redundant check against VMEM_MAX_PHYS The value of ident_map_size could never exceed the value of vmemmap as secured by setup_kernel_memory_layout() function: /* make sure identity map doesn't overlay with vmemmap */ ident_map_size = min(ident_map_size, vmemmap_start); Since VMEM_MAX_PHYS macro is set to vmemmap and a newly added range is checked against ident_map_size in add_memory_merged() function anyway, the check against VMEM_MAX_PHYS is redundant. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- drivers/s390/char/sclp_cmd.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 3c87057436d5..8b4575a0db9f 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -392,10 +392,6 @@ static void __init add_memory_merged(u16 rn) goto skip_add; start = rn2addr(first_rn); size = (unsigned long long) num * sclp.rzm; - if (start >= VMEM_MAX_PHYS) - goto skip_add; - if (start + size > VMEM_MAX_PHYS) - size = VMEM_MAX_PHYS - start; if (start >= ident_map_size) goto skip_add; if (start + size > ident_map_size) From 94fd522069e124297c094840473f0d9637c3d991 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sun, 2 Jul 2023 23:11:18 +0200 Subject: [PATCH 13/66] s390/mm: rework arch_get_mappable_range() callback As per description in mm/memory_hotplug.c platforms should define arch_get_mappable_range() that provides maximum possible addressable physical memory range for which the linear mapping could be created. The current implementation uses VMEM_MAX_PHYS macro as the maximum mappable physical address and it is simply a cast to vmemmap. Since the address is in physical address space the natural upper limit of MAX_PHYSMEM_BITS is honoured: vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS); Further, to make sure the identity mapping would not overlay with vmemmap, the size of identity mapping could be stripped like this: ident_map_size = min(ident_map_size, vmemmap_start); Similarily, any other memory that could be added (e.g DCSS segment) should not overlay with vmemmap as well and that is prevented by using vmemmap (VMEM_MAX_PHYS macro) as the upper limit. However, while the use of VMEM_MAX_PHYS brings the desired result it actually poses two issues: 1. As described, vmemmap is handled as a physical address, although it is actually a pointer to struct page in virtual address space. 2. As vmemmap is a virtual address it could have been located anywhere in the virtual address space. However, the desired necessity to honour MAX_PHYSMEM_BITS limit prevents that. Rework arch_get_mappable_range() callback in a way it does not use VMEM_MAX_PHYS macro and does not confuse the notion of virtual vs physical address spacees as result. That paves the way for moving vmemmap elsewhere and optimizing the virtual address space layout. Introduce max_mappable preserved boot variable and let function setup_kernel_memory_layout() set it up. As result, the rest of the code is does not need to know the virtual memory layout specifics. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/boot/startup.c | 3 +++ arch/s390/include/asm/setup.h | 1 + arch/s390/kernel/setup.c | 1 + arch/s390/mm/vmem.c | 2 +- 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 64bd7ac3e35d..59abdd0ab56f 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -27,6 +27,7 @@ struct page *__bootdata_preserved(vmemmap); unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); +unsigned long __bootdata_preserved(max_mappable); unsigned long __bootdata(ident_map_size); u64 __bootdata_preserved(stfle_fac_list[16]); @@ -222,6 +223,8 @@ static unsigned long setup_kernel_memory_layout(void) vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size); /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */ vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS); + /* maximum mappable address as seen by arch_get_mappable_range() */ + max_mappable = vmemmap_start; /* make sure identity map doesn't overlay with vmemmap */ ident_map_size = min(ident_map_size, vmemmap_start); vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index f191255c60db..e795f425627a 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -74,6 +74,7 @@ extern unsigned int zlib_dfltcc_support; extern int noexec_disabled; extern unsigned long ident_map_size; +extern unsigned long max_mappable; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 00d76448319d..393dd8385506 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -146,6 +146,7 @@ static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; int __bootdata(noexec_disabled); +unsigned long __bootdata_preserved(max_mappable); unsigned long __bootdata(ident_map_size); struct physmem_info __bootdata(physmem_info); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b26649233d12..be69cb2d47eb 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -531,7 +531,7 @@ struct range arch_get_mappable_range(void) struct range mhp_range; mhp_range.start = 0; - mhp_range.end = VMEM_MAX_PHYS - 1; + mhp_range.end = max_mappable - 1; return mhp_range; } From 9916bf4edac6f8f499ce7c42dafb57e242865790 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 5 Jul 2023 14:17:11 +0200 Subject: [PATCH 14/66] s390/extmem: improve reporting of -ERANGE error Interface segment_warning() reports maximum mappable physical address for -ERANGE error. Currently that address is the value of VMEM_MAX_PHYS macro, but that well might change. A better way to obtain that address is calling arch_get_mappable_range() callback - one that is used by vmem_add_mapping() and generates -ERANGE error in the first place. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/mm/extmem.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 1bc42ce26599..e41869f5cc95 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -640,10 +640,13 @@ void segment_warning(int rc, char *seg_name) pr_err("There is not enough memory to load or query " "DCSS %s\n", seg_name); break; - case -ERANGE: - pr_err("DCSS %s exceeds the kernel mapping range (%lu) " - "and cannot be loaded\n", seg_name, VMEM_MAX_PHYS); + case -ERANGE: { + struct range mhp_range = arch_get_mappable_range(); + + pr_err("DCSS %s exceeds the kernel mapping range (%llu) " + "and cannot be loaded\n", seg_name, mhp_range.end + 1); break; + } default: break; } From e7e828ebeb5d80d42c9ac514db5fb3d33367cf10 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 5 Jul 2023 15:50:19 +0200 Subject: [PATCH 15/66] s390/mm: get rid of VMEM_MAX_PHYS macro There are no users of VMEM_MAX_PHYS macro left, remove it. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/boot/startup.c | 1 - arch/s390/include/asm/pgtable.h | 2 -- 2 files changed, 3 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 59abdd0ab56f..a1f792fcc710 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -221,7 +221,6 @@ static unsigned long setup_kernel_memory_layout(void) pages = SECTION_ALIGN_UP(pages); /* keep vmemmap_start aligned to a top level region table entry */ vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size); - /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */ vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS); /* maximum mappable address as seen by arch_get_mappable_range() */ max_mappable = vmemmap_start; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c55f3c3365af..30909fe27c24 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -89,8 +89,6 @@ extern unsigned long __bootdata_preserved(VMALLOC_END); extern struct page *__bootdata_preserved(vmemmap); extern unsigned long __bootdata_preserved(vmemmap_size); -#define VMEM_MAX_PHYS ((unsigned long) vmemmap) - extern unsigned long __bootdata_preserved(MODULES_VADDR); extern unsigned long __bootdata_preserved(MODULES_END); #define MODULES_VADDR MODULES_VADDR From 5216d853cb154a4866c9984fd41f71583e8bdd39 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 14 Jul 2023 10:42:56 +0200 Subject: [PATCH 16/66] s390/hypfs: stop using ENOSYS error code ENOSYS should only be returned to userspace when a syscall is not implemented. The only known user, 'hyptop' is not explicitely checking for -ENOSYS, therefore use EOPNOTSUPP instead. It is very unlikely that there are other users, so this change should have no impact on userspace. Signed-off-by: Sven Schnelle Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/hypfs/hypfs_diag.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index ea4c436f86a0..279b7bba4d43 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -67,7 +67,7 @@ void *diag204_get_buffer(enum diag204_format fmt, int *pages) *pages = diag204((unsigned long)DIAG204_SUBC_RSI | (unsigned long)DIAG204_INFO_EXT, 0, NULL); if (*pages <= 0) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); } diag204_buf = __vmalloc_node(array_size(*pages, PAGE_SIZE), PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, @@ -127,7 +127,7 @@ static int diag204_probe(void) diag204_set_info_type(DIAG204_INFO_SIMPLE); goto out; } else { - rc = -ENOSYS; + rc = -EOPNOTSUPP; goto fail_store; } out: @@ -144,7 +144,7 @@ int diag204_store(void *buf, int pages) rc = diag204((unsigned long)diag204_store_sc | (unsigned long)diag204_get_info_type(), pages, buf); - return rc < 0 ? -ENOSYS : 0; + return rc < 0 ? -EOPNOTSUPP : 0; } struct dbfs_d204_hdr { From e3123dfb5373939d65ac2b874189a773d37ac7f5 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 17 Jul 2023 10:14:32 +0200 Subject: [PATCH 17/66] s390/tracing: pass struct ftrace_regs to ftrace_trace_function ftrace_trace_function expects a struct ftrace_regs, but the s390 architecure code passes struct pt_regs. This isn't a problem with the current code because struct ftrace_regs contains only one member: struct pt_regs. To avoid issues in the future this should be fixed. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/asm-offsets.c | 2 ++ arch/s390/kernel/mcount.S | 57 +++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 150809c9ffa5..fa5f6885c74a 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -184,5 +184,7 @@ int main(void) OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp); DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs)); #endif + OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs); + DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs)); return 0; } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index fd27ff9f2cf3..d2596e0df6fa 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -12,12 +12,19 @@ #include -#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) -#define STACK_PTREGS (STACK_FRAME_OVERHEAD) -#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) -#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) -#define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2) -#define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS) +#define STACK_FRAME_SIZE_PTREGS (STACK_FRAME_OVERHEAD + __PT_SIZE) +#define STACK_PTREGS (STACK_FRAME_OVERHEAD) +#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) +#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) + +#define STACK_FRAME_SIZE_FREGS (STACK_FRAME_OVERHEAD + __FTRACE_REGS_SIZE) +#define STACK_FREGS (STACK_FRAME_OVERHEAD) +#define STACK_FREGS_PTREGS (STACK_FRAME_OVERHEAD + __FTRACE_REGS_PT_REGS) +#define STACK_FREGS_PTREGS_GPRS (STACK_FREGS_PTREGS + __PT_GPRS) +#define STACK_FREGS_PTREGS_PSW (STACK_FREGS_PTREGS + __PT_PSW) +#define STACK_FREGS_PTREGS_ORIG_GPR2 (STACK_FREGS_PTREGS + __PT_ORIG_GPR2) +#define STACK_FREGS_PTREGS_FLAGS (STACK_FREGS_PTREGS + __PT_FLAGS) + /* packed stack: allocate just enough for r14, r15 and backchain */ #define TRACED_FUNC_FRAME_SIZE 24 @@ -53,23 +60,23 @@ SYM_CODE_END(ftrace_stub_direct_tramp) stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(__SF_GPRS+8*8)(%r15) stg %r15,(__SF_GPRS+9*8)(%r15) - # allocate pt_regs and stack frame for ftrace_trace_function - aghi %r15,-STACK_FRAME_SIZE - stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) - xc STACK_PTREGS_ORIG_GPR2(8,%r15),STACK_PTREGS_ORIG_GPR2(%r15) + # allocate ftrace_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE_FREGS + stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15) + xc STACK_FREGS_PTREGS_ORIG_GPR2(8,%r15),STACK_FREGS_PTREGS_ORIG_GPR2(%r15) .if \allregs == 1 - stg %r14,(STACK_PTREGS_PSW)(%r15) - mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS + stg %r14,(STACK_FREGS_PTREGS_PSW)(%r15) + mvghi STACK_FREGS_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS .else - xc STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15) + xc STACK_FREGS_PTREGS_FLAGS(8,%r15),STACK_FREGS_PTREGS_FLAGS(%r15) .endif lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address aghi %r1,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) - stg %r0,(STACK_PTREGS_PSW+8)(%r15) - stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) + stg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15) + stmg %r2,%r14,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) .endm SYM_CODE_START(ftrace_regs_caller) @@ -96,30 +103,30 @@ SYM_CODE_START(ftrace_common) lg %r1,0(%r1) #endif lgr %r3,%r14 - la %r5,STACK_PTREGS(%r15) + la %r5,STACK_FREGS(%r15) BASR_EX %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL) j .Lftrace_graph_caller_end - lmg %r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15) - lg %r4,(STACK_PTREGS_PSW+8)(%r15) + lmg %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) + lg %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15) brasl %r14,prepare_ftrace_return - stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) + stg %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) .Lftrace_graph_caller_end: #endif - lg %r0,(STACK_PTREGS_PSW+8)(%r15) + lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - ltg %r1,STACK_PTREGS_ORIG_GPR2(%r15) + ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15) locgrz %r1,%r0 #else - lg %r1,STACK_PTREGS_ORIG_GPR2(%r15) + lg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15) ltgr %r1,%r1 jnz 0f lgr %r1,%r0 #endif -0: lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) +0: lmg %r2,%r15,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) BR_EX %r1 SYM_CODE_END(ftrace_common) @@ -164,11 +171,11 @@ SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl) SYM_CODE_START(arch_rethook_trampoline) stg %r14,(__SF_GPRS+8*8)(%r15) - lay %r15,-STACK_FRAME_SIZE(%r15) + lay %r15,-STACK_FRAME_SIZE_PTREGS(%r15) stmg %r0,%r14,STACK_PTREGS_GPRS(%r15) # store original stack pointer in backchain and pt_regs - lay %r7,STACK_FRAME_SIZE(%r15) + lay %r7,STACK_FRAME_SIZE_PTREGS(%r15) stg %r7,__SF_BACKCHAIN(%r15) stg %r7,STACK_PTREGS_GPRS+(15*8)(%r15) From 37002bc6b6039e1491140869c6801e0a2deee43e Mon Sep 17 00:00:00 2001 From: Costa Shulyupin Date: Tue, 18 Jul 2023 07:55:02 +0300 Subject: [PATCH 18/66] docs: move s390 under arch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit and fix all in-tree references. Architecture-specific documentation is being moved into Documentation/arch/ as a way of cleaning up the top-level documentation directory and making the docs hierarchy more closely match the source hierarchy. Signed-off-by: Costa Shulyupin Reviewed-by: Tony Krowiak Acked-by: Jonathan Corbet Acked-by: Heiko Carstens Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/20230718045550.495428-1-costa.shul@redhat.com Signed-off-by: Heiko Carstens --- Documentation/admin-guide/kernel-parameters.txt | 4 ++-- Documentation/arch/index.rst | 2 +- Documentation/{ => arch}/s390/3270.ChangeLog | 0 Documentation/{ => arch}/s390/3270.rst | 4 ++-- Documentation/{ => arch}/s390/cds.rst | 2 +- Documentation/{ => arch}/s390/common_io.rst | 2 +- Documentation/{ => arch}/s390/config3270.sh | 0 Documentation/{ => arch}/s390/driver-model.rst | 0 Documentation/{ => arch}/s390/features.rst | 0 Documentation/{ => arch}/s390/index.rst | 0 Documentation/{ => arch}/s390/monreader.rst | 0 Documentation/{ => arch}/s390/pci.rst | 2 +- Documentation/{ => arch}/s390/qeth.rst | 0 Documentation/{ => arch}/s390/s390dbf.rst | 0 Documentation/{ => arch}/s390/text_files.rst | 0 Documentation/{ => arch}/s390/vfio-ap-locking.rst | 0 Documentation/{ => arch}/s390/vfio-ap.rst | 0 Documentation/{ => arch}/s390/vfio-ccw.rst | 2 +- Documentation/{ => arch}/s390/zfcpdump.rst | 0 Documentation/driver-api/s390-drivers.rst | 4 ++-- MAINTAINERS | 8 ++++---- arch/s390/Kconfig | 4 ++-- arch/s390/include/asm/debug.h | 4 ++-- drivers/s390/char/zcore.c | 2 +- 24 files changed, 20 insertions(+), 20 deletions(-) rename Documentation/{ => arch}/s390/3270.ChangeLog (100%) rename Documentation/{ => arch}/s390/3270.rst (99%) rename Documentation/{ => arch}/s390/cds.rst (99%) rename Documentation/{ => arch}/s390/common_io.rst (98%) rename Documentation/{ => arch}/s390/config3270.sh (100%) rename Documentation/{ => arch}/s390/driver-model.rst (100%) rename Documentation/{ => arch}/s390/features.rst (100%) rename Documentation/{ => arch}/s390/index.rst (100%) rename Documentation/{ => arch}/s390/monreader.rst (100%) rename Documentation/{ => arch}/s390/pci.rst (99%) rename Documentation/{ => arch}/s390/qeth.rst (100%) rename Documentation/{ => arch}/s390/s390dbf.rst (100%) rename Documentation/{ => arch}/s390/text_files.rst (100%) rename Documentation/{ => arch}/s390/vfio-ap-locking.rst (100%) rename Documentation/{ => arch}/s390/vfio-ap.rst (100%) rename Documentation/{ => arch}/s390/vfio-ccw.rst (99%) rename Documentation/{ => arch}/s390/zfcpdump.rst (100%) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a1457995fd41..94b3dc21eec8 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -553,7 +553,7 @@ others). ccw_timeout_log [S390] - See Documentation/s390/common_io.rst for details. + See Documentation/arch/s390/common_io.rst for details. cgroup_disable= [KNL] Disable a particular controller or optional feature Format: {name of the controller(s) or feature(s) to disable} @@ -598,7 +598,7 @@ Setting checkreqprot to 1 is deprecated. cio_ignore= [S390] - See Documentation/s390/common_io.rst for details. + See Documentation/arch/s390/common_io.rst for details. clearcpuid=X[,X...] [X86] Disable CPUID feature X for the kernel. See diff --git a/Documentation/arch/index.rst b/Documentation/arch/index.rst index 8458b88e9b79..c9a209878cf3 100644 --- a/Documentation/arch/index.rst +++ b/Documentation/arch/index.rst @@ -21,7 +21,7 @@ implementation. parisc/index ../powerpc/index ../riscv/index - ../s390/index + s390/index sh/index sparc/index x86/index diff --git a/Documentation/s390/3270.ChangeLog b/Documentation/arch/s390/3270.ChangeLog similarity index 100% rename from Documentation/s390/3270.ChangeLog rename to Documentation/arch/s390/3270.ChangeLog diff --git a/Documentation/s390/3270.rst b/Documentation/arch/s390/3270.rst similarity index 99% rename from Documentation/s390/3270.rst rename to Documentation/arch/s390/3270.rst index e09e77954238..467eace91473 100644 --- a/Documentation/s390/3270.rst +++ b/Documentation/arch/s390/3270.rst @@ -116,7 +116,7 @@ Here are the installation steps in detail: as a 3270, not a 3215. 5. Run the 3270 configuration script config3270. It is - distributed in this same directory, Documentation/s390, as + distributed in this same directory, Documentation/arch/s390, as config3270.sh. Inspect the output script it produces, /tmp/mkdev3270, and then run that script. This will create the necessary character special device files and make the necessary @@ -125,7 +125,7 @@ Here are the installation steps in detail: Then notify /sbin/init that /etc/inittab has changed, by issuing the telinit command with the q operand:: - cd Documentation/s390 + cd Documentation/arch/s390 sh config3270.sh sh /tmp/mkdev3270 telinit q diff --git a/Documentation/s390/cds.rst b/Documentation/arch/s390/cds.rst similarity index 99% rename from Documentation/s390/cds.rst rename to Documentation/arch/s390/cds.rst index 7006d8209d2e..bcad2a14244a 100644 --- a/Documentation/s390/cds.rst +++ b/Documentation/arch/s390/cds.rst @@ -39,7 +39,7 @@ some of them are ESA/390 platform specific. Note: In order to write a driver for S/390, you also need to look into the interface - described in Documentation/s390/driver-model.rst. + described in Documentation/arch/s390/driver-model.rst. Note for porting drivers from 2.4: diff --git a/Documentation/s390/common_io.rst b/Documentation/arch/s390/common_io.rst similarity index 98% rename from Documentation/s390/common_io.rst rename to Documentation/arch/s390/common_io.rst index 846485681ce7..6dcb40cb7145 100644 --- a/Documentation/s390/common_io.rst +++ b/Documentation/arch/s390/common_io.rst @@ -136,5 +136,5 @@ debugfs entries The level of logging can be changed to be more or less verbose by piping to /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the - documentation on the S/390 debug feature (Documentation/s390/s390dbf.rst) + documentation on the S/390 debug feature (Documentation/arch/s390/s390dbf.rst) for details. diff --git a/Documentation/s390/config3270.sh b/Documentation/arch/s390/config3270.sh similarity index 100% rename from Documentation/s390/config3270.sh rename to Documentation/arch/s390/config3270.sh diff --git a/Documentation/s390/driver-model.rst b/Documentation/arch/s390/driver-model.rst similarity index 100% rename from Documentation/s390/driver-model.rst rename to Documentation/arch/s390/driver-model.rst diff --git a/Documentation/s390/features.rst b/Documentation/arch/s390/features.rst similarity index 100% rename from Documentation/s390/features.rst rename to Documentation/arch/s390/features.rst diff --git a/Documentation/s390/index.rst b/Documentation/arch/s390/index.rst similarity index 100% rename from Documentation/s390/index.rst rename to Documentation/arch/s390/index.rst diff --git a/Documentation/s390/monreader.rst b/Documentation/arch/s390/monreader.rst similarity index 100% rename from Documentation/s390/monreader.rst rename to Documentation/arch/s390/monreader.rst diff --git a/Documentation/s390/pci.rst b/Documentation/arch/s390/pci.rst similarity index 99% rename from Documentation/s390/pci.rst rename to Documentation/arch/s390/pci.rst index a1a72a47dc96..d5755484d8e7 100644 --- a/Documentation/s390/pci.rst +++ b/Documentation/arch/s390/pci.rst @@ -40,7 +40,7 @@ For example: Change the level of logging to be more or less verbose by piping a number between 0 and 6 to /sys/kernel/debug/s390dbf/pci_*/level. For details, see the documentation on the S/390 debug feature at - Documentation/s390/s390dbf.rst. + Documentation/arch/s390/s390dbf.rst. Sysfs entries ============= diff --git a/Documentation/s390/qeth.rst b/Documentation/arch/s390/qeth.rst similarity index 100% rename from Documentation/s390/qeth.rst rename to Documentation/arch/s390/qeth.rst diff --git a/Documentation/s390/s390dbf.rst b/Documentation/arch/s390/s390dbf.rst similarity index 100% rename from Documentation/s390/s390dbf.rst rename to Documentation/arch/s390/s390dbf.rst diff --git a/Documentation/s390/text_files.rst b/Documentation/arch/s390/text_files.rst similarity index 100% rename from Documentation/s390/text_files.rst rename to Documentation/arch/s390/text_files.rst diff --git a/Documentation/s390/vfio-ap-locking.rst b/Documentation/arch/s390/vfio-ap-locking.rst similarity index 100% rename from Documentation/s390/vfio-ap-locking.rst rename to Documentation/arch/s390/vfio-ap-locking.rst diff --git a/Documentation/s390/vfio-ap.rst b/Documentation/arch/s390/vfio-ap.rst similarity index 100% rename from Documentation/s390/vfio-ap.rst rename to Documentation/arch/s390/vfio-ap.rst diff --git a/Documentation/s390/vfio-ccw.rst b/Documentation/arch/s390/vfio-ccw.rst similarity index 99% rename from Documentation/s390/vfio-ccw.rst rename to Documentation/arch/s390/vfio-ccw.rst index 37026fa18179..42960b7b0d70 100644 --- a/Documentation/s390/vfio-ccw.rst +++ b/Documentation/arch/s390/vfio-ccw.rst @@ -440,6 +440,6 @@ Reference 1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832) 2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204) 3. https://en.wikipedia.org/wiki/Channel_I/O -4. Documentation/s390/cds.rst +4. Documentation/arch/s390/cds.rst 5. Documentation/driver-api/vfio.rst 6. Documentation/driver-api/vfio-mediated-device.rst diff --git a/Documentation/s390/zfcpdump.rst b/Documentation/arch/s390/zfcpdump.rst similarity index 100% rename from Documentation/s390/zfcpdump.rst rename to Documentation/arch/s390/zfcpdump.rst diff --git a/Documentation/driver-api/s390-drivers.rst b/Documentation/driver-api/s390-drivers.rst index 5158577bc29b..8c0845c4eee7 100644 --- a/Documentation/driver-api/s390-drivers.rst +++ b/Documentation/driver-api/s390-drivers.rst @@ -27,7 +27,7 @@ not strictly considered I/O devices. They are considered here as well, although they are not the focus of this document. Some additional information can also be found in the kernel source under -Documentation/s390/driver-model.rst. +Documentation/arch/s390/driver-model.rst. The css bus =========== @@ -38,7 +38,7 @@ into several categories: * Standard I/O subchannels, for use by the system. They have a child device on the ccw bus and are described below. * I/O subchannels bound to the vfio-ccw driver. See - Documentation/s390/vfio-ccw.rst. + Documentation/arch/s390/vfio-ccw.rst. * Message subchannels. No Linux driver currently exists. * CHSC subchannels (at most one). The chsc subchannel driver can be used to send asynchronous chsc commands. diff --git a/MAINTAINERS b/MAINTAINERS index d516295978a4..262736152862 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18596,7 +18596,7 @@ L: linux-s390@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git F: Documentation/driver-api/s390-drivers.rst -F: Documentation/s390/ +F: Documentation/arch/s390/ F: arch/s390/ F: drivers/s390/ F: drivers/watchdog/diag288_wdt.c @@ -18657,7 +18657,7 @@ M: Niklas Schnelle M: Gerald Schaefer L: linux-s390@vger.kernel.org S: Supported -F: Documentation/s390/pci.rst +F: Documentation/arch/s390/pci.rst F: arch/s390/pci/ F: drivers/pci/hotplug/s390_pci_hpc.c @@ -18674,7 +18674,7 @@ M: Halil Pasic M: Jason Herne L: linux-s390@vger.kernel.org S: Supported -F: Documentation/s390/vfio-ap* +F: Documentation/arch/s390/vfio-ap* F: drivers/s390/crypto/vfio_ap* S390 VFIO-CCW DRIVER @@ -18684,7 +18684,7 @@ R: Halil Pasic L: linux-s390@vger.kernel.org L: kvm@vger.kernel.org S: Supported -F: Documentation/s390/vfio-ccw.rst +F: Documentation/arch/s390/vfio-ccw.rst F: drivers/s390/cio/vfio_ccw* F: include/uapi/linux/vfio_ccw.h diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c5be7199067b..d9d50a7a2016 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -754,9 +754,9 @@ config CRASH_DUMP Crash dump kernels are loaded in the main kernel with kexec-tools into a specially reserved region and then later executed after a crash by kdump/kexec. - Refer to for more details on this. + Refer to for more details on this. This option also enables s390 zfcpdump. - See also + See also endmenu diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index ac665b9670c5..ccd4e148b5ed 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -222,7 +222,7 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level, /* * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are - * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details! + * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details! */ extern debug_entry_t * __debug_sprintf_event(debug_info_t *id, int level, char *string, ...) @@ -350,7 +350,7 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level, /* * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are - * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details! + * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details! */ extern debug_entry_t * __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 942c73a11ca3..bc3be0330f1d 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -3,7 +3,7 @@ * zcore module to export memory content and register sets for creating system * dumps on SCSI/NVMe disks (zfcp/nvme dump). * - * For more information please refer to Documentation/s390/zfcpdump.rst + * For more information please refer to Documentation/arch/s390/zfcpdump.rst * * Copyright IBM Corp. 2003, 2008 * Author(s): Michael Holzheu From 305b9f4f7bebc12610035f8cd865a0db87df81b6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 22 Jul 2023 02:13:58 +0900 Subject: [PATCH 19/66] s390: use obj-y to descend into drivers/s390/ The single build rule does not work with the drivers-y syntax. [1] Use the standard obj-y syntax. It moves the objects from drivers/s390/ to slightly lower address, but fixes the reported issue. [1]: https://lore.kernel.org/linux-kbuild/d57ba55f-20a3-b836-783d-b49c8a161b6e@kernel.org/T/#m27f781ab60acadfed8a9e9642f30d5414a5e2df3 Signed-off-by: Masahiro Yamada Tested-by: Jiri Slaby Link: https://lore.kernel.org/r/20230721171358.3612099-1-masahiroy@kernel.org Signed-off-by: Heiko Carstens --- arch/s390/Makefile | 1 - drivers/Makefile | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 5ed242897b0d..a53a36ee0731 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -119,7 +119,6 @@ export KBUILD_CFLAGS_DECOMPRESSOR OBJCOPYFLAGS := -O binary libs-y += arch/s390/lib/ -drivers-y += drivers/s390/ boot := arch/s390/boot syscalls := arch/s390/kernel/syscalls diff --git a/drivers/Makefile b/drivers/Makefile index 7241d80a7b29..a7459e77df37 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -195,3 +195,5 @@ obj-$(CONFIG_PECI) += peci/ obj-$(CONFIG_HTE) += hte/ obj-$(CONFIG_DRM_ACCEL) += accel/ obj-$(CONFIG_CDX_BUS) += cdx/ + +obj-$(CONFIG_S390) += s390/ From e810487385de409e826f249fcce13105b5513a65 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Jul 2023 18:24:27 +0200 Subject: [PATCH 20/66] s390/diag: fix diagnose 8c description The comment above diag8c() describes diagnose 210, not diagnose 8c. Add a proper short description. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/kernel/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index f287713baf6d..f9f06cd8fcee 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -223,7 +223,7 @@ int diag210(struct diag210 *addr) EXPORT_SYMBOL(diag210); /* - * Diagnose 210: Get information about a virtual device + * Diagnose 8C: Access 3270 Display Device Information */ int diag8c(struct diag8c *addr, struct ccw_dev_id *devno) { From 7fb0ad1938ef129aa39d565bf2ad3728a39136ef Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 10 Jun 2023 16:25:28 +0200 Subject: [PATCH 21/66] s390/ebcdic: fix typo in comment s/ECBDIC/EBCDIC/ (C and B are swapped) Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/08ed63331699177b3354458da66a2f63c0217e49.1686407113.git.christophe.jaillet@wanadoo.fr Signed-off-by: Heiko Carstens --- arch/s390/kernel/ebcdic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c index 7f8246c9be08..0e51fa537262 100644 --- a/arch/s390/kernel/ebcdic.c +++ b/arch/s390/kernel/ebcdic.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * ECBDIC -> ASCII, ASCII -> ECBDIC, + * EBCDIC -> ASCII, ASCII -> EBCDIC, * upper to lower case (EBCDIC) conversion tables. * * S390 version From 7b27d9ef0f63da736d3a585a5d5098cea62a3ad7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 26 Jul 2023 08:18:34 +0200 Subject: [PATCH 22/66] s390/ftrace: use la instead of aghik in return_to_handler() Nathan Chancellor reported the following build error when compiling the kernel with CONFIG_MARCH_Z10=y: arch/s390/kernel/mcount.S: Assembler messages: arch/s390/kernel/mcount.S:140: Error: Unrecognized opcode: `aghik' The aghik instruction is only available since z196. Use the la instruction instead which is available for all machines. Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/all/20230725211105.GA224840@dev-arch.thelio-3990X Fixes: 1256e70a082a ("s390/ftrace: enable HAVE_FUNCTION_GRAPH_RETVAL") Reviewed-by: Sven Schnelle Tested-by: Nathan Chancellor # build Link: https://lore.kernel.org/r/20230726061834.1300984-1-hca@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/mcount.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index d2596e0df6fa..71c5fa05e7f1 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -137,7 +137,7 @@ SYM_FUNC_START(return_to_handler) lgr %r1,%r15 aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE) stg %r1,__SF_BACKCHAIN(%r15) - aghik %r3,%r15,STACK_FRAME_OVERHEAD + la %r3,STACK_FRAME_OVERHEAD(%r15) stg %r1,__FGRAPH_RET_FP(%r3) stg %r2,__FGRAPH_RET_GPR2(%r3) lgr %r2,%r3 From 8b46451c8bd63da543598ef8e0d67cb52281c6ef Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 26 Jul 2023 15:39:39 +0200 Subject: [PATCH 23/66] s390/defconfigs: set CONFIG_FUNCTION_GRAPH_RETVAL=y Enable recording and printing function return values for the function graph tracer. Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 1 + arch/s390/configs/defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index aa95cf6dfabb..b042ccb8a8a6 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -836,6 +836,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300 # CONFIG_RCU_TRACE is not set CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y +CONFIG_FUNCTION_GRAPH_RETVAL=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index f041945f9148..0fa45c0d6bee 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -787,6 +787,7 @@ CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y +CONFIG_FUNCTION_GRAPH_RETVAL=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y From c28c07fe235ccaafe11003393de064b2a24dd2e3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Jul 2023 17:20:26 +0200 Subject: [PATCH 24/66] s390/mm: move pfault code to own C file The pfault code has nothing to do with regular fault handling. Therefore move it to an own C file. Also add an own pfault header file. This way changes to setup.h don't cause a recompile of the pfault code and vice versa. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pfault.h | 24 ++++ arch/s390/include/asm/setup.h | 8 -- arch/s390/kernel/machine_kexec.c | 1 + arch/s390/kernel/smp.c | 1 + arch/s390/mm/Makefile | 1 + arch/s390/mm/fault.c | 228 ----------------------------- arch/s390/mm/pfault.c | 239 +++++++++++++++++++++++++++++++ 7 files changed, 266 insertions(+), 236 deletions(-) create mode 100644 arch/s390/include/asm/pfault.h create mode 100644 arch/s390/mm/pfault.c diff --git a/arch/s390/include/asm/pfault.h b/arch/s390/include/asm/pfault.h new file mode 100644 index 000000000000..beabeebf2859 --- /dev/null +++ b/arch/s390/include/asm/pfault.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 1999, 2023 + */ +#ifndef _ASM_S390_PFAULT_H +#define _ASM_S390_PFAULT_H + +int __pfault_init(void); +void __pfault_fini(void); + +static inline int pfault_init(void) +{ + if (IS_ENABLED(CONFIG_PFAULT)) + return __pfault_init(); + return -1; +} + +static inline void pfault_fini(void) +{ + if (IS_ENABLED(CONFIG_PFAULT)) + __pfault_fini(); +} + +#endif /* _ASM_S390_PFAULT_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index e795f425627a..b30fe91166e3 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -118,14 +118,6 @@ extern unsigned int console_irq; #define SET_CONSOLE_VT220 do { console_mode = 4; } while (0) #define SET_CONSOLE_HVC do { console_mode = 5; } while (0) -#ifdef CONFIG_PFAULT -extern int pfault_init(void); -extern void pfault_fini(void); -#else /* CONFIG_PFAULT */ -#define pfault_init() ({-1;}) -#define pfault_fini() do { } while (0) -#endif /* CONFIG_PFAULT */ - #ifdef CONFIG_VMCP void vmcp_cma_reserve(void); #else diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 6d9276c096a6..12a2bd4fc88c 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index f9a2b755f510..9244130721d6 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index d90db06a8af5..352ff520fd94 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o +obj-$(CONFIG_PFAULT) += pfault.o diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2f123429a291..b5e1bea9194c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -43,8 +43,6 @@ #include "../kernel/entry.h" #define __FAIL_ADDR_MASK -4096L -#define __SUBCODE_MASK 0x0600 -#define __PF_RES_FIELD 0x8000000000000000ULL /* * Allocate private vm_fault_reason from top. Please make sure it won't @@ -583,232 +581,6 @@ void do_dat_exception(struct pt_regs *regs) } NOKPROBE_SYMBOL(do_dat_exception); -#ifdef CONFIG_PFAULT -/* - * 'pfault' pseudo page faults routines. - */ -static int pfault_disable; - -static int __init nopfault(char *str) -{ - pfault_disable = 1; - return 1; -} - -__setup("nopfault", nopfault); - -struct pfault_refbk { - u16 refdiagc; - u16 reffcode; - u16 refdwlen; - u16 refversn; - u64 refgaddr; - u64 refselmk; - u64 refcmpmk; - u64 reserved; -} __attribute__ ((packed, aligned(8))); - -static struct pfault_refbk pfault_init_refbk = { - .refdiagc = 0x258, - .reffcode = 0, - .refdwlen = 5, - .refversn = 2, - .refgaddr = __LC_LPP, - .refselmk = 1ULL << 48, - .refcmpmk = 1ULL << 48, - .reserved = __PF_RES_FIELD -}; - -int pfault_init(void) -{ - int rc; - - if (pfault_disable) - return -1; - diag_stat_inc(DIAG_STAT_X258); - asm volatile( - " diag %1,%0,0x258\n" - "0: j 2f\n" - "1: la %0,8\n" - "2:\n" - EX_TABLE(0b,1b) - : "=d" (rc) - : "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc"); - return rc; -} - -static struct pfault_refbk pfault_fini_refbk = { - .refdiagc = 0x258, - .reffcode = 1, - .refdwlen = 5, - .refversn = 2, -}; - -void pfault_fini(void) -{ - - if (pfault_disable) - return; - diag_stat_inc(DIAG_STAT_X258); - asm volatile( - " diag %0,0,0x258\n" - "0: nopr %%r7\n" - EX_TABLE(0b,0b) - : : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc"); -} - -static DEFINE_SPINLOCK(pfault_lock); -static LIST_HEAD(pfault_list); - -#define PF_COMPLETE 0x0080 - -/* - * The mechanism of our pfault code: if Linux is running as guest, runs a user - * space process and the user space process accesses a page that the host has - * paged out we get a pfault interrupt. - * - * This allows us, within the guest, to schedule a different process. Without - * this mechanism the host would have to suspend the whole virtual cpu until - * the page has been paged in. - * - * So when we get such an interrupt then we set the state of the current task - * to uninterruptible and also set the need_resched flag. Both happens within - * interrupt context(!). If we later on want to return to user space we - * recognize the need_resched flag and then call schedule(). It's not very - * obvious how this works... - * - * Of course we have a lot of additional fun with the completion interrupt (-> - * host signals that a page of a process has been paged in and the process can - * continue to run). This interrupt can arrive on any cpu and, since we have - * virtual cpus, actually appear before the interrupt that signals that a page - * is missing. - */ -static void pfault_interrupt(struct ext_code ext_code, - unsigned int param32, unsigned long param64) -{ - struct task_struct *tsk; - __u16 subcode; - pid_t pid; - - /* - * Get the external interruption subcode & pfault initial/completion - * signal bit. VM stores this in the 'cpu address' field associated - * with the external interrupt. - */ - subcode = ext_code.subcode; - if ((subcode & 0xff00) != __SUBCODE_MASK) - return; - inc_irq_stat(IRQEXT_PFL); - /* Get the token (= pid of the affected task). */ - pid = param64 & LPP_PID_MASK; - rcu_read_lock(); - tsk = find_task_by_pid_ns(pid, &init_pid_ns); - if (tsk) - get_task_struct(tsk); - rcu_read_unlock(); - if (!tsk) - return; - spin_lock(&pfault_lock); - if (subcode & PF_COMPLETE) { - /* signal bit is set -> a page has been swapped in by VM */ - if (tsk->thread.pfault_wait == 1) { - /* Initial interrupt was faster than the completion - * interrupt. pfault_wait is valid. Set pfault_wait - * back to zero and wake up the process. This can - * safely be done because the task is still sleeping - * and can't produce new pfaults. */ - tsk->thread.pfault_wait = 0; - list_del(&tsk->thread.list); - wake_up_process(tsk); - put_task_struct(tsk); - } else { - /* Completion interrupt was faster than initial - * interrupt. Set pfault_wait to -1 so the initial - * interrupt doesn't put the task to sleep. - * If the task is not running, ignore the completion - * interrupt since it must be a leftover of a PFAULT - * CANCEL operation which didn't remove all pending - * completion interrupts. */ - if (task_is_running(tsk)) - tsk->thread.pfault_wait = -1; - } - } else { - /* signal bit not set -> a real page is missing. */ - if (WARN_ON_ONCE(tsk != current)) - goto out; - if (tsk->thread.pfault_wait == 1) { - /* Already on the list with a reference: put to sleep */ - goto block; - } else if (tsk->thread.pfault_wait == -1) { - /* Completion interrupt was faster than the initial - * interrupt (pfault_wait == -1). Set pfault_wait - * back to zero and exit. */ - tsk->thread.pfault_wait = 0; - } else { - /* Initial interrupt arrived before completion - * interrupt. Let the task sleep. - * An extra task reference is needed since a different - * cpu may set the task state to TASK_RUNNING again - * before the scheduler is reached. */ - get_task_struct(tsk); - tsk->thread.pfault_wait = 1; - list_add(&tsk->thread.list, &pfault_list); -block: - /* Since this must be a userspace fault, there - * is no kernel task state to trample. Rely on the - * return to userspace schedule() to block. */ - __set_current_state(TASK_UNINTERRUPTIBLE); - set_tsk_need_resched(tsk); - set_preempt_need_resched(); - } - } -out: - spin_unlock(&pfault_lock); - put_task_struct(tsk); -} - -static int pfault_cpu_dead(unsigned int cpu) -{ - struct thread_struct *thread, *next; - struct task_struct *tsk; - - spin_lock_irq(&pfault_lock); - list_for_each_entry_safe(thread, next, &pfault_list, list) { - thread->pfault_wait = 0; - list_del(&thread->list); - tsk = container_of(thread, struct task_struct, thread); - wake_up_process(tsk); - put_task_struct(tsk); - } - spin_unlock_irq(&pfault_lock); - return 0; -} - -static int __init pfault_irq_init(void) -{ - int rc; - - rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); - if (rc) - goto out_extint; - rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; - if (rc) - goto out_pfault; - irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL); - cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead", - NULL, pfault_cpu_dead); - return 0; - -out_pfault: - unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); -out_extint: - pfault_disable = 1; - return rc; -} -early_initcall(pfault_irq_init); - -#endif /* CONFIG_PFAULT */ - #if IS_ENABLED(CONFIG_PGSTE) void do_secure_storage_access(struct pt_regs *regs) diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c new file mode 100644 index 000000000000..5c0547f8d5ee --- /dev/null +++ b/arch/s390/mm/pfault.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 1999, 2023 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define __SUBCODE_MASK 0x0600 +#define __PF_RES_FIELD 0x8000000000000000ULL + +/* + * 'pfault' pseudo page faults routines. + */ +static int pfault_disable; + +static int __init nopfault(char *str) +{ + pfault_disable = 1; + return 1; +} + +__setup("nopfault", nopfault); + +struct pfault_refbk { + u16 refdiagc; + u16 reffcode; + u16 refdwlen; + u16 refversn; + u64 refgaddr; + u64 refselmk; + u64 refcmpmk; + u64 reserved; +} __attribute__ ((packed, aligned(8))); + +static struct pfault_refbk pfault_init_refbk = { + .refdiagc = 0x258, + .reffcode = 0, + .refdwlen = 5, + .refversn = 2, + .refgaddr = __LC_LPP, + .refselmk = 1ULL << 48, + .refcmpmk = 1ULL << 48, + .reserved = __PF_RES_FIELD +}; + +int __pfault_init(void) +{ + int rc; + + if (pfault_disable) + return -1; + diag_stat_inc(DIAG_STAT_X258); + asm volatile( + " diag %1,%0,0x258\n" + "0: j 2f\n" + "1: la %0,8\n" + "2:\n" + EX_TABLE(0b,1b) + : "=d" (rc) + : "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc"); + return rc; +} + +static struct pfault_refbk pfault_fini_refbk = { + .refdiagc = 0x258, + .reffcode = 1, + .refdwlen = 5, + .refversn = 2, +}; + +void __pfault_fini(void) +{ + + if (pfault_disable) + return; + diag_stat_inc(DIAG_STAT_X258); + asm volatile( + " diag %0,0,0x258\n" + "0: nopr %%r7\n" + EX_TABLE(0b,0b) + : : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc"); +} + +static DEFINE_SPINLOCK(pfault_lock); +static LIST_HEAD(pfault_list); + +#define PF_COMPLETE 0x0080 + +/* + * The mechanism of our pfault code: if Linux is running as guest, runs a user + * space process and the user space process accesses a page that the host has + * paged out we get a pfault interrupt. + * + * This allows us, within the guest, to schedule a different process. Without + * this mechanism the host would have to suspend the whole virtual cpu until + * the page has been paged in. + * + * So when we get such an interrupt then we set the state of the current task + * to uninterruptible and also set the need_resched flag. Both happens within + * interrupt context(!). If we later on want to return to user space we + * recognize the need_resched flag and then call schedule(). It's not very + * obvious how this works... + * + * Of course we have a lot of additional fun with the completion interrupt (-> + * host signals that a page of a process has been paged in and the process can + * continue to run). This interrupt can arrive on any cpu and, since we have + * virtual cpus, actually appear before the interrupt that signals that a page + * is missing. + */ +static void pfault_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct task_struct *tsk; + __u16 subcode; + pid_t pid; + + /* + * Get the external interruption subcode & pfault initial/completion + * signal bit. VM stores this in the 'cpu address' field associated + * with the external interrupt. + */ + subcode = ext_code.subcode; + if ((subcode & 0xff00) != __SUBCODE_MASK) + return; + inc_irq_stat(IRQEXT_PFL); + /* Get the token (= pid of the affected task). */ + pid = param64 & LPP_PID_MASK; + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return; + spin_lock(&pfault_lock); + if (subcode & PF_COMPLETE) { + /* signal bit is set -> a page has been swapped in by VM */ + if (tsk->thread.pfault_wait == 1) { + /* Initial interrupt was faster than the completion + * interrupt. pfault_wait is valid. Set pfault_wait + * back to zero and wake up the process. This can + * safely be done because the task is still sleeping + * and can't produce new pfaults. */ + tsk->thread.pfault_wait = 0; + list_del(&tsk->thread.list); + wake_up_process(tsk); + put_task_struct(tsk); + } else { + /* Completion interrupt was faster than initial + * interrupt. Set pfault_wait to -1 so the initial + * interrupt doesn't put the task to sleep. + * If the task is not running, ignore the completion + * interrupt since it must be a leftover of a PFAULT + * CANCEL operation which didn't remove all pending + * completion interrupts. */ + if (task_is_running(tsk)) + tsk->thread.pfault_wait = -1; + } + } else { + /* signal bit not set -> a real page is missing. */ + if (WARN_ON_ONCE(tsk != current)) + goto out; + if (tsk->thread.pfault_wait == 1) { + /* Already on the list with a reference: put to sleep */ + goto block; + } else if (tsk->thread.pfault_wait == -1) { + /* Completion interrupt was faster than the initial + * interrupt (pfault_wait == -1). Set pfault_wait + * back to zero and exit. */ + tsk->thread.pfault_wait = 0; + } else { + /* Initial interrupt arrived before completion + * interrupt. Let the task sleep. + * An extra task reference is needed since a different + * cpu may set the task state to TASK_RUNNING again + * before the scheduler is reached. */ + get_task_struct(tsk); + tsk->thread.pfault_wait = 1; + list_add(&tsk->thread.list, &pfault_list); +block: + /* Since this must be a userspace fault, there + * is no kernel task state to trample. Rely on the + * return to userspace schedule() to block. */ + __set_current_state(TASK_UNINTERRUPTIBLE); + set_tsk_need_resched(tsk); + set_preempt_need_resched(); + } + } +out: + spin_unlock(&pfault_lock); + put_task_struct(tsk); +} + +static int pfault_cpu_dead(unsigned int cpu) +{ + struct thread_struct *thread, *next; + struct task_struct *tsk; + + spin_lock_irq(&pfault_lock); + list_for_each_entry_safe(thread, next, &pfault_list, list) { + thread->pfault_wait = 0; + list_del(&thread->list); + tsk = container_of(thread, struct task_struct, thread); + wake_up_process(tsk); + put_task_struct(tsk); + } + spin_unlock_irq(&pfault_lock); + return 0; +} + +static int __init pfault_irq_init(void) +{ + int rc; + + rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); + if (rc) + goto out_extint; + rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; + if (rc) + goto out_pfault; + irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL); + cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead", + NULL, pfault_cpu_dead); + return 0; + +out_pfault: + unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); +out_extint: + pfault_disable = 1; + return rc; +} +early_initcall(pfault_irq_init); From b60624bb0a94126a26b9ac9653b4668a1a70ba2a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Jul 2023 17:20:27 +0200 Subject: [PATCH 25/66] s390/pfault: use UL instead of ULL Remove another leftover of the 31 bit area: replace the not needed "unsigned long long" suffix with "unsigned long", and stay consistent with the rest of the code. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/mm/pfault.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index 5c0547f8d5ee..05865e5616b2 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -13,7 +13,7 @@ #include #define __SUBCODE_MASK 0x0600 -#define __PF_RES_FIELD 0x8000000000000000ULL +#define __PF_RES_FIELD 0x8000000000000000UL /* * 'pfault' pseudo page faults routines. @@ -45,8 +45,8 @@ static struct pfault_refbk pfault_init_refbk = { .refdwlen = 5, .refversn = 2, .refgaddr = __LC_LPP, - .refselmk = 1ULL << 48, - .refcmpmk = 1ULL << 48, + .refselmk = 1UL << 48, + .refcmpmk = 1UL << 48, .reserved = __PF_RES_FIELD }; From c5b6eef58f88810b8e641d8d32d24041fa20e016 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Jul 2023 17:20:28 +0200 Subject: [PATCH 26/66] s390/pfault: remove not needed packed and aligned attributes struct pfault_refbk is naturally packed and aligned; remove not needed packed and aligned attributes. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/mm/pfault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index 05865e5616b2..e1c8cc14575d 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -37,7 +37,7 @@ struct pfault_refbk { u64 refselmk; u64 refcmpmk; u64 reserved; -} __attribute__ ((packed, aligned(8))); +}; static struct pfault_refbk pfault_init_refbk = { .refdiagc = 0x258, From 28254f36e2944a501e8bf440193e5c4f910cf10d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Jul 2023 17:20:29 +0200 Subject: [PATCH 27/66] s390/pfault: use early_param() instead if __setup() early_param() is the standard way of defining early kernel command line parameters. Use that instead of the old __setup() variant. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/mm/pfault.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index e1c8cc14575d..64cc42d37c8b 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -25,8 +25,7 @@ static int __init nopfault(char *str) pfault_disable = 1; return 1; } - -__setup("nopfault", nopfault); +early_param("nopfault", nopfault); struct pfault_refbk { u16 refdiagc; From 4c89eb874420a94680d1b842bb9c3785997713a4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Jul 2023 17:20:30 +0200 Subject: [PATCH 28/66] s390/pfault: cleanup inline assemblies Cleanup the pfault inline assemblies: - Use symbolic names for operands - Add extra linebreaks, and whitespace to improve readability In addition, change __pfault_init() to return -EOPNOTSUPP in case of an exception, and don't return a made up valid diag 258 return value (aka "8"). This allows to simplify the inline assembly, and makes debugging easier, in case something is broken. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pfault.h | 4 +++- arch/s390/mm/pfault.c | 28 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/s390/include/asm/pfault.h b/arch/s390/include/asm/pfault.h index beabeebf2859..a1bee4a1e470 100644 --- a/arch/s390/include/asm/pfault.h +++ b/arch/s390/include/asm/pfault.h @@ -5,6 +5,8 @@ #ifndef _ASM_S390_PFAULT_H #define _ASM_S390_PFAULT_H +#include + int __pfault_init(void); void __pfault_fini(void); @@ -12,7 +14,7 @@ static inline int pfault_init(void) { if (IS_ENABLED(CONFIG_PFAULT)) return __pfault_init(); - return -1; + return -EOPNOTSUPP; } static inline void pfault_fini(void) diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index 64cc42d37c8b..1d65512c3351 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -51,20 +51,19 @@ static struct pfault_refbk pfault_init_refbk = { int __pfault_init(void) { - int rc; + int rc = -EOPNOTSUPP; if (pfault_disable) - return -1; + return rc; diag_stat_inc(DIAG_STAT_X258); asm volatile( - " diag %1,%0,0x258\n" - "0: j 2f\n" - "1: la %0,8\n" - "2:\n" - EX_TABLE(0b,1b) - : "=d" (rc) - : "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc"); - return rc; + " diag %[refbk],%[rc],0x258\n" + "0: nopr %%r7\n" + EX_TABLE(0b, 0b) + : [rc] "+d" (rc) + : [refbk] "a" (&pfault_init_refbk), "m" (pfault_init_refbk) + : "cc"); + return rc; } static struct pfault_refbk pfault_fini_refbk = { @@ -76,15 +75,16 @@ static struct pfault_refbk pfault_fini_refbk = { void __pfault_fini(void) { - if (pfault_disable) return; diag_stat_inc(DIAG_STAT_X258); asm volatile( - " diag %0,0,0x258\n" + " diag %[refbk],0,0x258\n" "0: nopr %%r7\n" - EX_TABLE(0b,0b) - : : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc"); + EX_TABLE(0b, 0b) + : + : [refbk] "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) + : "cc"); } static DEFINE_SPINLOCK(pfault_lock); From 46a923fd86eb51acfc9e833ce0a27cc09f3e1c45 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Jul 2023 17:20:31 +0200 Subject: [PATCH 29/66] s390/pfault: use consistent comment style Use consistent comment style within the whole pfault C code. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/mm/pfault.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index 1d65512c3351..1aac13bb8f53 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -142,23 +142,27 @@ static void pfault_interrupt(struct ext_code ext_code, if (subcode & PF_COMPLETE) { /* signal bit is set -> a page has been swapped in by VM */ if (tsk->thread.pfault_wait == 1) { - /* Initial interrupt was faster than the completion + /* + * Initial interrupt was faster than the completion * interrupt. pfault_wait is valid. Set pfault_wait * back to zero and wake up the process. This can * safely be done because the task is still sleeping - * and can't produce new pfaults. */ + * and can't produce new pfaults. + */ tsk->thread.pfault_wait = 0; list_del(&tsk->thread.list); wake_up_process(tsk); put_task_struct(tsk); } else { - /* Completion interrupt was faster than initial + /* + * Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial * interrupt doesn't put the task to sleep. * If the task is not running, ignore the completion * interrupt since it must be a leftover of a PFAULT * CANCEL operation which didn't remove all pending - * completion interrupts. */ + * completion interrupts. + */ if (task_is_running(tsk)) tsk->thread.pfault_wait = -1; } @@ -170,23 +174,29 @@ static void pfault_interrupt(struct ext_code ext_code, /* Already on the list with a reference: put to sleep */ goto block; } else if (tsk->thread.pfault_wait == -1) { - /* Completion interrupt was faster than the initial + /* + * Completion interrupt was faster than the initial * interrupt (pfault_wait == -1). Set pfault_wait - * back to zero and exit. */ + * back to zero and exit. + */ tsk->thread.pfault_wait = 0; } else { - /* Initial interrupt arrived before completion + /* + * Initial interrupt arrived before completion * interrupt. Let the task sleep. * An extra task reference is needed since a different * cpu may set the task state to TASK_RUNNING again - * before the scheduler is reached. */ + * before the scheduler is reached. + */ get_task_struct(tsk); tsk->thread.pfault_wait = 1; list_add(&tsk->thread.list, &pfault_list); block: - /* Since this must be a userspace fault, there + /* + * Since this must be a userspace fault, there * is no kernel task state to trample. Rely on the - * return to userspace schedule() to block. */ + * return to userspace schedule() to block. + */ __set_current_state(TASK_UNINTERRUPTIBLE); set_tsk_need_resched(tsk); set_preempt_need_resched(); From 3e8fc2d492207353c5ee469241c8df36c9765471 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 28 Jul 2023 16:42:28 +0800 Subject: [PATCH 30/66] s390/cert_store: fix error return code in fill_cs_keyring() The 'rc' will be re-assigned to 0 after calling get_vcssb(), it needs be set to error code if create_cs_keyring() fails. [hca@linux.ibm.com: slightly changed coding style] Fixes: 8cf57d7217c3 ("s390: add support for user-defined certificates") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20230728084228.3186083-1-yangyingliang@huawei.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/cert_store.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c index 1cbeb9ce0eb1..3986a044eb36 100644 --- a/arch/s390/kernel/cert_store.c +++ b/arch/s390/kernel/cert_store.c @@ -702,6 +702,7 @@ static int fill_cs_keyring(void) if (rc) goto cleanup_keys; + rc = -ENOMEM; cs_keyring = create_cs_keyring(); if (!cs_keyring) goto cleanup_keys; From 481daa505bc3eb4ac7991e2e7a981506639935fd Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 28 Jul 2023 12:04:30 +0200 Subject: [PATCH 31/66] s390/cert_store: select CRYPTO_LIB_SHA256 A build failure was reported when sha256() is not present: gcc-13.1.0-nolibc/s390-linux/bin/s390-linux-ld: arch/s390/kernel/cert_store.o: in function `check_certificate_hash': arch/s390/kernel/cert_store.c:267: undefined reference to `sha256' Therefore make CONFIG_CERT_STORE select CRYPTO_LIB_SHA256. Fixes: 8cf57d7217c3 ("s390: add support for user-defined certificates") Reported-by: Randy Dunlap Closes: https://lore.kernel.org/all/8ecb57fb-4560-bdfc-9e55-63e3b0937132@infradead.org/ Signed-off-by: Sven Schnelle Tested-by: Randy Dunlap # build-tested Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20230728100430.1567328-1-svens@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d9d50a7a2016..18bf754e1fad 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -516,6 +516,7 @@ config KEXEC_SIG config CERT_STORE bool "Get user certificates via DIAG320" depends on KEYS + select CRYPTO_LIB_SHA256 help Enable this option if you want to access user-provided secure boot certificates via DIAG 0x320. From e1b9c2749af020e6c915eb07fcd53fa3a1a074e6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 1 Aug 2023 15:05:30 +0200 Subject: [PATCH 32/66] s390/smp: ensure global control register contents are in sync Globally setting a bit in control registers is done with smp_ctl_set_clear_bit(). This is using on_each_cpu() to execute a function which actually sets the control register bit on each online CPU. This can be problematic since on_each_cpu() does not prevent that new CPUs come online while it is executed, which in turn means that control register updates could be missing on new CPUs. In order to prevent this problem make sure that global control register contents cannot change until new CPUs have initialized their control registers, and marked themselves online, so they are included in subsequent on_each_cpu() calls. Reviewed-by: Sven Schnelle Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/smp.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 9244130721d6..a4edb7ea66ea 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -253,8 +253,9 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { - struct lowcore *lc = lowcore_ptr[cpu]; + struct lowcore *lc, *abs_lc; + lc = lowcore_ptr[cpu]; cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; @@ -267,7 +268,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->machine_flags = S390_lowcore.machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; - __ctl_store(lc->cregs_save_area, 0, 15); + abs_lc = get_abs_lowcore(); + memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area)); + put_abs_lowcore(abs_lc); lc->cregs_save_area[1] = lc->kernel_asce; lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); @@ -607,8 +610,8 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set) ctlreg = (ctlreg & parms.andval) | parms.orval; abs_lc->cregs_save_area[cr] = ctlreg; put_abs_lowcore(abs_lc); - spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); + spin_unlock(&ctl_lock); } EXPORT_SYMBOL(smp_ctl_set_clear_bit); @@ -928,12 +931,18 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) rc = pcpu_alloc_lowcore(pcpu, cpu); if (rc) return rc; + /* + * Make sure global control register contents do not change + * until new CPU has initialized control registers. + */ + spin_lock(&ctl_lock); pcpu_prepare_secondary(pcpu, cpu); pcpu_attach_task(pcpu, tidle); pcpu_start_fn(pcpu, smp_start_secondary, NULL); /* Wait until cpu puts itself in the online & active maps */ while (!cpu_online(cpu)) cpu_relax(); + spin_unlock(&ctl_lock); return 0; } From 1e66317a7f575b07a837b7dcbf4bf636282ea8a2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 7 Aug 2023 00:16:38 +0900 Subject: [PATCH 33/66] s390: remove unneeded #include There is no EXPORT_SYMBOL line there, hence #include is unneeded. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20230806151641.394720-1-masahiroy@kernel.org Signed-off-by: Heiko Carstens --- arch/s390/kernel/mcount.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 71c5fa05e7f1..ae4d4fd9afcd 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -9,8 +9,6 @@ #include #include #include -#include - #define STACK_FRAME_SIZE_PTREGS (STACK_FRAME_OVERHEAD + __PT_SIZE) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) From b8c723f1e62b40af4db91c04989272fb7056b30b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 7 Aug 2023 00:16:39 +0900 Subject: [PATCH 34/66] s390: replace #include with #include Commit ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost") deprecated , which is now a wrapper of . Replace #include with #include . After all the lines are converted, and will be removed. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20230806151641.394720-2-masahiroy@kernel.org Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 2 +- arch/s390/lib/mem.S | 2 +- arch/s390/lib/tishift.S | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a660f4b6d654..49a11f6dd7ae 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -8,6 +8,7 @@ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), */ +#include #include #include #include @@ -26,7 +27,6 @@ #include #include #include -#include #include _LPP_OFFSET = __LC_LPP diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 5a9a55de2e10..08f60a42b9a6 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -5,8 +5,8 @@ * Copyright IBM Corp. 2012 */ +#include #include -#include #include GEN_BR_THUNK %r14 diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S index de33cf02cfd2..96214f51f49b 100644 --- a/arch/s390/lib/tishift.S +++ b/arch/s390/lib/tishift.S @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include -#include .section .noinstr.text, "ax" From ee4ac5275fd82a877e2fe6aeefcc3c4268f43388 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 7 Aug 2023 00:16:40 +0900 Subject: [PATCH 35/66] s390: remove All *.S files under arch/s390/ have been converted to include instead of . Remove . Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20230806151641.394720-3-masahiroy@kernel.org Signed-off-by: Heiko Carstens --- arch/s390/include/asm/Kbuild | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 1a18d7b82f86..4b904110d27c 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -5,6 +5,5 @@ generated-y += syscall_table.h generated-y += unistd_nr.h generic-y += asm-offsets.h -generic-y += export.h generic-y += kvm_types.h generic-y += mcs_spinlock.h From 8ddccc8a7d06f7ea4d8579970c95609d1b1de77b Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 6 Jul 2023 12:28:17 +0200 Subject: [PATCH 36/66] s390/boot: cleanup number of page table levels setup The separate vmalloc area size check against _REGION2_SIZE is needed in case user provided insanely large value using vmalloc= kernel command line parameter. That could lead to overflow and selecting 3 page table levels instead of 4. Use size_add() for the overflow check and get rid of the extra vmalloc area check. With the current values of CONFIG_MAX_PHYSMEM_BITS and PAGES_PER_SECTION the sum of maximal possible size of identity mapping and vmemmap area (derived from these macros) plus modules area size MODULES_LEN can not overflow. Thus, that sum is used as first addend while vmalloc area size is second addend for size_add(). Suggested-by: Heiko Carstens Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/boot/startup.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index a1f792fcc710..b058e2a575c1 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -177,6 +177,7 @@ static unsigned long setup_kernel_memory_layout(void) unsigned long asce_limit; unsigned long rte_size; unsigned long pages; + unsigned long vsize; unsigned long vmax; pages = ident_map_size / PAGE_SIZE; @@ -184,11 +185,9 @@ static unsigned long setup_kernel_memory_layout(void) vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); /* choose kernel address space layout: 4 or 3 levels. */ - vmemmap_start = round_up(ident_map_size, _REGION3_SIZE); - if (IS_ENABLED(CONFIG_KASAN) || - vmalloc_size > _REGION2_SIZE || - vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN > - _REGION2_SIZE) { + vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size + MODULES_LEN; + vsize = size_add(vsize, vmalloc_size); + if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) { asce_limit = _REGION1_SIZE; rte_size = _REGION2_SIZE; } else { From a984f27ec26323204045c306f8b25bc61e042626 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 4 Aug 2023 15:39:06 +0200 Subject: [PATCH 37/66] s390/mm: define Real Memory Copy size and mask macros Make Real Memory Copy area size and mask explicit. This does not bring any functional change and only needed for clarity. Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/boot/startup.c | 2 +- arch/s390/include/asm/maccess.h | 3 +++ arch/s390/mm/dump_pagetables.c | 2 +- arch/s390/mm/maccess.c | 7 ++++--- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index b058e2a575c1..d61428190cdd 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -204,7 +204,7 @@ static unsigned long setup_kernel_memory_layout(void) /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif - __memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE); + __memcpy_real_area = round_down(vmax - MEMCPY_REAL_SIZE, PAGE_SIZE); __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)); MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE); diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h index cfec3141fdba..50225940d971 100644 --- a/arch/s390/include/asm/maccess.h +++ b/arch/s390/include/asm/maccess.h @@ -4,6 +4,9 @@ #include +#define MEMCPY_REAL_SIZE PAGE_SIZE +#define MEMCPY_REAL_MASK PAGE_MASK + struct iov_iter; extern unsigned long __memcpy_real_area; diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index ba5f80268878..afa5db750d92 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -297,7 +297,7 @@ static int pt_dump_init(void) address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore; address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE; address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area; - address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE; + address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + MEMCPY_REAL_SIZE; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index cbe1df1e9c18..c805b3e2592b 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -86,11 +86,12 @@ size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count) void *chunk; pte_t pte; + BUILD_BUG_ON(MEMCPY_REAL_SIZE != PAGE_SIZE); while (count) { - phys = src & PAGE_MASK; - offset = src & ~PAGE_MASK; + phys = src & MEMCPY_REAL_MASK; + offset = src & ~MEMCPY_REAL_MASK; chunk = (void *)(__memcpy_real_area + offset); - len = min(count, PAGE_SIZE - offset); + len = min(count, MEMCPY_REAL_SIZE - offset); pte = mk_pte_phys(phys, PAGE_KERNEL_RO); mutex_lock(&memcpy_real_mutex); From 09cd4ffafb2fbb1a18a88890b13520c501409d99 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 4 Aug 2023 15:24:28 +0200 Subject: [PATCH 38/66] s390/boot: account Real Memory Copy and Lowcore areas Real Memory Copy and (absolute) Lowcore areas are not accounted when virtual memory layout is set up. Fixes: 4df29d2b9024 ("s390/smp: rework absolute lowcore access") Fixes: 2f0e8aae26a2 ("s390/mm: rework memcpy_real() to avoid DAT-off mode") Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/boot/startup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index d61428190cdd..a81f92563037 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -185,7 +185,8 @@ static unsigned long setup_kernel_memory_layout(void) vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); /* choose kernel address space layout: 4 or 3 levels. */ - vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size + MODULES_LEN; + vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size + + MODULES_LEN + MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE; vsize = size_add(vsize, vmalloc_size); if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) { asce_limit = _REGION1_SIZE; From 5cfdff02e97a46f90b3ba408af62ad3dcb0dc586 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sat, 5 Aug 2023 10:59:09 +0200 Subject: [PATCH 39/66] s390/boot: fix multi-line comments style Make multi-line comment style consistent across the source. Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/boot/startup.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index a81f92563037..b9681cb22753 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -195,8 +195,9 @@ static unsigned long setup_kernel_memory_layout(void) asce_limit = _REGION2_SIZE; rte_size = _REGION3_SIZE; } + /* - * forcing modules and vmalloc area under the ultravisor + * Forcing modules and vmalloc area under the ultravisor * secure storage limit, so that any vmalloc allocation * we do could be used to back secure guest storage. */ @@ -288,8 +289,9 @@ void startup_kernel(void) setup_lpp(); safe_addr = mem_safe_offset(); + /* - * reserve decompressor memory together with decompression heap, buffer and + * Reserve decompressor memory together with decompression heap, buffer and * memory which might be occupied by uncompressed kernel at default 1Mb * position (if KASLR is off or failed). */ From 2d1494fb31405df0dfb6006fdb2b24e7880258cd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 12 Aug 2023 17:12:54 +0200 Subject: [PATCH 40/66] s390/mm: make virt_to_pfn() a static inline Making virt_to_pfn() a static inline taking a strongly typed (const void *) makes the contract of a passing a pointer of that type to the function explicit and exposes any misuse of the macro virt_to_pfn() acting polymorphic and accepting many types such as (void *), (unitptr_t) or (unsigned long) as arguments without warnings. For symmetry do the same with pfn_to_virt() reflecting the current layout in asm-generic/page.h. Doing this reveals a number of offenders in the arch code and the S390-specific drivers, so just bite the bullet and fix up all of those as well. Signed-off-by: Linus Walleij Reviewed-by: Alexander Gordeev Link: https://lore.kernel.org/r/20230812-virt-to-phys-s390-v2-1-6c40f31fe36f@linaro.org Signed-off-by: Heiko Carstens --- arch/s390/include/asm/kfence.h | 2 +- arch/s390/include/asm/page.h | 12 ++++++++++-- arch/s390/mm/cmm.c | 2 +- arch/s390/mm/vmem.c | 2 +- drivers/s390/block/scm_blk.c | 2 +- drivers/s390/char/vmcp.c | 2 +- 6 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h index d55ba878378b..e47fd8cbe701 100644 --- a/arch/s390/include/asm/kfence.h +++ b/arch/s390/include/asm/kfence.h @@ -35,7 +35,7 @@ static __always_inline void kfence_split_mapping(void) static inline bool kfence_protect_page(unsigned long addr, bool protect) { - __kernel_map_pages(virt_to_page(addr), 1, !protect); + __kernel_map_pages(virt_to_page((void *)addr), 1, !protect); return true; } diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index a9c138fcd2ad..cfec0743314e 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -191,8 +191,16 @@ int arch_make_page_accessible(struct page *page); #define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys)) #define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) -#define pfn_to_virt(pfn) __va(pfn_to_phys(pfn)) -#define virt_to_pfn(kaddr) (phys_to_pfn(__pa(kaddr))) +static inline void *pfn_to_virt(unsigned long pfn) +{ + return __va(pfn_to_phys(pfn)); +} + +static inline unsigned long virt_to_pfn(const void *kaddr) +{ + return phys_to_pfn(__pa(kaddr)); +} + #define pfn_to_kaddr(pfn) pfn_to_virt(pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 5300c6867d5e..f47515313226 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -90,7 +90,7 @@ static long cmm_alloc_pages(long nr, long *counter, } else free_page((unsigned long) npa); } - diag10_range(virt_to_pfn(addr), 1); + diag10_range(virt_to_pfn((void *)addr), 1); pa->pages[pa->index++] = addr; (*counter)++; spin_unlock(&cmm_lock); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index be69cb2d47eb..3391efb59641 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -36,7 +36,7 @@ static void vmem_free_pages(unsigned long addr, int order) { /* We don't expect boot memory to be removed ever. */ if (!slab_is_available() || - WARN_ON_ONCE(PageReserved(virt_to_page(addr)))) + WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr)))) return; free_pages(addr, order); } diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 0c1df1d5f1ac..3a9cc8a4a230 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -134,7 +134,7 @@ static void scm_request_done(struct scm_request *scmrq) if ((msb->flags & MSB_FLAG_IDA) && aidaw && IS_ALIGNED(aidaw, PAGE_SIZE)) - mempool_free(virt_to_page(aidaw), aidaw_pool); + mempool_free(virt_to_page((void *)aidaw), aidaw_pool); } spin_lock_irqsave(&list_lock, flags); diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index 4cebfaaa22b4..eb0520a9d4af 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -89,7 +89,7 @@ static void vmcp_response_free(struct vmcp_session *session) order = get_order(session->bufsize); nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT; if (session->cma_alloc) { - page = virt_to_page((unsigned long)session->response); + page = virt_to_page(session->response); cma_release(vmcp_cma, page, nr_pages); session->cma_alloc = 0; } else { From c8f40a0bccefd613748d080147469a4652d6e74c Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 10 Aug 2023 10:22:36 +0200 Subject: [PATCH 41/66] s390/dcssblk: fix kernel crash with list_add corruption Commit fb08a1908cb1 ("dax: simplify the dax_device <-> gendisk association") introduced new logic for gendisk association, requiring drivers to explicitly call dax_add_host() and dax_remove_host(). For dcssblk driver, some dax_remove_host() calls were missing, e.g. in device remove path. The commit also broke error handling for out_dax case in device add path, resulting in an extra put_device() w/o the previous get_device() in that case. This lead to stale xarray entries after device add / remove cycles. In the case when a previously used struct gendisk pointer (xarray index) would be used again, because blk_alloc_disk() happened to return such a pointer, the xa_insert() in dax_add_host() would fail and go to out_dax, doing the extra put_device() in the error path. In combination with an already flawed error handling in dcssblk (device_register() cleanup), which needs to be addressed in a separate patch, this resulted in a missing device_del() / klist_del(), and eventually in the kernel crash with list_add corruption on a subsequent device_add() / klist_add(). Fix this by adding the missing dax_remove_host() calls, and also move the put_device() in the error path to restore the previous logic. Fixes: fb08a1908cb1 ("dax: simplify the dax_device <-> gendisk association") Cc: # 5.17+ Acked-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- drivers/s390/block/dcssblk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 6eafd0a34483..06bcb6c78909 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -412,6 +412,7 @@ removeseg: } list_del(&dev_info->lh); + dax_remove_host(dev_info->gd); kill_dax(dev_info->dax_dev); put_dax(dev_info->dax_dev); del_gendisk(dev_info->gd); @@ -707,9 +708,9 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char goto out; out_dax_host: + put_device(&dev_info->dev); dax_remove_host(dev_info->gd); out_dax: - put_device(&dev_info->dev); kill_dax(dev_info->dax_dev); put_dax(dev_info->dax_dev); put_dev: @@ -789,6 +790,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch } list_del(&dev_info->lh); + dax_remove_host(dev_info->gd); kill_dax(dev_info->dax_dev); put_dax(dev_info->dax_dev); del_gendisk(dev_info->gd); From ea5717cb13468323a7c3dd394748301802991f39 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 15 Aug 2023 09:26:06 +0200 Subject: [PATCH 42/66] s390/ipl: add missing secure/has_secure file to ipl type 'unknown' OS installers are relying on /sys/firmware/ipl/has_secure to be present on machines supporting secure boot. This file is present for all IPL types, but not the unknown type, which prevents a secure installation when an LPAR is booted in HMC via FTP(s), because this is an unknown IPL type in linux. While at it, also add the secure file. Fixes: c9896acc7851 ("s390/ipl: Provide has_secure sysfs attribute") Cc: stable@vger.kernel.org Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/ipl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 85a00d97a314..dfcb2b563e2b 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -640,6 +640,8 @@ static struct attribute_group ipl_ccw_attr_group_lpar = { static struct attribute *ipl_unknown_attrs[] = { &sys_ipl_type_attr.attr, + &sys_ipl_secure_attr.attr, + &sys_ipl_has_secure_attr.attr, NULL, }; From 7645dcddc2666f57e37ed3dda50b8b273b62c88a Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 15 Aug 2023 10:27:08 +0200 Subject: [PATCH 43/66] s390/ipl: add common ipl parameter attribute group All ipl types have 'secure','has_secure' and type parameters. Move these to a common ipl parameter group so that they don't need to be present in each ipl parameter group. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/ipl.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index dfcb2b563e2b..7f3a84e414a4 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -557,15 +557,12 @@ static struct kobj_attribute sys_ipl_ccw_loadparm_attr = __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL); static struct attribute *ipl_fcp_attrs[] = { - &sys_ipl_type_attr.attr, &sys_ipl_device_attr.attr, &sys_ipl_fcp_wwpn_attr.attr, &sys_ipl_fcp_lun_attr.attr, &sys_ipl_fcp_bootprog_attr.attr, &sys_ipl_fcp_br_lba_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, - &sys_ipl_secure_attr.attr, - &sys_ipl_has_secure_attr.attr, NULL, }; @@ -575,14 +572,11 @@ static struct attribute_group ipl_fcp_attr_group = { }; static struct attribute *ipl_nvme_attrs[] = { - &sys_ipl_type_attr.attr, &sys_ipl_nvme_fid_attr.attr, &sys_ipl_nvme_nsid_attr.attr, &sys_ipl_nvme_bootprog_attr.attr, &sys_ipl_nvme_br_lba_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, - &sys_ipl_secure_attr.attr, - &sys_ipl_has_secure_attr.attr, NULL, }; @@ -592,13 +586,10 @@ static struct attribute_group ipl_nvme_attr_group = { }; static struct attribute *ipl_eckd_attrs[] = { - &sys_ipl_type_attr.attr, &sys_ipl_eckd_bootprog_attr.attr, &sys_ipl_eckd_br_chr_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, &sys_ipl_device_attr.attr, - &sys_ipl_secure_attr.attr, - &sys_ipl_has_secure_attr.attr, NULL, }; @@ -610,21 +601,15 @@ static struct attribute_group ipl_eckd_attr_group = { /* CCW ipl device attributes */ static struct attribute *ipl_ccw_attrs_vm[] = { - &sys_ipl_type_attr.attr, &sys_ipl_device_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, &sys_ipl_vm_parm_attr.attr, - &sys_ipl_secure_attr.attr, - &sys_ipl_has_secure_attr.attr, NULL, }; static struct attribute *ipl_ccw_attrs_lpar[] = { - &sys_ipl_type_attr.attr, &sys_ipl_device_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, - &sys_ipl_secure_attr.attr, - &sys_ipl_has_secure_attr.attr, NULL, }; @@ -636,17 +621,15 @@ static struct attribute_group ipl_ccw_attr_group_lpar = { .attrs = ipl_ccw_attrs_lpar }; -/* UNKNOWN ipl device attributes */ - -static struct attribute *ipl_unknown_attrs[] = { +static struct attribute *ipl_common_attrs[] = { &sys_ipl_type_attr.attr, &sys_ipl_secure_attr.attr, &sys_ipl_has_secure_attr.attr, NULL, }; -static struct attribute_group ipl_unknown_attr_group = { - .attrs = ipl_unknown_attrs, +static struct attribute_group ipl_common_attr_group = { + .attrs = ipl_common_attrs, }; static struct kset *ipl_kset; @@ -670,6 +653,9 @@ static int __init ipl_init(void) rc = -ENOMEM; goto out; } + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_common_attr_group); + if (rc) + goto out; switch (ipl_info.type) { case IPL_TYPE_CCW: if (MACHINE_IS_VM) @@ -691,8 +677,6 @@ static int __init ipl_init(void) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group); break; default: - rc = sysfs_create_group(&ipl_kset->kobj, - &ipl_unknown_attr_group); break; } out: From 37a08f010b7c423b5e4c9ed3b187d21166553007 Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Wed, 26 Jul 2023 11:33:45 +0200 Subject: [PATCH 44/66] s390/pkey: fix/harmonize internal keyblob headers Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")' introduced PKEY_TYPE_EP11_AES as a supplement to PKEY_TYPE_EP11. All pkeys have an internal header/payload structure, which is opaque to the userspace. The header structures for PKEY_TYPE_EP11 and PKEY_TYPE_EP11_AES are nearly identical and there is no reason, why different structures are used. In preparation to fix the keyversion handling in the broken PKEY IOCTLs, the same header structure is used for PKEY_TYPE_EP11 and PKEY_TYPE_EP11_AES. This reduces the number of different code paths and increases the readability. Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys") Signed-off-by: Holger Dengler Reviewed-by: Ingo Franzki Signed-off-by: Heiko Carstens --- drivers/s390/crypto/pkey_api.c | 2 +- drivers/s390/crypto/zcrypt_ep11misc.c | 4 ++-- drivers/s390/crypto/zcrypt_ep11misc.h | 9 +-------- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index e58bfd225323..ba8581e0809c 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -895,7 +895,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, if (ktype) *ktype = PKEY_TYPE_EP11; if (ksize) - *ksize = kb->head.keybitlen; + *ksize = kb->head.bitlen; rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain, ZCRYPT_CEX7, EP11_API_V, kb->wkvp); diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index 958f5ee47f1b..d7ecd6ce5b7a 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -787,7 +787,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, kb->head.type = TOKTYPE_NON_CCA; kb->head.len = rep_pl->data_len; kb->head.version = TOKVER_EP11_AES; - kb->head.keybitlen = keybitsize; + kb->head.bitlen = keybitsize; out: kfree(req); @@ -1055,7 +1055,7 @@ static int ep11_unwrapkey(u16 card, u16 domain, kb->head.type = TOKTYPE_NON_CCA; kb->head.len = rep_pl->data_len; kb->head.version = TOKVER_EP11_AES; - kb->head.keybitlen = keybitsize; + kb->head.bitlen = keybitsize; out: kfree(req); diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h index a3eddf51242d..67cc80d71ba3 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.h +++ b/drivers/s390/crypto/zcrypt_ep11misc.h @@ -29,14 +29,7 @@ struct ep11keyblob { union { u8 session[32]; /* only used for PKEY_TYPE_EP11: */ - struct { - u8 type; /* 0x00 (TOKTYPE_NON_CCA) */ - u8 res0; /* unused */ - u16 len; /* total length in bytes of this blob */ - u8 version; /* 0x03 (TOKVER_EP11_AES) */ - u8 res1; /* unused */ - u16 keybitlen; /* clear key bit len, 0 for unknown */ - } head; + struct ep11kblob_header head; }; u8 wkvp[16]; /* wrapping key verification pattern */ u64 attr; /* boolean key attributes */ From fb249ce7f7bfd8621a38e4ad401ba74b680786d4 Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Tue, 25 Jul 2023 09:49:55 +0200 Subject: [PATCH 45/66] s390/pkey: fix PKEY_TYPE_EP11_AES handling in PKEY_GENSECK2 IOCTL Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")' introduced PKEY_TYPE_EP11_AES for the PKEY_GENSECK2 IOCTL, to enable userspace to generate securekey blobs of this type. Unfortunately, all PKEY_GENSECK2 IOCTL requests for PKEY_TYPE_EP11_AES return with an error (-EINVAL). Fix the handling for PKEY_TYPE_EP11_AES in PKEY_GENSECK2 IOCTL, so that userspace can generate securekey blobs of this type. The start of the header and the keyblob, as well as the length need special handling, depending on the internal keyversion. Add a helper function that splits an uninitialized buffer into start and size of the header as well as start and size of the payload, depending on the requested keyversion. Do the header-related calculations and the raw genkey request handling in separate functions. Use the raw genkey request function for internal purposes. Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys") Signed-off-by: Holger Dengler Reviewed-by: Ingo Franzki Signed-off-by: Heiko Carstens --- drivers/s390/crypto/pkey_api.c | 18 +++-- drivers/s390/crypto/zcrypt_ep11misc.c | 103 ++++++++++++++++++++++---- drivers/s390/crypto/zcrypt_ep11misc.h | 2 +- 3 files changed, 102 insertions(+), 21 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index ba8581e0809c..2661d6a9ea13 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -713,6 +713,11 @@ static int pkey_genseckey2(const struct pkey_apqn *apqns, size_t nr_apqns, if (*keybufsize < MINEP11AESKEYBLOBSIZE) return -EINVAL; break; + case PKEY_TYPE_EP11_AES: + if (*keybufsize < (sizeof(struct ep11kblob_header) + + MINEP11AESKEYBLOBSIZE)) + return -EINVAL; + break; default: return -EINVAL; } @@ -729,9 +734,10 @@ static int pkey_genseckey2(const struct pkey_apqn *apqns, size_t nr_apqns, for (i = 0, rc = -ENODEV; i < nr_apqns; i++) { card = apqns[i].card; dom = apqns[i].domain; - if (ktype == PKEY_TYPE_EP11) { + if (ktype == PKEY_TYPE_EP11 || + ktype == PKEY_TYPE_EP11_AES) { rc = ep11_genaeskey(card, dom, ksize, kflags, - keybuf, keybufsize); + keybuf, keybufsize, ktype); } else if (ktype == PKEY_TYPE_CCA_DATA) { rc = cca_genseckey(card, dom, ksize, keybuf); *keybufsize = (rc ? 0 : SECKEYBLOBSIZE); @@ -1466,7 +1472,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd, apqns = _copy_apqns_from_user(kgs.apqns, kgs.apqn_entries); if (IS_ERR(apqns)) return PTR_ERR(apqns); - kkey = kmalloc(klen, GFP_KERNEL); + kkey = kzalloc(klen, GFP_KERNEL); if (!kkey) { kfree(apqns); return -ENOMEM; @@ -2130,7 +2136,8 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits, for (i = 0, rc = -ENODEV; i < nr_apqns; i++) { card = apqns[i] >> 16; dom = apqns[i] & 0xFFFF; - rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize); + rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize, + PKEY_TYPE_EP11); if (rc == 0) break; } @@ -2140,7 +2147,8 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits, if (is_xts) { keysize = MAXEP11AESKEYBLOBSIZE; buf += MAXEP11AESKEYBLOBSIZE; - rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize); + rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize, + PKEY_TYPE_EP11); if (rc == 0) return 2 * MAXEP11AESKEYBLOBSIZE; } diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index d7ecd6ce5b7a..51f6753e01c5 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -113,6 +113,50 @@ static void __exit card_cache_free(void) spin_unlock_bh(&card_list_lock); } +static int ep11_kb_split(const u8 *kb, size_t kblen, u32 kbver, + struct ep11kblob_header **kbhdr, size_t *kbhdrsize, + u8 **kbpl, size_t *kbplsize) +{ + struct ep11kblob_header *hdr = NULL; + size_t hdrsize, plsize = 0; + int rc = -EINVAL; + u8 *pl = NULL; + + if (kblen < sizeof(struct ep11kblob_header)) + goto out; + hdr = (struct ep11kblob_header *)kb; + + switch (kbver) { + case TOKVER_EP11_AES: + /* header overlays the payload */ + hdrsize = 0; + break; + case TOKVER_EP11_ECC_WITH_HEADER: + case TOKVER_EP11_AES_WITH_HEADER: + /* payload starts after the header */ + hdrsize = sizeof(struct ep11kblob_header); + break; + default: + goto out; + } + + plsize = kblen - hdrsize; + pl = (u8 *)kb + hdrsize; + + if (kbhdr) + *kbhdr = hdr; + if (kbhdrsize) + *kbhdrsize = hdrsize; + if (kbpl) + *kbpl = pl; + if (kbplsize) + *kbplsize = plsize; + + rc = 0; +out: + return rc; +} + /* * Simple check if the key blob is a valid EP11 AES key blob with header. */ @@ -664,8 +708,9 @@ EXPORT_SYMBOL(ep11_get_domain_info); */ #define KEY_ATTR_DEFAULTS 0x00200c00 -int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, - u8 *keybuf, size_t *keybufsize) +static int _ep11_genaeskey(u16 card, u16 domain, + u32 keybitsize, u32 keygenflags, + u8 *keybuf, size_t *keybufsize) { struct keygen_req_pl { struct pl_head head; @@ -701,7 +746,6 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, struct ep11_cprb *req = NULL, *rep = NULL; struct ep11_target_dev target; struct ep11_urb *urb = NULL; - struct ep11keyblob *kb; int api, rc = -ENOMEM; switch (keybitsize) { @@ -780,14 +824,9 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, goto out; } - /* copy key blob and set header values */ + /* copy key blob */ memcpy(keybuf, rep_pl->data, rep_pl->data_len); *keybufsize = rep_pl->data_len; - kb = (struct ep11keyblob *)keybuf; - kb->head.type = TOKTYPE_NON_CCA; - kb->head.len = rep_pl->data_len; - kb->head.version = TOKVER_EP11_AES; - kb->head.bitlen = keybitsize; out: kfree(req); @@ -795,6 +834,43 @@ out: kfree(urb); return rc; } + +int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, + u8 *keybuf, size_t *keybufsize, u32 keybufver) +{ + struct ep11kblob_header *hdr; + size_t hdr_size, pl_size; + u8 *pl; + int rc; + + switch (keybufver) { + case TOKVER_EP11_AES: + case TOKVER_EP11_AES_WITH_HEADER: + break; + default: + return -EINVAL; + } + + rc = ep11_kb_split(keybuf, *keybufsize, keybufver, + &hdr, &hdr_size, &pl, &pl_size); + if (rc) + return rc; + + rc = _ep11_genaeskey(card, domain, keybitsize, keygenflags, + pl, &pl_size); + if (rc) + return rc; + + *keybufsize = hdr_size + pl_size; + + /* update header information */ + hdr->type = TOKTYPE_NON_CCA; + hdr->len = *keybufsize; + hdr->version = keybufver; + hdr->bitlen = keybitsize; + + return 0; +} EXPORT_SYMBOL(ep11_genaeskey); static int ep11_cryptsingle(u16 card, u16 domain, @@ -1201,7 +1277,6 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, const u8 *clrkey, u8 *keybuf, size_t *keybufsize) { int rc; - struct ep11keyblob *kb; u8 encbuf[64], *kek = NULL; size_t clrkeylen, keklen, encbuflen = sizeof(encbuf); @@ -1223,17 +1298,15 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, } /* Step 1: generate AES 256 bit random kek key */ - rc = ep11_genaeskey(card, domain, 256, - 0x00006c00, /* EN/DECRYPT, WRAP/UNWRAP */ - kek, &keklen); + rc = _ep11_genaeskey(card, domain, 256, + 0x00006c00, /* EN/DECRYPT, WRAP/UNWRAP */ + kek, &keklen); if (rc) { DEBUG_ERR( "%s generate kek key failed, rc=%d\n", __func__, rc); goto out; } - kb = (struct ep11keyblob *)kek; - memset(&kb->head, 0, sizeof(kb->head)); /* Step 2: encrypt clear key value with the kek key */ rc = ep11_cryptsingle(card, domain, 0, 0, def_iv, kek, keklen, diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h index 67cc80d71ba3..2eecbd7be6e5 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.h +++ b/drivers/s390/crypto/zcrypt_ep11misc.h @@ -107,7 +107,7 @@ int ep11_get_domain_info(u16 card, u16 domain, struct ep11_domain_info *info); * Generate (random) EP11 AES secure key. */ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, - u8 *keybuf, size_t *keybufsize); + u8 *keybuf, size_t *keybufsize, u32 keybufver); /* * Generate EP11 AES secure key with given clear key value. From da2863f15945de100b95c72d5656541d30956c5d Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Tue, 25 Jul 2023 11:24:47 +0200 Subject: [PATCH 46/66] s390/pkey: fix PKEY_TYPE_EP11_AES handling in PKEY_CLR2SECK2 IOCTL Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")' introduced PKEY_TYPE_EP11_AES for the PKEY_CLR2SECK2 IOCTL to convert an AES clearkey into a securekey of this type. Unfortunately, all PKEY_CLR2SECK2 IOCTL requests with type PKEY_TYPE_EP11_AES return with an error (-EINVAL). Fix the handling for PKEY_TYPE_EP11_AES in PKEY_CLR2SECK2 IOCTL, so that userspace can convert clearkey blobs into PKEY_TYPE_EP11_AES securekey blobs. Cc: stable@vger.kernel.org # v5.10+ Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys") Signed-off-by: Holger Dengler Reviewed-by: Ingo Franzki Signed-off-by: Heiko Carstens --- drivers/s390/crypto/pkey_api.c | 16 +++++-- drivers/s390/crypto/zcrypt_ep11misc.c | 61 ++++++++++++++++++++------- drivers/s390/crypto/zcrypt_ep11misc.h | 3 +- 3 files changed, 60 insertions(+), 20 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 2661d6a9ea13..7543757c82e2 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -272,7 +272,8 @@ static int pkey_clr2ep11key(const u8 *clrkey, size_t clrkeylen, card = apqns[i] >> 16; dom = apqns[i] & 0xFFFF; rc = ep11_clr2keyblob(card, dom, clrkeylen * 8, - 0, clrkey, keybuf, keybuflen); + 0, clrkey, keybuf, keybuflen, + PKEY_TYPE_EP11); if (rc == 0) break; } @@ -775,6 +776,11 @@ static int pkey_clr2seckey2(const struct pkey_apqn *apqns, size_t nr_apqns, if (*keybufsize < MINEP11AESKEYBLOBSIZE) return -EINVAL; break; + case PKEY_TYPE_EP11_AES: + if (*keybufsize < (sizeof(struct ep11kblob_header) + + MINEP11AESKEYBLOBSIZE)) + return -EINVAL; + break; default: return -EINVAL; } @@ -793,9 +799,11 @@ static int pkey_clr2seckey2(const struct pkey_apqn *apqns, size_t nr_apqns, for (i = 0, rc = -ENODEV; i < nr_apqns; i++) { card = apqns[i].card; dom = apqns[i].domain; - if (ktype == PKEY_TYPE_EP11) { + if (ktype == PKEY_TYPE_EP11 || + ktype == PKEY_TYPE_EP11_AES) { rc = ep11_clr2keyblob(card, dom, ksize, kflags, - clrkey, keybuf, keybufsize); + clrkey, keybuf, keybufsize, + ktype); } else if (ktype == PKEY_TYPE_CCA_DATA) { rc = cca_clr2seckey(card, dom, ksize, clrkey, keybuf); @@ -1514,7 +1522,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd, apqns = _copy_apqns_from_user(kcs.apqns, kcs.apqn_entries); if (IS_ERR(apqns)) return PTR_ERR(apqns); - kkey = kmalloc(klen, GFP_KERNEL); + kkey = kzalloc(klen, GFP_KERNEL); if (!kkey) { kfree(apqns); return -ENOMEM; diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index 51f6753e01c5..355d30bc0aac 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -1000,12 +1000,12 @@ out: return rc; } -static int ep11_unwrapkey(u16 card, u16 domain, - const u8 *kek, size_t keksize, - const u8 *enckey, size_t enckeysize, - u32 mech, const u8 *iv, - u32 keybitsize, u32 keygenflags, - u8 *keybuf, size_t *keybufsize) +static int _ep11_unwrapkey(u16 card, u16 domain, + const u8 *kek, size_t keksize, + const u8 *enckey, size_t enckeysize, + u32 mech, const u8 *iv, + u32 keybitsize, u32 keygenflags, + u8 *keybuf, size_t *keybufsize) { struct uw_req_pl { struct pl_head head; @@ -1042,7 +1042,6 @@ static int ep11_unwrapkey(u16 card, u16 domain, struct ep11_cprb *req = NULL, *rep = NULL; struct ep11_target_dev target; struct ep11_urb *urb = NULL; - struct ep11keyblob *kb; size_t req_pl_size; int api, rc = -ENOMEM; u8 *p; @@ -1124,14 +1123,9 @@ static int ep11_unwrapkey(u16 card, u16 domain, goto out; } - /* copy key blob and set header values */ + /* copy key blob */ memcpy(keybuf, rep_pl->data, rep_pl->data_len); *keybufsize = rep_pl->data_len; - kb = (struct ep11keyblob *)keybuf; - kb->head.type = TOKTYPE_NON_CCA; - kb->head.len = rep_pl->data_len; - kb->head.version = TOKVER_EP11_AES; - kb->head.bitlen = keybitsize; out: kfree(req); @@ -1140,6 +1134,42 @@ out: return rc; } +static int ep11_unwrapkey(u16 card, u16 domain, + const u8 *kek, size_t keksize, + const u8 *enckey, size_t enckeysize, + u32 mech, const u8 *iv, + u32 keybitsize, u32 keygenflags, + u8 *keybuf, size_t *keybufsize, + u8 keybufver) +{ + struct ep11kblob_header *hdr; + size_t hdr_size, pl_size; + u8 *pl; + int rc; + + rc = ep11_kb_split(keybuf, *keybufsize, keybufver, + &hdr, &hdr_size, &pl, &pl_size); + if (rc) + return rc; + + rc = _ep11_unwrapkey(card, domain, kek, keksize, enckey, enckeysize, + mech, iv, keybitsize, keygenflags, + pl, &pl_size); + if (rc) + return rc; + + *keybufsize = hdr_size + pl_size; + + /* update header information */ + hdr = (struct ep11kblob_header *)keybuf; + hdr->type = TOKTYPE_NON_CCA; + hdr->len = *keybufsize; + hdr->version = keybufver; + hdr->bitlen = keybitsize; + + return 0; +} + static int ep11_wrapkey(u16 card, u16 domain, const u8 *key, size_t keysize, u32 mech, const u8 *iv, @@ -1274,7 +1304,8 @@ out: } int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, - const u8 *clrkey, u8 *keybuf, size_t *keybufsize) + const u8 *clrkey, u8 *keybuf, size_t *keybufsize, + u32 keytype) { int rc; u8 encbuf[64], *kek = NULL; @@ -1321,7 +1352,7 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, /* Step 3: import the encrypted key value as a new key */ rc = ep11_unwrapkey(card, domain, kek, keklen, encbuf, encbuflen, 0, def_iv, - keybitsize, 0, keybuf, keybufsize); + keybitsize, 0, keybuf, keybufsize, keytype); if (rc) { DEBUG_ERR( "%s importing key value as new key failed,, rc=%d\n", diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h index 2eecbd7be6e5..b611cf64231d 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.h +++ b/drivers/s390/crypto/zcrypt_ep11misc.h @@ -113,7 +113,8 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, * Generate EP11 AES secure key with given clear key value. */ int ep11_clr2keyblob(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, - const u8 *clrkey, u8 *keybuf, size_t *keybufsize); + const u8 *clrkey, u8 *keybuf, size_t *keybufsize, + u32 keytype); /* * Build a list of ep11 apqns meeting the following constrains: From d1fdfb0b2f339cf882c0b5431084a1950b8b73b9 Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Wed, 26 Jul 2023 16:22:19 +0200 Subject: [PATCH 47/66] s390/pkey: fix PKEY_TYPE_EP11_AES handling in PKEY_KBLOB2PROTK[23] Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")' introduced a new PKEY_TYPE_EP11_AES type for the PKEY_KBLOB2PROTK2 and a new IOCTL, PKEY_KBLOB2PROTK3, which both allows userspace to convert opaque securekey blobs of this type into protectedkey blobs. Unfortunately, all PKEY_KBLOB2PROTK2 and PKEY_KBLOB2PROTK3 IOCTL requests with this keyblobs of this type return with an error (-EINVAL). Fix PKEY_TYPE_EP11_AES handling in PKEY_KBLOB2PROTK2 and PKEY_KBLOB2PROTK3 IOCTLs, so that userspace can convert PKEY_TYPE_EP11_AES keyblobs into protectedkey blobs. Add a helper function to decode the start and size of the internal header as well as start and size of the keyblob payload of an existing keyblob. Also validate the length of header and keyblob, as well as the keyblob magic. Introduce another helper function, which handles a raw key wrapping request and do the keyblob decoding in the calling function. Remove all other header-related calculations. Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys") Signed-off-by: Holger Dengler Reviewed-by: Ingo Franzki Signed-off-by: Heiko Carstens --- drivers/s390/crypto/pkey_api.c | 33 +++---- drivers/s390/crypto/zcrypt_ep11misc.c | 123 ++++++++++++++++---------- drivers/s390/crypto/zcrypt_ep11misc.h | 6 ++ 3 files changed, 100 insertions(+), 62 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 7543757c82e2..75d7f0d5f14e 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -288,10 +288,9 @@ out: /* * Find card and transform EP11 secure key into protected key. */ -static int pkey_ep11key2pkey(const u8 *key, u8 *protkey, - u32 *protkeylen, u32 *protkeytype) +static int pkey_ep11key2pkey(const u8 *key, size_t keylen, + u8 *protkey, u32 *protkeylen, u32 *protkeytype) { - struct ep11keyblob *kb = (struct ep11keyblob *)key; u32 nr_apqns, *apqns = NULL; u16 card, dom; int i, rc; @@ -300,7 +299,8 @@ static int pkey_ep11key2pkey(const u8 *key, u8 *protkey, /* build a list of apqns suitable for this key */ rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF, - ZCRYPT_CEX7, EP11_API_V, kb->wkvp); + ZCRYPT_CEX7, EP11_API_V, + ep11_kb_wkvp(key, keylen)); if (rc) goto out; @@ -308,7 +308,7 @@ static int pkey_ep11key2pkey(const u8 *key, u8 *protkey, for (rc = -ENODEV, i = 0; i < nr_apqns; i++) { card = apqns[i] >> 16; dom = apqns[i] & 0xFFFF; - rc = ep11_kblob2protkey(card, dom, key, kb->head.len, + rc = ep11_kblob2protkey(card, dom, key, keylen, protkey, protkeylen, protkeytype); if (rc == 0) break; @@ -496,7 +496,7 @@ try_via_ep11: tmpbuf, &tmpbuflen); if (rc) goto failure; - rc = pkey_ep11key2pkey(tmpbuf, + rc = pkey_ep11key2pkey(tmpbuf, tmpbuflen, protkey, protkeylen, protkeytype); if (!rc) goto out; @@ -612,7 +612,7 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen, rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1); if (rc) goto out; - rc = pkey_ep11key2pkey(key, + rc = pkey_ep11key2pkey(key, keylen, protkey, protkeylen, protkeytype); break; } @@ -621,7 +621,7 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen, rc = ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1); if (rc) goto out; - rc = pkey_ep11key2pkey(key + sizeof(struct ep11kblob_header), + rc = pkey_ep11key2pkey(key, keylen, protkey, protkeylen, protkeytype); break; default: @@ -963,10 +963,12 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns, } } else if (hdr->type == TOKTYPE_NON_CCA) { if (hdr->version == TOKVER_EP11_AES) { - if (keylen < sizeof(struct ep11keyblob)) - return -EINVAL; if (ep11_check_aes_key(debug_info, 3, key, keylen, 1)) return -EINVAL; + } else if (hdr->version == TOKVER_EP11_AES_WITH_HEADER) { + if (ep11_check_aes_key_with_hdr(debug_info, 3, + key, keylen, 1)) + return -EINVAL; } else { return pkey_nonccatok2pkey(key, keylen, protkey, protkeylen, @@ -994,10 +996,7 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns, protkey, protkeylen, protkeytype); } else { - /* EP11 AES secure key blob */ - struct ep11keyblob *kb = (struct ep11keyblob *)key; - - rc = ep11_kblob2protkey(card, dom, key, kb->head.len, + rc = ep11_kblob2protkey(card, dom, key, keylen, protkey, protkeylen, protkeytype); } @@ -1257,12 +1256,14 @@ static int pkey_keyblob2pkey3(const struct pkey_apqn *apqns, size_t nr_apqns, hdr->version == TOKVER_EP11_ECC_WITH_HEADER) && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) rc = ep11_kblob2protkey(card, dom, key, hdr->len, - protkey, protkeylen, protkeytype); + protkey, protkeylen, + protkeytype); else if (hdr->type == TOKTYPE_NON_CCA && hdr->version == TOKVER_EP11_AES && is_ep11_keyblob(key)) rc = ep11_kblob2protkey(card, dom, key, hdr->len, - protkey, protkeylen, protkeytype); + protkey, protkeylen, + protkeytype); else if (hdr->type == TOKTYPE_CCA_INTERNAL && hdr->version == TOKVER_CCA_AES) rc = cca_sec2protkey(card, dom, key, protkey, diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index 355d30bc0aac..669ad6f5d5b0 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -157,6 +157,65 @@ out: return rc; } +static int ep11_kb_decode(const u8 *kb, size_t kblen, + struct ep11kblob_header **kbhdr, size_t *kbhdrsize, + struct ep11keyblob **kbpl, size_t *kbplsize) +{ + struct ep11kblob_header *tmph, *hdr = NULL; + size_t hdrsize = 0, plsize = 0; + struct ep11keyblob *pl = NULL; + int rc = -EINVAL; + u8 *tmpp; + + if (kblen < sizeof(struct ep11kblob_header)) + goto out; + tmph = (struct ep11kblob_header *)kb; + + if (tmph->type != TOKTYPE_NON_CCA && + tmph->len > kblen) + goto out; + + if (ep11_kb_split(kb, kblen, tmph->version, + &hdr, &hdrsize, &tmpp, &plsize)) + goto out; + + if (plsize < sizeof(struct ep11keyblob)) + goto out; + + if (!is_ep11_keyblob(tmpp)) + goto out; + + pl = (struct ep11keyblob *)tmpp; + plsize = hdr->len - hdrsize; + + if (kbhdr) + *kbhdr = hdr; + if (kbhdrsize) + *kbhdrsize = hdrsize; + if (kbpl) + *kbpl = pl; + if (kbplsize) + *kbplsize = plsize; + + rc = 0; +out: + return rc; +} + +/* + * For valid ep11 keyblobs, returns a reference to the wrappingkey verification + * pattern. Otherwise NULL. + */ +const u8 *ep11_kb_wkvp(const u8 *keyblob, size_t keybloblen) +{ + struct ep11keyblob *kb; + + if (ep11_kb_decode(keyblob, keybloblen, NULL, NULL, &kb, NULL)) + return NULL; + return kb->wkvp; +} +EXPORT_SYMBOL(ep11_kb_wkvp); + /* * Simple check if the key blob is a valid EP11 AES key blob with header. */ @@ -1170,10 +1229,10 @@ static int ep11_unwrapkey(u16 card, u16 domain, return 0; } -static int ep11_wrapkey(u16 card, u16 domain, - const u8 *key, size_t keysize, - u32 mech, const u8 *iv, - u8 *databuf, size_t *datasize) +static int _ep11_wrapkey(u16 card, u16 domain, + const u8 *key, size_t keysize, + u32 mech, const u8 *iv, + u8 *databuf, size_t *datasize) { struct wk_req_pl { struct pl_head head; @@ -1203,20 +1262,10 @@ static int ep11_wrapkey(u16 card, u16 domain, struct ep11_cprb *req = NULL, *rep = NULL; struct ep11_target_dev target; struct ep11_urb *urb = NULL; - struct ep11keyblob *kb; size_t req_pl_size; int api, rc = -ENOMEM; - bool has_header = false; u8 *p; - /* maybe the session field holds a header with key info */ - kb = (struct ep11keyblob *)key; - if (kb->head.type == TOKTYPE_NON_CCA && - kb->head.version == TOKVER_EP11_AES) { - has_header = true; - keysize = min_t(size_t, kb->head.len, keysize); - } - /* request cprb and payload */ req_pl_size = sizeof(struct wk_req_pl) + (iv ? 16 : 0) + ASN1TAGLEN(keysize) + 4; @@ -1241,11 +1290,6 @@ static int ep11_wrapkey(u16 card, u16 domain, } /* key blob */ p += asn1tag_write(p, 0x04, key, keysize); - /* maybe the key argument needs the head data cleaned out */ - if (has_header) { - kb = (struct ep11keyblob *)(p - keysize); - memset(&kb->head, 0, sizeof(kb->head)); - } /* empty kek tag */ *p++ = 0x04; *p++ = 0; @@ -1366,11 +1410,12 @@ out: } EXPORT_SYMBOL(ep11_clr2keyblob); -int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen, +int ep11_kblob2protkey(u16 card, u16 dom, + const u8 *keyblob, size_t keybloblen, u8 *protkey, u32 *protkeylen, u32 *protkeytype) { - int rc = -EIO; - u8 *wkbuf = NULL; + struct ep11kblob_header *hdr; + struct ep11keyblob *key; size_t wkbuflen, keylen; struct wk_info { u16 version; @@ -1381,31 +1426,17 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen, u8 res2[8]; u8 pkey[]; } __packed * wki; - const u8 *key; - struct ep11kblob_header *hdr; + u8 *wkbuf = NULL; + int rc = -EIO; - /* key with or without header ? */ - hdr = (struct ep11kblob_header *)keyblob; - if (hdr->type == TOKTYPE_NON_CCA && - (hdr->version == TOKVER_EP11_AES_WITH_HEADER || - hdr->version == TOKVER_EP11_ECC_WITH_HEADER) && - is_ep11_keyblob(keyblob + sizeof(struct ep11kblob_header))) { - /* EP11 AES or ECC key with header */ - key = keyblob + sizeof(struct ep11kblob_header); - keylen = hdr->len - sizeof(struct ep11kblob_header); - } else if (hdr->type == TOKTYPE_NON_CCA && - hdr->version == TOKVER_EP11_AES && - is_ep11_keyblob(keyblob)) { - /* EP11 AES key (old style) */ - key = keyblob; - keylen = hdr->len; - } else if (is_ep11_keyblob(keyblob)) { - /* raw EP11 key blob */ - key = keyblob; - keylen = keybloblen; - } else { + if (ep11_kb_decode((u8 *)keyblob, keybloblen, &hdr, NULL, &key, &keylen)) return -EINVAL; + + if (hdr->version == TOKVER_EP11_AES) { + /* wipe overlayed header */ + memset(hdr, 0, sizeof(*hdr)); } + /* !!! hdr is no longer a valid header !!! */ /* alloc temp working buffer */ wkbuflen = (keylen + AES_BLOCK_SIZE) & (~(AES_BLOCK_SIZE - 1)); @@ -1414,8 +1445,8 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen, return -ENOMEM; /* ep11 secure key -> protected key + info */ - rc = ep11_wrapkey(card, dom, key, keylen, - 0, def_iv, wkbuf, &wkbuflen); + rc = _ep11_wrapkey(card, dom, (u8 *)key, keylen, + 0, def_iv, wkbuf, &wkbuflen); if (rc) { DEBUG_ERR( "%s rewrapping ep11 key to pkey failed, rc=%d\n", diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h index b611cf64231d..a0de1cccebbe 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.h +++ b/drivers/s390/crypto/zcrypt_ep11misc.h @@ -48,6 +48,12 @@ static inline bool is_ep11_keyblob(const u8 *key) return (kb->version == EP11_STRUCT_MAGIC); } +/* + * For valid ep11 keyblobs, returns a reference to the wrappingkey verification + * pattern. Otherwise NULL. + */ +const u8 *ep11_kb_wkvp(const u8 *kblob, size_t kbloblen); + /* * Simple check if the key blob is a valid EP11 AES key blob with header. * If checkcpacfexport is enabled, the key is also checked for the From 745742dbca11a1b63684ec7032a81aaedcf51fb0 Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Tue, 25 Jul 2023 13:05:36 +0200 Subject: [PATCH 48/66] s390/pkey: fix PKEY_TYPE_EP11_AES handling in PKEY_VERIFYKEY2 IOCTL Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")' introduced a new PKEY_TYPE_EP11_AES type for the PKEY_VERIFYKEY2 IOCTL to verify keyblobs of this type. Unfortunately, all PKEY_VERIFYKEY2 IOCTL requests with keyblobs of this type return with an error (-EINVAL). Fix PKEY_TYPE_EP11_AES handling in PKEY_VERIFYKEY2 IOCTL, so that userspace can verify keyblobs of this type. Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys") Signed-off-by: Holger Dengler Reviewed-by: Ingo Franzki Signed-off-by: Heiko Carstens --- drivers/s390/crypto/pkey_api.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 75d7f0d5f14e..8d6f35ccc561 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -912,7 +912,8 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, *ksize = kb->head.bitlen; rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain, - ZCRYPT_CEX7, EP11_API_V, kb->wkvp); + ZCRYPT_CEX7, EP11_API_V, + ep11_kb_wkvp(key, keylen)); if (rc) goto out; @@ -922,6 +923,30 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, *cardnr = ((struct pkey_apqn *)_apqns)->card; *domain = ((struct pkey_apqn *)_apqns)->domain; + } else if (hdr->type == TOKTYPE_NON_CCA && + hdr->version == TOKVER_EP11_AES_WITH_HEADER) { + struct ep11kblob_header *kh = (struct ep11kblob_header *)key; + + rc = ep11_check_aes_key_with_hdr(debug_info, 3, + key, keylen, 1); + if (rc) + goto out; + if (ktype) + *ktype = PKEY_TYPE_EP11_AES; + if (ksize) + *ksize = kh->bitlen; + + rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain, + ZCRYPT_CEX7, EP11_API_V, + ep11_kb_wkvp(key, keylen)); + if (rc) + goto out; + + if (flags) + *flags = PKEY_FLAGS_MATCH_CUR_MKVP; + + *cardnr = ((struct pkey_apqn *)_apqns)->card; + *domain = ((struct pkey_apqn *)_apqns)->domain; } else { rc = -EINVAL; } From b9352e4b9b9eff949bcc6907b8569b3a1d992f1e Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Fri, 4 Aug 2023 16:02:58 +0200 Subject: [PATCH 49/66] s390/pkey: fix PKEY_TYPE_EP11_AES handling for sysfs attributes Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")' introduced a new PKEY_TYPE_EP11_AES securekey type as a supplement to the existing PKEY_TYPE_EP11 (which won't work in environments with session-bound keys). The pkey EP11 securekey attributes use PKEY_TYPE_EP11_AES (instead of PKEY_TYPE_EP11) keyblobs, to make the generated keyblobs usable also in environments, where session-bound keys are required. There should be no negative impacts to userspace because the internal structure of the keyblobs is opaque. The increased size of the generated keyblobs is reflected by the changed size of the attributes. Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys") Signed-off-by: Holger Dengler Reviewed-by: Ingo Franzki Signed-off-by: Heiko Carstens --- arch/s390/include/uapi/asm/pkey.h | 2 +- drivers/s390/crypto/pkey_api.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index 5faf0a1d2c16..5ad76471e73f 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -26,7 +26,7 @@ #define MAXCLRKEYSIZE 32 /* a clear key value may be up to 32 bytes */ #define MAXAESCIPHERKEYSIZE 136 /* our aes cipher keys have always 136 bytes */ #define MINEP11AESKEYBLOBSIZE 256 /* min EP11 AES key blob size */ -#define MAXEP11AESKEYBLOBSIZE 320 /* max EP11 AES key blob size */ +#define MAXEP11AESKEYBLOBSIZE 336 /* max EP11 AES key blob size */ /* Minimum size of a key blob */ #define MINKEYBLOBSIZE SECKEYBLOBSIZE diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 8d6f35ccc561..396a159afdf5 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -2142,7 +2142,7 @@ static struct attribute_group ccacipher_attr_group = { * (i.e. off != 0 or count < key blob size) -EINVAL is returned. * This function and the sysfs attributes using it provide EP11 key blobs * padded to the upper limit of MAXEP11AESKEYBLOBSIZE which is currently - * 320 bytes. + * 336 bytes. */ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits, bool is_xts, char *buf, loff_t off, @@ -2171,7 +2171,7 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits, card = apqns[i] >> 16; dom = apqns[i] & 0xFFFF; rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize, - PKEY_TYPE_EP11); + PKEY_TYPE_EP11_AES); if (rc == 0) break; } @@ -2182,7 +2182,7 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits, keysize = MAXEP11AESKEYBLOBSIZE; buf += MAXEP11AESKEYBLOBSIZE; rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize, - PKEY_TYPE_EP11); + PKEY_TYPE_EP11_AES); if (rc == 0) return 2 * MAXEP11AESKEYBLOBSIZE; } From cba33db3fc4dbf2e54294b0e499d2335a3a00d78 Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Wed, 9 Aug 2023 14:23:45 +0200 Subject: [PATCH 50/66] s390/paes: fix PKEY_TYPE_EP11_AES handling for secure keyblobs Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")' introduced PKEY_TYPE_EP11_AES securekey blobs as a supplement to the PKEY_TYPE_EP11 (which won't work in environments with session-bound keys). This new keyblobs has a different maximum size, so fix paes crypto module to accept also these larger keyblobs. Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys") Signed-off-by: Holger Dengler Reviewed-by: Ingo Franzki Signed-off-by: Heiko Carstens --- arch/s390/crypto/paes_s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 38349150c96e..8b541e44151d 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -35,7 +35,7 @@ * and padding is also possible, the limits need to be generous. */ #define PAES_MIN_KEYSIZE 16 -#define PAES_MAX_KEYSIZE 320 +#define PAES_MAX_KEYSIZE MAXEP11AESKEYBLOBSIZE static u8 *ctrblk; static DEFINE_MUTEX(ctrblk_lock); From 386cb81e4ba7811573765aaaeb91b472639c2bae Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Fri, 11 Aug 2023 16:56:20 +0200 Subject: [PATCH 51/66] s390/zcrypt_ep11misc: support API ordinal 6 with empty pin-blob Secure execution guest environments require an empty pinblob in all key generation and unwrap requests. Empty pinblobs are only available in EP11 API ordinal 6 or higher. Add an empty pinblob to key generation and unwrap requests, if the AP secure binding facility is available. In all other cases, stay with the empty pin tag (no pinblob) and the current API ordinals. The EP11 API ordinal also needs to be considered when the pkey module tries to figure out the list of eligible cards for key operations with protected keys in secure execution environment. These changes are transparent to userspace but required for running an secure execution guest with handling key generate and key derive (e.g. secure key to protected key) correct. Especially using EP11 secure keys with the kernel dm-crypt layer requires this patch. Co-developed-by: Harald Freudenberger Signed-off-by: Harald Freudenberger Signed-off-by: Holger Dengler Reviewed-by: Ingo Franzki Signed-off-by: Heiko Carstens --- drivers/s390/crypto/ap_bus.c | 9 ++++ drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/pkey_api.c | 27 ++++++++---- drivers/s390/crypto/zcrypt_ep11misc.c | 60 ++++++++++++++++++++------- drivers/s390/crypto/zcrypt_ep11misc.h | 4 +- 5 files changed, 76 insertions(+), 25 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index b1d2fedea086..339812efe822 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -218,6 +218,15 @@ int ap_sb_available(void) return 0; } +/* + * ap_is_se_guest(): Check for SE guest with AP pass-through support. + */ +bool ap_is_se_guest(void) +{ + return is_prot_virt_guest() && ap_sb_available(); +} +EXPORT_SYMBOL(ap_is_se_guest); + /* * ap_fetch_qci_info(): Fetch cryptographic config info * diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 47bbe9babc59..be54b070c031 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -274,6 +274,7 @@ void ap_flush_queue(struct ap_queue *aq); void *ap_airq_ptr(void); int ap_sb_available(void); +bool ap_is_se_guest(void); void ap_wait(enum ap_sm_wait wait); void ap_request_timeout(struct timer_list *t); void ap_bus_force_rescan(void); diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 396a159afdf5..6cfb6b2340c9 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -263,7 +263,9 @@ static int pkey_clr2ep11key(const u8 *clrkey, size_t clrkeylen, /* build a list of apqns suitable for ep11 keys with cpacf support */ rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF, - ZCRYPT_CEX7, EP11_API_V, NULL); + ZCRYPT_CEX7, + ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4, + NULL); if (rc) goto out; @@ -299,7 +301,8 @@ static int pkey_ep11key2pkey(const u8 *key, size_t keylen, /* build a list of apqns suitable for this key */ rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF, - ZCRYPT_CEX7, EP11_API_V, + ZCRYPT_CEX7, + ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4, ep11_kb_wkvp(key, keylen)); if (rc) goto out; @@ -902,6 +905,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, } else if (hdr->type == TOKTYPE_NON_CCA && hdr->version == TOKVER_EP11_AES) { struct ep11keyblob *kb = (struct ep11keyblob *)key; + int api; rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1); if (rc) @@ -911,8 +915,9 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, if (ksize) *ksize = kb->head.bitlen; + api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4; rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain, - ZCRYPT_CEX7, EP11_API_V, + ZCRYPT_CEX7, api, ep11_kb_wkvp(key, keylen)); if (rc) goto out; @@ -926,6 +931,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, } else if (hdr->type == TOKTYPE_NON_CCA && hdr->version == TOKVER_EP11_AES_WITH_HEADER) { struct ep11kblob_header *kh = (struct ep11kblob_header *)key; + int api; rc = ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1); @@ -936,8 +942,9 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, if (ksize) *ksize = kh->bitlen; + api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4; rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain, - ZCRYPT_CEX7, EP11_API_V, + ZCRYPT_CEX7, api, ep11_kb_wkvp(key, keylen)); if (rc) goto out; @@ -1056,7 +1063,7 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, return -EINVAL; if (kb->attr & EP11_BLOB_PKEY_EXTRACTABLE) { minhwtype = ZCRYPT_CEX7; - api = EP11_API_V; + api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4; } rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, minhwtype, api, kb->wkvp); @@ -1072,7 +1079,7 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, return -EINVAL; if (kb->attr & EP11_BLOB_PKEY_EXTRACTABLE) { minhwtype = ZCRYPT_CEX7; - api = EP11_API_V; + api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4; } rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, minhwtype, api, kb->wkvp); @@ -1182,11 +1189,13 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype, ktype == PKEY_TYPE_EP11_AES || ktype == PKEY_TYPE_EP11_ECC) { u8 *wkvp = NULL; + int api; if (flags & PKEY_FLAGS_MATCH_CUR_MKVP) wkvp = cur_mkvp; + api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4; rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, - ZCRYPT_CEX7, EP11_API_V, wkvp); + ZCRYPT_CEX7, api, wkvp); if (rc) goto out; @@ -2160,7 +2169,9 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits, /* build a list of apqns able to generate an cipher key */ rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF, - ZCRYPT_CEX7, EP11_API_V, NULL); + ZCRYPT_CEX7, + ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4, + NULL); if (rc) return rc; diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index 669ad6f5d5b0..0a877f9792c2 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -29,6 +29,8 @@ #define DEBUG_WARN(...) ZCRYPT_DBF(DBF_WARN, ##__VA_ARGS__) #define DEBUG_ERR(...) ZCRYPT_DBF(DBF_ERR, ##__VA_ARGS__) +#define EP11_PINBLOB_V1_BYTES 56 + /* default iv used here */ static const u8 def_iv[16] = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff }; @@ -592,7 +594,7 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type, struct ep11_cprb *req = NULL, *rep = NULL; struct ep11_target_dev target; struct ep11_urb *urb = NULL; - int api = 1, rc = -ENOMEM; + int api = EP11_API_V1, rc = -ENOMEM; /* request cprb and payload */ req = alloc_cprb(sizeof(struct ep11_info_req_pl)); @@ -789,8 +791,7 @@ static int _ep11_genaeskey(u16 card, u16 domain, u32 attr_bool_bits; u32 attr_val_len_type; u32 attr_val_len_value; - u8 pin_tag; - u8 pin_len; + /* followed by empty pin tag or empty pinblob tag */ } __packed * req_pl; struct keygen_rep_pl { struct pl_head head; @@ -803,9 +804,11 @@ static int _ep11_genaeskey(u16 card, u16 domain, u8 data[512]; } __packed * rep_pl; struct ep11_cprb *req = NULL, *rep = NULL; + size_t req_pl_size, pinblob_size = 0; struct ep11_target_dev target; struct ep11_urb *urb = NULL; int api, rc = -ENOMEM; + u8 *p; switch (keybitsize) { case 128: @@ -821,12 +824,22 @@ static int _ep11_genaeskey(u16 card, u16 domain, } /* request cprb and payload */ - req = alloc_cprb(sizeof(struct keygen_req_pl)); + api = (!keygenflags || keygenflags & 0x00200000) ? + EP11_API_V4 : EP11_API_V1; + if (ap_is_se_guest()) { + /* + * genkey within SE environment requires API ordinal 6 + * with empty pinblob + */ + api = EP11_API_V6; + pinblob_size = EP11_PINBLOB_V1_BYTES; + } + req_pl_size = sizeof(struct keygen_req_pl) + ASN1TAGLEN(pinblob_size); + req = alloc_cprb(req_pl_size); if (!req) goto out; req_pl = (struct keygen_req_pl *)(((u8 *)req) + sizeof(*req)); - api = (!keygenflags || keygenflags & 0x00200000) ? 4 : 1; - prep_head(&req_pl->head, sizeof(*req_pl), api, 21); /* GenerateKey */ + prep_head(&req_pl->head, req_pl_size, api, 21); /* GenerateKey */ req_pl->var_tag = 0x04; req_pl->var_len = sizeof(u32); req_pl->keybytes_tag = 0x04; @@ -842,7 +855,10 @@ static int _ep11_genaeskey(u16 card, u16 domain, req_pl->attr_bool_bits = keygenflags ? keygenflags : KEY_ATTR_DEFAULTS; req_pl->attr_val_len_type = 0x00000161; /* CKA_VALUE_LEN */ req_pl->attr_val_len_value = keybitsize / 8; - req_pl->pin_tag = 0x04; + p = ((u8 *)req_pl) + sizeof(*req_pl); + /* pin tag */ + *p++ = 0x04; + *p++ = pinblob_size; /* reply cprb and payload */ rep = alloc_cprb(sizeof(struct keygen_rep_pl)); @@ -857,7 +873,7 @@ static int _ep11_genaeskey(u16 card, u16 domain, target.ap_id = card; target.dom_id = domain; prep_urb(urb, &target, 1, - req, sizeof(*req) + sizeof(*req_pl), + req, sizeof(*req) + req_pl_size, rep, sizeof(*rep) + sizeof(*rep_pl)); rc = zcrypt_send_ep11_cprb(urb); @@ -965,7 +981,7 @@ static int ep11_cryptsingle(u16 card, u16 domain, struct ep11_target_dev target; struct ep11_urb *urb = NULL; size_t req_pl_size, rep_pl_size; - int n, api = 1, rc = -ENOMEM; + int n, api = EP11_API_V1, rc = -ENOMEM; u8 *p; /* the simple asn1 coding used has length limits */ @@ -1084,7 +1100,7 @@ static int _ep11_unwrapkey(u16 card, u16 domain, * maybe followed by iv data * followed by kek tag + kek blob * followed by empty mac tag - * followed by empty pin tag + * followed by empty pin tag or empty pinblob tag * followed by encryted key tag + bytes */ } __packed * req_pl; @@ -1099,20 +1115,30 @@ static int _ep11_unwrapkey(u16 card, u16 domain, u8 data[512]; } __packed * rep_pl; struct ep11_cprb *req = NULL, *rep = NULL; + size_t req_pl_size, pinblob_size = 0; struct ep11_target_dev target; struct ep11_urb *urb = NULL; - size_t req_pl_size; int api, rc = -ENOMEM; u8 *p; /* request cprb and payload */ + api = (!keygenflags || keygenflags & 0x00200000) ? + EP11_API_V4 : EP11_API_V1; + if (ap_is_se_guest()) { + /* + * unwrap within SE environment requires API ordinal 6 + * with empty pinblob + */ + api = EP11_API_V6; + pinblob_size = EP11_PINBLOB_V1_BYTES; + } req_pl_size = sizeof(struct uw_req_pl) + (iv ? 16 : 0) - + ASN1TAGLEN(keksize) + 4 + ASN1TAGLEN(enckeysize); + + ASN1TAGLEN(keksize) + ASN1TAGLEN(0) + + ASN1TAGLEN(pinblob_size) + ASN1TAGLEN(enckeysize); req = alloc_cprb(req_pl_size); if (!req) goto out; req_pl = (struct uw_req_pl *)(((u8 *)req) + sizeof(*req)); - api = (!keygenflags || keygenflags & 0x00200000) ? 4 : 1; prep_head(&req_pl->head, req_pl_size, api, 34); /* UnwrapKey */ req_pl->attr_tag = 0x04; req_pl->attr_len = 7 * sizeof(u32); @@ -1137,9 +1163,10 @@ static int _ep11_unwrapkey(u16 card, u16 domain, /* empty mac key tag */ *p++ = 0x04; *p++ = 0; - /* empty pin tag */ + /* pin tag */ *p++ = 0x04; - *p++ = 0; + *p++ = pinblob_size; + p += pinblob_size; /* encrypted key value tag and bytes */ p += asn1tag_write(p, 0x04, enckey, enckeysize); @@ -1275,7 +1302,8 @@ static int _ep11_wrapkey(u16 card, u16 domain, if (!mech || mech == 0x80060001) req->flags |= 0x20; /* CPACF_WRAP needs special bit */ req_pl = (struct wk_req_pl *)(((u8 *)req) + sizeof(*req)); - api = (!mech || mech == 0x80060001) ? 4 : 1; /* CKM_IBM_CPACF_WRAP */ + api = (!mech || mech == 0x80060001) ? /* CKM_IBM_CPACF_WRAP */ + EP11_API_V4 : EP11_API_V1; prep_head(&req_pl->head, req_pl_size, api, 33); /* WrapKey */ req_pl->var_tag = 0x04; req_pl->var_len = sizeof(u32); diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h index a0de1cccebbe..9d17fd5228a7 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.h +++ b/drivers/s390/crypto/zcrypt_ep11misc.h @@ -12,7 +12,9 @@ #include #include -#define EP11_API_V 4 /* highest known and supported EP11 API version */ +#define EP11_API_V1 1 /* min EP11 API, default if no higher api required */ +#define EP11_API_V4 4 /* supported EP11 API for the ep11misc cprbs */ +#define EP11_API_V6 6 /* min EP11 API for some cprbs in SE environment */ #define EP11_STRUCT_MAGIC 0x1234 #define EP11_BLOB_PKEY_EXTRACTABLE 0x00200000 From 979fe44af819d76fc02a5a97ad1cb74f7d83578e Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 16 Aug 2023 15:29:42 +0200 Subject: [PATCH 52/66] s390/ipl: fix virtual vs physical address confusion The value of ipl_cert_list_addr boot variable contains a physical address, which is used directly. That works because virtual and physical address spaces are currently the same, but otherwise it is wrong. While at it, fix also a comment for the platform keyring. Signed-off-by: Alexander Gordeev Reviewed-by: Mimi Zohar Acked-by: Jarkko Sakkinen Link: https://lore.kernel.org/r/20230816132942.2540411-1-agordeev@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 4 ++-- arch/s390/kernel/setup.c | 2 +- security/integrity/platform_certs/load_ipl_s390.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 2df94d32140c..8d207b82d9fe 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -188,7 +188,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->memsz = ALIGN(data->memsz, PAGE_SIZE); buf.mem = data->memsz; - ptr = (void *)ipl_cert_list_addr; + ptr = __va(ipl_cert_list_addr); end = ptr + ipl_cert_list_size; ncerts = 0; while (ptr < end) { @@ -200,7 +200,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, addr = data->memsz + data->report->size; addr += ncerts * sizeof(struct ipl_rb_certificate_entry); - ptr = (void *)ipl_cert_list_addr; + ptr = __va(ipl_cert_list_addr); while (ptr < end) { len = *(unsigned int *)ptr; ptr += sizeof(len); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 393dd8385506..c744104e4a9c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -875,7 +875,7 @@ static void __init log_component_list(void) pr_info("Linux is running with Secure-IPL enabled\n"); else pr_info("Linux is running with Secure-IPL disabled\n"); - ptr = (void *) early_ipl_comp_list_addr; + ptr = __va(early_ipl_comp_list_addr); end = (void *) ptr + early_ipl_comp_list_size; pr_info("The IPL report contains the following components:\n"); while (ptr < end) { diff --git a/security/integrity/platform_certs/load_ipl_s390.c b/security/integrity/platform_certs/load_ipl_s390.c index e769dcb7ea94..c7c381a9ddaa 100644 --- a/security/integrity/platform_certs/load_ipl_s390.c +++ b/security/integrity/platform_certs/load_ipl_s390.c @@ -22,8 +22,8 @@ static int __init load_ipl_certs(void) if (!ipl_cert_list_addr) return 0; - /* Copy the certificates to the system keyring */ - ptr = (void *) ipl_cert_list_addr; + /* Copy the certificates to the platform keyring */ + ptr = __va(ipl_cert_list_addr); end = ptr + ipl_cert_list_size; while ((void *) ptr < end) { len = *(unsigned int *) ptr; From cfd012107f11ec4af010f11eca341edc831abf6c Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Fri, 11 Aug 2023 21:56:15 +0000 Subject: [PATCH 53/66] s390/ipl: refactor deprecated strncpy `strncpy` is deprecated for use on NUL-terminated destination strings [1]. Use `strscpy` which has the same behavior as `strncpy` here with the extra safeguard of guaranteeing NUL-termination of destination strings. In it's current form, this may result in silent truncation if the src string has the same size as the destination string. [hca@linux.ibm.com: use strscpy() instead of strscpy_pad()] Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings[1] Link: https://github.com/KSPP/linux/issues/90 Cc: linux-hardening@vger.kernel.org Signed-off-by: Justin Stitt Link: https://lore.kernel.org/r/20230811-arch-s390-kernel-v1-1-7edbeeab3809@google.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/ipl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 7f3a84e414a4..05e51666db03 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -266,7 +266,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ struct kobj_attribute *attr, \ const char *buf, size_t len) \ { \ - strncpy(_value, buf, sizeof(_value) - 1); \ + strscpy(_value, buf, sizeof(_value)); \ strim(_value); \ return len; \ } \ From 680b7ddd7e2ab7638d431722432f6d02d75dade1 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:22 -0400 Subject: [PATCH 54/66] s390/vfio-ap: no need to check the 'E' and 'I' bits in APQSW after TAPQ After a ZAPQ is executed to reset a queue, if the queue is not empty or interrupts are still enabled, the vfio_ap driver will wait for the reset operation to complete by repeatedly executing the TAPQ instruction and checking the 'E' and 'I' bits in the APQSW to verify that the queue is empty and interrupts are disabled. This is unnecessary because it is sufficient to check only the response code in the APQSW. If the reset is still in progress, the response code will be 02; however, if the reset has completed successfully, the response code will be 00. Signed-off-by: Tony Krowiak Acked-by: Janosch Frank Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-2-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index b441745b0418..3fd80533194b 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1608,19 +1608,10 @@ static int apq_status_check(int apqn, struct ap_queue_status *status) { switch (status->response_code) { case AP_RESPONSE_NORMAL: - case AP_RESPONSE_RESET_IN_PROGRESS: - if (status->queue_empty && !status->irq_enabled) - return 0; - return -EBUSY; case AP_RESPONSE_DECONFIGURED: - /* - * If the AP queue is deconfigured, any subsequent AP command - * targeting the queue will fail with the same response code. On the - * other hand, when an AP adapter is deconfigured, the associated - * queues are reset, so let's return a value indicating the reset - * for which we're waiting completed successfully. - */ return 0; + case AP_RESPONSE_RESET_IN_PROGRESS: + return -EBUSY; default: WARN(true, "failed to verify reset of queue %02x.%04x: TAPQ rc=%u\n", From 7aa7b2a80cb70d528785f06a54d6c8148826006d Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:23 -0400 Subject: [PATCH 55/66] s390/vfio-ap: clean up irq resources if possible The architecture does not specify whether interrupts are disabled as part of the asynchronous reset or upon return from the PQAP/ZAPQ instruction. If, however, PQAP/ZAPQ completes with APQSW response code 0 and the interrupt bit in the status word is also 0, we know the interrupts are disabled and we can go ahead and clean up the corresponding resources; otherwise, we must wait until the asynchronous reset has completed. Signed-off-by: Tony Krowiak Suggested-by: Halil Pasic Reviewed-by: Jason J. Herne Acked-by: Halil Pasic Acked-by: Janosch Frank Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-3-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 3fd80533194b..be92ba45226d 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1654,9 +1654,13 @@ retry_zapq: switch (status.response_code) { case AP_RESPONSE_NORMAL: ret = 0; - /* if the reset has not completed, wait for it to take effect */ - if (!status.queue_empty || status.irq_enabled) + if (!status.irq_enabled) + vfio_ap_free_aqic_resources(q); + if (!status.queue_empty || status.irq_enabled) { ret = apq_reset_check(q); + if (status.irq_enabled && ret == 0) + vfio_ap_free_aqic_resources(q); + } break; case AP_RESPONSE_RESET_IN_PROGRESS: /* @@ -1675,6 +1679,7 @@ retry_zapq: * completed successfully. */ ret = 0; + vfio_ap_free_aqic_resources(q); break; default: WARN(true, @@ -1684,8 +1689,6 @@ retry_zapq: return -EIO; } - vfio_ap_free_aqic_resources(q); - return ret; } From 411b0109daa52d1cc5be39635631e22a5590c5d8 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:24 -0400 Subject: [PATCH 56/66] s390/vfio-ap: wait for response code 05 to clear on queue reset Response code 05, AP busy, is a valid response code for a ZAPQ or TAPQ. Instead of returning error -EIO when a ZAPQ fails with response code 05, let's wait until the queue is no longer busy and try the ZAPQ again. Signed-off-by: Tony Krowiak Acked-by: Janosch Frank Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-4-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index be92ba45226d..3f67cfb53d0c 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1611,6 +1611,7 @@ static int apq_status_check(int apqn, struct ap_queue_status *status) case AP_RESPONSE_DECONFIGURED: return 0; case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_BUSY: return -EBUSY; default: WARN(true, @@ -1663,6 +1664,7 @@ retry_zapq: } break; case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_BUSY: /* * There is a reset issued by another process in progress. Let's wait * for that to complete. Since we have no idea whether it was a RAPQ or From c51f8c6bb5c8a4878310d55e3a0b91747954b43d Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:25 -0400 Subject: [PATCH 57/66] s390/vfio-ap: allow deconfigured queue to be passed through to a guest When a queue is reset, the status response code returned from the reset operation is stored in the reset_rc field of the vfio_ap_queue structure representing the queue being reset. This field is later used to decide whether the queue should be passed through to a guest. If the reset_rc field is a non-zero value, the queue will be filtered from the list of queues passed through. When an adapter is deconfigured, all queues associated with that adapter are reset. That being the case, it is not necessary to filter those queues; so, if the status response code returned from a reset operation indicates the queue is deconfigured, the reset_rc field of the vfio_ap_queue structure will be set to zero so it will be passed through (i.e., not filtered). Signed-off-by: Tony Krowiak Reviewed-by: Jason J. Herne Acked-by: Halil Pasic Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-5-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 3f67cfb53d0c..a489536c508a 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1677,9 +1677,11 @@ retry_zapq: case AP_RESPONSE_DECONFIGURED: /* * When an AP adapter is deconfigured, the associated - * queues are reset, so let's return a value indicating the reset - * completed successfully. + * queues are reset, so let's set the status response code to 0 + * so the queue may be passed through (i.e., not filtered) and + * return a value indicating the reset completed successfully. */ + q->reset_rc = 0; ret = 0; vfio_ap_free_aqic_resources(q); break; From dd174833e44e7717f88f0925b1f78e9ba1d2626e Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:26 -0400 Subject: [PATCH 58/66] s390/vfio-ap: remove upper limit on wait for queue reset to complete The architecture does not define an upper limit on how long a queue reset (RAPQ/ZAPQ) can take to complete. In order to ensure both the security requirements and prevent resource leakage and corruption in the hypervisor, it is necessary to remove the upper limit (200ms) the vfio_ap driver currently waits for a reset to complete. This, of course, may result in a hang which is a less than desirable user experience, but until a firmware solution is provided, this is a necessary evil. Signed-off-by: Tony Krowiak Reviewed-by: Jason J. Herne Acked-by: Halil Pasic Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-6-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 64 +++++++++++++++++-------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index a489536c508a..2517868aad56 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -30,7 +30,6 @@ #define AP_QUEUE_UNASSIGNED "unassigned" #define AP_QUEUE_IN_USE "in use" -#define MAX_RESET_CHECK_WAIT 200 /* Sleep max 200ms for reset check */ #define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */ static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable); @@ -1622,58 +1621,66 @@ static int apq_status_check(int apqn, struct ap_queue_status *status) } } +#define WAIT_MSG "Waited %dms for reset of queue %02x.%04x (%u, %u, %u)" + static int apq_reset_check(struct vfio_ap_queue *q) { - int ret; - int iters = MAX_RESET_CHECK_WAIT / AP_RESET_INTERVAL; + int ret = -EBUSY, elapsed = 0; struct ap_queue_status status; - for (; iters > 0; iters--) { + while (true) { msleep(AP_RESET_INTERVAL); + elapsed += AP_RESET_INTERVAL; status = ap_tapq(q->apqn, NULL); ret = apq_status_check(q->apqn, &status); - if (ret != -EBUSY) + if (ret == -EIO) return ret; + if (ret == -EBUSY) { + pr_notice_ratelimited(WAIT_MSG, elapsed, + AP_QID_CARD(q->apqn), + AP_QID_QUEUE(q->apqn), + status.response_code, + status.queue_empty, + status.irq_enabled); + } else { + if (q->reset_rc == AP_RESPONSE_RESET_IN_PROGRESS || + q->reset_rc == AP_RESPONSE_BUSY) { + status = ap_zapq(q->apqn, 0); + q->reset_rc = status.response_code; + continue; + } + /* + * When an AP adapter is deconfigured, the associated + * queues are reset, so let's set the status response + * code to 0 so the queue may be passed through (i.e., + * not filtered). + */ + if (q->reset_rc == AP_RESPONSE_DECONFIGURED) + q->reset_rc = 0; + if (q->saved_isc != VFIO_AP_ISC_INVALID) + vfio_ap_free_aqic_resources(q); + break; + } } - WARN_ONCE(iters <= 0, - "timeout verifying reset of queue %02x.%04x (%u, %u, %u)", - AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn), - status.queue_empty, status.irq_enabled, status.response_code); return ret; } static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) { struct ap_queue_status status; - int ret; + int ret = 0; if (!q) return 0; -retry_zapq: status = ap_zapq(q->apqn, 0); q->reset_rc = status.response_code; switch (status.response_code) { case AP_RESPONSE_NORMAL: - ret = 0; - if (!status.irq_enabled) - vfio_ap_free_aqic_resources(q); - if (!status.queue_empty || status.irq_enabled) { - ret = apq_reset_check(q); - if (status.irq_enabled && ret == 0) - vfio_ap_free_aqic_resources(q); - } - break; case AP_RESPONSE_RESET_IN_PROGRESS: case AP_RESPONSE_BUSY: - /* - * There is a reset issued by another process in progress. Let's wait - * for that to complete. Since we have no idea whether it was a RAPQ or - * ZAPQ, then if it completes successfully, let's issue the ZAPQ. - */ + /* Let's verify whether the ZAPQ completed successfully */ ret = apq_reset_check(q); - if (ret) - break; - goto retry_zapq; + break; case AP_RESPONSE_DECONFIGURED: /* * When an AP adapter is deconfigured, the associated @@ -1682,7 +1689,6 @@ retry_zapq: * return a value indicating the reset completed successfully. */ q->reset_rc = 0; - ret = 0; vfio_ap_free_aqic_resources(q); break; default: From 62aab082e9993163731656ce270cd3c1d29079af Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:27 -0400 Subject: [PATCH 59/66] s390/vfio-ap: store entire AP queue status word with the queue object Store the entire AP queue status word returned from the ZAPQ command with the struct vfio_ap_queue object instead of just the response code field. The other information contained in the status word is need by the apq_reset_check function to display a proper message to indicate that the vfio_ap driver is waiting for the ZAPQ to complete because the queue is not empty or IRQs are still enabled. Signed-off-by: Tony Krowiak Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-7-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 27 +++++++++++++++------------ drivers/s390/crypto/vfio_ap_private.h | 4 ++-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 2517868aad56..43224f7a40ea 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -674,7 +674,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, */ apqn = AP_MKQID(apid, apqi); q = vfio_ap_mdev_get_queue(matrix_mdev, apqn); - if (!q || q->reset_rc) { + if (!q || q->reset_status.response_code) { clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); break; @@ -1628,6 +1628,7 @@ static int apq_reset_check(struct vfio_ap_queue *q) int ret = -EBUSY, elapsed = 0; struct ap_queue_status status; + memcpy(&status, &q->reset_status, sizeof(status)); while (true) { msleep(AP_RESET_INTERVAL); elapsed += AP_RESET_INTERVAL; @@ -1643,20 +1644,20 @@ static int apq_reset_check(struct vfio_ap_queue *q) status.queue_empty, status.irq_enabled); } else { - if (q->reset_rc == AP_RESPONSE_RESET_IN_PROGRESS || - q->reset_rc == AP_RESPONSE_BUSY) { + if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS || + q->reset_status.response_code == AP_RESPONSE_BUSY) { status = ap_zapq(q->apqn, 0); - q->reset_rc = status.response_code; + memcpy(&q->reset_status, &status, sizeof(status)); continue; } /* - * When an AP adapter is deconfigured, the associated - * queues are reset, so let's set the status response - * code to 0 so the queue may be passed through (i.e., - * not filtered). + * When an AP adapter is deconfigured, the + * associated queues are reset, so let's set the + * status response code to 0 so the queue may be + * passed through (i.e., not filtered) */ - if (q->reset_rc == AP_RESPONSE_DECONFIGURED) - q->reset_rc = 0; + if (status.response_code == AP_RESPONSE_DECONFIGURED) + q->reset_status.response_code = 0; if (q->saved_isc != VFIO_AP_ISC_INVALID) vfio_ap_free_aqic_resources(q); break; @@ -1673,7 +1674,7 @@ static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) if (!q) return 0; status = ap_zapq(q->apqn, 0); - q->reset_rc = status.response_code; + memcpy(&q->reset_status, &status, sizeof(status)); switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: @@ -1688,7 +1689,8 @@ static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) * so the queue may be passed through (i.e., not filtered) and * return a value indicating the reset completed successfully. */ - q->reset_rc = 0; + q->reset_status.response_code = 0; + ret = 0; vfio_ap_free_aqic_resources(q); break; default: @@ -2042,6 +2044,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) q->apqn = to_ap_queue(&apdev->device)->qid; q->saved_isc = VFIO_AP_ISC_INVALID; + memset(&q->reset_status, 0, sizeof(q->reset_status)); matrix_mdev = get_update_locks_by_apqn(q->apqn); if (matrix_mdev) { diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index 4642bbdbd1b2..d6eb3527e056 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -133,7 +133,7 @@ struct ap_matrix_mdev { * @apqn: the APQN of the AP queue device * @saved_isc: the guest ISC registered with the GIB interface * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable - * @reset_rc: the status response code from the last reset of the queue + * @reset_status: the status from the last reset of the queue */ struct vfio_ap_queue { struct ap_matrix_mdev *matrix_mdev; @@ -142,7 +142,7 @@ struct vfio_ap_queue { #define VFIO_AP_ISC_INVALID 0xff unsigned char saved_isc; struct hlist_node mdev_qnode; - unsigned int reset_rc; + struct ap_queue_status reset_status; }; int vfio_ap_mdev_register(void); From 9261f0438835a97254590046e1be83733cca440f Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:28 -0400 Subject: [PATCH 60/66] s390/vfio-ap: use work struct to verify queue reset Instead of waiting to verify that a queue is reset in the vfio_ap_mdev_reset_queue function, let's use a wait queue to check the the state of the reset. This way, when resetting all of the queues assigned to a matrix mdev, we don't have to wait for each queue to be reset before initiating a reset on the next queue to be reset. Signed-off-by: Tony Krowiak Reviewed-by: Jason J. Herne Suggested-by: Halil Pasic Acked-by: Janosch Frank Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-8-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 48 +++++++++++++-------------- drivers/s390/crypto/vfio_ap_private.h | 2 ++ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 43224f7a40ea..3a59f1c5390f 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -35,7 +35,7 @@ static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable); static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); static const struct vfio_device_ops vfio_ap_matrix_dev_ops; -static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q); +static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q); /** * get_update_locks_for_kvm: Acquire the locks required to dynamically update a @@ -1623,11 +1623,13 @@ static int apq_status_check(int apqn, struct ap_queue_status *status) #define WAIT_MSG "Waited %dms for reset of queue %02x.%04x (%u, %u, %u)" -static int apq_reset_check(struct vfio_ap_queue *q) +static void apq_reset_check(struct work_struct *reset_work) { int ret = -EBUSY, elapsed = 0; struct ap_queue_status status; + struct vfio_ap_queue *q; + q = container_of(reset_work, struct vfio_ap_queue, reset_work); memcpy(&status, &q->reset_status, sizeof(status)); while (true) { msleep(AP_RESET_INTERVAL); @@ -1635,7 +1637,7 @@ static int apq_reset_check(struct vfio_ap_queue *q) status = ap_tapq(q->apqn, NULL); ret = apq_status_check(q->apqn, &status); if (ret == -EIO) - return ret; + return; if (ret == -EBUSY) { pr_notice_ratelimited(WAIT_MSG, elapsed, AP_QID_CARD(q->apqn), @@ -1663,34 +1665,32 @@ static int apq_reset_check(struct vfio_ap_queue *q) break; } } - return ret; } -static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) +static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) { struct ap_queue_status status; - int ret = 0; if (!q) - return 0; + return; status = ap_zapq(q->apqn, 0); memcpy(&q->reset_status, &status, sizeof(status)); switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: case AP_RESPONSE_BUSY: - /* Let's verify whether the ZAPQ completed successfully */ - ret = apq_reset_check(q); + /* + * Let's verify whether the ZAPQ completed successfully on a work queue. + */ + queue_work(system_long_wq, &q->reset_work); break; case AP_RESPONSE_DECONFIGURED: /* * When an AP adapter is deconfigured, the associated * queues are reset, so let's set the status response code to 0 - * so the queue may be passed through (i.e., not filtered) and - * return a value indicating the reset completed successfully. + * so the queue may be passed through (i.e., not filtered). */ q->reset_status.response_code = 0; - ret = 0; vfio_ap_free_aqic_resources(q); break; default: @@ -1698,29 +1698,25 @@ static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) "PQAP/ZAPQ for %02x.%04x failed with invalid rc=%u\n", AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn), status.response_code); - return -EIO; } - - return ret; } static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable) { - int ret, loop_cursor, rc = 0; + int ret = 0, loop_cursor; struct vfio_ap_queue *q; + hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) + vfio_ap_mdev_reset_queue(q); + hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { - ret = vfio_ap_mdev_reset_queue(q); - /* - * Regardless whether a queue turns out to be busy, or - * is not operational, we need to continue resetting - * the remaining queues. - */ - if (ret) - rc = ret; + flush_work(&q->reset_work); + + if (q->reset_status.response_code) + ret = -EIO; } - return rc; + return ret; } static int vfio_ap_mdev_open_device(struct vfio_device *vdev) @@ -2045,6 +2041,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) q->apqn = to_ap_queue(&apdev->device)->qid; q->saved_isc = VFIO_AP_ISC_INVALID; memset(&q->reset_status, 0, sizeof(q->reset_status)); + INIT_WORK(&q->reset_work, apq_reset_check); matrix_mdev = get_update_locks_by_apqn(q->apqn); if (matrix_mdev) { @@ -2094,6 +2091,7 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev) } vfio_ap_mdev_reset_queue(q); + flush_work(&q->reset_work); dev_set_drvdata(&apdev->device, NULL); kfree(q); release_update_locks_for_mdev(matrix_mdev); diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index d6eb3527e056..88aff8b81f2f 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -134,6 +134,7 @@ struct ap_matrix_mdev { * @saved_isc: the guest ISC registered with the GIB interface * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable * @reset_status: the status from the last reset of the queue + * @reset_work: work to wait for queue reset to complete */ struct vfio_ap_queue { struct ap_matrix_mdev *matrix_mdev; @@ -143,6 +144,7 @@ struct vfio_ap_queue { unsigned char saved_isc; struct hlist_node mdev_qnode; struct ap_queue_status reset_status; + struct work_struct reset_work; }; int vfio_ap_mdev_register(void); From e1f17f8ea93d8fc9d6d0562d38bb0a5fb3e8355e Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:29 -0400 Subject: [PATCH 61/66] s390/vfio-ap: handle queue state change in progress on reset A new APQSW response code (0xA) indicating the designated queue is in the process of being bound or associated to a configuration may be returned from the PQAP(ZAPQ) command. This patch introduces code that will verify when the PQAP(ZAPQ) command can be re-issued after receiving response code 0xA. Signed-off-by: Tony Krowiak Reviewed-by: Jason J. Herne Acked-by: Halil Pasic Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-9-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 3a59f1c5390f..43dea259fe23 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1647,7 +1647,8 @@ static void apq_reset_check(struct work_struct *reset_work) status.irq_enabled); } else { if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS || - q->reset_status.response_code == AP_RESPONSE_BUSY) { + q->reset_status.response_code == AP_RESPONSE_BUSY || + q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS) { status = ap_zapq(q->apqn, 0); memcpy(&q->reset_status, &status, sizeof(status)); continue; @@ -1679,6 +1680,7 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: case AP_RESPONSE_BUSY: + case AP_RESPONSE_STATE_CHANGE_IN_PROGRESS: /* * Let's verify whether the ZAPQ completed successfully on a work queue. */ From 7847a19b5b6265f11e71c8499a3b608edac7f398 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:30 -0400 Subject: [PATCH 62/66] s390/vfio-ap: check for TAPQ response codes 0x35 and 0x36 Check for response codes 0x35 and 0x36 which are asynchronous return codes indicating a failure of the guest to associate a secret with a queue. Since there can be no interaction with this queue from the guest (i.e., the vcpus are out of SIE for hot unplug, the guest is being shut down or an emulated subsystem reset of the guest is taking place), let's go ahead and re-issue the ZAPQ to reset and zeroize the queue. Signed-off-by: Tony Krowiak Reviewed-by: Jason J. Herne Reviewed-by: Halil Pasic Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-10-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 43dea259fe23..8bda52c46df0 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1612,6 +1612,16 @@ static int apq_status_check(int apqn, struct ap_queue_status *status) case AP_RESPONSE_RESET_IN_PROGRESS: case AP_RESPONSE_BUSY: return -EBUSY; + case AP_RESPONSE_ASSOC_SECRET_NOT_UNIQUE: + case AP_RESPONSE_ASSOC_FAILED: + /* + * These asynchronous response codes indicate a PQAP(AAPQ) + * instruction to associate a secret with the guest failed. All + * subsequent AP instructions will end with the asynchronous + * response code until the AP queue is reset; so, let's return + * a value indicating a reset needs to be performed again. + */ + return -EAGAIN; default: WARN(true, "failed to verify reset of queue %02x.%04x: TAPQ rc=%u\n", @@ -1648,7 +1658,8 @@ static void apq_reset_check(struct work_struct *reset_work) } else { if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS || q->reset_status.response_code == AP_RESPONSE_BUSY || - q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS) { + q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS || + ret == -EAGAIN) { status = ap_zapq(q->apqn, 0); memcpy(&q->reset_status, &status, sizeof(status)); continue; From cf3fa16a6fd49216ae83502e61bea0d8322b51eb Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 15 Aug 2023 14:43:31 -0400 Subject: [PATCH 63/66] s390/uv: export uv_pin_shared for direct usage Export the uv_pin_shared function so that it can be called from other modules that carry a GPL-compatible license. Signed-off-by: Janosch Frank Signed-off-by: Tony Krowiak Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-11-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/include/asm/uv.h | 6 ++++++ arch/s390/kernel/uv.c | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index d6bb2f4f78d1..d2cd42bb2c26 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -463,6 +463,7 @@ static inline int is_prot_virt_host(void) return prot_virt_host; } +int uv_pin_shared(unsigned long paddr); int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); int uv_destroy_owned_page(unsigned long paddr); @@ -475,6 +476,11 @@ void setup_uv(void); #define is_prot_virt_host() 0 static inline void setup_uv(void) {} +static inline int uv_pin_shared(unsigned long paddr) +{ + return 0; +} + static inline int uv_destroy_owned_page(unsigned long paddr) { return 0; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 66f0eb1c872b..b771f1b4cdd1 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -88,7 +88,7 @@ fail: * Requests the Ultravisor to pin the page in the shared state. This will * cause an intercept when the guest attempts to unshare the pinned page. */ -static int uv_pin_shared(unsigned long paddr) +int uv_pin_shared(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_PIN_PAGE_SHARED, @@ -100,6 +100,7 @@ static int uv_pin_shared(unsigned long paddr) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(uv_pin_shared); /* * Requests the Ultravisor to destroy a guest page and make it From fb5040ef7f707525d0681cf6bfe424ccd1aadab7 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:32 -0400 Subject: [PATCH 64/66] KVM: s390: export kvm_s390_pv*_is_protected functions Export the kvm_s390_pv_is_protected and kvm_s390_pv_cpu_is_protected functions so that they can be called from other modules that carry a GPL-compatible license. Signed-off-by: Janosch Frank Signed-off-by: Tony Krowiak Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-12-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/include/asm/kvm_host.h | 3 +++ arch/s390/kvm/kvm-s390.h | 12 ------------ arch/s390/kvm/pv.c | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 2bbc3d54959d..91bfecb91321 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1028,6 +1028,9 @@ static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa) extern char sie_exit; +bool kvm_s390_pv_is_protected(struct kvm *kvm); +bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu); + extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 0261d42c7d01..a7ea80cfa445 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -270,18 +270,6 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu) return vcpu->arch.pv.handle; } -static inline bool kvm_s390_pv_is_protected(struct kvm *kvm) -{ - lockdep_assert_held(&kvm->lock); - return !!kvm_s390_pv_get_handle(kvm); -} - -static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) -{ - lockdep_assert_held(&vcpu->mutex); - return !!kvm_s390_pv_cpu_get_handle(vcpu); -} - /* implemented in interrupt.c */ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 2f34c7c3c5ab..856140e9942e 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -18,6 +18,20 @@ #include #include "kvm-s390.h" +bool kvm_s390_pv_is_protected(struct kvm *kvm) +{ + lockdep_assert_held(&kvm->lock); + return !!kvm_s390_pv_get_handle(kvm); +} +EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected); + +bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) +{ + lockdep_assert_held(&vcpu->mutex); + return !!kvm_s390_pv_cpu_get_handle(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected); + /** * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to * be destroyed From f88fb1335733029b4630fb93cfaad349a81e57b2 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 15 Aug 2023 14:43:33 -0400 Subject: [PATCH 65/66] s390/vfio-ap: make sure nib is shared Since the NIB is visible by HW, KVM and the (PV) guest it needs to be in non-secure or secure but shared storage. Return code 6 is used to indicate to a PV guest that its NIB would be on secure, unshared storage and therefore the NIB address is invalid. Unfortunately we have no easy way to check if a page is unshared after vfio_pin_pages() since it will automatically export an unshared page if the UV pin shared call did not succeed due to a page being in unshared state. Therefore we use the fact that UV pinning it a second time is a nop but trying to pin an exported page is an error (0x102). If we encounter this error, we do a vfio unpin and import the page again, since vfio_pin_pages() exported it. Signed-off-by: Janosch Frank Signed-off-by: Tony Krowiak Acked-by: Halil Pasic Tested-by: Viktor Mihajlovski Link: https://lore.kernel.org/r/20230815184333.6554-13-akrowiak@linux.ibm.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 8bda52c46df0..0509f80622cd 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -359,6 +359,28 @@ static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib) return 0; } +static int ensure_nib_shared(unsigned long addr, struct gmap *gmap) +{ + int ret; + + /* + * The nib has to be located in shared storage since guest and + * host access it. vfio_pin_pages() will do a pin shared and + * if that fails (possibly because it's not a shared page) it + * calls export. We try to do a second pin shared here so that + * the UV gives us an error code if we try to pin a non-shared + * page. + * + * If the page is already pinned shared the UV will return a success. + */ + ret = uv_pin_shared(addr); + if (ret) { + /* vfio_pin_pages() likely exported the page so let's re-import */ + gmap_convert_to_secure(gmap, addr); + } + return ret; +} + /** * vfio_ap_irq_enable - Enable Interruption for a APQN * @@ -422,6 +444,14 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK); aqic_gisa.gisc = isc; + /* NIB in non-shared storage is a rc 6 for PV guests */ + if (kvm_s390_pv_cpu_is_protected(vcpu) && + ensure_nib_shared(h_nib & PAGE_MASK, kvm->arch.gmap)) { + vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1); + status.response_code = AP_RESPONSE_INVALID_ADDRESS; + return status; + } + nisc = kvm_s390_gisc_register(kvm, isc); if (nisc < 0) { VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n", From dedf98dd1cfb61cfc74be9248b90a49b42c6dead Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Tue, 15 Aug 2023 16:08:33 +0800 Subject: [PATCH 66/66] s390/pci: use builtin_misc_device macro to simplify the code Use the builtin_misc_device macro to simplify the code, which is the same as declaring with device_initcall(). Signed-off-by: Li Zetao Acked-by: Niklas Schnelle Link: https://lore.kernel.org/r/20230815080833.1103609-1-lizetao1@huawei.com Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_clp.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index ee367798e388..ee90a91ed888 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -666,9 +666,4 @@ static struct miscdevice clp_misc_device = { .fops = &clp_misc_fops, }; -static int __init clp_misc_init(void) -{ - return misc_register(&clp_misc_device); -} - -device_initcall(clp_misc_init); +builtin_misc_device(clp_misc_device);