Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull misc x86 fixes from Ingo Molnar:
 - topology enumeration fixes
 - KASAN fix
 - two entry fixes (not yet the big series related to KASLR)
 - remove obsolete code
 - instruction decoder fix
 - better /dev/mem sanity checks, hopefully working better this time
 - pkeys fixes
 - two ACPI fixes
 - 5-level paging related fixes
 - UMIP fixes that should make application visible faults more debuggable
 - boot fix for weird virtualization environment

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  x86/decoder: Add new TEST instruction pattern
  x86/PCI: Remove unused HyperTransport interrupt support
  x86/umip: Fix insn_get_code_seg_params()'s return value
  x86/boot/KASLR: Remove unused variable
  x86/entry/64: Add missing irqflags tracing to native_load_gs_index()
  x86/mm/kasan: Don't use vmemmap_populate() to initialize shadow
  x86/entry/64: Fix entry_SYSCALL_64_after_hwframe() IRQ tracing
  x86/pkeys/selftests: Fix protection keys write() warning
  x86/pkeys/selftests: Rename 'si_pkey' to 'siginfo_pkey'
  x86/mpx/selftests: Fix up weird arrays
  x86/pkeys: Update documentation about availability
  x86/umip: Print a warning into the syslog if UMIP-protected instructions are used
  x86/smpboot: Fix __max_logical_packages estimate
  x86/topology: Avoid wasting 128k for package id array
  perf/x86/intel/uncore: Cache logical pkg id in uncore driver
  x86/acpi: Reduce code duplication in mp_override_legacy_irq()
  x86/acpi: Handle SCI interrupts above legacy space gracefully
  x86/boot: Fix boot failure when SMP MP-table is based at 0
  x86/mm: Limit mmap() of /dev/mem to valid physical addresses
  x86/selftests: Add test for mapping placement for 5-level paging
  ...
This commit is contained in:
Linus Torvalds 2017-11-26 14:11:54 -08:00
commit 02fc87b117
38 changed files with 476 additions and 620 deletions

View File

@ -1,5 +1,10 @@
Memory Protection Keys for Userspace (PKU aka PKEYs) is a CPU feature Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
which will be found on future Intel CPUs. which is found on Intel's Skylake "Scalable Processor" Server CPUs.
It will be avalable in future non-server parts.
For anyone wishing to test or use this feature, it is available in
Amazon's EC2 C5 instances and is known to work there using an Ubuntu
17.04 image.
Memory Protection Keys provides a mechanism for enforcing page-based Memory Protection Keys provides a mechanism for enforcing page-based
protections, but without requiring modification of the page tables protections, but without requiring modification of the page tables

View File

@ -1804,14 +1804,20 @@ config X86_SMAP
If unsure, say Y. If unsure, say Y.
config X86_INTEL_UMIP config X86_INTEL_UMIP
def_bool n def_bool y
depends on CPU_SUP_INTEL depends on CPU_SUP_INTEL
prompt "Intel User Mode Instruction Prevention" if EXPERT prompt "Intel User Mode Instruction Prevention" if EXPERT
---help--- ---help---
The User Mode Instruction Prevention (UMIP) is a security The User Mode Instruction Prevention (UMIP) is a security
feature in newer Intel processors. If enabled, a general feature in newer Intel processors. If enabled, a general
protection fault is issued if the instructions SGDT, SLDT, protection fault is issued if the SGDT, SLDT, SIDT, SMSW
SIDT, SMSW and STR are executed in user mode. or STR instructions are executed in user mode. These instructions
unnecessarily expose information about the hardware state.
The vast majority of applications do not use these instructions.
For the very few that do, software emulation is provided in
specific cases in protected and virtual-8086 modes. Emulated
results are dummy.
config X86_INTEL_MPX config X86_INTEL_MPX
prompt "Intel MPX (Memory Protection Extensions)" prompt "Intel MPX (Memory Protection Extensions)"

View File

@ -171,7 +171,6 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
static void mem_avoid_memmap(char *str) static void mem_avoid_memmap(char *str)
{ {
static int i; static int i;
int rc;
if (i >= MAX_MEMMAP_REGIONS) if (i >= MAX_MEMMAP_REGIONS)
return; return;
@ -219,7 +218,7 @@ static int handle_mem_memmap(void)
return 0; return 0;
tmp_cmdline = malloc(len + 1); tmp_cmdline = malloc(len + 1);
if (!tmp_cmdline ) if (!tmp_cmdline)
error("Failed to allocate space for tmp_cmdline"); error("Failed to allocate space for tmp_cmdline");
memcpy(tmp_cmdline, args, len); memcpy(tmp_cmdline, args, len);
@ -363,7 +362,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
cmd_line |= boot_params->hdr.cmd_line_ptr; cmd_line |= boot_params->hdr.cmd_line_ptr;
/* Calculate size of cmd_line. */ /* Calculate size of cmd_line. */
ptr = (char *)(unsigned long)cmd_line; ptr = (char *)(unsigned long)cmd_line;
for (cmd_line_size = 0; ptr[cmd_line_size++]; ) for (cmd_line_size = 0; ptr[cmd_line_size++];)
; ;
mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;

View File

@ -51,15 +51,19 @@ ENTRY(native_usergs_sysret64)
END(native_usergs_sysret64) END(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */ #endif /* CONFIG_PARAVIRT */
.macro TRACE_IRQS_IRETQ .macro TRACE_IRQS_FLAGS flags:req
#ifdef CONFIG_TRACE_IRQFLAGS #ifdef CONFIG_TRACE_IRQFLAGS
bt $9, EFLAGS(%rsp) /* interrupts off? */ bt $9, \flags /* interrupts off? */
jnc 1f jnc 1f
TRACE_IRQS_ON TRACE_IRQS_ON
1: 1:
#endif #endif
.endm .endm
.macro TRACE_IRQS_IRETQ
TRACE_IRQS_FLAGS EFLAGS(%rsp)
.endm
/* /*
* When dynamic function tracer is enabled it will add a breakpoint * When dynamic function tracer is enabled it will add a breakpoint
* to all locations that it is about to modify, sync CPUs, update * to all locations that it is about to modify, sync CPUs, update
@ -148,8 +152,6 @@ ENTRY(entry_SYSCALL_64)
movq %rsp, PER_CPU_VAR(rsp_scratch) movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
TRACE_IRQS_OFF
/* Construct struct pt_regs on stack */ /* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */ pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
@ -170,6 +172,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
UNWIND_HINT_REGS extra=0 UNWIND_HINT_REGS extra=0
TRACE_IRQS_OFF
/* /*
* If we need to do entry work or if we guess we'll need to do * If we need to do entry work or if we guess we'll need to do
* exit work, go straight to the slow path. * exit work, go straight to the slow path.
@ -943,11 +947,13 @@ ENTRY(native_load_gs_index)
FRAME_BEGIN FRAME_BEGIN
pushfq pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
TRACE_IRQS_OFF
SWAPGS SWAPGS
.Lgs_change: .Lgs_change:
movl %edi, %gs movl %edi, %gs
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
SWAPGS SWAPGS
TRACE_IRQS_FLAGS (%rsp)
popfq popfq
FRAME_END FRAME_END
ret ret

View File

@ -975,10 +975,10 @@ static void uncore_pci_remove(struct pci_dev *pdev)
int i, phys_id, pkg; int i, phys_id, pkg;
phys_id = uncore_pcibus_to_physid(pdev->bus); phys_id = uncore_pcibus_to_physid(pdev->bus);
pkg = topology_phys_to_logical_pkg(phys_id);
box = pci_get_drvdata(pdev); box = pci_get_drvdata(pdev);
if (!box) { if (!box) {
pkg = topology_phys_to_logical_pkg(phys_id);
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
uncore_extra_pci_dev[pkg].dev[i] = NULL; uncore_extra_pci_dev[pkg].dev[i] = NULL;
@ -994,7 +994,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
return; return;
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
pmu->boxes[pkg] = NULL; pmu->boxes[box->pkgid] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0) if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu); uncore_pmu_unregister(pmu);
uncore_box_exit(box); uncore_box_exit(box);

View File

@ -100,7 +100,7 @@ struct intel_uncore_extra_reg {
struct intel_uncore_box { struct intel_uncore_box {
int pci_phys_id; int pci_phys_id;
int pkgid; int pkgid; /* Logical package ID */
int n_active; /* number of active events */ int n_active; /* number of active events */
int n_events; int n_events;
int cpu; /* cpu to collect events */ int cpu; /* cpu to collect events */

View File

@ -1057,7 +1057,7 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
if (reg1->idx != EXTRA_REG_NONE) { if (reg1->idx != EXTRA_REG_NONE) {
int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
int pkg = topology_phys_to_logical_pkg(box->pci_phys_id); int pkg = box->pkgid;
struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx]; struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
if (filter_pdev) { if (filter_pdev) {

View File

@ -309,6 +309,7 @@ static inline int mmap_is_ia32(void)
extern unsigned long task_size_32bit(void); extern unsigned long task_size_32bit(void);
extern unsigned long task_size_64bit(int full_addr_space); extern unsigned long task_size_64bit(int full_addr_space);
extern unsigned long get_mmap_base(int is_legacy); extern unsigned long get_mmap_base(int is_legacy);
extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32

View File

@ -99,14 +99,6 @@ struct irq_alloc_info {
void *dmar_data; void *dmar_data;
}; };
#endif #endif
#ifdef CONFIG_HT_IRQ
struct {
int ht_pos;
int ht_idx;
struct pci_dev *ht_dev;
void *ht_update;
};
#endif
#ifdef CONFIG_X86_UV #ifdef CONFIG_X86_UV
struct { struct {
int uv_limit; int uv_limit;

View File

@ -1,46 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_HYPERTRANSPORT_H
#define _ASM_X86_HYPERTRANSPORT_H
/*
* Constants for x86 Hypertransport Interrupts.
*/
#define HT_IRQ_LOW_BASE 0xf8000000
#define HT_IRQ_LOW_VECTOR_SHIFT 16
#define HT_IRQ_LOW_VECTOR_MASK 0x00ff0000
#define HT_IRQ_LOW_VECTOR(v) \
(((v) << HT_IRQ_LOW_VECTOR_SHIFT) & HT_IRQ_LOW_VECTOR_MASK)
#define HT_IRQ_LOW_DEST_ID_SHIFT 8
#define HT_IRQ_LOW_DEST_ID_MASK 0x0000ff00
#define HT_IRQ_LOW_DEST_ID(v) \
(((v) << HT_IRQ_LOW_DEST_ID_SHIFT) & HT_IRQ_LOW_DEST_ID_MASK)
#define HT_IRQ_LOW_DM_PHYSICAL 0x0000000
#define HT_IRQ_LOW_DM_LOGICAL 0x0000040
#define HT_IRQ_LOW_RQEOI_EDGE 0x0000000
#define HT_IRQ_LOW_RQEOI_LEVEL 0x0000020
#define HT_IRQ_LOW_MT_FIXED 0x0000000
#define HT_IRQ_LOW_MT_ARBITRATED 0x0000004
#define HT_IRQ_LOW_MT_SMI 0x0000008
#define HT_IRQ_LOW_MT_NMI 0x000000c
#define HT_IRQ_LOW_MT_INIT 0x0000010
#define HT_IRQ_LOW_MT_STARTUP 0x0000014
#define HT_IRQ_LOW_MT_EXTINT 0x0000018
#define HT_IRQ_LOW_MT_LINT1 0x000008c
#define HT_IRQ_LOW_MT_LINT0 0x0000098
#define HT_IRQ_LOW_IRQ_MASKED 0x0000001
#define HT_IRQ_HIGH_DEST_ID_SHIFT 0
#define HT_IRQ_HIGH_DEST_ID_MASK 0x00ffffff
#define HT_IRQ_HIGH_DEST_ID(v) \
((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
#endif /* _ASM_X86_HYPERTRANSPORT_H */

View File

@ -18,6 +18,6 @@
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs); void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
char insn_get_code_seg_params(struct pt_regs *regs); int insn_get_code_seg_params(struct pt_regs *regs);
#endif /* _ASM_X86_INSN_EVAL_H */ #endif /* _ASM_X86_INSN_EVAL_H */

View File

@ -111,6 +111,10 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
#endif #endif
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
/** /**
* virt_to_phys - map virtual addresses to physical * virt_to_phys - map virtual addresses to physical
* @address: address to remap * @address: address to remap

View File

@ -56,10 +56,4 @@ extern void arch_init_msi_domain(struct irq_domain *domain);
static inline void arch_init_msi_domain(struct irq_domain *domain) { } static inline void arch_init_msi_domain(struct irq_domain *domain) { }
#endif #endif
#ifdef CONFIG_HT_IRQ
extern void arch_init_htirq_domain(struct irq_domain *domain);
#else
static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
#endif
#endif #endif

View File

@ -132,6 +132,7 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */ /* Index into per_cpu list: */
u16 cpu_index; u16 cpu_index;
u32 microcode; u32 microcode;
unsigned initialized : 1;
} __randomize_layout; } __randomize_layout;
struct cpuid_regs { struct cpuid_regs {

View File

@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
#ifdef CONFIG_X86_IO_APIC #ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0 #define MP_ISA_BUS 0
static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
u8 trigger, u32 gsi);
static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi) u32 gsi)
{ {
int ioapic;
int pin;
struct mpc_intsrc mp_irq;
/* /*
* Check bus_irq boundary. * Check bus_irq boundary.
*/ */
@ -357,14 +356,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
return; return;
} }
/*
* Convert 'gsi' to 'ioapic.pin'.
*/
ioapic = mp_find_ioapic(gsi);
if (ioapic < 0)
return;
pin = mp_find_ioapic_pin(ioapic, gsi);
/* /*
* TBD: This check is for faulty timer entries, where the override * TBD: This check is for faulty timer entries, where the override
* erroneously sets the trigger to level, resulting in a HUGE * erroneously sets the trigger to level, resulting in a HUGE
@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
if ((bus_irq == 0) && (trigger == 3)) if ((bus_irq == 0) && (trigger == 3))
trigger = 1; trigger = 1;
mp_irq.type = MP_INTSRC; if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
mp_irq.irqtype = mp_INT; return;
mp_irq.irqflag = (trigger << 2) | polarity;
mp_irq.srcbus = MP_ISA_BUS;
mp_irq.srcbusirq = bus_irq; /* IRQ */
mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
mp_irq.dstirq = pin; /* INTIN# */
mp_save_irq(&mp_irq);
/* /*
* Reset default identity mapping if gsi is also an legacy IRQ, * Reset default identity mapping if gsi is also an legacy IRQ,
* otherwise there will be more than one entry with the same GSI * otherwise there will be more than one entry with the same GSI
@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
return 0; return 0;
} }
static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
u8 trigger, u32 gsi)
{
struct mpc_intsrc mp_irq;
int ioapic, pin;
/* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
ioapic = mp_find_ioapic(gsi);
if (ioapic < 0) {
pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
return ioapic;
}
pin = mp_find_ioapic_pin(ioapic, gsi);
mp_irq.type = MP_INTSRC;
mp_irq.irqtype = mp_INT;
mp_irq.irqflag = (trigger << 2) | polarity;
mp_irq.srcbus = MP_ISA_BUS;
mp_irq.srcbusirq = bus_irq;
mp_irq.dstapic = mpc_ioapic_id(ioapic);
mp_irq.dstirq = pin;
mp_save_irq(&mp_irq);
return 0;
}
static int __init static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{ {
@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
if (bus_irq < NR_IRQS_LEGACY)
mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
else
mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
acpi_penalize_sci_irq(bus_irq, trigger, polarity); acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/* /*

View File

@ -12,7 +12,6 @@ obj-y += hw_nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_PCI_MSI) += msi.o
obj-$(CONFIG_HT_IRQ) += htirq.o
obj-$(CONFIG_SMP) += ipi.o obj-$(CONFIG_SMP) += ipi.o
ifeq ($(CONFIG_X86_64),y) ifeq ($(CONFIG_X86_64),y)

View File

@ -1,198 +0,0 @@
/*
* Support Hypertransport IRQ
*
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c.
* Jiang Liu <jiang.liu@linux.intel.com>
* Add support of hierarchical irqdomain
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/pci.h>
#include <linux/htirq.h>
#include <asm/irqdomain.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/hypertransport.h>
static struct irq_domain *htirq_domain;
/*
* Hypertransport interrupt support
*/
static int
ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
struct irq_data *parent = data->parent_data;
int ret;
ret = parent->chip->irq_set_affinity(parent, mask, force);
if (ret >= 0) {
struct ht_irq_msg msg;
struct irq_cfg *cfg = irqd_cfg(data);
fetch_ht_irq_msg(data->irq, &msg);
msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK |
HT_IRQ_LOW_DEST_ID_MASK);
msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) |
HT_IRQ_LOW_DEST_ID(cfg->dest_apicid);
msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
write_ht_irq_msg(data->irq, &msg);
}
return ret;
}
static struct irq_chip ht_irq_chip = {
.name = "PCI-HT",
.irq_mask = mask_ht_irq,
.irq_unmask = unmask_ht_irq,
.irq_ack = irq_chip_ack_parent,
.irq_set_affinity = ht_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct ht_irq_cfg *ht_cfg;
struct irq_alloc_info *info = arg;
struct pci_dev *dev;
irq_hw_number_t hwirq;
int ret;
if (nr_irqs > 1 || !info)
return -EINVAL;
dev = info->ht_dev;
hwirq = (info->ht_idx & 0xFF) |
PCI_DEVID(dev->bus->number, dev->devfn) << 8 |
(pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24;
if (irq_find_mapping(domain, hwirq) > 0)
return -EEXIST;
ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL);
if (!ht_cfg)
return -ENOMEM;
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
if (ret < 0) {
kfree(ht_cfg);
return ret;
}
/* Initialize msg to a value that will never match the first write. */
ht_cfg->msg.address_lo = 0xffffffff;
ht_cfg->msg.address_hi = 0xffffffff;
ht_cfg->dev = info->ht_dev;
ht_cfg->update = info->ht_update;
ht_cfg->pos = info->ht_pos;
ht_cfg->idx = 0x10 + (info->ht_idx * 2);
irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg,
handle_edge_irq, ht_cfg, "edge");
return 0;
}
static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs)
{
struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
BUG_ON(nr_irqs != 1);
kfree(irq_data->chip_data);
irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
static int htirq_domain_activate(struct irq_domain *domain,
struct irq_data *irq_data, bool early)
{
struct ht_irq_msg msg;
struct irq_cfg *cfg = irqd_cfg(irq_data);
msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
msg.address_lo =
HT_IRQ_LOW_BASE |
HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) |
HT_IRQ_LOW_VECTOR(cfg->vector) |
((apic->irq_dest_mode == 0) ?
HT_IRQ_LOW_DM_PHYSICAL :
HT_IRQ_LOW_DM_LOGICAL) |
HT_IRQ_LOW_RQEOI_EDGE |
((apic->irq_delivery_mode != dest_LowestPrio) ?
HT_IRQ_LOW_MT_FIXED :
HT_IRQ_LOW_MT_ARBITRATED) |
HT_IRQ_LOW_IRQ_MASKED;
write_ht_irq_msg(irq_data->irq, &msg);
return 0;
}
static void htirq_domain_deactivate(struct irq_domain *domain,
struct irq_data *irq_data)
{
struct ht_irq_msg msg;
memset(&msg, 0, sizeof(msg));
write_ht_irq_msg(irq_data->irq, &msg);
}
static const struct irq_domain_ops htirq_domain_ops = {
.alloc = htirq_domain_alloc,
.free = htirq_domain_free,
.activate = htirq_domain_activate,
.deactivate = htirq_domain_deactivate,
};
void __init arch_init_htirq_domain(struct irq_domain *parent)
{
struct fwnode_handle *fn;
if (disable_apic)
return;
fn = irq_domain_alloc_named_fwnode("PCI-HT");
if (!fn)
goto warn;
htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL);
irq_domain_free_fwnode(fn);
if (!htirq_domain)
goto warn;
htirq_domain->parent = parent;
return;
warn:
pr_warn("Failed to initialize irqdomain for HTIRQ.\n");
}
int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
ht_irq_update_t *update)
{
struct irq_alloc_info info;
if (!htirq_domain)
return -ENOSYS;
init_irq_alloc_info(&info, NULL);
info.ht_idx = idx;
info.ht_pos = pos;
info.ht_dev = dev;
info.ht_update = update;
return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev),
&info);
}
void arch_teardown_ht_irq(unsigned int irq)
{
irq_domain_free_irqs(irq, 1);
}

View File

@ -1,5 +1,5 @@
/* /*
* Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc. * Local APIC related interfaces to support IOAPIC, MSI, etc.
* *
* Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
* Moved from arch/x86/kernel/apic/io_apic.c. * Moved from arch/x86/kernel/apic/io_apic.c.
@ -601,7 +601,7 @@ int __init arch_probe_nr_irqs(void)
nr_irqs = NR_VECTORS * nr_cpu_ids; nr_irqs = NR_VECTORS * nr_cpu_ids;
nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) #if defined(CONFIG_PCI_MSI)
/* /*
* for MSI and HT dyn irq * for MSI and HT dyn irq
*/ */
@ -663,7 +663,6 @@ int __init arch_early_irq_init(void)
irq_set_default_host(x86_vector_domain); irq_set_default_host(x86_vector_domain);
arch_init_msi_domain(x86_vector_domain); arch_init_msi_domain(x86_vector_domain);
arch_init_htirq_domain(x86_vector_domain);
BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));

View File

@ -341,6 +341,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
cr4_set_bits(X86_CR4_UMIP); cr4_set_bits(X86_CR4_UMIP);
pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
return; return;
out: out:

View File

@ -431,6 +431,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
} }
static unsigned long mpf_base; static unsigned long mpf_base;
static bool mpf_found;
static unsigned long __init get_mpc_size(unsigned long physptr) static unsigned long __init get_mpc_size(unsigned long physptr)
{ {
@ -504,7 +505,7 @@ void __init default_get_smp_config(unsigned int early)
if (!smp_found_config) if (!smp_found_config)
return; return;
if (!mpf_base) if (!mpf_found)
return; return;
if (acpi_lapic && early) if (acpi_lapic && early)
@ -593,6 +594,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
smp_found_config = 1; smp_found_config = 1;
#endif #endif
mpf_base = base; mpf_base = base;
mpf_found = true;
pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n", pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n",
base, base + sizeof(*mpf) - 1, mpf); base, base + sizeof(*mpf) - 1, mpf);
@ -858,7 +860,7 @@ static int __init update_mp_table(void)
if (!enable_update_mptable) if (!enable_update_mptable)
return 0; return 0;
if (!mpf_base) if (!mpf_found)
return 0; return 0;
mpf = early_memremap(mpf_base, sizeof(*mpf)); mpf = early_memremap(mpf_base, sizeof(*mpf));

View File

@ -101,9 +101,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info);
/* Logical package management. We might want to allocate that dynamically */ /* Logical package management. We might want to allocate that dynamically */
static int *physical_to_logical_pkg __read_mostly;
static unsigned long *physical_package_map __read_mostly;;
static unsigned int max_physical_pkg_id __read_mostly;
unsigned int __max_logical_packages __read_mostly; unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages); EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly; static unsigned int logical_packages __read_mostly;
@ -280,44 +277,6 @@ static void notrace start_secondary(void *unused)
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
} }
/**
* topology_update_package_map - Update the physical to logical package map
* @pkg: The physical package id as retrieved via CPUID
* @cpu: The cpu for which this is updated
*/
int topology_update_package_map(unsigned int pkg, unsigned int cpu)
{
unsigned int new;
/* Called from early boot ? */
if (!physical_package_map)
return 0;
if (pkg >= max_physical_pkg_id)
return -EINVAL;
/* Set the logical package id */
if (test_and_set_bit(pkg, physical_package_map))
goto found;
if (logical_packages >= __max_logical_packages) {
pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n",
logical_packages, cpu, __max_logical_packages);
return -ENOSPC;
}
new = logical_packages++;
if (new != pkg) {
pr_info("CPU %u Converting physical %u to logical package %u\n",
cpu, pkg, new);
}
physical_to_logical_pkg[pkg] = new;
found:
cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
return 0;
}
/** /**
* topology_phys_to_logical_pkg - Map a physical package id to a logical * topology_phys_to_logical_pkg - Map a physical package id to a logical
* *
@ -325,62 +284,40 @@ found:
*/ */
int topology_phys_to_logical_pkg(unsigned int phys_pkg) int topology_phys_to_logical_pkg(unsigned int phys_pkg)
{ {
if (phys_pkg >= max_physical_pkg_id) int cpu;
for_each_possible_cpu(cpu) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
if (c->initialized && c->phys_proc_id == phys_pkg)
return c->logical_proc_id;
}
return -1; return -1;
return physical_to_logical_pkg[phys_pkg];
} }
EXPORT_SYMBOL(topology_phys_to_logical_pkg); EXPORT_SYMBOL(topology_phys_to_logical_pkg);
static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu) /**
* topology_update_package_map - Update the physical to logical package map
* @pkg: The physical package id as retrieved via CPUID
* @cpu: The cpu for which this is updated
*/
int topology_update_package_map(unsigned int pkg, unsigned int cpu)
{ {
unsigned int ncpus; int new;
size_t size;
/* /* Already available somewhere? */
* Today neither Intel nor AMD support heterogenous systems. That new = topology_phys_to_logical_pkg(pkg);
* might change in the future.... if (new >= 0)
* goto found;
* While ideally we'd want '* smp_num_siblings' in the below @ncpus
* computation, this won't actually work since some Intel BIOSes new = logical_packages++;
* report inconsistent HT data when they disable HT. if (new != pkg) {
* pr_info("CPU %u Converting physical %u to logical package %u\n",
* In particular, they reduce the APIC-IDs to only include the cores, cpu, pkg, new);
* but leave the CPUID topology to say there are (2) siblings.
* This means we don't know how many threads there will be until
* after the APIC enumeration.
*
* By not including this we'll sometimes over-estimate the number of
* logical packages by the amount of !present siblings, but this is
* still better than MAX_LOCAL_APIC.
*
* We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
* on the command line leading to a similar issue as the HT disable
* problem because the hyperthreads are usually enumerated after the
* primary cores.
*/
ncpus = boot_cpu_data.x86_max_cores;
if (!ncpus) {
pr_warn("x86_max_cores == zero !?!?");
ncpus = 1;
} }
found:
__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); cpu_data(cpu).logical_proc_id = new;
logical_packages = 0; return 0;
/*
* Possibly larger than what we need as the number of apic ids per
* package can be smaller than the actual used apic ids.
*/
max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
size = max_physical_pkg_id * sizeof(unsigned int);
physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
memset(physical_to_logical_pkg, 0xff, size);
size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
physical_package_map = kzalloc(size, GFP_KERNEL);
pr_info("Max logical packages: %u\n", __max_logical_packages);
topology_update_package_map(c->phys_proc_id, cpu);
} }
void __init smp_store_boot_cpu_info(void) void __init smp_store_boot_cpu_info(void)
@ -390,7 +327,8 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data; *c = boot_cpu_data;
c->cpu_index = id; c->cpu_index = id;
smp_init_package_map(c, id); topology_update_package_map(c->phys_proc_id, id);
c->initialized = true;
} }
/* /*
@ -401,6 +339,8 @@ void smp_store_cpu_info(int id)
{ {
struct cpuinfo_x86 *c = &cpu_data(id); struct cpuinfo_x86 *c = &cpu_data(id);
/* Copy boot_cpu_data only on the first bringup */
if (!c->initialized)
*c = boot_cpu_data; *c = boot_cpu_data;
c->cpu_index = id; c->cpu_index = id;
/* /*
@ -408,6 +348,7 @@ void smp_store_cpu_info(int id)
* bringing up AP or offlined CPU0. * bringing up AP or offlined CPU0.
*/ */
identify_secondary_cpu(c); identify_secondary_cpu(c);
c->initialized = true;
} }
static bool static bool
@ -1356,7 +1297,16 @@ void __init native_smp_prepare_boot_cpu(void)
void __init native_smp_cpus_done(unsigned int max_cpus) void __init native_smp_cpus_done(unsigned int max_cpus)
{ {
int ncpus;
pr_debug("Boot done\n"); pr_debug("Boot done\n");
/*
* Today neither Intel nor AMD support heterogenous systems so
* extrapolate the boot cpu's data to all packages.
*/
ncpus = cpu_data(0).booted_cores * smp_num_siblings;
__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
pr_info("Max logical packages: %u\n", __max_logical_packages);
if (x86_has_numa_in_package) if (x86_has_numa_in_package)
set_sched_topology(x86_numa_in_package_topology); set_sched_topology(x86_numa_in_package_topology);

View File

@ -188,6 +188,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
if (len > TASK_SIZE) if (len > TASK_SIZE)
return -ENOMEM; return -ENOMEM;
/* No address checking. See comment at mmap_address_hint_valid() */
if (flags & MAP_FIXED) if (flags & MAP_FIXED)
return addr; return addr;
@ -197,12 +198,15 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
/* requesting a specific address */ /* requesting a specific address */
if (addr) { if (addr) {
addr = PAGE_ALIGN(addr); addr &= PAGE_MASK;
if (!mmap_address_hint_valid(addr, len))
goto get_unmapped_area;
vma = find_vma(mm, addr); vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && if (!vma || addr + len <= vm_start_gap(vma))
(!vma || addr + len <= vm_start_gap(vma)))
return addr; return addr;
} }
get_unmapped_area:
info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len; info.length = len;

View File

@ -78,7 +78,60 @@
#define UMIP_INST_SGDT 0 /* 0F 01 /0 */ #define UMIP_INST_SGDT 0 /* 0F 01 /0 */
#define UMIP_INST_SIDT 1 /* 0F 01 /1 */ #define UMIP_INST_SIDT 1 /* 0F 01 /1 */
#define UMIP_INST_SMSW 3 /* 0F 01 /4 */ #define UMIP_INST_SMSW 2 /* 0F 01 /4 */
#define UMIP_INST_SLDT 3 /* 0F 00 /0 */
#define UMIP_INST_STR 4 /* 0F 00 /1 */
const char * const umip_insns[5] = {
[UMIP_INST_SGDT] = "SGDT",
[UMIP_INST_SIDT] = "SIDT",
[UMIP_INST_SMSW] = "SMSW",
[UMIP_INST_SLDT] = "SLDT",
[UMIP_INST_STR] = "STR",
};
#define umip_pr_err(regs, fmt, ...) \
umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
#define umip_pr_warning(regs, fmt, ...) \
umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__)
/**
* umip_printk() - Print a rate-limited message
* @regs: Register set with the context in which the warning is printed
* @log_level: Kernel log level to print the message
* @fmt: The text string to print
*
* Print the text contained in @fmt. The print rate is limited to bursts of 5
* messages every two minutes. The purpose of this customized version of
* printk() is to print messages when user space processes use any of the
* UMIP-protected instructions. Thus, the printed text is prepended with the
* task name and process ID number of the current task as well as the
* instruction and stack pointers in @regs as seen when entering kernel mode.
*
* Returns:
*
* None.
*/
static __printf(3, 4)
void umip_printk(const struct pt_regs *regs, const char *log_level,
const char *fmt, ...)
{
/* Bursts of 5 messages every two minutes */
static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
struct task_struct *tsk = current;
struct va_format vaf;
va_list args;
if (!__ratelimit(&ratelimit))
return;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
va_end(args);
}
/** /**
* identify_insn() - Identify a UMIP-protected instruction * identify_insn() - Identify a UMIP-protected instruction
@ -118,10 +171,16 @@ static int identify_insn(struct insn *insn)
default: default:
return -EINVAL; return -EINVAL;
} }
} } else if (insn->opcode.bytes[1] == 0x0) {
if (X86_MODRM_REG(insn->modrm.value) == 0)
/* SLDT AND STR are not emulated */ return UMIP_INST_SLDT;
else if (X86_MODRM_REG(insn->modrm.value) == 1)
return UMIP_INST_STR;
else
return -EINVAL; return -EINVAL;
} else {
return -EINVAL;
}
} }
/** /**
@ -228,10 +287,8 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
return; return;
pr_err_ratelimited("%s[%d] umip emulation segfault ip:%lx sp:%lx error:%x in %lx\n", umip_pr_err(regs, "segfault in emulation. error%x\n",
tsk->comm, task_pid_nr(tsk), regs->ip, X86_PF_USER | X86_PF_WRITE);
regs->sp, X86_PF_USER | X86_PF_WRITE,
regs->ip);
} }
/** /**
@ -262,15 +319,11 @@ bool fixup_umip_exception(struct pt_regs *regs)
unsigned char buf[MAX_INSN_SIZE]; unsigned char buf[MAX_INSN_SIZE];
void __user *uaddr; void __user *uaddr;
struct insn insn; struct insn insn;
char seg_defs; int seg_defs;
if (!regs) if (!regs)
return false; return false;
/* Do not emulate 64-bit processes. */
if (user_64bit_mode(regs))
return false;
/* /*
* If not in user-space long mode, a custom code segment could be in * If not in user-space long mode, a custom code segment could be in
* use. This is true in protected mode (if the process defined a local * use. This is true in protected mode (if the process defined a local
@ -322,6 +375,15 @@ bool fixup_umip_exception(struct pt_regs *regs)
if (umip_inst < 0) if (umip_inst < 0)
return false; return false;
umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
umip_insns[umip_inst]);
/* Do not emulate SLDT, STR or user long mode processes. */
if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
return false;
umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size)) if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
return false; return false;

View File

@ -733,11 +733,11 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
* *
* Returns: * Returns:
* *
* A signed 8-bit value containing the default parameters on success. * An int containing ORed-in default parameters on success.
* *
* -EINVAL on error. * -EINVAL on error.
*/ */
char insn_get_code_seg_params(struct pt_regs *regs) int insn_get_code_seg_params(struct pt_regs *regs)
{ {
struct desc_struct *desc; struct desc_struct *desc;
short sel; short sel;

View File

@ -896,7 +896,7 @@ EndTable
GrpTable: Grp3_1 GrpTable: Grp3_1
0: TEST Eb,Ib 0: TEST Eb,Ib
1: 1: TEST Eb,Ib
2: NOT Eb 2: NOT Eb
3: NEG Eb 3: NEG Eb
4: MUL AL,Eb 4: MUL AL,Eb

View File

@ -158,6 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (len > TASK_SIZE) if (len > TASK_SIZE)
return -ENOMEM; return -ENOMEM;
/* No address checking. See comment at mmap_address_hint_valid() */
if (flags & MAP_FIXED) { if (flags & MAP_FIXED) {
if (prepare_hugepage_range(file, addr, len)) if (prepare_hugepage_range(file, addr, len))
return -EINVAL; return -EINVAL;
@ -165,12 +166,16 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
} }
if (addr) { if (addr) {
addr = ALIGN(addr, huge_page_size(h)); addr &= huge_page_mask(h);
if (!mmap_address_hint_valid(addr, len))
goto get_unmapped_area;
vma = find_vma(mm, addr); vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && if (!vma || addr + len <= vm_start_gap(vma))
(!vma || addr + len <= vm_start_gap(vma)))
return addr; return addr;
} }
get_unmapped_area:
if (mm->get_unmapped_area == arch_get_unmapped_area) if (mm->get_unmapped_area == arch_get_unmapped_area)
return hugetlb_get_unmapped_area_bottomup(file, addr, len, return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags); pgoff, flags);

View File

@ -33,6 +33,8 @@
#include <linux/compat.h> #include <linux/compat.h>
#include <asm/elf.h> #include <asm/elf.h>
#include "physaddr.h"
struct va_alignment __read_mostly va_align = { struct va_alignment __read_mostly va_align = {
.flags = -1, .flags = -1,
}; };
@ -174,3 +176,63 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return "[mpx]"; return "[mpx]";
return NULL; return NULL;
} }
/**
* mmap_address_hint_valid - Validate the address hint of mmap
* @addr: Address hint
* @len: Mapping length
*
* Check whether @addr and @addr + @len result in a valid mapping.
*
* On 32bit this only checks whether @addr + @len is <= TASK_SIZE.
*
* On 64bit with 5-level page tables another sanity check is required
* because mappings requested by mmap(@addr, 0) which cross the 47-bit
* virtual address boundary can cause the following theoretical issue:
*
* An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr
* is below the border of the 47-bit address space and @addr + @len is
* above the border.
*
* With 4-level paging this request succeeds, but the resulting mapping
* address will always be within the 47-bit virtual address space, because
* the hint address does not result in a valid mapping and is
* ignored. Hence applications which are not prepared to handle virtual
* addresses above 47-bit work correctly.
*
* With 5-level paging this request would be granted and result in a
* mapping which crosses the border of the 47-bit virtual address
* space. If the application cannot handle addresses above 47-bit this
* will lead to misbehaviour and hard to diagnose failures.
*
* Therefore ignore address hints which would result in a mapping crossing
* the 47-bit virtual address boundary.
*
* Note, that in the same scenario with MAP_FIXED the behaviour is
* different. The request with @addr < 47-bit and @addr + @len > 47-bit
* fails on a 4-level paging machine but succeeds on a 5-level paging
* machine. It is reasonable to expect that an application does not rely on
* the failure of such a fixed mapping request, so the restriction is not
* applied.
*/
bool mmap_address_hint_valid(unsigned long addr, unsigned long len)
{
if (TASK_SIZE - len < addr)
return false;
return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW);
}
/* Can we access it for direct reading/writing? Must be RAM: */
int valid_phys_addr_range(phys_addr_t addr, size_t count)
{
return addr + count <= __pa(high_memory);
}
/* Can we access it through mmap? Must be a valid physical address: */
int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
{
phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT;
return phys_addr_valid(addr + count - 1);
}

View File

@ -343,6 +343,10 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
size_t size = vma->vm_end - vma->vm_start; size_t size = vma->vm_end - vma->vm_start;
phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
/* Does it even fit in phys_addr_t? */
if (offset >> PAGE_SHIFT != vma->vm_pgoff)
return -EINVAL;
/* It's illegal to wrap around the end of the physical address space. */ /* It's illegal to wrap around the end of the physical address space. */
if (offset + (phys_addr_t)size - 1 < offset) if (offset + (phys_addr_t)size - 1 < offset)
return -EINVAL; return -EINVAL;

View File

@ -80,15 +80,6 @@ config XEN_PCIDEV_FRONTEND
The PCI device frontend driver allows the kernel to import arbitrary The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains. PCI devices from a PCI backend to support PCI driver domains.
config HT_IRQ
bool "Interrupts on hypertransport devices"
default y
depends on PCI && X86_LOCAL_APIC
help
This allows native hypertransport devices to use interrupts.
If unsure say Y.
config PCI_ATS config PCI_ATS
bool bool

View File

@ -21,9 +21,6 @@ obj-$(CONFIG_HOTPLUG_PCI) += hotplug/
# Build the PCI MSI interrupt support # Build the PCI MSI interrupt support
obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_PCI_MSI) += msi.o
# Build the Hypertransport interrupt support
obj-$(CONFIG_HT_IRQ) += htirq.o
obj-$(CONFIG_PCI_ATS) += ats.o obj-$(CONFIG_PCI_ATS) += ats.o
obj-$(CONFIG_PCI_IOV) += iov.o obj-$(CONFIG_PCI_IOV) += iov.o

View File

@ -1,135 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* File: htirq.c
* Purpose: Hypertransport Interrupt Capability
*
* Copyright (C) 2006 Linux Networx
* Copyright (C) Eric Biederman <ebiederman@lnxi.com>
*/
#include <linux/irq.h>
#include <linux/pci.h>
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/htirq.h>
/* Global ht irq lock.
*
* This is needed to serialize access to the data port in hypertransport
* irq capability.
*
* With multiple simultaneous hypertransport irq devices it might pay
* to make this more fine grained. But start with simple, stupid, and correct.
*/
static DEFINE_SPINLOCK(ht_irq_lock);
void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
{
struct ht_irq_cfg *cfg = irq_get_handler_data(irq);
unsigned long flags;
spin_lock_irqsave(&ht_irq_lock, flags);
if (cfg->msg.address_lo != msg->address_lo) {
pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx);
pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_lo);
}
if (cfg->msg.address_hi != msg->address_hi) {
pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx + 1);
pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_hi);
}
if (cfg->update)
cfg->update(cfg->dev, irq, msg);
spin_unlock_irqrestore(&ht_irq_lock, flags);
cfg->msg = *msg;
}
void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
{
struct ht_irq_cfg *cfg = irq_get_handler_data(irq);
*msg = cfg->msg;
}
void mask_ht_irq(struct irq_data *data)
{
struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data);
struct ht_irq_msg msg = cfg->msg;
msg.address_lo |= 1;
write_ht_irq_msg(data->irq, &msg);
}
void unmask_ht_irq(struct irq_data *data)
{
struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data);
struct ht_irq_msg msg = cfg->msg;
msg.address_lo &= ~1;
write_ht_irq_msg(data->irq, &msg);
}
/**
* __ht_create_irq - create an irq and attach it to a device.
* @dev: The hypertransport device to find the irq capability on.
* @idx: Which of the possible irqs to attach to.
* @update: Function to be called when changing the htirq message
*
* The irq number of the new irq or a negative error value is returned.
*/
int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
{
int max_irq, pos, irq;
unsigned long flags;
u32 data;
pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
if (!pos)
return -EINVAL;
/* Verify the idx I want to use is in range */
spin_lock_irqsave(&ht_irq_lock, flags);
pci_write_config_byte(dev, pos + 2, 1);
pci_read_config_dword(dev, pos + 4, &data);
spin_unlock_irqrestore(&ht_irq_lock, flags);
max_irq = (data >> 16) & 0xff;
if (idx > max_irq)
return -EINVAL;
irq = arch_setup_ht_irq(idx, pos, dev, update);
if (irq > 0)
dev_dbg(&dev->dev, "irq %d for HT\n", irq);
return irq;
}
EXPORT_SYMBOL(__ht_create_irq);
/**
* ht_create_irq - create an irq and attach it to a device.
* @dev: The hypertransport device to find the irq capability on.
* @idx: Which of the possible irqs to attach to.
*
* ht_create_irq needs to be called for all hypertransport devices
* that generate irqs.
*
* The irq number of the new irq or a negative error value is returned.
*/
int ht_create_irq(struct pci_dev *dev, int idx)
{
return __ht_create_irq(dev, idx, NULL);
}
EXPORT_SYMBOL(ht_create_irq);
/**
* ht_destroy_irq - destroy an irq created with ht_create_irq
* @irq: irq to be destroyed
*
* This reverses ht_create_irq removing the specified irq from
* existence. The irq should be free before this happens.
*/
void ht_destroy_irq(unsigned int irq)
{
arch_teardown_ht_irq(irq);
}
EXPORT_SYMBOL(ht_destroy_irq);

View File

@ -1,39 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_HTIRQ_H
#define LINUX_HTIRQ_H
struct pci_dev;
struct irq_data;
struct ht_irq_msg {
u32 address_lo; /* low 32 bits of the ht irq message */
u32 address_hi; /* high 32 bits of the it irq message */
};
typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq,
struct ht_irq_msg *msg);
struct ht_irq_cfg {
struct pci_dev *dev;
/* Update callback used to cope with buggy hardware */
ht_irq_update_t *update;
unsigned pos;
unsigned idx;
struct ht_irq_msg msg;
};
/* Helper functions.. */
void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
void mask_ht_irq(struct irq_data *data);
void unmask_ht_irq(struct irq_data *data);
/* The arch hook for getting things started */
int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
ht_irq_update_t *update);
void arch_teardown_ht_irq(unsigned int irq);
/* For drivers of buggy hardware */
int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update);
#endif /* LINUX_HTIRQ_H */

View File

@ -1485,12 +1485,6 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
static inline void pcie_ecrc_get_policy(char *str) { } static inline void pcie_ecrc_get_policy(char *str) { }
#endif #endif
#ifdef CONFIG_HT_IRQ
/* The functions a driver should call */
int ht_create_irq(struct pci_dev *dev, int idx);
void ht_destroy_irq(unsigned int irq);
#endif /* CONFIG_HT_IRQ */
#ifdef CONFIG_PCI_ATS #ifdef CONFIG_PCI_ATS
/* Address Translation Service */ /* Address Translation Service */
void pci_ats_init(struct pci_dev *dev); void pci_ats_init(struct pci_dev *dev);

View File

@ -0,0 +1,177 @@
#include <stdio.h>
#include <sys/mman.h>
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#define PAGE_SIZE 4096
#define LOW_ADDR ((void *) (1UL << 30))
#define HIGH_ADDR ((void *) (1UL << 50))
struct testcase {
void *addr;
unsigned long size;
unsigned long flags;
const char *msg;
unsigned int low_addr_required:1;
unsigned int keep_mapped:1;
};
static struct testcase testcases[] = {
{
.addr = NULL,
.size = 2 * PAGE_SIZE,
.flags = MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(NULL)",
.low_addr_required = 1,
},
{
.addr = LOW_ADDR,
.size = 2 * PAGE_SIZE,
.flags = MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(LOW_ADDR)",
.low_addr_required = 1,
},
{
.addr = HIGH_ADDR,
.size = 2 * PAGE_SIZE,
.flags = MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(HIGH_ADDR)",
.keep_mapped = 1,
},
{
.addr = HIGH_ADDR,
.size = 2 * PAGE_SIZE,
.flags = MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(HIGH_ADDR) again",
.keep_mapped = 1,
},
{
.addr = HIGH_ADDR,
.size = 2 * PAGE_SIZE,
.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
.msg = "mmap(HIGH_ADDR, MAP_FIXED)",
},
{
.addr = (void*) -1,
.size = 2 * PAGE_SIZE,
.flags = MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(-1)",
.keep_mapped = 1,
},
{
.addr = (void*) -1,
.size = 2 * PAGE_SIZE,
.flags = MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(-1) again",
},
{
.addr = (void *)((1UL << 47) - PAGE_SIZE),
.size = 2 * PAGE_SIZE,
.flags = MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap((1UL << 47), 2 * PAGE_SIZE)",
.low_addr_required = 1,
.keep_mapped = 1,
},
{
.addr = (void *)((1UL << 47) - PAGE_SIZE / 2),
.size = 2 * PAGE_SIZE,
.flags = MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap((1UL << 47), 2 * PAGE_SIZE / 2)",
.low_addr_required = 1,
.keep_mapped = 1,
},
{
.addr = (void *)((1UL << 47) - PAGE_SIZE),
.size = 2 * PAGE_SIZE,
.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
.msg = "mmap((1UL << 47) - PAGE_SIZE, 2 * PAGE_SIZE, MAP_FIXED)",
},
{
.addr = NULL,
.size = 2UL << 20,
.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(NULL, MAP_HUGETLB)",
.low_addr_required = 1,
},
{
.addr = LOW_ADDR,
.size = 2UL << 20,
.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(LOW_ADDR, MAP_HUGETLB)",
.low_addr_required = 1,
},
{
.addr = HIGH_ADDR,
.size = 2UL << 20,
.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(HIGH_ADDR, MAP_HUGETLB)",
.keep_mapped = 1,
},
{
.addr = HIGH_ADDR,
.size = 2UL << 20,
.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again",
.keep_mapped = 1,
},
{
.addr = HIGH_ADDR,
.size = 2UL << 20,
.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
.msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)",
},
{
.addr = (void*) -1,
.size = 2UL << 20,
.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(-1, MAP_HUGETLB)",
.keep_mapped = 1,
},
{
.addr = (void*) -1,
.size = 2UL << 20,
.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap(-1, MAP_HUGETLB) again",
},
{
.addr = (void *)((1UL << 47) - PAGE_SIZE),
.size = 4UL << 20,
.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
.msg = "mmap((1UL << 47), 4UL << 20, MAP_HUGETLB)",
.low_addr_required = 1,
.keep_mapped = 1,
},
{
.addr = (void *)((1UL << 47) - (2UL << 20)),
.size = 4UL << 20,
.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
.msg = "mmap((1UL << 47) - (2UL << 20), 4UL << 20, MAP_FIXED | MAP_HUGETLB)",
},
};
int main(int argc, char **argv)
{
int i;
void *p;
for (i = 0; i < ARRAY_SIZE(testcases); i++) {
struct testcase *t = testcases + i;
p = mmap(t->addr, t->size, PROT_NONE, t->flags, -1, 0);
printf("%s: %p - ", t->msg, p);
if (p == MAP_FAILED) {
printf("FAILED\n");
continue;
}
if (t->low_addr_required && p >= (void *)(1UL << 47))
printf("FAILED\n");
else
printf("OK\n");
if (!t->keep_mapped)
munmap(p, t->size);
}
return 0;
}

View File

@ -11,7 +11,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \ test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl
TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)

View File

@ -52,14 +52,14 @@
struct mpx_bd_entry { struct mpx_bd_entry {
union { union {
char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES]; char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
void *contents[1]; void *contents[0];
}; };
} __attribute__((packed)); } __attribute__((packed));
struct mpx_bt_entry { struct mpx_bt_entry {
union { union {
char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES]; char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
unsigned long contents[1]; unsigned long contents[0];
}; };
} __attribute__((packed)); } __attribute__((packed));

View File

@ -30,6 +30,7 @@ static inline void sigsafe_printf(const char *format, ...)
if (!dprint_in_signal) { if (!dprint_in_signal) {
vprintf(format, ap); vprintf(format, ap);
} else { } else {
int ret;
int len = vsnprintf(dprint_in_signal_buffer, int len = vsnprintf(dprint_in_signal_buffer,
DPRINT_IN_SIGNAL_BUF_SIZE, DPRINT_IN_SIGNAL_BUF_SIZE,
format, ap); format, ap);
@ -39,7 +40,9 @@ static inline void sigsafe_printf(const char *format, ...)
*/ */
if (len > DPRINT_IN_SIGNAL_BUF_SIZE) if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
len = DPRINT_IN_SIGNAL_BUF_SIZE; len = DPRINT_IN_SIGNAL_BUF_SIZE;
write(1, dprint_in_signal_buffer, len); ret = write(1, dprint_in_signal_buffer, len);
if (ret < 0)
abort();
} }
va_end(ap); va_end(ap);
} }

View File

@ -250,7 +250,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
unsigned long ip; unsigned long ip;
char *fpregs; char *fpregs;
u32 *pkru_ptr; u32 *pkru_ptr;
u64 si_pkey; u64 siginfo_pkey;
u32 *si_pkey_ptr; u32 *si_pkey_ptr;
int pkru_offset; int pkru_offset;
fpregset_t fpregset; fpregset_t fpregset;
@ -292,9 +292,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
dump_mem(si_pkey_ptr - 8, 24); dump_mem(si_pkey_ptr - 8, 24);
si_pkey = *si_pkey_ptr; siginfo_pkey = *si_pkey_ptr;
pkey_assert(si_pkey < NR_PKEYS); pkey_assert(siginfo_pkey < NR_PKEYS);
last_si_pkey = si_pkey; last_si_pkey = siginfo_pkey;
if ((si->si_code == SEGV_MAPERR) || if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) || (si->si_code == SEGV_ACCERR) ||
@ -306,7 +306,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
/* need __rdpkru() version so we do not do shadow_pkru checking */ /* need __rdpkru() version so we do not do shadow_pkru checking */
dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
dprintf1("si_pkey from siginfo: %jx\n", si_pkey); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
*(u64 *)pkru_ptr = 0x00000000; *(u64 *)pkru_ptr = 0x00000000;
dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
pkru_faults++; pkru_faults++;