From a903e3b64adfc2e126237f06e0b3ea7d2d8d13b5 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 17 Mar 2015 15:31:11 +0200 Subject: [PATCH 001/210] drm/omap: return error if dma_alloc_writecombine fails On a platform with no TILER (e.g. omap3, am43xx), when the user wants to allocate buffer with flag OMAP_BO_SCANOUT, the buffer needs to be allocated with dma_alloc_writecombine. For some reason the driver does not return an error if that alloc fails, instead it continues without backing memory. This leads to errors later when the user tries to use the buffer. This patch makes the driver return an error if dma_alloc_writecombine fails. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_gem.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 2ab77801cf5f..874eb6dcf94b 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -1378,11 +1378,7 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, omap_obj = kzalloc(sizeof(*omap_obj), GFP_KERNEL); if (!omap_obj) - goto fail; - - spin_lock(&priv->list_lock); - list_add(&omap_obj->mm_list, &priv->obj_list); - spin_unlock(&priv->list_lock); + return NULL; obj = &omap_obj->base; @@ -1392,11 +1388,19 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, */ omap_obj->vaddr = dma_alloc_writecombine(dev->dev, size, &omap_obj->paddr, GFP_KERNEL); - if (omap_obj->vaddr) - flags |= OMAP_BO_DMA; + if (!omap_obj->vaddr) { + kfree(omap_obj); + return NULL; + } + + flags |= OMAP_BO_DMA; } + spin_lock(&priv->list_lock); + list_add(&omap_obj->mm_list, &priv->obj_list); + spin_unlock(&priv->list_lock); + omap_obj->flags = flags; if (flags & OMAP_BO_TILED) { From 398f74569cebbf06bc6b069442bcd0e9616ca465 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 19 Jun 2015 21:37:56 +0100 Subject: [PATCH 002/210] ARM: 8393/1: smp: Fix suspicious RCU usage with ipi tracepoints John Stultz reports an RCU splat on boot with ARM ipi trace events enabled. =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc7-00033-gb5bed2f #153 Not tainted ------------------------------- include/trace/events/ipi.h:68 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.1.0-rc7-00033-gb5bed2f #153 Hardware name: Qualcomm (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x70/0xbc) [] (dump_stack) from [] (handle_IPI+0x428/0x604) [] (handle_IPI) from [] (gic_handle_irq+0x54/0x5c) [] (gic_handle_irq) from [] (__irq_svc+0x44/0x7c) Exception stack(0xc09f3f48 to 0xc09f3f90) 3f40: 00000001 00000001 00000000 c09f73b8 c09f4528 c0a5de9c 3f60: c076b4f0 00000000 00000000 c09ef108 c0a5cec1 00000001 00000000 c09f3f90 3f80: c026bf60 c0210ab8 20000113 ffffffff [] (__irq_svc) from [] (arch_cpu_idle+0x20/0x3c) [] (arch_cpu_idle) from [] (cpu_startup_entry+0x2c0/0x5dc) [] (cpu_startup_entry) from [] (start_kernel+0x358/0x3c4) [] (start_kernel) from [<8020807c>] (0x8020807c) At this point in the IPI handling path we haven't called irq_enter() yet, so RCU doesn't know that we're about to exit idle and properly warns that we're using RCU from an idle CPU. Use trace_ipi_entry_rcuidle() instead of trace_ipi_entry() so that RCU is informed about our exit from idle. Fixes: 365ec7b17327 ("ARM: add IPI tracepoints") Reported-by: John Stultz Tested-by: John Stultz Acked-by: Steven Rostedt Reviewed-by: Paul E. McKenney Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cca5b8758185..f11d82527076 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -576,7 +576,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -635,7 +635,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } From 3de1f52a3ae823265299409e276f56cdadddc310 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 25 Jun 2015 01:04:20 +0100 Subject: [PATCH 003/210] ARM: 8394/1: update memblock limit after mapping lowmem The memblock limit is currently used in find_limits to find the bounds for ZONE_NORMAL. The memblock limit may need to be rounded down a PMD size to ensure allocations are fully mapped though. This has the side effect of reducing the amount of memory in ZONE_NORMAL. Once all lowmem is mapped, it's safe to change the memblock limit back to include the unaligned section. Adjust the memblock limit after lowmem mapping is complete. Before: # cat /proc/zoneinfo | grep managed managed 62907 managed 424 After: # cat /proc/zoneinfo | grep managed managed 63331 Signed-off-by: Laura Abbott Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 7186382672b5..904d1532e6d0 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1519,6 +1519,7 @@ void __init paging_init(const struct machine_desc *mdesc) build_mem_type_table(); prepare_page_table(); map_lowmem(); + memblock_set_current_limit(arm_lowmem_limit); dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); From e48866647b486f31ff7c3927b48de8bbb1c6a4c0 Mon Sep 17 00:00:00 2001 From: Vitaly Andrianov Date: Fri, 26 Jun 2015 17:13:03 +0100 Subject: [PATCH 004/210] ARM: 8396/1: use phys_addr_t in pfn_to_kaddr() This patch fixes pfn_to_kaddr() to use phys_addr_t. Without this, this macro is broken on LPAE systems. For physical addresses above first 4GB result of shifting pfn with PAGE_SHIFT may be truncated. Signed-off-by: Vitaly Andrianov Acked-by: Nicolas Pitre Acked-by: Santosh Shilimkar Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 184def0e1652..063ef314cca9 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -291,7 +291,7 @@ static inline void *phys_to_virt(phys_addr_t x) */ #define __pa(x) __virt_to_phys((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((phys_addr_t)(pfn) << PAGE_SHIFT) extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x); From e6ae32c343981a50aa07c34767f2a967cc920edc Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 25 Jun 2015 10:34:38 +0100 Subject: [PATCH 005/210] ARM: fix DEBUG_SET_MODULE_RONX build dependencies randconfig testing reveals that DEBUG_SET_MODULE_RONX needs to depend on MMU otherwise these build errors are observed: kernel/built-in.o: In function `set_section_ro_nx': kernel/module.c:1738: undefined reference to `set_memory_nx' kernel/built-in.o: In function `set_page_attributes': kernel/module.c:1709: undefined reference to `set_memory_ro' This is because the pageattr functions are not built for !MMU configs as they don't have page tables. Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 0c12ffb155a2..dfa21cb58f36 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1590,7 +1590,7 @@ config PID_IN_CONTEXTIDR config DEBUG_SET_MODULE_RONX bool "Set loadable kernel module data as NX and text as RO" - depends on MODULES + depends on MODULES && MMU ---help--- This option helps catch unintended modifications to loadable kernel module's text and read-only data. It also prevents execution From b4d103d1a45fed0da2f31c905eb5e053c84a41c6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 25 Jun 2015 10:49:45 +0100 Subject: [PATCH 006/210] ARM: add help text for HIGHPTE configuration entry Add some help text for the HIGHPTE configuration entry. This comes from the x86 entry, but reworded to be more a more accurate description of what this option does. Signed-off-by: Russell King --- arch/arm/Kconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 45df48ba0b12..cc0ea0c6cfc9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1661,6 +1661,12 @@ config HIGHMEM config HIGHPTE bool "Allocate 2nd-level pagetables from highmem" depends on HIGHMEM + help + The VM uses one page of physical memory for each page table. + For systems with a lot of processes, this can use a lot of + precious low memory, eventually leading to low memory being + consumed by page tables. Setting this option will allow + user-space 2nd level page tables to reside in high memory. config HW_PERF_EVENTS bool "Enable hardware performance counter support for perf events" From eeb3fee8f6cca5e7bf1647d9e327c7b40e384578 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 25 Jun 2015 10:52:58 +0100 Subject: [PATCH 007/210] ARM: add helpful message when truncating physical memory Add a nmessage to suggest that HIGHMEM is enabled when physical memory is truncated due to lack of virtual address space to map it in the low memory mapping. Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 904d1532e6d0..79de062c6077 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1072,6 +1072,7 @@ void __init sanity_check_meminfo(void) int highmem = 0; phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; struct memblock_region *reg; + bool should_use_highmem = false; for_each_memblock(memory, reg) { phys_addr_t block_start = reg->base; @@ -1090,6 +1091,7 @@ void __init sanity_check_meminfo(void) pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n", &block_start, &block_end); memblock_remove(reg->base, reg->size); + should_use_highmem = true; continue; } @@ -1100,6 +1102,7 @@ void __init sanity_check_meminfo(void) &block_start, &block_end, &vmalloc_limit); memblock_remove(vmalloc_limit, overlap_size); block_end = vmalloc_limit; + should_use_highmem = true; } } @@ -1134,6 +1137,9 @@ void __init sanity_check_meminfo(void) } } + if (should_use_highmem) + pr_notice("Consider using a HIGHMEM enabled kernel.\n"); + high_memory = __va(arm_lowmem_limit - 1) + 1; /* From 31f02455455d405320e2f749696bef4e02903b35 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 12:07:17 -0400 Subject: [PATCH 008/210] sparse: fix misplaced __pmem definition Move the definition of __pmem outside of CONFIG_SPARSE_RCU_POINTER to fix: drivers/nvdimm/pmem.c:198:17: sparse: too many arguments for function __builtin_expect drivers/nvdimm/pmem.c:36:33: sparse: expected ; at end of declaration drivers/nvdimm/pmem.c:48:21: sparse: void declaration ...due to __pmem failing to be defined in some configurations when CONFIG_SPARSE_RCU_POINTER=y. Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Dan Williams --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 26fc8bc77f85..d8fbd500330e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -17,11 +17,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +# define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) #else # define __rcu -# define __pmem __attribute__((noderef, address_space(5))) #endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); From af834d457d9ed69e14836b63d0da198fdd2ec706 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 14:10:09 -0400 Subject: [PATCH 009/210] libnvdimm: smatch cleanups in __nd_ioctl Drop use of access_ok() since we are already using copy_{to|from}_user() which do their own access_ok(). Reported-by: Dan Carpenter Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 8eb22c0ca7ce..73442bd824a7 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -535,8 +535,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, __func__, dimm_name, cmd_name, i); return -ENXIO; } - if (!access_ok(VERIFY_READ, p + in_len, in_size)) - return -EFAULT; if (in_len < sizeof(in_env)) copy = min_t(u32, sizeof(in_env) - in_len, in_size); else @@ -557,8 +555,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, __func__, dimm_name, cmd_name, i); return -EFAULT; } - if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size)) - return -EFAULT; if (out_len < sizeof(out_env)) copy = min_t(u32, sizeof(out_env) - out_len, out_size); else @@ -570,9 +566,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } buf_len = out_len + in_len; - if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len))) - return -EFAULT; - if (buf_len > ND_IOCTL_MAX_BUFLEN) { dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__, dimm_name, cmd_name, buf_len, From daa1dee405d7d3d3e816b84a692e838a5647a02a Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 28 Jun 2015 17:00:57 +0800 Subject: [PATCH 010/210] nvdimm: Fix return value of nvdimm_bus_init() if class_create() fails Return proper error if class_create() fails. Signed-off-by: Axel Lin Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 73442bd824a7..7e2c43f701bc 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -699,8 +699,10 @@ int __init nvdimm_bus_init(void) nvdimm_major = rc; nd_class = class_create(THIS_MODULE, "nd"); - if (IS_ERR(nd_class)) + if (IS_ERR(nd_class)) { + rc = PTR_ERR(nd_class); goto err_class; + } return 0; From 193ccca43850d2355e7690a93ab9d7d78d38f905 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 16:09:39 -0400 Subject: [PATCH 011/210] nfit: fix smatch "use after null check" report drivers/acpi/nfit.c:1224 acpi_nfit_blk_region_enable() error: we previously assumed 'nfit_mem' could be null (see line 1223) drivers/acpi/nfit.c 1222 nfit_mem = nvdimm_provider_data(nvdimm); 1223 if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { ^^^^^^^^ Check. 1224 dev_dbg(dev, "%s: missing%s%s%s\n", __func__, 1225 nfit_mem ? "" : " nfit_mem", 1226 nfit_mem->dcr ? "" : " dcr", ^^^^^^^^^^^^^ Unchecked dereference. Reported-by: Dan Carpenter Acked-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 2161fa178c8d..a20b7c883ca0 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -1223,8 +1223,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { dev_dbg(dev, "%s: missing%s%s%s\n", __func__, nfit_mem ? "" : " nfit_mem", - nfit_mem->dcr ? "" : " dcr", - nfit_mem->bdw ? "" : " bdw"); + (nfit_mem && nfit_mem->dcr) ? "" : " dcr", + (nfit_mem && nfit_mem->bdw) ? "" : " bdw"); return -ENXIO; } From fe7599079b03d521d376da88920cc7b87f71ae25 Mon Sep 17 00:00:00 2001 From: Yang Dongsheng Date: Wed, 3 Jun 2015 14:57:32 +0800 Subject: [PATCH 012/210] btrfs: qgroup: allow user to clear the limitation on qgroup Currently, we can only set a limitation on a qgroup, but we can not clear it. This patch provide a choice to user to clear a limitation on qgroup by passing a value of CLEAR_VALUE(-1) to kernel. Reported-by: Tsutomu Itoh Signed-off-by: Dongsheng Yang Tested-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/qgroup.c | 49 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index d5f1f033b7a0..e9ace099162c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; int ret = 0; + /* Sometimes we would want to clear the limit on this qgroup. + * To meet this requirement, we treat the -1 as a special value + * which tell kernel to clear the limit on this qgroup. + */ + const u64 CLEAR_VALUE = -1; mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; @@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, } spin_lock(&fs_info->qgroup_lock); - if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) - qgroup->max_rfer = limit->max_rfer; - if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) - qgroup->max_excl = limit->max_excl; - if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) - qgroup->rsv_rfer = limit->rsv_rfer; - if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) - qgroup->rsv_excl = limit->rsv_excl; + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { + if (limit->max_rfer == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; + qgroup->max_rfer = 0; + } else { + qgroup->max_rfer = limit->max_rfer; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { + if (limit->max_excl == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; + qgroup->max_excl = 0; + } else { + qgroup->max_excl = limit->max_excl; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { + if (limit->rsv_rfer == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; + qgroup->rsv_rfer = 0; + } else { + qgroup->rsv_rfer = limit->rsv_rfer; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { + if (limit->rsv_excl == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; + qgroup->rsv_excl = 0; + } else { + qgroup->rsv_excl = limit->rsv_excl; + } + } qgroup->lim_flags |= limit->flags; spin_unlock(&fs_info->qgroup_lock); From 65f5333875d7bbfc13436e224a181d10a80d1ada Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Mon, 8 Jun 2015 20:05:50 +0800 Subject: [PATCH 013/210] btrfs: cleanup noused initialization of dev in btrfs_end_bio() It is introduced by: c404e0dc2c843b154f9a36c3aec10d0a715d88eb Btrfs: fix use-after-free in the finishing procedure of the device replace But seems no relationship with that bug, this patch revirt these code block for cleanup. Signed-off-by: Zhao Lei Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 83966996aacb..d4cd4059bded 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5715,7 +5715,6 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e static void btrfs_end_bio(struct bio *bio, int err) { struct btrfs_bio *bbio = bio->bi_private; - struct btrfs_device *dev = bbio->stripes[0].dev; int is_orig_bio = 0; if (err) { @@ -5723,6 +5722,7 @@ static void btrfs_end_bio(struct bio *bio, int err) if (err == -EIO || err == -EREMOTEIO) { unsigned int stripe_index = btrfs_io_bio(bio)->stripe_index; + struct btrfs_device *dev; BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; From e82afc52abff07a4acbc90f899598ebafb662831 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Fri, 12 Jun 2015 20:36:58 +0800 Subject: [PATCH 014/210] btrfs: add error handling for scrub_workers_get() Although it is a rare case, we'd better free previous allocated memory on error. Signed-off-by: Zhao Lei Signed-off-by: Qu Wenruo Signed-off-by: Chris Mason --- fs/btrfs/scrub.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9f2feabe99f2..94db0fa5225a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3571,7 +3571,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, int is_dev_replace) { - int ret = 0; unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; int max_active = fs_info->thread_pool_size; @@ -3584,34 +3583,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, fs_info->scrub_workers = btrfs_alloc_workqueue("btrfs-scrub", flags, max_active, 4); - if (!fs_info->scrub_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_workers) + goto fail_scrub_workers; + fs_info->scrub_wr_completion_workers = btrfs_alloc_workqueue("btrfs-scrubwrc", flags, max_active, 2); - if (!fs_info->scrub_wr_completion_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_wr_completion_workers) + goto fail_scrub_wr_completion_workers; + fs_info->scrub_nocow_workers = btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); - if (!fs_info->scrub_nocow_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_nocow_workers) + goto fail_scrub_nocow_workers; fs_info->scrub_parity_workers = btrfs_alloc_workqueue("btrfs-scrubparity", flags, max_active, 2); - if (!fs_info->scrub_parity_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_parity_workers) + goto fail_scrub_parity_workers; } ++fs_info->scrub_workers_refcnt; -out: - return ret; + return 0; + +fail_scrub_parity_workers: + btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); +fail_scrub_nocow_workers: + btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); +fail_scrub_wr_completion_workers: + btrfs_destroy_workqueue(fs_info->scrub_workers); +fail_scrub_workers: + return -ENOMEM; } static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) From 67c5e7d464bc466471b05e027abe8a6b29687ebd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 11 Jun 2015 00:58:53 +0100 Subject: [PATCH 015/210] Btrfs: fix race between balance and unused block group deletion We have a race between deleting an unused block group and balancing the same block group that leads to an assertion failure/BUG(), producing the following trace: [181631.208236] BTRFS: assertion failed: 0, file: fs/btrfs/volumes.c, line: 2622 [181631.220591] ------------[ cut here ]------------ [181631.222959] kernel BUG at fs/btrfs/ctree.h:4062! [181631.223932] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [181631.224566] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse acpi_cpufreq parpor$ [181631.224566] CPU: 8 PID: 17451 Comm: btrfs Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [181631.224566] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [181631.224566] task: ffff880127e09590 ti: ffff8800b5824000 task.ti: ffff8800b5824000 [181631.224566] RIP: 0010:[] [] assfail.constprop.50+0x1e/0x20 [btrfs] [181631.224566] RSP: 0018:ffff8800b5827ae8 EFLAGS: 00010246 [181631.224566] RAX: 0000000000000040 RBX: ffff8800109fc218 RCX: ffffffff81095dce [181631.224566] RDX: 0000000000005124 RSI: ffffffff81464819 RDI: 00000000ffffffff [181631.224566] RBP: ffff8800b5827ae8 R08: 0000000000000001 R09: 0000000000000000 [181631.224566] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800109fc200 [181631.224566] R13: ffff880020095000 R14: ffff8800b1a13f38 R15: ffff880020095000 [181631.224566] FS: 00007f70ca0b0c80(0000) GS:ffff88013ec00000(0000) knlGS:0000000000000000 [181631.224566] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [181631.224566] CR2: 00007f2872ab6e68 CR3: 00000000a717c000 CR4: 00000000000006e0 [181631.224566] Stack: [181631.224566] ffff8800b5827ba8 ffffffffa03f3916 ffff8800b5827b38 ffffffffa03d080e [181631.224566] ffffffffa03d1423 ffff880020095000 ffff88001233c000 0000000000000001 [181631.224566] ffff880020095000 ffff8800b1a13f38 0000000a69c00000 0000000000000000 [181631.224566] Call Trace: [181631.224566] [] btrfs_remove_chunk+0xa4/0x6bb [btrfs] [181631.224566] [] ? join_transaction.isra.8+0xb9/0x3ba [btrfs] [181631.224566] [] ? wait_current_trans.isra.13+0x22/0xfc [btrfs] [181631.224566] [] btrfs_relocate_chunk.isra.29+0x8f/0xa7 [btrfs] [181631.224566] [] btrfs_balance+0xaa4/0xc52 [btrfs] [181631.224566] [] btrfs_ioctl_balance+0x23f/0x2b0 [btrfs] [181631.224566] [] ? trace_hardirqs_on+0xd/0xf [181631.224566] [] btrfs_ioctl+0xfe2/0x2220 [btrfs] [181631.224566] [] ? __this_cpu_preempt_check+0x13/0x15 [181631.224566] [] ? arch_local_irq_save+0x9/0xc [181631.224566] [] ? handle_mm_fault+0x834/0xcd2 [181631.224566] [] ? handle_mm_fault+0x834/0xcd2 [181631.224566] [] ? __do_page_fault+0x211/0x424 [181631.224566] [] do_vfs_ioctl+0x3c6/0x479 (...) The sequence of steps leading to this are: CPU 0 CPU 1 btrfs_balance() btrfs_relocate_chunk() btrfs_relocate_block_group(bg X) btrfs_lookup_block_group(bg X) cleaner_kthread locks fs_info->cleaner_mutex btrfs_delete_unused_bgs() finds bg X, which became unused in the previous transaction checks bg X ->ro == 0, so it proceeds sets bg X ->ro to 1 (btrfs_set_block_group_ro(bg X)) blocks on fs_info->cleaner_mutex btrfs_remove_chunk(bg X) unlocks fs_info->cleaner_mutex acquires fs_info->cleaner_mutex relocate_block_group() --> does nothing, no extents found in the extent tree from bg X unlocks fs_info->cleaner_mutex btrfs_relocate_block_group(bg X) returns btrfs_remove_chunk(bg X) extent map not found --> ASSERT(0) Fix this by using a new mutex to make sure these 2 operations, block group relocation and removal, are serialized. This issue is reproducible by running fstests generic/038 (which stresses chunk allocation and automatic removal of unused block groups) together with the following balance loop: while true; do btrfs balance start -dusage=0 ; done Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 12 ++++++++++- fs/btrfs/extent-tree.c | 3 +++ fs/btrfs/volumes.c | 48 +++++++++++++++++++++++++++++++++++++----- 4 files changed, 58 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80a9aefb0c46..aac314e14188 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1778,6 +1778,7 @@ struct btrfs_fs_info { spinlock_t unused_bgs_lock; struct list_head unused_bgs; struct mutex unused_bg_unpin_mutex; + struct mutex delete_unused_bgs_mutex; /* For btrfs to record security options */ struct security_mnt_opts security_opts; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b977fc8d8201..b59deb2c63f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1772,7 +1772,6 @@ static int cleaner_kthread(void *arg) } btrfs_run_delayed_iputs(root); - btrfs_delete_unused_bgs(root->fs_info); again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -1781,6 +1780,16 @@ static int cleaner_kthread(void *arg) * needn't do anything special here. */ btrfs_run_defrag_inodes(root->fs_info); + + /* + * Acquires fs_info->delete_unused_bgs_mutex to avoid racing + * with relocation (btrfs_relocate_chunk) and relocation + * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group) + * after acquiring fs_info->delete_unused_bgs_mutex. So we + * can't hold, nor need to, fs_info->cleaner_mutex when deleting + * unused block groups. + */ + btrfs_delete_unused_bgs(root->fs_info); sleep: if (!try_to_freeze() && !again) { set_current_state(TASK_INTERRUPTIBLE); @@ -2492,6 +2501,7 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->unused_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); + mutex_init(&fs_info->delete_unused_bgs_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); seqlock_init(&fs_info->profiles_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 38b76cc02f48..1c2bd1723e40 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9889,6 +9889,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) } spin_unlock(&fs_info->unused_bgs_lock); + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); + /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); @@ -9983,6 +9985,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) end_trans: btrfs_end_transaction(trans, root); next: + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d4cd4059bded..9b95503ddd00 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; + /* + * Prevent races with automatic removal of unused block groups. + * After we relocate and before we remove the chunk with offset + * chunk_offset, automatic removal of the block group can kick in, + * resulting in a failure when calling btrfs_remove_chunk() below. + * + * Make sure to acquire this mutex before doing a tree search (dev + * or chunk trees) to find chunks. Otherwise the cleaner kthread might + * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after + * we release the path used to search the chunk/dev tree and before + * the current task acquires this mutex and calls us. + */ + ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex)); + ret = btrfs_can_relocate(extent_root, chunk_offset); if (ret) return -ENOSPC; @@ -2814,13 +2828,18 @@ again: key.type = BTRFS_CHUNK_ITEM_KEY; while (1) { + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); goto error; + } BUG_ON(ret == 0); /* Corruption */ ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); + if (ret) + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret < 0) goto error; if (ret > 0) @@ -2843,6 +2862,7 @@ again: else BUG_ON(ret); } + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (found_key.offset == 0) break; @@ -3299,9 +3319,12 @@ again: goto error; } + mutex_lock(&fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto error; + } /* * this shouldn't happen, it means the last relocate @@ -3313,6 +3336,7 @@ again: ret = btrfs_previous_item(chunk_root, path, 0, BTRFS_CHUNK_ITEM_KEY); if (ret) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); ret = 0; break; } @@ -3321,8 +3345,10 @@ again: slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid != key.objectid) + if (found_key.objectid != key.objectid) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); break; + } chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -3335,10 +3361,13 @@ again: ret = should_balance_chunk(chunk_root, leaf, chunk, found_key.offset); btrfs_release_path(path); - if (!ret) + if (!ret) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto loop; + } if (counting) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); spin_lock(&fs_info->balance_lock); bctl->stat.expected++; spin_unlock(&fs_info->balance_lock); @@ -3348,6 +3377,7 @@ again: ret = btrfs_relocate_chunk(chunk_root, found_key.objectid, found_key.offset); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) goto error; if (ret == -ENOSPC) { @@ -4087,11 +4117,16 @@ again: key.type = BTRFS_DEV_EXTENT_KEY; do { + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); goto done; + } ret = btrfs_previous_item(root, path, 0, key.type); + if (ret) + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret < 0) goto done; if (ret) { @@ -4105,6 +4140,7 @@ again: btrfs_item_key_to_cpu(l, &key, path->slots[0]); if (key.objectid != device->devid) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_release_path(path); break; } @@ -4113,6 +4149,7 @@ again: length = btrfs_dev_extent_length(l, dev_extent); if (key.offset + length <= new_size) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_release_path(path); break; } @@ -4122,6 +4159,7 @@ again: btrfs_release_path(path); ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) goto done; if (ret == -ENOSPC) From c3f4a1685bb87e59c886ee68f7967eae07d4dffa Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:52:56 +0100 Subject: [PATCH 016/210] Btrfs: use kmem_cache_free when freeing entry in inode cache The free space entries are allocated using kmem_cache_zalloc(), through __btrfs_add_free_space(), therefore we should use kmem_cache_free() and not kfree() to avoid any confusion and any potential problem. Looking at the kfree() definition at mm/slab.c it has the following comment: /* * (...) * * Don't free memory not originally allocated by kmalloc() * or you will run into trouble. */ So better be safe and use kmem_cache_free(). Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f6a596d5a637..218df701e607 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -271,7 +271,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) __btrfs_add_free_space(ctl, info->offset, count); free: rb_erase(&info->offset_index, rbroot); - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); } } From ae9d8f17118551bedd797406a6768b87c2146234 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:52:57 +0100 Subject: [PATCH 017/210] Btrfs: fix race between caching kthread and returning inode to inode cache While the inode cache caching kthread is calling btrfs_unpin_free_ino(), we could have a concurrent call to btrfs_return_ino() that adds a new entry to the root's free space cache of pinned inodes. This concurrent call does not acquire the fs_info->commit_root_sem before adding a new entry if the caching state is BTRFS_CACHE_FINISHED, which is a problem because the caching kthread calls btrfs_unpin_free_ino() after setting the caching state to BTRFS_CACHE_FINISHED and therefore races with the task calling btrfs_return_ino(), which is adding a new entry, while the former (caching kthread) is navigating the cache's rbtree, removing and freeing nodes from the cache's rbtree without acquiring the spinlock that protects the rbtree. This race resulted in memory corruption due to double free of struct btrfs_free_space objects because both tasks can end up doing freeing the same objects. Note that adding a new entry can result in merging it with other entries in the cache, in which case those entries are freed. This is particularly important as btrfs_free_space structures are also used for the block group free space caches. This memory corruption can be detected by a debugging kernel, which reports it with the following trace: [132408.501148] slab error in verify_redzone_free(): cache `btrfs_free_space': double free detected [132408.505075] CPU: 15 PID: 12248 Comm: btrfs-ino-cache Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [132408.505075] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [132408.505075] ffff880023e7d320 ffff880163d73cd8 ffffffff8145eec7 ffffffff81095dce [132408.505075] ffff880009735d40 ffff880163d73ce8 ffffffff81154e1e ffff880163d73d68 [132408.505075] ffffffff81155733 ffffffffa054a95a ffff8801b6099f00 ffffffffa0505b5f [132408.505075] Call Trace: [132408.505075] [] dump_stack+0x4f/0x7b [132408.505075] [] ? console_unlock+0x356/0x3a2 [132408.505075] [] __slab_error.isra.28+0x25/0x36 [132408.505075] [] __cache_free+0xe2/0x4b6 [132408.505075] [] ? __btrfs_add_free_space+0x2f0/0x343 [btrfs] [132408.505075] [] ? btrfs_unpin_free_ino+0x8e/0x99 [btrfs] [132408.505075] [] ? time_hardirqs_off+0x15/0x28 [132408.505075] [] ? trace_hardirqs_off+0xd/0xf [132408.505075] [] ? kfree+0xb6/0x14e [132408.505075] [] kfree+0xe5/0x14e [132408.505075] [] btrfs_unpin_free_ino+0x8e/0x99 [btrfs] [132408.505075] [] caching_kthread+0x29e/0x2d9 [btrfs] [132408.505075] [] ? btrfs_unpin_free_ino+0x99/0x99 [btrfs] [132408.505075] [] kthread+0xef/0xf7 [132408.505075] [] ? time_hardirqs_on+0x15/0x28 [132408.505075] [] ? __kthread_parkme+0xad/0xad [132408.505075] [] ret_from_fork+0x42/0x70 [132408.505075] [] ? __kthread_parkme+0xad/0xad [132408.505075] ffff880023e7d320: redzone 1:0x9f911029d74e35b, redzone 2:0x9f911029d74e35b. [132409.501654] slab: double free detected in cache 'btrfs_free_space', objp ffff880023e7d320 [132409.503355] ------------[ cut here ]------------ [132409.504241] kernel BUG at mm/slab.c:2571! Therefore fix this by having btrfs_unpin_free_ino() acquire the lock that protects the rbtree while doing the searches and removing entries. Fixes: 1c70d8fb4dfa ("Btrfs: fix inode caching vs tree log") Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 218df701e607..d4a582ac3f73 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; + spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock; struct btrfs_free_space *info; struct rb_node *n; u64 count; @@ -254,23 +255,29 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) return; while (1) { + bool add_to_ctl = true; + + spin_lock(rbroot_lock); n = rb_first(rbroot); - if (!n) + if (!n) { + spin_unlock(rbroot_lock); break; + } info = rb_entry(n, struct btrfs_free_space, offset_index); BUG_ON(info->bitmap); /* Logic error */ if (info->offset > root->ino_cache_progress) - goto free; + add_to_ctl = false; else if (info->offset + info->bytes > root->ino_cache_progress) count = root->ino_cache_progress - info->offset + 1; else count = info->bytes; - __btrfs_add_free_space(ctl, info->offset, count); -free: rb_erase(&info->offset_index, rbroot); + spin_unlock(rbroot_lock); + if (add_to_ctl) + __btrfs_add_free_space(ctl, info->offset, count); kmem_cache_free(btrfs_free_space_cachep, info); } } From da288d280d16f4d7e4ada331cb33d381b408b10c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:55:31 +0100 Subject: [PATCH 018/210] Btrfs: fix crash on close_ctree() if cleaner starts new transaction Often when running fstests btrfs/079 I was running into the following trace during umount on one of my qemu/kvm test vms: [ 8245.682441] WARNING: CPU: 8 PID: 25064 at fs/btrfs/extent-tree.c:138 btrfs_put_block_group+0x51/0x69 [btrfs]() [ 8245.685039] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc i2c_piix4 acpi_cpufreq processor psmouse i2c_core thermal_sys parport evdev serio_raw button pcspkr microcode ext4 crc16 jbd2 mbcache sg sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring scsi_mod virtio e1000 [last unloaded: btrfs] [ 8245.693860] CPU: 8 PID: 25064 Comm: umount Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [ 8245.695081] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [ 8245.697583] 0000000000000009 ffff88020d047ce8 ffffffff8145eec7 ffffffff81095dce [ 8245.699234] 0000000000000000 ffff88020d047d28 ffffffff8104b399 0000000000000028 [ 8245.700995] ffffffffa04db07b ffff8801c6036c00 ffff8801c6036d68 ffff880202eb40b0 [ 8245.702510] Call Trace: [ 8245.703006] [] dump_stack+0x4f/0x7b [ 8245.705393] [] ? console_unlock+0x356/0x3a2 [ 8245.706569] [] warn_slowpath_common+0xa1/0xbb [ 8245.707747] [] ? btrfs_put_block_group+0x51/0x69 [btrfs] [ 8245.709101] [] warn_slowpath_null+0x1a/0x1c [ 8245.710274] [] btrfs_put_block_group+0x51/0x69 [btrfs] [ 8245.711823] [] btrfs_free_block_groups+0x145/0x322 [btrfs] [ 8245.713251] [] close_ctree+0x1ef/0x325 [btrfs] [ 8245.714448] [] ? evict_inodes+0xdc/0xeb [ 8245.715539] [] btrfs_put_super+0x19/0x1b [btrfs] [ 8245.716835] [] generic_shutdown_super+0x73/0xef [ 8245.718015] [] kill_anon_super+0x13/0x1e [ 8245.719101] [] btrfs_kill_super+0x17/0x23 [btrfs] [ 8245.720316] [] deactivate_locked_super+0x3b/0x68 [ 8245.721517] [] deactivate_super+0x3f/0x43 [ 8245.722581] [] cleanup_mnt+0x59/0x78 [ 8245.723538] [] __cleanup_mnt+0x12/0x14 [ 8245.724572] [] task_work_run+0x8f/0xbc [ 8245.725598] [] do_notify_resume+0x45/0x53 [ 8245.726892] [] int_signal+0x12/0x17 [ 8245.737887] ---[ end trace a01d038397e99b92 ]--- [ 8245.769363] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 8245.770737] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc i2c_piix4 acpi_cpufreq processor psmouse i2c_core thermal_sys parport evdev serio_raw button pcspkr microcode ext4 crc16 jbd2 mbcache sg sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring scsi_mod virtio e1000 [last unloaded: btrfs] [ 8245.772641] CPU: 2 PID: 25064 Comm: umount Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [ 8245.772641] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [ 8245.772641] task: ffff880013005810 ti: ffff88020d044000 task.ti: ffff88020d044000 [ 8245.772641] RIP: 0010:[] [] btrfs_queue_work+0x2c/0x14d [btrfs] [ 8245.772641] RSP: 0018:ffff88020d0478b8 EFLAGS: 00010202 [ 8245.772641] RAX: 0000000000000004 RBX: 6b6b6b6b6b6b6b6b RCX: ffffffffa0581488 [ 8245.772641] RDX: 0000000000000000 RSI: ffff880194b7bf48 RDI: ffff880144b6a7a0 [ 8245.772641] RBP: ffff88020d0478d8 R08: 0000000000000000 R09: 000000000000ffff [ 8245.772641] R10: 0000000000000004 R11: 0000000000000005 R12: ffff880194b7bf48 [ 8245.772641] R13: ffff880194b7bf48 R14: 0000000000000410 R15: 0000000000000000 [ 8245.772641] FS: 00007f991e77d840(0000) GS:ffff88023e280000(0000) knlGS:0000000000000000 [ 8245.772641] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 8245.772641] CR2: 00007fbbd325ee68 CR3: 000000021de8e000 CR4: 00000000000006e0 [ 8245.772641] Stack: [ 8245.772641] ffff880194b7bf00 ffff880202eb4000 ffff880194b7bf48 0000000000000410 [ 8245.772641] ffff88020d047958 ffffffffa04ec6d5 ffff8801629b2ee8 0000000082987570 [ 8245.772641] 0000000000a5813f 0000000000000001 ffff880013006100 0000000000000002 [ 8245.772641] Call Trace: [ 8245.772641] [] btrfs_wq_submit_bio+0xe1/0x17b [btrfs] [ 8245.772641] [] ? check_irq_usage+0x76/0x87 [ 8245.772641] [] btree_submit_bio_hook+0xb6/0xd9 [btrfs] [ 8245.772641] [] ? btree_csum_one_bio+0xad/0xad [btrfs] [ 8245.772641] [] ? btree_io_failed_hook+0x5e/0x5e [btrfs] [ 8245.772641] [] submit_one_bio+0x8c/0xc7 [btrfs] [ 8245.772641] [] submit_extent_page.isra.18+0x9d/0x186 [btrfs] [ 8245.772641] [] write_one_eb+0x117/0x1ae [btrfs] [ 8245.772641] [] ? end_extent_buffer_writeback+0x21/0x21 [btrfs] [ 8245.772641] [] btree_write_cache_pages+0x2ab/0x385 [btrfs] [ 8245.772641] [] btree_writepages+0x23/0x5c [btrfs] [ 8245.772641] [] do_writepages+0x23/0x2c [ 8245.772641] [] __writeback_single_inode+0xda/0x5bd [ 8245.772641] [] ? writeback_single_inode+0x2b/0x173 [ 8245.772641] [] writeback_single_inode+0xc8/0x173 [ 8245.772641] [] write_inode_now+0x8a/0x95 [ 8245.772641] [] ? _atomic_dec_and_lock+0x30/0x4e [ 8245.772641] [] iput+0x17d/0x26a [ 8245.772641] [] close_ctree+0x22a/0x325 [btrfs] [ 8245.772641] [] ? evict_inodes+0xdc/0xeb [ 8245.772641] [] btrfs_put_super+0x19/0x1b [btrfs] [ 8245.772641] [] generic_shutdown_super+0x73/0xef [ 8245.772641] [] kill_anon_super+0x13/0x1e [ 8245.772641] [] btrfs_kill_super+0x17/0x23 [btrfs] [ 8245.772641] [] deactivate_locked_super+0x3b/0x68 [ 8245.772641] [] deactivate_super+0x3f/0x43 [ 8245.772641] [] cleanup_mnt+0x59/0x78 [ 8245.772641] [] __cleanup_mnt+0x12/0x14 [ 8245.772641] [] task_work_run+0x8f/0xbc [ 8245.772641] [] do_notify_resume+0x45/0x53 [ 8245.772641] [] int_signal+0x12/0x17 [ 8245.772641] Code: 1f 44 00 00 55 48 89 e5 41 56 41 55 41 54 53 49 89 f4 48 8b 46 70 a8 04 74 09 48 8b 5f 08 48 85 db 75 03 48 8b 1f 49 89 5c 24 68 <83> 7b 5c ff 74 04 f0 ff 43 50 49 83 7c 24 08 00 74 2c 4c 8d 6b [ 8245.772641] RIP [] btrfs_queue_work+0x2c/0x14d [btrfs] [ 8245.772641] RSP [ 8245.845040] ---[ end trace a01d038397e99b93 ]--- For logical reasons such as the phase of the moon, this happened more often with "-o inode_cache" than without any mount options. After some debugging it turned out to be simple to understand what was happening: 1) close_ctree() is called; 2) It then stops the transaction kthread, which commits the current transaction; 3) It asks the cleaner kthread to stop, which is currently running btrfs_delete_unused_bgs(); 4) btrfs_delete_unused_bgs() finds an unused block group, starts a new transaction, deletes the block group, which implies COWing some tree nodes and leafs and dirtying their respective pages, and then finally it ends the transaction it started, without committing it; 5) The cleaner kthread stops; 6) close_ctree() releases (from memory) the block group objects, which produces the warning in the trace pasted above; 7) Then it invalidates all pages of the btree inode, by calling invalidate_inode_pages2(), which waits for any pages under writeback, and releases any non-dirty pages; 8) All work queues are destroyed (waiting first for their current tasks to finish execution); 9) A final iput() is called against the btree inode; 10) This iput triggers a writeback of the btree inode because it still has dirty pages; 11) This starts the whole chain of callbacks for the btree inode until it eventually reaches btrfs_wq_submit_bio() where it leads to a NULL pointer dereference because the work queues were already destroyed. Fix this by making the cleaner commit any transaction that it started after the transaction kthread was stopped. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b59deb2c63f4..e5aad7f535aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1751,6 +1751,7 @@ static int cleaner_kthread(void *arg) { struct btrfs_root *root = arg; int again; + struct btrfs_trans_handle *trans; do { again = 0; @@ -1798,6 +1799,34 @@ sleep: __set_current_state(TASK_RUNNING); } } while (!kthread_should_stop()); + + /* + * Transaction kthread is stopped before us and wakes us up. + * However we might have started a new transaction and COWed some + * tree blocks when deleting unused block groups for example. So + * make sure we commit the transaction we started to have a clean + * shutdown when evicting the btree inode - if it has dirty pages + * when we do the final iput() on it, eviction will trigger a + * writeback for it which will fail with null pointer dereferences + * since work queues and other resources were already released and + * destroyed by the time the iput/eviction/writeback is made. + */ + trans = btrfs_attach_transaction(root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT) + btrfs_err(root->fs_info, + "cleaner transaction attach returned %ld", + PTR_ERR(trans)); + } else { + int ret; + + ret = btrfs_commit_transaction(trans, root); + if (ret) + btrfs_err(root->fs_info, + "cleaner open transaction commit returned %d", + ret); + } + return 0; } From e4545de5b035c7debb73d260c78377dbb69cbfb5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 17 Jun 2015 12:49:23 +0100 Subject: [PATCH 019/210] Btrfs: fix fsync data loss after append write If we do an append write to a file (which increases its inode's i_size) that does not have the flag BTRFS_INODE_NEEDS_FULL_SYNC set in its inode, and the previous transaction added a new hard link to the file, which sets the flag BTRFS_INODE_COPY_EVERYTHING in the file's inode, and then fsync the file, the inode's new i_size isn't logged. This has the consequence that after the fsync log is replayed, the file size remains what it was before the append write operation, which means users/applications will not be able to read the data that was successsfully fsync'ed before. This happens because neither the inode item nor the delayed inode get their i_size updated when the append write is made - doing so would require starting a transaction in the buffered write path, something that we do not do intentionally for performance reasons. Fix this by making sure that when the flag BTRFS_INODE_COPY_EVERYTHING is set the inode is logged with its current i_size (log the in-memory inode into the log tree). This issue is not a recent regression and is easy to reproduce with the following test case for fstests: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" here=`pwd` tmp=/tmp/$$ status=1 # failure is the default! _cleanup() { _cleanup_flakey rm -f $tmp.* } trap "_cleanup; exit \$status" 0 1 2 3 15 # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey # real QA test starts here _supported_fs generic _supported_os Linux _need_to_be_root _require_scratch _require_dm_flakey _require_metadata_journaling $SCRATCH_DEV _crash_and_mount() { # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again and mount. This makes the fs replay its fsync log. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey } rm -f $seqres.full _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create the test file with some initial data and then fsync it. # The fsync here is only needed to trigger the issue in btrfs, as it causes the # the flag BTRFS_INODE_NEEDS_FULL_SYNC to be removed from the btrfs inode. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 32k" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io sync # Add a hard link to our file. # On btrfs this sets the flag BTRFS_INODE_COPY_EVERYTHING on the btrfs inode, # which is a necessary condition to trigger the issue. ln $SCRATCH_MNT/foo $SCRATCH_MNT/bar # Sync the filesystem to force a commit of the current btrfs transaction, this # is a necessary condition to trigger the bug on btrfs. sync # Now append more data to our file, increasing its size, and fsync the file. # In btrfs because the inode flag BTRFS_INODE_COPY_EVERYTHING was set and the # write path did not update the inode item in the btree nor the delayed inode # item (in memory struture) in the current transaction (created by the fsync # handler), the fsync did not record the inode's new i_size in the fsync # log/journal. This made the data unavailable after the fsync log/journal is # replayed. $XFS_IO_PROG -c "pwrite -S 0xbb 32K 32K" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io echo "File content after fsync and before crash:" od -t x1 $SCRATCH_MNT/foo _crash_and_mount echo "File content after crash and log replay:" od -t x1 $SCRATCH_MNT/foo status=0 exit The expected file output before and after the crash/power failure expects the appended data to be available, which is: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0100000 bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb * 0200000 Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1ce80c1c4eb6..76c4f9d1b80a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4155,6 +4155,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, u64 ino = btrfs_ino(inode); struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 logged_isize = 0; + bool need_log_inode_item = true; path = btrfs_alloc_path(); if (!path) @@ -4263,11 +4264,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } else { if (inode_only == LOG_INODE_ALL) fast_search = true; - ret = log_inode_item(trans, log, dst_path, inode); - if (ret) { - err = ret; - goto out_unlock; - } goto log_extents; } @@ -4290,6 +4286,9 @@ again: if (min_key.type > max_key.type) break; + if (min_key.type == BTRFS_INODE_ITEM_KEY) + need_log_inode_item = false; + src = path->nodes[0]; if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; @@ -4360,6 +4359,11 @@ next_slot: log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); + if (need_log_inode_item) { + err = log_inode_item(trans, log, dst_path, inode); + if (err) + goto out_unlock; + } if (fast_search) { /* * Some ordered extents started by fsync might have completed From 36283bf777d963fac099213297e155d071096994 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 20 Jun 2015 00:44:51 +0100 Subject: [PATCH 020/210] Btrfs: fix fsync xattr loss in the fast fsync path After commit 4f764e515361 ("Btrfs: remove deleted xattrs on fsync log replay"), we can end up in a situation where during log replay we end up deleting xattrs that were never deleted when their file was last fsynced. This happens in the fast fsync path (flag BTRFS_INODE_NEEDS_FULL_SYNC is not set in the inode) if the inode has the flag BTRFS_INODE_COPY_EVERYTHING set, the xattr was added in a past transaction and the leaf where the xattr is located was not updated (COWed or created) in the current transaction. In this scenario the xattr item never ends up in the log tree and therefore at log replay time, which makes the replay code delete the xattr from the fs/subvol tree as it thinks that xattr was deleted prior to the last fsync. Fix this by always logging all xattrs, which is the simplest and most reliable way to detect deleted xattrs and replay the deletes at log replay time. This issue is reproducible with the following test case for fstests: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" here=`pwd` tmp=/tmp/$$ status=1 # failure is the default! _cleanup() { _cleanup_flakey rm -f $tmp.* } trap "_cleanup; exit \$status" 0 1 2 3 15 # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey . ./common/attr # real QA test starts here # We create a lot of xattrs for a single file. Only btrfs and xfs are currently # able to store such a large mount of xattrs per file, other filesystems such # as ext3/4 and f2fs for example, fail with ENOSPC even if we attempt to add # less than 1000 xattrs with very small values. _supported_fs btrfs xfs _supported_os Linux _need_to_be_root _require_scratch _require_dm_flakey _require_attrs _require_metadata_journaling $SCRATCH_DEV rm -f $seqres.full _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create the test file with some initial data and make sure everything is # durably persisted. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 32k" $SCRATCH_MNT/foo | _filter_xfs_io sync # Add many small xattrs to our file. # We create such a large amount because it's needed to trigger the issue found # in btrfs - we need to have an amount that causes the fs to have at least 3 # btree leafs with xattrs stored in them, and it must work on any leaf size # (maximum leaf/node size is 64Kb). num_xattrs=2000 for ((i = 1; i <= $num_xattrs; i++)); do name="user.attr_$(printf "%04d" $i)" $SETFATTR_PROG -n $name -v "val_$(printf "%04d" $i)" $SCRATCH_MNT/foo done # Sync the filesystem to force a commit of the current btrfs transaction, this # is a necessary condition to trigger the bug on btrfs. sync # Now update our file's data and fsync the file. # After a successful fsync, if the fsync log/journal is replayed we expect to # see all the xattrs we added before with the same values (and the updated file # data of course). Btrfs used to delete some of these xattrs when it replayed # its fsync log/journal. $XFS_IO_PROG -c "pwrite -S 0xbb 8K 16K" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again and mount. This makes the fs replay its fsync log. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey echo "File content after crash and log replay:" od -t x1 $SCRATCH_MNT/foo echo "File xattrs after crash and log replay:" for ((i = 1; i <= $num_xattrs; i++)); do name="user.attr_$(printf "%04d" $i)" echo -n "$name=" $GETFATTR_PROG --absolute-names -n $name --only-values $SCRATCH_MNT/foo echo done status=0 exit The golden output expects all xattrs to be available, and with the correct values, after the fsync log is replayed. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 104 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 76c4f9d1b80a..66f87156882f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4117,6 +4117,86 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode, return 0; } +/* + * At the moment we always log all xattrs. This is to figure out at log replay + * time which xattrs must have their deletion replayed. If a xattr is missing + * in the log tree and exists in the fs/subvol tree, we delete it. This is + * because if a xattr is deleted, the inode is fsynced and a power failure + * happens, causing the log to be replayed the next time the fs is mounted, + * we want the xattr to not exist anymore (same behaviour as other filesystems + * with a journal, ext3/4, xfs, f2fs, etc). + */ +static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path) +{ + int ret; + struct btrfs_key key; + const u64 ino = btrfs_ino(inode); + int ins_nr = 0; + int start_slot = 0; + + key.objectid = ino; + key.type = BTRFS_XATTR_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + + while (true) { + int slot = path->slots[0]; + struct extent_buffer *leaf = path->nodes[0]; + int nritems = btrfs_header_nritems(leaf); + + if (slot >= nritems) { + if (ins_nr > 0) { + u64 last_extent = 0; + + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + /* can't be 1, extent items aren't processed */ + ASSERT(ret <= 0); + if (ret < 0) + return ret; + ins_nr = 0; + } + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + else if (ret > 0) + break; + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) + break; + + if (ins_nr == 0) + start_slot = slot; + ins_nr++; + path->slots[0]++; + cond_resched(); + } + if (ins_nr > 0) { + u64 last_extent = 0; + + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + /* can't be 1, extent items aren't processed */ + ASSERT(ret <= 0); + if (ret < 0) + return ret; + } + + return 0; +} + /* log a single inode in the tree log. * At least one parent directory for this inode must exist in the tree * or be logged already. @@ -4289,6 +4369,25 @@ again: if (min_key.type == BTRFS_INODE_ITEM_KEY) need_log_inode_item = false; + /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ + if (min_key.type == BTRFS_XATTR_ITEM_KEY) { + if (ins_nr == 0) + goto next_slot; + ret = copy_items(trans, inode, dst_path, path, + &last_extent, ins_start_slot, + ins_nr, inode_only, logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } + ins_nr = 0; + if (ret) { + btrfs_release_path(path); + continue; + } + goto next_slot; + } + src = path->nodes[0]; if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; @@ -4356,6 +4455,11 @@ next_slot: ins_nr = 0; } + btrfs_release_path(path); + btrfs_release_path(dst_path); + err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); + if (err) + goto out_unlock; log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); From a89ca6f24ffe435edad57de02eaabd37a2c6bff6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jun 2015 04:17:46 +0100 Subject: [PATCH 021/210] Btrfs: fix fsync after truncate when no_holes feature is enabled When we have the no_holes feature enabled, if a we truncate a file to a smaller size, truncate it again but to a size greater than or equals to its original size and fsync it, the log tree will not have any information about the hole covering the range [truncate_1_offset, new_file_size[. Which means if the fsync log is replayed, the file will remain with the state it had before both truncate operations. Without the no_holes feature this does not happen, since when the inode is logged (full sync flag is set) it will find in the fs/subvol tree a leaf with a generation matching the current transaction id that has an explicit extent item representing the hole. Fix this by adding an explicit extent item representing a hole between the last extent and the inode's i_size if we are doing a full sync. The issue is easy to reproduce with the following test case for fstests: . ./common/rc . ./common/filter . ./common/dmflakey _need_to_be_root _supported_fs generic _supported_os Linux _require_scratch _require_dm_flakey # This test was motivated by an issue found in btrfs when the btrfs # no-holes feature is enabled (introduced in kernel 3.14). So enable # the feature if the fs being tested is btrfs. if [ $FSTYP == "btrfs" ]; then _require_btrfs_fs_feature "no_holes" _require_btrfs_mkfs_feature "no-holes" MKFS_OPTIONS="$MKFS_OPTIONS -O no-holes" fi rm -f $seqres.full _scratch_mkfs >>$seqres.full 2>&1 _init_flakey _mount_flakey # Create our test files and make sure everything is durably persisted. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 64K" \ -c "pwrite -S 0xbb 64K 61K" \ $SCRATCH_MNT/foo | _filter_xfs_io $XFS_IO_PROG -f -c "pwrite -S 0xee 0 64K" \ -c "pwrite -S 0xff 64K 61K" \ $SCRATCH_MNT/bar | _filter_xfs_io sync # Now truncate our file foo to a smaller size (64Kb) and then truncate # it to the size it had before the shrinking truncate (125Kb). Then # fsync our file. If a power failure happens after the fsync, we expect # our file to have a size of 125Kb, with the first 64Kb of data having # the value 0xaa and the second 61Kb of data having the value 0x00. $XFS_IO_PROG -c "truncate 64K" \ -c "truncate 125K" \ -c "fsync" \ $SCRATCH_MNT/foo # Do something similar to our file bar, but the first truncation sets # the file size to 0 and the second truncation expands the size to the # double of what it was initially. $XFS_IO_PROG -c "truncate 0" \ -c "truncate 253K" \ -c "fsync" \ $SCRATCH_MNT/bar _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again, mount to trigger log replay and validate file # contents. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey # We expect foo to have a size of 125Kb, the first 64Kb of data all # having the value 0xaa and the remaining 61Kb to be a hole (all bytes # with value 0x00). echo "File foo content after log replay:" od -t x1 $SCRATCH_MNT/foo # We expect bar to have a size of 253Kb and no extents (any byte read # from bar has the value 0x00). echo "File bar content after log replay:" od -t x1 $SCRATCH_MNT/bar status=0 exit The expected file contents in the golden output are: File foo content after log replay: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0200000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0372000 File bar content after log replay: 0000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0772000 Without this fix, their contents are: File foo content after log replay: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0200000 bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb * 0372000 File bar content after log replay: 0000000 ee ee ee ee ee ee ee ee ee ee ee ee ee ee ee ee * 0200000 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff * 0372000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0772000 A test case submission for fstests follows soon. Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 108 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 66f87156882f..9c45431e69ab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4197,6 +4197,107 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, return 0; } +/* + * If the no holes feature is enabled we need to make sure any hole between the + * last extent and the i_size of our inode is explicitly marked in the log. This + * is to make sure that doing something like: + * + * 1) create file with 128Kb of data + * 2) truncate file to 64Kb + * 3) truncate file to 256Kb + * 4) fsync file + * 5) + * 6) mount fs and trigger log replay + * + * Will give us a file with a size of 256Kb, the first 64Kb of data match what + * the file had in its first 64Kb of data at step 1 and the last 192Kb of the + * file correspond to a hole. The presence of explicit holes in a log tree is + * what guarantees that log replay will remove/adjust file extent items in the + * fs/subvol tree. + * + * Here we do not need to care about holes between extents, that is already done + * by copy_items(). We also only need to do this in the full sync path, where we + * lookup for extents from the fs/subvol tree only. In the fast path case, we + * lookup the list of modified extent maps and if any represents a hole, we + * insert a corresponding extent representing a hole in the log tree. + */ +static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + struct btrfs_path *path) +{ + int ret; + struct btrfs_key key; + u64 hole_start; + u64 hole_size; + struct extent_buffer *leaf; + struct btrfs_root *log = root->log_root; + const u64 ino = btrfs_ino(inode); + const u64 i_size = i_size_read(inode); + + if (!btrfs_fs_incompat(root->fs_info, NO_HOLES)) + return 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ASSERT(ret != 0); + if (ret < 0) + return ret; + + ASSERT(path->slots[0] > 0); + path->slots[0]--; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { + /* inode does not have any extents */ + hole_start = 0; + hole_size = i_size; + } else { + struct btrfs_file_extent_item *extent; + u64 len; + + /* + * If there's an extent beyond i_size, an explicit hole was + * already inserted by copy_items(). + */ + if (key.offset >= i_size) + return 0; + + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, extent) == + BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_inline_len(leaf, + path->slots[0], + extent); + ASSERT(len == i_size); + return 0; + } + + len = btrfs_file_extent_num_bytes(leaf, extent); + /* Last extent goes beyond i_size, no need to log a hole. */ + if (key.offset + len > i_size) + return 0; + hole_start = key.offset + len; + hole_size = i_size - hole_start; + } + btrfs_release_path(path); + + /* Last extent ends at i_size. */ + if (hole_size == 0) + return 0; + + hole_size = ALIGN(hole_size, root->sectorsize); + ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, + hole_size, 0, hole_size, 0, 0, 0); + return ret; +} + /* log a single inode in the tree log. * At least one parent directory for this inode must exist in the tree * or be logged already. @@ -4460,6 +4561,13 @@ next_slot: err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); if (err) goto out_unlock; + if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { + btrfs_release_path(path); + btrfs_release_path(dst_path); + err = btrfs_log_trailing_hole(trans, root, inode, path); + if (err) + goto out_unlock; + } log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); From 207910ddeeda38fd54544d94f8c8ca5a9632cc25 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:04 -0700 Subject: [PATCH 022/210] btrfs: pass unaligned length to btrfs_cmp_data() In the case that we dedupe the tail of a file, we might expand the dedupe len out to the end of our last block. We don't want to compare data past i_size however, so pass the original length to btrfs_cmp_data(). Signed-off-by: Mark Fasheh Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c86b835da7a8..55504338491d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2943,7 +2943,8 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, goto out_unlock; } - ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); + /* pass original length for comparison so we stay within i_size */ + ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); From f441460202cb787c49963bcc1f54cb48c52f7512 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:05 -0700 Subject: [PATCH 023/210] btrfs: fix deadlock with extent-same and readpage ->readpage() does page_lock() before extent_lock(), we do the opposite in extent-same. We want to reverse the order in btrfs_extent_same() but it's not quite straightforward since the page locks are taken inside btrfs_cmp_data(). So I split btrfs_cmp_data() into 3 parts with a small context structure that is passed between them. The first, btrfs_cmp_data_prepare() gathers up the pages needed (taking page lock as required) and puts them on our context structure. At this point, we are safe to lock the extent range. Afterwards, we use btrfs_cmp_data() to do the data compare as usual and btrfs_cmp_data_free() to clean up our context. Signed-off-by: Mark Fasheh Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 148 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 117 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 55504338491d..9ebe2dd31f2a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2765,14 +2765,11 @@ out: return ret; } -static struct page *extent_same_get_page(struct inode *inode, u64 off) +static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) { struct page *page; - pgoff_t index; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; - index = off >> PAGE_CACHE_SHIFT; - page = grab_cache_page(inode->i_mapping, index); if (!page) return NULL; @@ -2793,6 +2790,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off) return page; } +static int gather_extent_pages(struct inode *inode, struct page **pages, + int num_pages, u64 off) +{ + int i; + pgoff_t index = off >> PAGE_CACHE_SHIFT; + + for (i = 0; i < num_pages; i++) { + pages[i] = extent_same_get_page(inode, index + i); + if (!pages[i]) + return -ENOMEM; + } + return 0; +} + static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) { /* do any pending delalloc/csum calc on src, one way or @@ -2818,52 +2829,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) } } -static void btrfs_double_unlock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) { - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); - mutex_unlock(&inode1->i_mutex); mutex_unlock(&inode2->i_mutex); } -static void btrfs_double_lock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) +{ + if (inode1 < inode2) + swap(inode1, inode2); + + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + if (inode1 != inode2) + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); +} + +static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); +} + +static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) { if (inode1 < inode2) { swap(inode1, inode2); swap(loff1, loff2); } - - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); lock_extent_range(inode1, loff1, len); - if (inode1 != inode2) { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + if (inode1 != inode2) lock_extent_range(inode2, loff2, len); +} + +struct cmp_pages { + int num_pages; + struct page **src_pages; + struct page **dst_pages; +}; + +static void btrfs_cmp_data_free(struct cmp_pages *cmp) +{ + int i; + struct page *pg; + + for (i = 0; i < cmp->num_pages; i++) { + pg = cmp->src_pages[i]; + if (pg) + page_cache_release(pg); + pg = cmp->dst_pages[i]; + if (pg) + page_cache_release(pg); } + kfree(cmp->src_pages); + kfree(cmp->dst_pages); +} + +static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, + struct inode *dst, u64 dst_loff, + u64 len, struct cmp_pages *cmp) +{ + int ret; + int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; + struct page **src_pgarr, **dst_pgarr; + + /* + * We must gather up all the pages before we initiate our + * extent locking. We use an array for the page pointers. Size + * of the array is bounded by len, which is in turn bounded by + * BTRFS_MAX_DEDUPE_LEN. + */ + src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + if (!src_pgarr || !dst_pgarr) { + kfree(src_pgarr); + kfree(dst_pgarr); + return -ENOMEM; + } + cmp->num_pages = num_pages; + cmp->src_pages = src_pgarr; + cmp->dst_pages = dst_pgarr; + + ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff); + if (ret) + goto out; + + ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff); + +out: + if (ret) + btrfs_cmp_data_free(cmp); + return 0; } static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, - u64 dst_loff, u64 len) + u64 dst_loff, u64 len, struct cmp_pages *cmp) { int ret = 0; + int i; struct page *src_page, *dst_page; unsigned int cmp_len = PAGE_CACHE_SIZE; void *addr, *dst_addr; + i = 0; while (len) { if (len < PAGE_CACHE_SIZE) cmp_len = len; - src_page = extent_same_get_page(src, loff); - if (!src_page) - return -EINVAL; - dst_page = extent_same_get_page(dst, dst_loff); - if (!dst_page) { - page_cache_release(src_page); - return -EINVAL; - } + BUG_ON(i >= cmp->num_pages); + + src_page = cmp->src_pages[i]; + dst_page = cmp->dst_pages[i]; + addr = kmap_atomic(src_page); dst_addr = kmap_atomic(dst_page); @@ -2875,15 +2954,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, kunmap_atomic(addr); kunmap_atomic(dst_addr); - page_cache_release(src_page); - page_cache_release(dst_page); if (ret) break; - loff += cmp_len; - dst_loff += cmp_len; len -= cmp_len; + i++; } return ret; @@ -2914,6 +2990,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, { int ret; u64 len = olen; + struct cmp_pages cmp; /* * btrfs_clone() can't handle extents in the same file @@ -2926,7 +3003,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, if (len == 0) return 0; - btrfs_double_lock(src, loff, dst, dst_loff, len); + btrfs_double_inode_lock(src, dst); ret = extent_same_check_offsets(src, loff, &len, olen); if (ret) @@ -2943,13 +3020,22 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, goto out_unlock; } + ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); + if (ret) + goto out_unlock; + + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + /* pass original length for comparison so we stay within i_size */ - ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen); + ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); + + btrfs_cmp_data_free(&cmp); out_unlock: - btrfs_double_unlock(src, loff, dst, dst_loff, len); + btrfs_double_inode_unlock(src, dst); return ret; } From 0efa9f48c7e6c15e75946dd2b1c82d3d19e13545 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:07 -0700 Subject: [PATCH 024/210] btrfs: allow dedupe of same inode clone() supports cloning within an inode so extent-same can do the same now. This patch fixes up the locking in extent-same to know about the single-inode case. In addition to that, we add a check for overlapping ranges, which clone does not allow. Signed-off-by: Mark Fasheh Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 76 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9ebe2dd31f2a..af064946c9b2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2991,27 +2991,61 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, int ret; u64 len = olen; struct cmp_pages cmp; + int same_inode = 0; + u64 same_lock_start = 0; + u64 same_lock_len = 0; - /* - * btrfs_clone() can't handle extents in the same file - * yet. Once that works, we can drop this check and replace it - * with a check for the same inode, but overlapping extents. - */ if (src == dst) - return -EINVAL; + same_inode = 1; if (len == 0) return 0; - btrfs_double_inode_lock(src, dst); + if (same_inode) { + mutex_lock(&src->i_mutex); - ret = extent_same_check_offsets(src, loff, &len, olen); - if (ret) - goto out_unlock; + ret = extent_same_check_offsets(src, loff, &len, olen); + if (ret) + goto out_unlock; - ret = extent_same_check_offsets(dst, dst_loff, &len, olen); - if (ret) - goto out_unlock; + /* + * Single inode case wants the same checks, except we + * don't want our length pushed out past i_size as + * comparing that data range makes no sense. + * + * extent_same_check_offsets() will do this for an + * unaligned length at i_size, so catch it here and + * reject the request. + * + * This effectively means we require aligned extents + * for the single-inode case, whereas the other cases + * allow an unaligned length so long as it ends at + * i_size. + */ + if (len != olen) { + ret = -EINVAL; + goto out_unlock; + } + + /* Check for overlapping ranges */ + if (dst_loff + len > loff && dst_loff < loff + len) { + ret = -EINVAL; + goto out_unlock; + } + + same_lock_start = min_t(u64, loff, dst_loff); + same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; + } else { + btrfs_double_inode_lock(src, dst); + + ret = extent_same_check_offsets(src, loff, &len, olen); + if (ret) + goto out_unlock; + + ret = extent_same_check_offsets(dst, dst_loff, &len, olen); + if (ret) + goto out_unlock; + } /* don't make the dst file partly checksummed */ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != @@ -3024,18 +3058,28 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, if (ret) goto out_unlock; - btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + if (same_inode) + lock_extent_range(src, same_lock_start, same_lock_len); + else + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); /* pass original length for comparison so we stay within i_size */ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); - btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); + if (same_inode) + unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, + same_lock_start + same_lock_len - 1); + else + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); btrfs_cmp_data_free(&cmp); out_unlock: - btrfs_double_inode_unlock(src, dst); + if (same_inode) + mutex_unlock(&src->i_mutex); + else + btrfs_double_inode_unlock(src, dst); return ret; } From 1c919a5e13702caffbe2d2c7c305f9d0d2925160 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:08 -0700 Subject: [PATCH 025/210] btrfs: don't update mtime/ctime on deduped inodes One issue users have reported is that dedupe changes mtime on files, resulting in tools like rsync thinking that their contents have changed when in fact the data is exactly the same. We also skip the ctime update as no user-visible metadata changes here and we want dedupe to be transparent to the user. Clone still wants time changes, so we special case this in the code. This was tested with the btrfs-extent-same tool. Signed-off-by: Mark Fasheh Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index af064946c9b2..5d91776e12a2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 { static int btrfs_clone(struct inode *src, struct inode *inode, - u64 off, u64 olen, u64 olen_aligned, u64 destoff); + u64 off, u64 olen, u64 olen_aligned, u64 destoff, + int no_time_update); /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -3066,7 +3067,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, /* pass original length for comparison so we stay within i_size */ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) - ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); + ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); if (same_inode) unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, @@ -3231,13 +3232,15 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, struct inode *inode, u64 endoff, const u64 destoff, - const u64 olen) + const u64 olen, + int no_time_update) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; inode_inc_iversion(inode); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; + if (!no_time_update) + inode->i_mtime = inode->i_ctime = CURRENT_TIME; /* * We round up to the block size at eof when determining which * extents to clone above, but shouldn't round up the file size. @@ -3322,13 +3325,13 @@ static void clone_update_extent_map(struct inode *inode, * @inode: Inode to clone to * @off: Offset within source to start clone from * @olen: Original length, passed by user, of range to clone - * @olen_aligned: Block-aligned value of olen, extent_same uses - * identical values here + * @olen_aligned: Block-aligned value of olen * @destoff: Offset within @inode to start clone + * @no_time_update: Whether to update mtime/ctime on the target inode */ static int btrfs_clone(struct inode *src, struct inode *inode, const u64 off, const u64 olen, const u64 olen_aligned, - const u64 destoff) + const u64 destoff, int no_time_update) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path = NULL; @@ -3652,7 +3655,8 @@ process_slot: root->sectorsize); ret = clone_finish_inode_update(trans, inode, last_dest_end, - destoff, olen); + destoff, olen, + no_time_update); if (ret) goto out; if (new_key.offset + datal >= destoff + len) @@ -3690,7 +3694,7 @@ process_slot: clone_update_extent_map(inode, trans, NULL, last_dest_end, destoff + len - last_dest_end); ret = clone_finish_inode_update(trans, inode, destoff + len, - destoff, olen); + destoff, olen, no_time_update); } out: @@ -3827,7 +3831,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, lock_extent_range(inode, destoff, len); } - ret = btrfs_clone(src, inode, off, olen, len, destoff); + ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); if (same_inode) { u64 lock_start = min_t(u64, off, destoff); From 61de718fceb6bc028dafe4d06a1f87a9e0998303 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 1 Jul 2015 12:13:10 +0100 Subject: [PATCH 026/210] Btrfs: fix memory corruption on failure to submit bio for direct IO If we fail to submit a bio for a direct IO request, we were grabbing the corresponding ordered extent and decrementing its reference count twice, once for our lookup reference and once for the ordered tree reference. This was a problem because it caused the ordered extent to be freed without removing it from the ordered tree and any lists it might be attached to, leaving dangling pointers to the ordered extent around. Example trace with CONFIG_DEBUG_PAGEALLOC=y: [161779.858707] BUG: unable to handle kernel paging request at 0000000087654330 [161779.859983] IP: [] rb_prev+0x22/0x3b [161779.860636] PGD 34d818067 PUD 0 [161779.860636] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC (...) [161779.860636] Call Trace: [161779.860636] [] __tree_search+0xd9/0xf9 [btrfs] [161779.860636] [] tree_search+0x42/0x63 [btrfs] [161779.860636] [] ? btrfs_lookup_ordered_range+0x2d/0xa5 [btrfs] [161779.860636] [] btrfs_lookup_ordered_range+0x38/0xa5 [btrfs] [161779.860636] [] btrfs_get_blocks_direct+0x11b/0x615 [btrfs] [161779.860636] [] do_blockdev_direct_IO+0x5ff/0xb43 [161779.860636] [] ? btrfs_page_exists_in_range+0x1ad/0x1ad [btrfs] [161779.860636] [] ? btrfs_get_extent_fiemap+0x1bc/0x1bc [btrfs] [161779.860636] [] __blockdev_direct_IO+0x32/0x34 [161779.860636] [] ? btrfs_get_extent_fiemap+0x1bc/0x1bc [btrfs] [161779.860636] [] btrfs_direct_IO+0x198/0x21f [btrfs] [161779.860636] [] ? btrfs_get_extent_fiemap+0x1bc/0x1bc [btrfs] [161779.860636] [] generic_file_direct_write+0xb3/0x128 [161779.860636] [] ? btrfs_file_write_iter+0x15f/0x3e0 [btrfs] [161779.860636] [] btrfs_file_write_iter+0x201/0x3e0 [btrfs] (...) We were also not freeing the btrfs_dio_private we allocated previously, which kmemleak reported with the following trace in its sysfs file: unreferenced object 0xffff8803f553bf80 (size 96): comm "xfs_io", pid 4501, jiffies 4295039588 (age 173.936s) hex dump (first 32 bytes): 88 6c 9b f5 02 88 ff ff 00 00 00 00 00 00 00 00 .l.............. 00 00 00 00 00 00 00 00 00 00 c4 00 00 00 00 00 ................ backtrace: [] create_object+0x172/0x29a [] kmemleak_alloc+0x25/0x41 [] kmemleak_alloc_recursive.constprop.40+0x16/0x18 [] kmem_cache_alloc_trace+0xfb/0x148 [] btrfs_submit_direct+0x65/0x16a [btrfs] [] dio_bio_submit+0x62/0x8f [] do_blockdev_direct_IO+0x97e/0xb43 [] __blockdev_direct_IO+0x32/0x34 [] btrfs_direct_IO+0x198/0x21f [btrfs] [] generic_file_direct_write+0xb3/0x128 [] btrfs_file_write_iter+0x201/0x3e0 [btrfs] [] __vfs_write+0x7c/0xa5 [] vfs_write+0xa0/0xe4 [] SyS_pwrite64+0x64/0x82 [] system_call_fastpath+0x12/0x6f [] 0xffffffffffffffff For read requests we weren't doing any cleanup either (none of the work done by btrfs_endio_direct_read()), so a failure submitting a bio for a read request would leave a range in the inode's io_tree locked forever, blocking any future operations (both reads and writes) against that range. So fix this by making sure we do the same cleanup that we do for the case where the bio submission succeeds. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 65 +++++++++++++++++++++++++++++------------ fs/btrfs/ordered-data.c | 5 ++++ 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 855935f6671a..c0b2b6b51b2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8163,9 +8163,8 @@ out_err: static void btrfs_submit_direct(int rw, struct bio *dio_bio, struct inode *inode, loff_t file_offset) { - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_dio_private *dip; - struct bio *io_bio; + struct btrfs_dio_private *dip = NULL; + struct bio *io_bio = NULL; struct btrfs_io_bio *btrfs_bio; int skip_sum; int write = rw & REQ_WRITE; @@ -8182,7 +8181,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dip = kzalloc(sizeof(*dip), GFP_NOFS); if (!dip) { ret = -ENOMEM; - goto free_io_bio; + goto free_ordered; } dip->private = dio_bio->bi_private; @@ -8210,25 +8209,55 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, if (btrfs_bio->end_io) btrfs_bio->end_io(btrfs_bio, ret); -free_io_bio: - bio_put(io_bio); free_ordered: /* - * If this is a write, we need to clean up the reserved space and kill - * the ordered extent. + * If we arrived here it means either we failed to submit the dip + * or we either failed to clone the dio_bio or failed to allocate the + * dip. If we cloned the dio_bio and allocated the dip, we can just + * call bio_endio against our io_bio so that we get proper resource + * cleanup if we fail to submit the dip, otherwise, we must do the + * same as btrfs_endio_direct_[write|read] because we can't call these + * callbacks - they require an allocated dip and a clone of dio_bio. */ - if (write) { - struct btrfs_ordered_extent *ordered; - ordered = btrfs_lookup_ordered_extent(inode, file_offset); - if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && - !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) - btrfs_free_reserved_extent(root, ordered->start, - ordered->disk_len, 1); - btrfs_put_ordered_extent(ordered); - btrfs_put_ordered_extent(ordered); + if (io_bio && dip) { + bio_endio(io_bio, ret); + /* + * The end io callbacks free our dip, do the final put on io_bio + * and all the cleanup and final put for dio_bio (through + * dio_end_io()). + */ + dip = NULL; + io_bio = NULL; + } else { + if (write) { + struct btrfs_ordered_extent *ordered; + + ordered = btrfs_lookup_ordered_extent(inode, + file_offset); + set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); + /* + * Decrements our ref on the ordered extent and removes + * the ordered extent from the inode's ordered tree, + * doing all the proper resource cleanup such as for the + * reserved space and waking up any waiters for this + * ordered extent (through btrfs_remove_ordered_extent). + */ + btrfs_finish_ordered_io(ordered); + } else { + unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, + file_offset + dio_bio->bi_iter.bi_size - 1); + } + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); + /* + * Releases and cleans up our dio_bio, no need to bio_put() + * nor bio_endio()/bio_io_error() against dio_bio. + */ + dio_end_io(dio_bio, ret); } - bio_endio(dio_bio, ret); + if (io_bio) + bio_put(io_bio); + kfree(dip); } static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 89656d799ff6..52170cf1757e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) trace_btrfs_ordered_extent_put(entry->inode, entry); if (atomic_dec_and_test(&entry->refs)) { + ASSERT(list_empty(&entry->log_list)); + ASSERT(list_empty(&entry->trans_list)); + ASSERT(list_empty(&entry->root_extent_list)); + ASSERT(RB_EMPTY_NODE(&entry->rb_node)); if (entry->inode) btrfs_add_delayed_iput(entry->inode); while (!list_empty(&entry->list)) { @@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, spin_lock_irq(&tree->lock); node = &entry->rb_node; rb_erase(node, &tree->tree); + RB_CLEAR_NODE(node); if (tree->last == node) tree->last = NULL; set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); From 9c6429d96daec64f6b5b10a1c6b02c7264541ea1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 10 Jun 2015 12:55:41 +0100 Subject: [PATCH 027/210] Btrfs: fix a comment in inode.c:evict_inode_truncate_pages() The comment was not correct about the part where it says the endio callback of the bio might have not yet been called - update it to mention that by that time the endio callback execution might still be in progress only. Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c0b2b6b51b2a..53afda0ef4e3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4989,8 +4989,9 @@ static void evict_inode_truncate_pages(struct inode *inode) /* * Keep looping until we have no more ranges in the io tree. * We can have ongoing bios started by readpages (called from readahead) - * that didn't get their end io callbacks called yet or they are still - * in progress ((extent_io.c:end_bio_extent_readpage()). This means some + * that have their endio callback (extent_io.c:end_bio_extent_readpage) + * still in progress (unlocked the pages in the bio but did not yet + * unlocked the ranges in the io tree). Therefore this means some * ranges can still be locked and eviction started because before * submitting those bios, which are executed by a separate task (work * queue kthread), inode references (inode->i_count) were not taken From ad9ee2053f3f2babebc09ebc4970daa66c56c7ee Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 17 Jun 2015 16:59:57 +0800 Subject: [PATCH 028/210] Btrfs: fix hang when failing to submit bio of directIO The hang is uncoverd by generic/019. btrfs_endio_direct_write() skips the "finish_ordered_fn" part when it hits an error, thus those added ordered extents will never get processed, which block processes that waiting for them via btrfs_start_ordered_extent(). This fixes the above, and meanwhile finish_ordered_fn will do the space accounting work. Signed-off-by: Liu Bo Reviewed-by: Filipe Manana Tested-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 53afda0ef4e3..0b9fb81ccf8a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7872,8 +7872,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) struct bio *dio_bio; int ret; - if (err) - goto out_done; again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, @@ -7896,7 +7894,6 @@ out_test: ordered = NULL; goto again; } -out_done: dio_bio = dip->dio_bio; kfree(dip); From ddba1bfc2369cd0566bcfdab47599834a32d1c19 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 17 Jun 2015 16:59:58 +0800 Subject: [PATCH 029/210] Btrfs: fix warning of bytes_may_use While running generic/019, dmesg got several warnings from btrfs_free_reserved_data_space(). Test generic/019 produces some disk failures so sumbit dio will get errors, in which case, btrfs_direct_IO() goes to the error handling and free bytes_may_use, but the problem is that bytes_may_use has been free'd during get_block(). This adds a runtime flag to show if we've gone through get_block(), if so, don't do the cleanup work. Signed-off-by: Liu Bo Reviewed-by: Filipe Manana Tested-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 2 ++ fs/btrfs/inode.c | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0ef5cc13fae2..81220b2203c6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,6 +44,8 @@ #define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_HAS_PROPS 11 +/* DIO is ready to submit */ +#define BTRFS_INODE_DIO_READY 12 /* * The following 3 bits are meant only for the btree inode. * When any of them is set, it means an error happened while writing an diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0b9fb81ccf8a..b33c0cf02668 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7547,6 +7547,7 @@ unlock: current->journal_info = outstanding_extents; btrfs_free_reserved_data_space(inode, len); + set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); } /* @@ -8357,9 +8358,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, btrfs_submit_direct, flags); if (iov_iter_rw(iter) == WRITE) { current->journal_info = NULL; - if (ret < 0 && ret != -EIOCBQUEUED) - btrfs_delalloc_release_space(inode, count); - else if (ret >= 0 && (size_t)ret < count) + if (ret < 0 && ret != -EIOCBQUEUED) { + /* + * If the error comes from submitting stage, + * btrfs_get_blocsk_direct() has free'd data space, + * and metadata space will be handled by + * finish_ordered_fn, don't do that again to make + * sure bytes_may_use is correct. + */ + if (!test_and_clear_bit(BTRFS_INODE_DIO_READY, + &BTRFS_I(inode)->runtime_flags)) + btrfs_delalloc_release_space(inode, count); + } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, count - (size_t)ret); } From 9689457b5b0a2b69874c421a489d3fb50ca76b7b Mon Sep 17 00:00:00 2001 From: Shilong Wang Date: Sun, 12 Apr 2015 14:35:20 +0800 Subject: [PATCH 030/210] Btrfs: fix wrong check for btrfs_force_chunk_alloc() btrfs_force_chunk_alloc() return 1 for allocation chunk successfully. This problem exists since commit c87f08ca4. With this patch, we might fix some enospc problems for balances. Signed-off-by: Wang Shilong Reviewed-by: Filipe Manana Tested-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 827951fbf7fc..88cbb5995667 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4049,7 +4049,7 @@ restart: if (trans && progress && err == -ENOSPC) { ret = btrfs_force_chunk_alloc(trans, rc->extent_root, rc->block_group->flags); - if (ret == 0) { + if (ret == 1) { err = 0; progress = 0; goto restart; From 0f0ff9a9f3fa2ec6f427603fd521d5f3a0b076d1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 1 Jul 2015 23:37:46 -0400 Subject: [PATCH 031/210] ext4: fix fencepost error in lazytime optimization Commit 8f4d8558391: "ext4: fix lazytime optimization" was not a complete fix. In the case where the inode number is a multiple of 16, and we could still end up updating an inode with dirty timestamps written to the wrong inode on disk. Oops. This can be easily reproduced by using generic/005 with a file system with metadata_csum and lazytime enabled. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e057c6fcc227..4ad73d3c1003 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4348,7 +4348,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb, int inode_size = EXT4_INODE_SIZE(sb); oi.orig_ino = orig_ino; - ino = (orig_ino & ~(inodes_per_block - 1)) + 1; + /* + * Calculate the first inode in the inode table block. Inode + * numbers are one-based. That is, the first inode in a block + * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1). + */ + ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1; for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { if (ino == orig_ino) continue; From bd7ade3cd9b0850264306f5c2b79024a417b6396 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 2 Jul 2015 01:32:44 -0400 Subject: [PATCH 032/210] bufferhead: Add _gfp version for sb_getblk() sb_getblk() is used during ext4 (and possibly other FSes) writeback paths. Sometimes such path require allocating memory and guaranteeing that such allocation won't block. Currently, however, there is no way to provide user flags for sb_getblk which could lead to deadlocks. This patch implements a sb_getblk_gfp with the only difference it can accept user-provided GFP flags. Signed-off-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/buffer_head.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 73b45225a7ca..e6797ded700e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -317,6 +317,13 @@ sb_getblk(struct super_block *sb, sector_t block) return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } + +static inline struct buffer_head * +sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) +{ + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); +} + static inline struct buffer_head * sb_find_get_block(struct super_block *sb, sector_t block) { From c45653c341f5c8a0ce19c8f0ad4678640849cb86 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 2 Jul 2015 01:34:07 -0400 Subject: [PATCH 033/210] ext4: avoid deadlocks in the writeback path by using sb_getblk_gfp Switch ext4 to using sb_getblk_gfp with GFP_NOFS added to fix possible deadlocks in the page writeback path. Signed-off-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d86d2622f826..69af30e0d75d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -503,7 +503,7 @@ __read_extent_tree_block(const char *function, unsigned int line, struct buffer_head *bh; int err; - bh = sb_getblk(inode->i_sb, pblk); + bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -1088,7 +1088,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = -EIO; goto cleanup; } - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) { err = -ENOMEM; goto cleanup; @@ -1282,7 +1282,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, if (newblock == 0) return err; - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return -ENOMEM; lock_buffer(bh); From c423bc85097327fba71f9a831280d09b64bb62b6 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 16 Jun 2015 14:54:51 +0300 Subject: [PATCH 034/210] drm/omap: check that plane is inside crtc DRM allows planes to be partially off-screen, but DSS hardware does not. This patch adds the necessary check to reject plane configs if the plane is not fully inside the crtc. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_plane.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index cfa8276c4deb..098904696a5c 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -17,6 +17,7 @@ * this program. If not, see . */ +#include #include #include @@ -153,9 +154,34 @@ static void omap_plane_atomic_disable(struct drm_plane *plane, dispc_ovl_enable(omap_plane->id, false); } +static int omap_plane_atomic_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct drm_crtc_state *crtc_state; + + if (!state->crtc) + return 0; + + crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + if (state->crtc_x < 0 || state->crtc_y < 0) + return -EINVAL; + + if (state->crtc_x + state->crtc_w > crtc_state->adjusted_mode.hdisplay) + return -EINVAL; + + if (state->crtc_y + state->crtc_h > crtc_state->adjusted_mode.vdisplay) + return -EINVAL; + + return 0; +} + static const struct drm_plane_helper_funcs omap_plane_helper_funcs = { .prepare_fb = omap_plane_prepare_fb, .cleanup_fb = omap_plane_cleanup_fb, + .atomic_check = omap_plane_atomic_check, .atomic_update = omap_plane_atomic_update, .atomic_disable = omap_plane_atomic_disable, }; From 96cbd142310cc7281f94739b27d8e4c308d588c5 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Apr 2015 14:01:32 +0300 Subject: [PATCH 035/210] drm/omap: increase DMM transaction timeout The DMM driver uses a timeout of 1 ms to wait for DMM transaction to finish. While DMM should always finish the operation within that time, the timeout is rather strict. Small misbehavior of the system (e.g. an irq taking too long) could trigger the timeout. As the DMM is a critical piece of code for display memory management, let's increase the timeout to 100 ms so that we are less likely to fail a memory allocation in case of system misbehaviors. 100 ms is just a guess of a reasonably large timeout. The HW should accomplish the task in less than 1 ms. Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index f2daad8c3d96..7841970de48d 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -285,7 +285,7 @@ static int dmm_txn_commit(struct dmm_txn *txn, bool wait) if (wait) { if (!wait_for_completion_timeout(&engine->compl, - msecs_to_jiffies(1))) { + msecs_to_jiffies(100))) { dev_err(dmm->dev, "timed out waiting for done\n"); ret = -ETIMEDOUT; } From 9c368506c96793f17934a61a0f06412afb7d2f8d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Apr 2015 14:01:35 +0300 Subject: [PATCH 036/210] drm/omap: fix omap_framebuffer_unpin() error handling omap_framebuffer_unpin() check the return value of omap_gem_put_paddr() and return immediately if omap_gem_put_paddr() fails. This patch removes the check for the return value, and also removes the return value of omap_framebuffer_unpin(), because: * Nothing checks the return value of omap_framebuffer_unpin(), and even something did check it, there's nothing the caller can do to handle the error. * If a omap_gem_put_paddr() fails, the framebuffer's other planes will be left unreleased. So it's better to call omap_gem_put_paddr() for all the planes, even if one would fail. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_drv.h | 2 +- drivers/gpu/drm/omapdrm/omap_fb.c | 16 ++++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index ae2df41f216f..2ef89c0c3006 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -177,7 +177,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p); int omap_framebuffer_pin(struct drm_framebuffer *fb); -int omap_framebuffer_unpin(struct drm_framebuffer *fb); +void omap_framebuffer_unpin(struct drm_framebuffer *fb); void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, struct omap_drm_window *win, struct omap_overlay_info *info); struct drm_connector *omap_framebuffer_get_next_connector( diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 0b967e76df1a..51b1219af87f 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -287,10 +287,10 @@ fail: } /* unpin, no longer being scanned out: */ -int omap_framebuffer_unpin(struct drm_framebuffer *fb) +void omap_framebuffer_unpin(struct drm_framebuffer *fb) { struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb); - int ret, i, n = drm_format_num_planes(fb->pixel_format); + int i, n = drm_format_num_planes(fb->pixel_format); mutex_lock(&omap_fb->lock); @@ -298,24 +298,16 @@ int omap_framebuffer_unpin(struct drm_framebuffer *fb) if (omap_fb->pin_count > 0) { mutex_unlock(&omap_fb->lock); - return 0; + return; } for (i = 0; i < n; i++) { struct plane *plane = &omap_fb->planes[i]; - ret = omap_gem_put_paddr(plane->bo); - if (ret) - goto fail; + omap_gem_put_paddr(plane->bo); plane->paddr = 0; } mutex_unlock(&omap_fb->lock); - - return 0; - -fail: - mutex_unlock(&omap_fb->lock); - return ret; } struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p) From 393a949f51994a67e8e33b2f79c6f2020959a7e2 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Apr 2015 14:01:36 +0300 Subject: [PATCH 037/210] drm/omap: fix omap_gem_put_paddr() error handling If tiler_unpin() call in omap_gem_put_paddr() fails, omap_gem_put_paddr() will immediately stop processing and return an error. This patch remoes that error checking, and also removes omap_gem_put_paddr()'s return value, because: * The caller of omap_gem_put_paddr() can do nothing if an error happens, so it's pointless to return an error value * If tiler_unpin() fails, the GEM object will possibly be left in an undefined state, where the DMM mapping may have been removed, but the GEM object still thinks everything is as it should be, leading to crashes later. * There's no point in returning an error from a "free" call, as the caller can do nothing about it. So it's better to clean up as much as possible. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_drv.h | 2 +- drivers/gpu/drm/omapdrm/omap_gem.c | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index 2ef89c0c3006..1ae8477e4289 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -211,7 +211,7 @@ void omap_gem_dma_sync(struct drm_gem_object *obj, enum dma_data_direction dir); int omap_gem_get_paddr(struct drm_gem_object *obj, dma_addr_t *paddr, bool remap); -int omap_gem_put_paddr(struct drm_gem_object *obj); +void omap_gem_put_paddr(struct drm_gem_object *obj); int omap_gem_get_pages(struct drm_gem_object *obj, struct page ***pages, bool remap); int omap_gem_put_pages(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 874eb6dcf94b..7ed08fdc4c42 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -808,10 +808,10 @@ fail: /* Release physical address, when DMA is no longer being performed.. this * could potentially unpin and unmap buffers from TILER */ -int omap_gem_put_paddr(struct drm_gem_object *obj) +void omap_gem_put_paddr(struct drm_gem_object *obj) { struct omap_gem_object *omap_obj = to_omap_bo(obj); - int ret = 0; + int ret; mutex_lock(&obj->dev->struct_mutex); if (omap_obj->paddr_cnt > 0) { @@ -821,7 +821,6 @@ int omap_gem_put_paddr(struct drm_gem_object *obj) if (ret) { dev_err(obj->dev->dev, "could not unpin pages: %d\n", ret); - goto fail; } ret = tiler_release(omap_obj->block); if (ret) { @@ -832,9 +831,8 @@ int omap_gem_put_paddr(struct drm_gem_object *obj) omap_obj->block = NULL; } } -fail: + mutex_unlock(&obj->dev->struct_mutex); - return ret; } /* Get rotated scanout address (only valid if already pinned), at the From d642d3acd89454006fd6ca9cc4dd57f26959f9d1 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 8 May 2015 13:32:31 +0300 Subject: [PATCH 038/210] drm/omap: fix align_pitch() for 24 bits per pixel align_pitch() uses ALIGN() to ensure the pitch is aligned to SGX's requirement of 8 pixels. However, ALIGN() expects the alignment value to be a power of two, which is not the case for 24 bits per pixels. Use roundup() instead, which works for all alignments. This fixes the error seen with 24 bits per pixel modes: "buffer pitch (2176 bytes) is not a multiple of pixel size (3 bytes)" Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index 1ae8477e4289..12081e61d45a 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -236,7 +236,7 @@ static inline int align_pitch(int pitch, int width, int bpp) /* PVR needs alignment to 8 pixels.. right now that is the most * restrictive stride requirement.. */ - return ALIGN(pitch, 8 * bytespp); + return roundup(pitch, 8 * bytespp); } /* map crtc to vblank mask */ From 743c16719f671c206923d23dae4ac57edfd9483c Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 1 Jul 2014 20:52:50 +0200 Subject: [PATCH 039/210] drm/omap: replace ALIGN(PAGE_SIZE) by PAGE_ALIGN use mm.h definition Cc: David Airlie Cc: Tomi Valkeinen Cc: dri-devel@lists.freedesktop.org Signed-off-by: Fabian Frederick Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_fbdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c index 23b5a84389e3..720d16bce7e8 100644 --- a/drivers/gpu/drm/omapdrm/omap_fbdev.c +++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c @@ -135,7 +135,7 @@ static int omap_fbdev_create(struct drm_fb_helper *helper, fbdev->ywrap_enabled = priv->has_dmm && ywrap_enabled; if (fbdev->ywrap_enabled) { /* need to align pitch to page size if using DMM scrolling */ - mode_cmd.pitches[0] = ALIGN(mode_cmd.pitches[0], PAGE_SIZE); + mode_cmd.pitches[0] = PAGE_ALIGN(mode_cmd.pitches[0]); } /* allocate backing bo */ From 45a481c2176f6acca2efb6477c6018b2c3e3c60f Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Mon, 29 Jun 2015 14:30:09 -0700 Subject: [PATCH 040/210] clk: iproc: fix memory leak from clock name of_property_read_string_index takes array of pointers and assign them to strings read from device tree property. No additional memory allocation is needed prior to calling of_property_read_string_index. In fact, since the array of pointers will be re-assigned to other strings, any memory that it points to prior to calling of_property_read_string_index will be leaked Reported-by: Dan Carpenter Signed-off-by: Ray Jui Fixes: 5fe225c105fd ("clk: iproc: add initial common clock support") Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-iproc-asiu.c | 6 +----- drivers/clk/bcm/clk-iproc-pll.c | 8 +------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/clk/bcm/clk-iproc-asiu.c b/drivers/clk/bcm/clk-iproc-asiu.c index e19c09cd9645..f630e1bbdcfe 100644 --- a/drivers/clk/bcm/clk-iproc-asiu.c +++ b/drivers/clk/bcm/clk-iproc-asiu.c @@ -222,10 +222,6 @@ void __init iproc_asiu_setup(struct device_node *node, struct iproc_asiu_clk *asiu_clk; const char *clk_name; - clk_name = kzalloc(IPROC_CLK_NAME_LEN, GFP_KERNEL); - if (WARN_ON(!clk_name)) - goto err_clk_register; - ret = of_property_read_string_index(node, "clock-output-names", i, &clk_name); if (WARN_ON(ret)) @@ -259,7 +255,7 @@ void __init iproc_asiu_setup(struct device_node *node, err_clk_register: for (i = 0; i < num_clks; i++) - kfree(asiu->clks[i].name); + clk_unregister(asiu->clk_data.clks[i]); iounmap(asiu->gate_base); err_iomap_gate: diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c index 46fb84bc2674..a8d971b820b0 100644 --- a/drivers/clk/bcm/clk-iproc-pll.c +++ b/drivers/clk/bcm/clk-iproc-pll.c @@ -655,10 +655,6 @@ void __init iproc_pll_clk_setup(struct device_node *node, memset(&init, 0, sizeof(init)); parent_name = node->name; - clk_name = kzalloc(IPROC_CLK_NAME_LEN, GFP_KERNEL); - if (WARN_ON(!clk_name)) - goto err_clk_register; - ret = of_property_read_string_index(node, "clock-output-names", i, &clk_name); if (WARN_ON(ret)) @@ -690,10 +686,8 @@ void __init iproc_pll_clk_setup(struct device_node *node, return; err_clk_register: - for (i = 0; i < num_clks; i++) { - kfree(pll->clks[i].name); + for (i = 0; i < num_clks; i++) clk_unregister(pll->clk_data.clks[i]); - } err_pll_register: if (pll->asiu_base) From 69916d96094e1e16567c5f25515a13ed2896c730 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Mon, 29 Jun 2015 14:30:10 -0700 Subject: [PATCH 041/210] clk: iproc: fix bit manipulation arithmetic A 32-bit variable should be type casted to 64-bit before arithmetic operation and assigning it to a 64-bit variable Reported-by: Dan Carpenter Signed-off-by: Ray Jui Fixes: 5fe225c105fd ("clk: iproc: add initial common clock support") Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-iproc-pll.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c index a8d971b820b0..2dda4e8295a9 100644 --- a/drivers/clk/bcm/clk-iproc-pll.c +++ b/drivers/clk/bcm/clk-iproc-pll.c @@ -366,7 +366,7 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw, val = readl(pll->pll_base + ctrl->ndiv_int.offset); ndiv_int = (val >> ctrl->ndiv_int.shift) & bit_mask(ctrl->ndiv_int.width); - ndiv = ndiv_int << ctrl->ndiv_int.shift; + ndiv = (u64)ndiv_int << ctrl->ndiv_int.shift; if (ctrl->flags & IPROC_CLK_PLL_HAS_NDIV_FRAC) { val = readl(pll->pll_base + ctrl->ndiv_frac.offset); @@ -374,7 +374,8 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw, bit_mask(ctrl->ndiv_frac.width); if (ndiv_frac != 0) - ndiv = (ndiv_int << ctrl->ndiv_int.shift) | ndiv_frac; + ndiv = ((u64)ndiv_int << ctrl->ndiv_int.shift) | + ndiv_frac; } val = readl(pll->pll_base + ctrl->pdiv.offset); From 15ab38273d21a45487116ad4c428593427954848 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Sun, 28 Jun 2015 10:55:32 +0100 Subject: [PATCH 042/210] clk: stm32: Fix out-by-one error path in the index lookup If stm32f4_rcc_lookup() is called with primary == 0 and secondary == 192 then it will read beyond the end of the table array due to an out-by-one error in the range check. In addition to the fixing the inequality we also modify the r.h.s. to make it even more explicit that we are comparing against the size of table in bits. Reported-by: Dan Carpenter Signed-off-by: Daniel Thompson Acked-by: Maxime Coquelin Fixes: 358bdf892f6b ("clk: stm32: Add clock driver for STM32F4[23]xxx devices") Signed-off-by: Stephen Boyd --- drivers/clk/clk-stm32f4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index b9b12a742970..3f6f7ad39490 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -268,7 +268,7 @@ static int stm32f4_rcc_lookup_clk_idx(u8 primary, u8 secondary) memcpy(table, stm32f42xx_gate_map, sizeof(table)); /* only bits set in table can be used as indices */ - if (WARN_ON(secondary > 8 * sizeof(table) || + if (WARN_ON(secondary >= BITS_PER_BYTE * sizeof(table) || 0 == (table[BIT_ULL_WORD(secondary)] & BIT_ULL_MASK(secondary)))) return -EINVAL; From c76a024e82bdb83a0f7d57e006f8e7f8ddf983e5 Mon Sep 17 00:00:00 2001 From: David Dueck Date: Fri, 26 Jun 2015 15:30:22 +0200 Subject: [PATCH 043/210] clk: at91: do not leak resources Do not leak memory and free irqs in case of an error. Acked-by: Boris Brezillon Signed-off-by: David Dueck Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-h32mx.c | 4 +++- drivers/clk/at91/clk-main.c | 4 +++- drivers/clk/at91/clk-master.c | 8 ++++++-- drivers/clk/at91/clk-pll.c | 8 ++++++-- drivers/clk/at91/clk-system.c | 8 ++++++-- drivers/clk/at91/clk-utmi.c | 8 ++++++-- 6 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c index 152dcb3f7b5f..61566bcefa53 100644 --- a/drivers/clk/at91/clk-h32mx.c +++ b/drivers/clk/at91/clk-h32mx.c @@ -116,8 +116,10 @@ void __init of_sama5d4_clk_h32mx_setup(struct device_node *np, h32mxclk->pmc = pmc; clk = clk_register(NULL, &h32mxclk->hw); - if (!clk) + if (!clk) { + kfree(h32mxclk); return; + } of_clk_add_provider(np, of_clk_src_simple_get, clk); } diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c index c2400456a044..27dfa965cfed 100644 --- a/drivers/clk/at91/clk-main.c +++ b/drivers/clk/at91/clk-main.c @@ -171,8 +171,10 @@ at91_clk_register_main_osc(struct at91_pmc *pmc, irq_set_status_flags(osc->irq, IRQ_NOAUTOEN); ret = request_irq(osc->irq, clk_main_osc_irq_handler, IRQF_TRIGGER_HIGH, name, osc); - if (ret) + if (ret) { + kfree(osc); return ERR_PTR(ret); + } if (bypass) pmc_write(pmc, AT91_CKGR_MOR, diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c index f98eafe9b12d..5b3ded5205a2 100644 --- a/drivers/clk/at91/clk-master.c +++ b/drivers/clk/at91/clk-master.c @@ -165,12 +165,16 @@ at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq, irq_set_status_flags(master->irq, IRQ_NOAUTOEN); ret = request_irq(master->irq, clk_master_irq_handler, IRQF_TRIGGER_HIGH, "clk-master", master); - if (ret) + if (ret) { + kfree(master); return ERR_PTR(ret); + } clk = clk_register(NULL, &master->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(master->irq, master); kfree(master); + } return clk; } diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c index 6ec79dbc0840..23163be24b6f 100644 --- a/drivers/clk/at91/clk-pll.c +++ b/drivers/clk/at91/clk-pll.c @@ -338,12 +338,16 @@ at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name, irq_set_status_flags(pll->irq, IRQ_NOAUTOEN); ret = request_irq(pll->irq, clk_pll_irq_handler, IRQF_TRIGGER_HIGH, id ? "clk-pllb" : "clk-plla", pll); - if (ret) + if (ret) { + kfree(pll); return ERR_PTR(ret); + } clk = clk_register(NULL, &pll->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(pll->irq, pll); kfree(pll); + } return clk; } diff --git a/drivers/clk/at91/clk-system.c b/drivers/clk/at91/clk-system.c index a76d03fd577b..58008b3e8bc1 100644 --- a/drivers/clk/at91/clk-system.c +++ b/drivers/clk/at91/clk-system.c @@ -130,13 +130,17 @@ at91_clk_register_system(struct at91_pmc *pmc, const char *name, irq_set_status_flags(sys->irq, IRQ_NOAUTOEN); ret = request_irq(sys->irq, clk_system_irq_handler, IRQF_TRIGGER_HIGH, name, sys); - if (ret) + if (ret) { + kfree(sys); return ERR_PTR(ret); + } } clk = clk_register(NULL, &sys->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(sys->irq, sys); kfree(sys); + } return clk; } diff --git a/drivers/clk/at91/clk-utmi.c b/drivers/clk/at91/clk-utmi.c index ae3263bc1476..30dd697b1668 100644 --- a/drivers/clk/at91/clk-utmi.c +++ b/drivers/clk/at91/clk-utmi.c @@ -118,12 +118,16 @@ at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq, irq_set_status_flags(utmi->irq, IRQ_NOAUTOEN); ret = request_irq(utmi->irq, clk_utmi_irq_handler, IRQF_TRIGGER_HIGH, "clk-utmi", utmi); - if (ret) + if (ret) { + kfree(utmi); return ERR_PTR(ret); + } clk = clk_register(NULL, &utmi->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(utmi->irq, utmi); kfree(utmi); + } return clk; } From 13ee9fdba96577eb1583dcd7b15767ef623fae12 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 1 Jul 2015 23:08:10 +0100 Subject: [PATCH 044/210] ARM: 8397/1: fix vdsomunge not to depend on glibc specific error.h If the host toolchain is not glibc based then the arm kernel build fails with arch/arm/vdso/vdsomunge.c:53:19: fatal error: error.h: No such file or directory error.h is a glibc only header (ie not available in musl, newlib and bsd libcs). Changed the error reporting to standard conforming code to avoid depending on specific C implementations. Signed-off-by: Szabolcs Nagy Acked-by: Will Deacon Fixes: 8512287a8165 ("ARM: 8330/1: add VDSO user-space code") Cc: stable@vger.kernel.org Signed-off-by: Nathan Lynch Signed-off-by: Russell King --- arch/arm/vdso/vdsomunge.c | 56 +++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c index 9005b07296c8..aedec81d1198 100644 --- a/arch/arm/vdso/vdsomunge.c +++ b/arch/arm/vdso/vdsomunge.c @@ -45,13 +45,11 @@ * it does. */ -#define _GNU_SOURCE - #include #include #include -#include #include +#include #include #include #include @@ -82,11 +80,25 @@ #define EF_ARM_ABI_FLOAT_HARD 0x400 #endif +static int failed; +static const char *argv0; static const char *outfile; +static void fail(const char *fmt, ...) +{ + va_list ap; + + failed = 1; + fprintf(stderr, "%s: ", argv0); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + static void cleanup(void) { - if (error_message_count > 0 && outfile != NULL) + if (failed && outfile != NULL) unlink(outfile); } @@ -119,68 +131,66 @@ int main(int argc, char **argv) int infd; atexit(cleanup); + argv0 = argv[0]; if (argc != 3) - error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]); + fail("Usage: %s [infile] [outfile]\n", argv[0]); infile = argv[1]; outfile = argv[2]; infd = open(infile, O_RDONLY); if (infd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", infile); + fail("Cannot open %s: %s\n", infile, strerror(errno)); if (fstat(infd, &stat) != 0) - error(EXIT_FAILURE, errno, "Failed stat for %s", infile); + fail("Failed stat for %s: %s\n", infile, strerror(errno)); inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0); if (inbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", infile); + fail("Failed to map %s: %s\n", infile, strerror(errno)); close(infd); inhdr = inbuf; if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0) - error(EXIT_FAILURE, 0, "Not an ELF file"); + fail("Not an ELF file\n"); if (inhdr->e_ident[EI_CLASS] != ELFCLASS32) - error(EXIT_FAILURE, 0, "Unsupported ELF class"); + fail("Unsupported ELF class\n"); swap = inhdr->e_ident[EI_DATA] != HOST_ORDER; if (read_elf_half(inhdr->e_type, swap) != ET_DYN) - error(EXIT_FAILURE, 0, "Not a shared object"); + fail("Not a shared object\n"); - if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) { - error(EXIT_FAILURE, 0, "Unsupported architecture %#x", - inhdr->e_machine); - } + if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) + fail("Unsupported architecture %#x\n", inhdr->e_machine); e_flags = read_elf_word(inhdr->e_flags, swap); if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) { - error(EXIT_FAILURE, 0, "Unsupported EABI version %#x", - EF_ARM_EABI_VERSION(e_flags)); + fail("Unsupported EABI version %#x\n", + EF_ARM_EABI_VERSION(e_flags)); } if (e_flags & EF_ARM_ABI_FLOAT_HARD) - error(EXIT_FAILURE, 0, - "Unexpected hard-float flag set in e_flags"); + fail("Unexpected hard-float flag set in e_flags\n"); clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT); outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (outfd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", outfile); + fail("Cannot open %s: %s\n", outfile, strerror(errno)); if (ftruncate(outfd, stat.st_size) != 0) - error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile); + fail("Cannot truncate %s: %s\n", outfile, strerror(errno)); outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, outfd, 0); if (outbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", outfile); + fail("Failed to map %s: %s\n", outfile, strerror(errno)); close(outfd); @@ -195,7 +205,7 @@ int main(int argc, char **argv) } if (msync(outbuf, stat.st_size, MS_SYNC) != 0) - error(EXIT_FAILURE, errno, "Failed to sync %s", outfile); + fail("Failed to sync %s: %s\n", outfile, strerror(errno)); return EXIT_SUCCESS; } From 11b8b25ce4f8acfd3b438683c0c9ade27756c6e8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Jul 2015 12:42:36 +0100 Subject: [PATCH 045/210] ARM: fix lockdep unannotated irqs-off warning Wolfram Sang reported an unannotated irqs-off warning from lockdep: WARNING: CPU: 0 PID: 282 at kernel/locking/lockdep.c:3557 check_flags+0x84/0x1f4() DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) CPU: 0 PID: 282 Comm: rcS Tainted: G W 4.1.0-00002-g5b076054611833 #179 Hardware name: Generic Emma Mobile EV2 (Flattened Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x18/0x1c) r6:c02dcc67 r5:00000009 r4:00000000 r3:00400000 [] (show_stack) from [] (dump_stack+0x20/0x28) [] (dump_stack) from [] (warn_slowpath_common+0x8c/0xb4) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x38/0x40) r8:c780f470 r7:00000000 r6:00000000 r5:c03b0570 r4:c0b7ec04 [] (warn_slowpath_fmt) from [] (check_flags+0x84/0x1f4) r3:c02e13d8 r2:c02dceaa [] (check_flags) from [] (lock_acquire+0x4c/0xbc) r5:00000000 r4:60000193 [] (lock_acquire) from [] (_raw_spin_lock+0x34/0x44) r9:000a8d5c r8:00000001 r7:c7806000 r6:c780f460 r5:c03b06a0 r4:c780f460 [] (_raw_spin_lock) from [] (handle_fasteoi_irq+0x20/0x11c) r4:c780f400 [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x28/0x38) r6:00000000 r5:c03b038c r4:00000012 r3:c005a8ac [] (generic_handle_irq) from [] (__handle_domain_irq+0x88/0xa8) r4:00000000 r3:00000026 [] (__handle_domain_irq) from [] (gic_handle_irq+0x40/0x58) r8:10c5347d r7:10c5347d r6:c35b1fb0 r5:c03a6304 r4:c8802000 r3:c35b1fb0 [] (gic_handle_irq) from [] (__irq_usr+0x48/0x60) Exception stack(0xc35b1fb0 to 0xc35b1ff8) 1fa0: 00000061 00000000 000ab736 00000066 1fc0: 00000061 000aa1f0 000a8d54 000a8d54 000a8d88 000a8d5c 000a8cc8 000a8d68 1fe0: 72727272 bef8a528 000398c0 00031334 20000010 ffffffff r6:ffffffff r5:20000010 r4:00031334 r3:00000061 ---[ end trace cb88537fdc8fa202 ]--- possible reason: unannotated irqs-off. irq event stamp: 769 hardirqs last enabled at (769): [] ret_fast_syscall+0x2c/0x54 hardirqs last disabled at (768): [] ret_fast_syscall+0xc/0x54 softirqs last enabled at (0): [] copy_process.part.65+0x2e8/0x11dc softirqs last disabled at (0): [< (null)>] (null) His kernel configuration had: CONFIG_PROVE_LOCKING=y CONFIG_TRACE_IRQFLAGS=y but no IRQSOFF_TRACER, which means entry from userspace can result in the kernel seeing IRQs off without being notified of that change of state. Change the IRQSOFF ifdef in the usr_entry macro to TRACE_IRQFLAGS instead. Tested-by: Wolfram Sang Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 570306c49406..dba6cf65c9e4 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -408,7 +408,7 @@ ENDPROC(__fiq_abt) zero_fp .if \trace -#ifdef CONFIG_IRQSOFF_TRACER +#ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif ct_user_exit save = 0 From ac5e2f170f033e48cfcdc2c4f74b27083eabffa5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 10:02:39 +0100 Subject: [PATCH 046/210] ARM: io: document ARM specific behaviour of ioremap*() implementations Add documentation of the ARM specific behaviour of the mappings setup by the ioremap() series of macros. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 42 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 1c3938f26beb..ae10717f61d4 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -348,11 +348,47 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, #endif /* readl */ /* - * ioremap and friends. + * ioremap() and friends. * - * ioremap takes a PCI memory address, as specified in - * Documentation/io-mapping.txt. + * ioremap() takes a resource address, and size. Due to the ARM memory + * types, it is important to use the correct ioremap() function as each + * mapping has specific properties. * + * Function Memory type Cacheability Cache hint + * ioremap() Device n/a n/a + * ioremap_nocache() Device n/a n/a + * ioremap_cache() Normal Writeback Read allocate + * ioremap_wc() Normal Non-cacheable n/a + * ioremap_wt() Normal Non-cacheable n/a + * + * All device mappings have the following properties: + * - no access speculation + * - no repetition (eg, on return from an exception) + * - number, order and size of accesses are maintained + * - unaligned accesses are "unpredictable" + * - writes may be delayed before they hit the endpoint device + * + * ioremap_nocache() is the same as ioremap() as there are too many device + * drivers using this for device registers, and documentation which tells + * people to use it for such for this to be any different. This is not a + * safe fallback for memory-like mappings, or memory regions where the + * compiler may generate unaligned accesses - eg, via inlining its own + * memcpy. + * + * All normal memory mappings have the following properties: + * - reads can be repeated with no side effects + * - repeated reads return the last value written + * - reads can fetch additional locations without side effects + * - writes can be repeated (in certain cases) with no side effects + * - writes can be merged before accessing the target + * - unaligned accesses can be supported + * - ordering is not guaranteed without explicit dependencies or barrier + * instructions + * - writes may be delayed before they hit the endpoint memory + * + * The cache hint is only a performance hint: CPUs may alias these hints. + * Eg, a CPU not implementing read allocate but implementing write allocate + * will provide a write allocate mapping instead. */ #define ioremap(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) From 1e2c727f6c022778d4562147433ca4c0b101f0ad Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 10:17:55 +0100 Subject: [PATCH 047/210] ARM: io: fix ioremap_wt() implementation ioremap_wt() was added by aliasing it to ioremap_nocache(), which is a device mapping. Device mappings do not allow unaligned accesses, but it appears that GCC is able to inline its own memcpy() implementation which may use such accesses. The only user of this is pmem, which uses memcpy() on the region. Therefore, this is unsafe. We must implement ioremap_wt() correctly for ARM, or not at all. This patch adds a more correct implementation by re-using ioremap_wc() to provide a normal-memory non-cacheable mapping. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index ae10717f61d4..f1083ebf214d 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -394,7 +394,7 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, #define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) #define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) -#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) +#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) #define iounmap __arm_iounmap /* From 20a1080dff2f1be8933baa0d910c41882c7279ee Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 10:06:32 +0100 Subject: [PATCH 048/210] ARM: io: convert ioremap*() to functions Convert the ioremap*() preprocessor macros to real functions, moving them out of line. This allows us to kill off __arm_ioremap(), and __arm_iounmap() helpers, and remove __arm_ioremap_pfn_caller() from global view. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 24 ++++++++++--------- arch/arm/mm/ioremap.c | 33 ++++++++++++++++++-------- arch/arm/mm/nommu.c | 49 +++++++++++++++++++++++++-------------- 3 files changed, 67 insertions(+), 39 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index f1083ebf214d..a5ed237c87d9 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -140,16 +140,11 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) * The _caller variety takes a __builtin_return_address(0) value for * /proc/vmalloc to use - and should only be used in non-inline functions. */ -extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long, - size_t, unsigned int, void *); extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int, void *); - extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int); -extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int); extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached); extern void __iounmap(volatile void __iomem *addr); -extern void __arm_iounmap(volatile void __iomem *addr); extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); @@ -390,12 +385,19 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, * Eg, a CPU not implementing read allocate but implementing write allocate * will provide a write allocate mapping instead. */ -#define ioremap(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) -#define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) -#define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) -#define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) -#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) -#define iounmap __arm_iounmap +void __iomem *ioremap(resource_size_t res_cookie, size_t size); +#define ioremap ioremap +#define ioremap_nocache ioremap + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size); +#define ioremap_cache ioremap_cache + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size); +#define ioremap_wc ioremap_wc +#define ioremap_wt ioremap_wc + +void iounmap(volatile void __iomem *iomem_cookie); +#define iounmap iounmap /* * io{read,write}{16,32}be() macros diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index d1e5ad7ab3bc..0c81056c1dd7 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -255,7 +255,7 @@ remap_area_supersections(unsigned long virt, unsigned long pfn, } #endif -void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, +static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype, void *caller) { const struct mem_type *type; @@ -363,7 +363,7 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype) { return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, - __builtin_return_address(0)); + __builtin_return_address(0)); } EXPORT_SYMBOL(__arm_ioremap_pfn); @@ -371,13 +371,26 @@ void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *) = __arm_ioremap_caller; -void __iomem * -__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) +void __iomem *ioremap(resource_size_t res_cookie, size_t size) { - return arch_ioremap_caller(phys_addr, size, mtype, - __builtin_return_address(0)); + return arch_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); } -EXPORT_SYMBOL(__arm_ioremap); +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); /* * Remap an arbitrary physical address space into the kernel virtual @@ -431,11 +444,11 @@ void __iounmap(volatile void __iomem *io_addr) void (*arch_iounmap)(volatile void __iomem *) = __iounmap; -void __arm_iounmap(volatile void __iomem *io_addr) +void iounmap(volatile void __iomem *cookie) { - arch_iounmap(io_addr); + arch_iounmap(cookie); } -EXPORT_SYMBOL(__arm_iounmap); +EXPORT_SYMBOL(iounmap); #ifdef CONFIG_PCI static int pci_ioremap_mem_type = MT_DEVICE; diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index afd7e05d95f1..1dd10936d68d 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -351,30 +351,43 @@ void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, } EXPORT_SYMBOL(__arm_ioremap_pfn); -void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, - size_t size, unsigned int mtype, void *caller) -{ - return __arm_ioremap_pfn(pfn, offset, size, mtype); -} - -void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, - unsigned int mtype) -{ - return (void __iomem *)phys_addr; -} -EXPORT_SYMBOL(__arm_ioremap); - -void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); - void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { - return __arm_ioremap(phys_addr, size, mtype); + return (void __iomem *)phys_addr; } +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); + +void __iomem *ioremap(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iounmap(volatile void __iomem *addr) +{ +} +EXPORT_SYMBOL(__iounmap); + void (*arch_iounmap)(volatile void __iomem *); -void __arm_iounmap(volatile void __iomem *addr) +void iounmap(volatile void __iomem *addr) { } -EXPORT_SYMBOL(__arm_iounmap); +EXPORT_SYMBOL(iounmap); From 9ab79bb22cc77adcca5ebbadea6caec6a478f283 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 15:23:10 +0100 Subject: [PATCH 049/210] ARM: pgtable: document mapping types Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-2level.h | 31 ++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index bfd662e49a25..aeddd28b3595 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -129,7 +129,36 @@ /* * These are the memory types, defined to be compatible with - * pre-ARMv6 CPUs cacheable and bufferable bits: XXCB + * pre-ARMv6 CPUs cacheable and bufferable bits: n/a,n/a,C,B + * ARMv6+ without TEX remapping, they are a table index. + * ARMv6+ with TEX remapping, they correspond to n/a,TEX(0),C,B + * + * MT type Pre-ARMv6 ARMv6+ type / cacheable status + * UNCACHED Uncached Strongly ordered + * BUFFERABLE Bufferable Normal memory / non-cacheable + * WRITETHROUGH Writethrough Normal memory / write through + * WRITEBACK Writeback Normal memory / write back, read alloc + * MINICACHE Minicache N/A + * WRITEALLOC Writeback Normal memory / write back, write alloc + * DEV_SHARED Uncached Device memory (shared) + * DEV_NONSHARED Uncached Device memory (non-shared) + * DEV_WC Bufferable Normal memory / non-cacheable + * DEV_CACHED Writeback Normal memory / write back, read alloc + * VECTORS Variable Normal memory / variable + * + * All normal memory mappings have the following properties: + * - reads can be repeated with no side effects + * - repeated reads return the last value written + * - reads can fetch additional locations without side effects + * - writes can be repeated (in certain cases) with no side effects + * - writes can be merged before accessing the target + * - unaligned accesses can be supported + * + * All device mappings have the following properties: + * - no access speculation + * - no repetition (eg, on return from an exception) + * - number, order and size of accesses are maintained + * - unaligned accesses are "unpredictable" */ #define L_PTE_MT_UNCACHED (_AT(pteval_t, 0x00) << 2) /* 0000 */ #define L_PTE_MT_BUFFERABLE (_AT(pteval_t, 0x01) << 2) /* 0001 */ From 1bd46782d08b01b73df0085b51ea1021b19b44fd Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Jul 2015 15:22:54 +0100 Subject: [PATCH 050/210] ARM: avoid unwanted GCC memset()/memcpy() optimisations for IO variants We don't want GCC optimising our memset_io(), memcpy_fromio() or memcpy_toio() variants, so we must not call one of the standard functions. Provide a separate name for our assembly memcpy() and memset() functions, and use that instead, thereby bypassing GCC's ability to optimise these operations. GCCs optimisation may introduce unaligned accesses which are invalid for device mappings. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 9 ++++++--- arch/arm/kernel/armksyms.c | 6 ++++++ arch/arm/lib/memcpy.S | 2 ++ arch/arm/lib/memset.S | 2 ++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index a5ed237c87d9..485982084fe9 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -316,21 +316,24 @@ extern void _memset_io(volatile void __iomem *, int, size_t); static inline void memset_io(volatile void __iomem *dst, unsigned c, size_t count) { - memset((void __force *)dst, c, count); + extern void mmioset(void *, unsigned int, size_t); + mmioset((void __force *)dst, c, count); } #define memset_io(dst,c,count) memset_io(dst,c,count) static inline void memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { - memcpy(to, (const void __force *)from, count); + extern void mmiocpy(void *, const void *, size_t); + mmiocpy(to, (const void __force *)from, count); } #define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count) static inline void memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { - memcpy((void __force *)to, from, count); + extern void mmiocpy(void *, const void *, size_t); + mmiocpy((void __force *)to, from, count); } #define memcpy_toio(to,from,count) memcpy_toio(to,from,count) diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index a88671cfe1ff..5e5a51a99e68 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -50,6 +50,9 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); +void mmioset(void *, unsigned int, size_t); +void mmiocpy(void *, const void *, size_t); + /* platform dependent support */ EXPORT_SYMBOL(arm_delay_ops); @@ -88,6 +91,9 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(__memzero); +EXPORT_SYMBOL(mmioset); +EXPORT_SYMBOL(mmiocpy); + #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 7797e81e40e0..64111bd4440b 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -61,8 +61,10 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ +ENTRY(mmiocpy) ENTRY(memcpy) #include "copy_template.S" ENDPROC(memcpy) +ENDPROC(mmiocpy) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index a4ee97b5a2bf..3c65e3bd790f 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -16,6 +16,7 @@ .text .align 5 +ENTRY(mmioset) ENTRY(memset) UNWIND( .fnstart ) ands r3, r0, #3 @ 1 unaligned? @@ -133,3 +134,4 @@ UNWIND( .fnstart ) b 1b UNWIND( .fnend ) ENDPROC(memset) +ENDPROC(mmioset) From 9705acd63b125dee8b15c705216d7186daea4625 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 3 Jul 2015 21:13:55 -0400 Subject: [PATCH 051/210] ext4: fix reservation release on invalidatepage for delalloc fs On delalloc enabled file system on invalidatepage operation in ext4_da_page_release_reservation() we want to clear the delayed buffer and remove the extent covering the delayed buffer from the extent status tree. However currently there is a bug where on the systems with page size > block size we will always remove extents from the start of the page regardless where the actual delayed buffers are positioned in the page. This leads to the errors like this: EXT4-fs warning (device loop0): ext4_da_release_space:1225: ext4_da_release_space: ino 13, to_free 1 with only 0 reserved data blocks This however can cause data loss on writeback time if the file system is in ENOSPC condition because we're releasing reservation for someones else delayed buffer. Fix this by only removing extents that corresponds to the part of the page we want to invalidate. This problem is reproducible by the following fio receipt (however I was only able to reproduce it with fio-2.1 or older. [global] bs=8k iodepth=1024 iodepth_batch=60 randrepeat=1 size=1m directory=/mnt/test numjobs=20 [job1] ioengine=sync bs=1k direct=1 rw=randread filename=file1:file2 [job2] ioengine=libaio rw=randwrite direct=1 filename=file1:file2 [job3] bs=1k ioengine=posixaio rw=randwrite direct=1 filename=file1:file2 [job5] bs=1k ioengine=sync rw=randread filename=file1:file2 [job7] ioengine=libaio rw=randwrite filename=file1:file2 [job8] ioengine=posixaio rw=randwrite filename=file1:file2 [job10] ioengine=mmap rw=randwrite bs=1k filename=file1:file2 [job11] ioengine=mmap rw=randwrite direct=1 filename=file1:file2 Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4ad73d3c1003..2334e86d7447 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1330,7 +1330,7 @@ static void ext4_da_page_release_reservation(struct page *page, unsigned int offset, unsigned int length) { - int to_release = 0; + int to_release = 0, contiguous_blks = 0; struct buffer_head *head, *bh; unsigned int curr_off = 0; struct inode *inode = page->mapping->host; @@ -1351,14 +1351,23 @@ static void ext4_da_page_release_reservation(struct page *page, if ((offset <= curr_off) && (buffer_delay(bh))) { to_release++; + contiguous_blks++; clear_buffer_delay(bh); + } else if (contiguous_blks) { + lblk = page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblk += (curr_off >> inode->i_blkbits) - + contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); + contiguous_blks = 0; } curr_off = next_off; } while ((bh = bh->b_this_page) != head); - if (to_release) { + if (contiguous_blks) { lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - ext4_es_remove_extent(inode, lblk, to_release); + lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); } /* If we have released all the blocks belonging to a cluster, then we From d6f123a9297496ad0b6335fe881504c4b5b2a5e5 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Fri, 3 Jul 2015 23:56:50 -0400 Subject: [PATCH 052/210] ext4: be more strict when migrating to non-extent based file Currently the check in ext4_ind_migrate() is not enough before doing the real conversion: a) delayed allocated extents could bypass the check on eh->eh_entries and eh->eh_depth This can be demonstrated by this script xfs_io -fc "pwrite 0 4k" -c "pwrite 8k 4k" /mnt/ext4/testfile chattr -e /mnt/ext4/testfile where testfile has two extents but still be converted to non-extent based file format. b) only extent length is checked but not the offset, which would result in data lose (delalloc) or fs corruption (nodelalloc), because non-extent based file only supports at most (12 + 2^10 + 2^20 + 2^30) blocks This can be demostrated by xfs_io -fc "pwrite 5T 4k" /mnt/ext4/testfile chattr -e /mnt/ext4/testfile sync If delalloc is enabled, dmesg prints EXT4-fs warning (device dm-4): ext4_block_to_path:105: block 1342177280 > max in inode 53 EXT4-fs (dm-4): Delayed block allocation failed for inode 53 at logical offset 1342177280 with max blocks 1 with error 5 EXT4-fs (dm-4): This should not happen!! Data will be lost If delalloc is disabled, e2fsck -nf shows corruption Inode 53, i_size is 5497558142976, should be 4096. Fix? no Fix the two issues by a) forcing all delayed allocation blocks to be allocated before checking eh->eh_depth and eh->eh_entries b) limiting the last logical block of the extent is within direct map Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/migrate.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b52374e42102..6d8b0c917364 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; + ext4_lblk_t end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode) EXT4_FEATURE_RO_COMPAT_BIGALLOC)) return -EOPNOTSUPP; + /* + * In order to get correct extent info, force all delayed allocation + * blocks to be allocated, otherwise delayed allocation blocks may not + * be reflected and bypass the checks on extent header. + */ + if (test_opt(inode->i_sb, DELALLOC)) + ext4_alloc_da_blocks(inode); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -654,7 +663,8 @@ int ext4_ind_migrate(struct inode *inode) else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - if (len > EXT4_NDIR_BLOCKS) { + end = le32_to_cpu(ex->ee_block) + len - 1; + if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; } From 8974fec7d72e3e02752fe0f27b4c3719c78d9a15 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Sat, 4 Jul 2015 00:03:44 -0400 Subject: [PATCH 053/210] ext4: correctly migrate a file with a hole at the beginning Currently ext4_ind_migrate() doesn't correctly handle a file which contains a hole at the beginning of the file. This caused the migration to be done incorrectly, and then if there is a subsequent following delayed allocation write to the "hole", this would reclaim the same data blocks again and results in fs corruption. # assmuing 4k block size ext4, with delalloc enabled # skip the first block and write to the second block xfs_io -fc "pwrite 4k 4k" -c "fsync" /mnt/ext4/testfile # converting to indirect-mapped file, which would move the data blocks # to the beginning of the file, but extent status cache still marks # that region as a hole chattr -e /mnt/ext4/testfile # delayed allocation writes to the "hole", reclaim the same data block # again, results in i_blocks corruption xfs_io -c "pwrite 0 4k" /mnt/ext4/testfile umount /mnt/ext4 e2fsck -nf /dev/sda6 ... Inode 53, i_blocks is 16, should be 8. Fix? no ... Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/migrate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 6d8b0c917364..6163ad21cb0e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,7 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; - ext4_lblk_t end; + ext4_lblk_t start, end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -659,11 +659,12 @@ int ext4_ind_migrate(struct inode *inode) goto errout; } if (eh->eh_entries == 0) - blk = len = 0; + blk = len = start = end = 0; else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - end = le32_to_cpu(ex->ee_block) + len - 1; + start = le32_to_cpu(ex->ee_block); + end = start + len - 1; if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; @@ -672,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode) ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); memset(ei->i_data, 0, sizeof(ei->i_data)); - for (i=0; i < len; i++) + for (i = start; i <= end; i++) ei->i_data[i] = cpu_to_le32(blk++); ext4_mark_inode_dirty(handle, inode); errout: From 7444a072c387a93ebee7066e8aee776954ab0e41 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Sun, 5 Jul 2015 12:33:44 -0400 Subject: [PATCH 054/210] ext4: replace open coded nofail allocation in ext4_free_blocks() ext4_free_blocks is looping around the allocation request and mimics __GFP_NOFAIL behavior without any allocation fallback strategy. Let's remove the open coded loop and replace it with __GFP_NOFAIL. Without the flag the allocator has no way to find out never-fail requirement and cannot help in any way. Signed-off-by: Michal Hocko Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/mballoc.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1c535fa67640..2299d629eeb1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4815,18 +4815,12 @@ do_more: /* * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed + * + * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed + * to fail. */ - retry: - new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); - if (!new_entry) { - /* - * We use a retry loop because - * ext4_free_blocks() is not allowed to fail. - */ - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; - } + new_entry = kmem_cache_alloc(ext4_free_data_cachep, + GFP_NOFS|__GFP_NOFAIL); new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; new_entry->efd_count = count_clusters; From 728abda6a6654ee7f4e903dc921c6307065e1644 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 14:12:42 -0300 Subject: [PATCH 055/210] tools: Adopt {READ,WRITE_ONCE} from the kernel We need it to build rbtree.c after this cset: commit d72da4a4d973 Author: Peter Zijlstra Date: Wed May 27 11:09:36 2015 +0930 rbtree: Make lockless searches non-fatal Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-qlnzhezv5ddwst0w9fydju0y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler.h | 58 ++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index f0e72674c52d..9098083869c8 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -41,4 +41,62 @@ #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) +#include + +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(__u8 *)res = *(volatile __u8 *)p; break; + case 2: *(__u16 *)res = *(volatile __u16 *)p; break; + case 4: *(__u32 *)res = *(volatile __u32 *)p; break; + case 8: *(__u64 *)res = *(volatile __u64 *)p; break; + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + barrier(); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile __u8 *)p = *(__u8 *)res; break; + case 2: *(volatile __u16 *)p = *(__u16 *)res; break; + case 4: *(volatile __u32 *)p = *(__u32 *)res; break; + case 8: *(volatile __u64 *)p = *(__u64 *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define WRITE_ONCE(x, val) \ + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + #endif /* _TOOLS_LINUX_COMPILER_H */ From 4407f967441aa1adfc11f739e8e9ec0f38fa839f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 14:17:04 -0300 Subject: [PATCH 056/210] perf tools: Copy rbtree.h from the kernel We were using the include/linux/rbtree.h directly from the kernel, which broke the build as soon as it started using rcupdate.h, to avoid dragging the rcu header files into tools/, for which there is no use so far, grab a copy of rbtree.h. This is the minimal fix, later patches will copy as well lib/rbtree.c and move rbtree.h into tools/include/, etc. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-dfmuj0j63w4by7vhlh4hhn74@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/MANIFEST | 1 - tools/perf/util/include/linux/rbtree.h | 92 +++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 3 deletions(-) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index fe50a1b34aa0..6504b727f450 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -51,7 +51,6 @@ include/asm-generic/bitops/fls64.h include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h include/linux/perf_event.h -include/linux/rbtree.h include/linux/list.h include/linux/hash.h include/linux/stringify.h diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h index f06d89f0b867..112582253dd0 100644 --- a/tools/perf/util/include/linux/rbtree.h +++ b/tools/perf/util/include/linux/rbtree.h @@ -1,7 +1,95 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/rbtree.txt for documentation and samples. +*/ + #ifndef __TOOLS_LINUX_PERF_RBTREE_H #define __TOOLS_LINUX_PERF_RBTREE_H -#include -#include "../../../../include/linux/rbtree.h" + +#include +#include + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + /* * Handy for checking that we are not deleting an entry that is From 3f735377bfd6567d80815a6242c147211963680a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 22:48:21 -0300 Subject: [PATCH 057/210] tools: Copy lib/rbtree.c to tools/lib/ So that we can remove kernel specific stuff we've been stubbing out via a tools/include/linux/export.h that gets removed in this patch and to avoid breakages in the future like the one fixed recently where rcupdate.h started being used in rbtree.h. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-rxuzfsozpb8hv1emwpx06rm6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/export.h | 10 - tools/lib/rbtree.c | 548 +++++++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 2 +- tools/perf/util/Build | 2 +- 4 files changed, 550 insertions(+), 12 deletions(-) delete mode 100644 tools/include/linux/export.h create mode 100644 tools/lib/rbtree.c diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h deleted file mode 100644 index d07e586b9ba0..000000000000 --- a/tools/include/linux/export.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _TOOLS_LINUX_EXPORT_H_ -#define _TOOLS_LINUX_EXPORT_H_ - -#define EXPORT_SYMBOL(sym) -#define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_SYMBOL_GPL_FUTURE(sym) -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) - -#endif diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c new file mode 100644 index 000000000000..17c2b596f043 --- /dev/null +++ b/tools/lib/rbtree.c @@ -0,0 +1,548 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include + +/* + * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + */ + +static inline void rb_set_black(struct rb_node *rb) +{ + rb->__rb_parent_color |= RB_BLACK; +} + +static inline struct rb_node *rb_red_parent(struct rb_node *red) +{ + return (struct rb_node *)red->__rb_parent_color; +} + +/* + * Helper function for rotations: + * - old's parent and color get assigned to new + * - old gets assigned new as a parent and 'color' as a color. + */ +static inline void +__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, + struct rb_root *root, int color) +{ + struct rb_node *parent = rb_parent(old); + new->__rb_parent_color = old->__rb_parent_color; + rb_set_parent_color(old, new, color); + __rb_change_child(old, new, parent, root); +} + +static __always_inline void +__rb_insert(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red + * + * If there is a black parent, we are done. + * Otherwise, take some corrective action as we don't + * want a red root or two consecutive red nodes. + */ + if (!parent) { + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } else if (rb_is_black(parent)) + break; + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - color flips + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and + * 4) does not allow this, we need to recurse + * at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - left rotate at parent + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + parent->rb_right = tmp = node->rb_left; + node->rb_left = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - right rotate at gparent + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + gparent->rb_left = tmp; /* == parent->rb_right */ + parent->rb_right = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + parent->rb_left = tmp = node->rb_right; + node->rb_right = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + gparent->rb_right = tmp; /* == parent->rb_left */ + parent->rb_left = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } + } +} + +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static __always_inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; + + while (true) { + /* + * Loop invariants: + * - node is black (or NULL on first iteration) + * - node is not the root (parent is not NULL) + * - All leaf paths going through parent and node have a + * black node count that is 1 lower than other leaf paths. + */ + sibling = parent->rb_right; + if (node != sibling) { /* node == parent->rb_left */ + if (rb_is_red(sibling)) { + /* + * Case 1 - left rotate at parent + * + * P S + * / \ / \ + * N s --> p Sr + * / \ / \ + * Sl Sr N Sl + */ + parent->rb_right = tmp1 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_right; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_left; + if (!tmp2 || rb_is_black(tmp2)) { + /* + * Case 2 - sibling color flip + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N s + * / \ / \ + * Sl Sr Sl Sr + * + * This leaves us violating 5) which + * can be fixed by flipping p to black + * if it was red, or by recursing at p. + * p is red when coming from Case 1. + */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* + * Case 3 - right rotate at sibling + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N Sl + * / \ \ + * sl Sr s + * \ + * Sr + */ + sibling->rb_left = tmp1 = tmp2->rb_right; + tmp2->rb_right = sibling; + parent->rb_right = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* + * Case 4 - left rotate at parent + color flips + * (p and sl could be either color here. + * After rotation, p becomes black, s acquires + * p's color, and sl keeps its color) + * + * (p) (s) + * / \ / \ + * N S --> P Sr + * / \ / \ + * (sl) sr N (sl) + */ + parent->rb_right = tmp2 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } else { + sibling = parent->rb_left; + if (rb_is_red(sibling)) { + /* Case 1 - right rotate at parent */ + parent->rb_left = tmp1 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_left; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_right; + if (!tmp2 || rb_is_black(tmp2)) { + /* Case 2 - sibling color flip */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* Case 3 - right rotate at sibling */ + sibling->rb_right = tmp1 = tmp2->rb_left; + tmp2->rb_left = sibling; + parent->rb_left = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* Case 4 - left rotate at parent + color flips */ + parent->rb_left = tmp2 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } + } +} + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} + +/* + * Non-augmented rbtree manipulation functions. + * + * We use dummy augmented callbacks here, and have the compiler optimize them + * out of the rb_insert_color() and rb_erase() function definitions. + */ + +static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} +static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} +static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} + +static const struct rb_augment_callbacks dummy_callbacks = { + dummy_propagate, dummy_copy, dummy_rotate +}; + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + __rb_insert(node, root, dummy_rotate); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); +} + +/* + * Augmented rbtree manipulation functions. + * + * This instantiates the same __always_inline functions as in the non-augmented + * case, but this time with user-defined callbacks. + */ + +void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + __rb_insert(node, root, augment_rotate); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a left-hand child, go down and then right as far + * as we can. + */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return (struct rb_node *)node; + } + + /* + * No left-hand children. Go up till we find an ancestor which + * is a right-hand child of its parent. + */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + __rb_change_child(victim, new, parent, root); + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 6504b727f450..ff667e36ca52 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -18,6 +18,7 @@ tools/arch/x86/include/asm/atomic.h tools/arch/x86/include/asm/rmwcc.h tools/lib/traceevent tools/lib/api +tools/lib/rbtree.c tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/util/find_next_bit.c @@ -55,7 +56,6 @@ include/linux/list.h include/linux/hash.h include/linux/stringify.h lib/hweight.c -lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h arch/*/include/uapi/asm/unistd*.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 586a59d46022..601d11440596 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -139,7 +139,7 @@ $(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE +$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) From 03da23a34a039a74f550646e68d562655306e331 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 22:54:01 -0300 Subject: [PATCH 058/210] tools: Move rbtree.h from tools/perf/ The previous step, copying the contents minus the rcupdate.h parts, was done as a minimal fix, now do the move from tools/perf/. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-52fllxtsgmtke66pmv98mcma@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/{perf/util => }/include/linux/rbtree.h | 0 tools/perf/MANIFEST | 1 + 2 files changed, 1 insertion(+) rename tools/{perf/util => }/include/linux/rbtree.h (100%) diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/include/linux/rbtree.h similarity index 100% rename from tools/perf/util/include/linux/rbtree.h rename to tools/include/linux/rbtree.h diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index ff667e36ca52..9df268167b8c 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -45,6 +45,7 @@ tools/include/linux/kernel.h tools/include/linux/list.h tools/include/linux/log2.h tools/include/linux/poison.h +tools/include/linux/rbtree.h tools/include/linux/types.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h From 307bc971959aaa2df44032e7f6b0bda1f7e26890 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 22:59:05 -0300 Subject: [PATCH 059/210] tools: Copy rbtree_augmented.h from the kernel To complete the transitioning to not to share the same files with the kernel, also moving it from tools/perf/include/linux/ to tools/include/linux to make the whoke rbtree kit to other tools/ living codebases. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-5bxyehixafckqm6ez25alnfo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/rbtree_augmented.h | 245 ++++++++++++++++++ tools/perf/MANIFEST | 2 +- .../util/include/linux/rbtree_augmented.h | 2 - 3 files changed, 246 insertions(+), 3 deletions(-) create mode 100644 tools/include/linux/rbtree_augmented.h delete mode 100644 tools/perf/util/include/linux/rbtree_augmented.h diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h new file mode 100644 index 000000000000..43be941db695 --- /dev/null +++ b/tools/include/linux/rbtree_augmented.h @@ -0,0 +1,245 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + tools/linux/include/linux/rbtree_augmented.h + + Copied from: + linux/include/linux/rbtree_augmented.h +*/ + +#ifndef _TOOLS_LINUX_RBTREE_AUGMENTED_H +#define _TOOLS_LINUX_RBTREE_AUGMENTED_H + +#include +#include + +/* + * Please note - only struct rb_augment_callbacks and the prototypes for + * rb_insert_augmented() and rb_erase_augmented() are intended to be public. + * The rest are implementation details you are not expected to depend on. + * + * See Documentation/rbtree.txt for documentation and samples. + */ + +struct rb_augment_callbacks { + void (*propagate)(struct rb_node *node, struct rb_node *stop); + void (*copy)(struct rb_node *old, struct rb_node *new); + void (*rotate)(struct rb_node *old, struct rb_node *new); +}; + +extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); +/* + * Fixup the rbtree and update the augmented information when rebalancing. + * + * On insertion, the user must update the augmented information on the path + * leading to the inserted node, then call rb_link_node() as usual and + * rb_augment_inserted() instead of the usual rb_insert_color() call. + * If rb_augment_inserted() rebalances the rbtree, it will callback into + * a user provided function to update the augmented information on the + * affected subtrees. + */ +static inline void +rb_insert_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, root, augment->rotate); +} + +#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ + rbtype, rbaugmented, rbcompute) \ +static inline void \ +rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \ +{ \ + while (rb != stop) { \ + rbstruct *node = rb_entry(rb, rbstruct, rbfield); \ + rbtype augmented = rbcompute(node); \ + if (node->rbaugmented == augmented) \ + break; \ + node->rbaugmented = augmented; \ + rb = rb_parent(&node->rbfield); \ + } \ +} \ +static inline void \ +rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ +} \ +static void \ +rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ + old->rbaugmented = rbcompute(old); \ +} \ +rbstatic const struct rb_augment_callbacks rbname = { \ + rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ +}; + + +#define RB_RED 0 +#define RB_BLACK 1 + +#define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) + +#define __rb_color(pc) ((pc) & 1) +#define __rb_is_black(pc) __rb_color(pc) +#define __rb_is_red(pc) (!__rb_color(pc)) +#define rb_color(rb) __rb_color((rb)->__rb_parent_color) +#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) +#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; +} + +static inline void rb_set_parent_color(struct rb_node *rb, + struct rb_node *p, int color) +{ + rb->__rb_parent_color = (unsigned long)p | color; +} + +static inline void +__rb_change_child(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + parent->rb_left = new; + else + parent->rb_right = new; + } else + root->rb_node = new; +} + +extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); + +static __always_inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *parent, *rebalance; + unsigned long pc; + + if (!tmp) { + /* + * Case 1: node to erase has no more than 1 child (easy!) + * + * Note that if there is one child it must be red due to 5) + * and node must be black due to 4). We adjust colors locally + * so as to bypass __rb_erase_color() later on. + */ + pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, child, parent, root); + if (child) { + child->__rb_parent_color = pc; + rebalance = NULL; + } else + rebalance = __rb_is_black(pc) ? parent : NULL; + tmp = parent; + } else if (!child) { + /* Still case 1, but this time the child is node->rb_left */ + tmp->__rb_parent_color = pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, tmp, parent, root); + rebalance = NULL; + tmp = parent; + } else { + struct rb_node *successor = child, *child2; + tmp = child->rb_left; + if (!tmp) { + /* + * Case 2: node's successor is its right child + * + * (n) (s) + * / \ / \ + * (x) (s) -> (x) (c) + * \ + * (c) + */ + parent = successor; + child2 = successor->rb_right; + augment->copy(node, successor); + } else { + /* + * Case 3: node's successor is leftmost under + * node's right child subtree + * + * (n) (s) + * / \ / \ + * (x) (y) -> (x) (y) + * / / + * (p) (p) + * / / + * (s) (c) + * \ + * (c) + */ + do { + parent = successor; + successor = tmp; + tmp = tmp->rb_left; + } while (tmp); + parent->rb_left = child2 = successor->rb_right; + successor->rb_right = child; + rb_set_parent(child, successor); + augment->copy(node, successor); + augment->propagate(parent, successor); + } + + successor->rb_left = tmp = node->rb_left; + rb_set_parent(tmp, successor); + + pc = node->__rb_parent_color; + tmp = __rb_parent(pc); + __rb_change_child(node, successor, tmp, root); + if (child2) { + successor->__rb_parent_color = pc; + rb_set_parent_color(child2, parent, RB_BLACK); + rebalance = NULL; + } else { + unsigned long pc2 = successor->__rb_parent_color; + successor->__rb_parent_color = pc; + rebalance = __rb_is_black(pc2) ? parent : NULL; + } + tmp = successor; + } + + augment->propagate(tmp, NULL); + return rebalance; +} + +static __always_inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + if (rebalance) + __rb_erase_color(rebalance, root, augment->rotate); +} + +#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 9df268167b8c..09dc0aabb515 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -46,6 +46,7 @@ tools/include/linux/list.h tools/include/linux/log2.h tools/include/linux/poison.h tools/include/linux/rbtree.h +tools/include/linux/rbtree_augmented.h tools/include/linux/types.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h @@ -65,7 +66,6 @@ arch/*/lib/memcpy*.S arch/*/lib/memset*.S include/linux/poison.h include/linux/hw_breakpoint.h -include/linux/rbtree_augmented.h include/uapi/linux/perf_event.h include/uapi/linux/const.h include/uapi/linux/swab.h diff --git a/tools/perf/util/include/linux/rbtree_augmented.h b/tools/perf/util/include/linux/rbtree_augmented.h deleted file mode 100644 index 9d6fcdf1788b..000000000000 --- a/tools/perf/util/include/linux/rbtree_augmented.h +++ /dev/null @@ -1,2 +0,0 @@ -#include -#include "../../../../include/linux/rbtree_augmented.h" From d8ea782b56d9d2c46a47b3231cfd16ecfb538c60 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Mon, 29 Jun 2015 10:47:55 +0530 Subject: [PATCH 060/210] powerpc/powernv: Fix vma page prot flags in opal-prd driver opal-prd driver will mmap() firmware code/data area as private mapping to prd user space daemon. Write to this page will trigger COW faults. The new COW pages are normal kernel RAM pages accounted by the kernel and are not special. vma->vm_page_prot value will be used at page fault time for the new COW pages, while pgprot_t value passed in remap_pfn_range() is used for the initial page table entry. Hence: * Do not add _PAGE_SPECIAL in vma, but only for remap_pfn_range() * Also remap_pfn_range() will add the _PAGE_SPECIAL flag using pte_mkspecial() call, hence no need to specify in the driver This fix resolves the page accounting warning shown below: BUG: Bad rss-counter state mm:c0000007d34ac600 idx:1 val:19 The above warning is triggered since _PAGE_SPECIAL was incorrectly being set for the normal kernel COW pages. Signed-off-by: Vaidyanathan Srinivasan Acked-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-prd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 46cb3feb0a13..4ece8e40dd54 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -112,6 +112,7 @@ static int opal_prd_open(struct inode *inode, struct file *file) static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) { size_t addr, size; + pgprot_t page_prot; int rc; pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", @@ -125,13 +126,11 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) if (!opal_prd_range_is_valid(addr, size)) return -EINVAL; - vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file, - vma->vm_pgoff, - size, vma->vm_page_prot)) - | _PAGE_SPECIAL); + page_prot = phys_mem_access_prot(file, vma->vm_pgoff, + size, vma->vm_page_prot); rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, - vma->vm_page_prot); + page_prot); return rc; } From 5aac644a9944bea93b4f05ced1883a902a2535f6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 3 Jun 2015 10:39:46 +0300 Subject: [PATCH 061/210] x86/tsc: Let high latency PIT fail fast in quick_pit_calibrate() If it takes longer than 12us to read the PIT counter lsb/msb, then the error margin will never fall below 500ppm within 50ms, and Fast TSC calibration will always fail. This patch detects when that will happen and fails fast. Note the failure message is not printed in that case because: 1. it will always happen on that class of hardware 2. the absence of the message is more informative than its presence Signed-off-by: Adrian Hunter Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Len Brown Cc: Andi Kleen Link: http://lkml.kernel.org/r/556EB717.9070607@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 505449700e0c..7437b41f6a47 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -598,10 +598,19 @@ static unsigned long quick_pit_calibrate(void) if (!pit_expect_msb(0xff-i, &delta, &d2)) break; + delta -= tsc; + + /* + * Extrapolate the error and fail fast if the error will + * never be below 500 ppm. + */ + if (i == 1 && + d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11) + return 0; + /* * Iterate until the error is less than 500 ppm */ - delta -= tsc; if (d1+d2 >= delta >> 11) continue; From 0d7b6b1182ef6f72be592688c8a22025a5b7b483 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 2 Jul 2015 14:29:58 +0300 Subject: [PATCH 062/210] drm/i915/chv: fix HW readout of the port PLL fractional divider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ville noticed that the PLL HW readout code parsed the fractional divider value as if the fractional divider was always enabled. This may result in a port clock state check mismatch if the preceeding modeset disabled the fractional divider, but left a non-zero divider value in the register. Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1b61f9810387..8aa803848f35 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7887,7 +7887,7 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, int pipe = pipe_config->cpu_transcoder; enum dpio_channel port = vlv_pipe_to_channel(pipe); intel_clock_t clock; - u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2; + u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3; int refclk = 100000; mutex_lock(&dev_priv->sb_lock); @@ -7895,10 +7895,13 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port)); pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port)); pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port)); + pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port)); mutex_unlock(&dev_priv->sb_lock); clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0; - clock.m2 = ((pll_dw0 & 0xff) << 22) | (pll_dw2 & 0x3fffff); + clock.m2 = (pll_dw0 & 0xff) << 22; + if (pll_dw3 & DPIO_CHV_FRAC_DIV_EN) + clock.m2 |= pll_dw2 & 0x3fffff; clock.n = (pll_dw1 >> DPIO_CHV_N_DIV_SHIFT) & 0xf; clock.p1 = (cmn_dw13 >> DPIO_CHV_P1_DIV_SHIFT) & 0x7; clock.p2 = (cmn_dw13 >> DPIO_CHV_P2_DIV_SHIFT) & 0x1f; From 14f21189df33bc972455d6a0ed875aa68718d7fc Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Mon, 29 Jun 2015 16:05:11 +0530 Subject: [PATCH 063/210] cxl/vphb.c: Use phb pointer after NULL check static Anlaysis detected below error:- (error) Possible null pointer dereference: phb So, Use phb after NULL check. Signed-off-by: Maninder Singh Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/vphb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index b1d1983a84a5..2eba002b580b 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -112,9 +112,10 @@ static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, unsigned long addr; phb = pci_bus_to_host(bus); - afu = (struct cxl_afu *)phb->private_data; if (phb == NULL) return PCIBIOS_DEVICE_NOT_FOUND; + afu = (struct cxl_afu *)phb->private_data; + if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num) return PCIBIOS_DEVICE_NOT_FOUND; if (offset >= (unsigned long)phb->cfg_data) From 27ea2c420cad57386c25668cb9a9f0f082635586 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 15 Jun 2015 13:25:19 +1000 Subject: [PATCH 064/210] powerpc: Set the correct kernel taint on machine check errors. This means the 'M' flag will work properly when the kernel prints a backtrace. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6530f1b8874d..37de90f8a845 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -297,6 +297,8 @@ long machine_check_early(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; From 8c00d5c9d3e7452658dda55024485d52919ebfdd Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 2 Jul 2015 15:55:21 +1000 Subject: [PATCH 065/210] cxl: Test the correct mmio space before unmapping Before freeing p2n, test p2n, not p1n. Signed-off-by: Daniel Axtens Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index c68ef5806dbe..32ad09705949 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -539,7 +539,7 @@ err: static void cxl_unmap_slice_regs(struct cxl_afu *afu) { - if (afu->p1n_mmio) + if (afu->p2n_mmio) iounmap(afu->p2n_mmio); if (afu->p1n_mmio) iounmap(afu->p1n_mmio); From eab861a7a52208868637f47a92caa926ddadd9c7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 2 Jul 2015 14:56:20 +1000 Subject: [PATCH 066/210] powerpc: Add plain English description for alignment exception oopses If we take an alignment exception which we cannot fix, the oops currently prints: Unable to handle kernel paging request for unknown fault Lets print something more useful: Unable to handle kernel paging request for unaligned access at address 0xc0000000f77bba8f Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/mm/fault.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6d535973b200..a67c6d781c52 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -529,6 +529,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) printk(KERN_ALERT "Unable to handle kernel paging request for " "instruction fetch\n"); break; + case 0x600: + printk(KERN_ALERT "Unable to handle kernel paging request for " + "unaligned access at address 0x%08lx\n", regs->dar); + break; default: printk(KERN_ALERT "Unable to handle kernel paging request for " "unknown fault\n"); From aaf6fd5c75eb4aa734ec2062deab371633c3655f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 6 Jul 2015 09:40:34 +1000 Subject: [PATCH 067/210] powerpc/ppc4xx_hsta_msi: Include ppc-pci.h to fix reference to hose_list An earlier commit referenced 'hose_list' in sysdev/ppc4xx_hsta_msi.c. hose_list is defined in ppc-pci.h, which was not included in that file. Include it, fixing the build for the akebono defconfig used by the kbuild test robot. Fixes: f2c800aaceb6 ("powerpc/ppc4xx_hsta_msi: Move MSI-related ops to pci_controller_ops") Reported-by: kbuild test robot Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/ppc4xx_hsta_msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index 2bc33674ebfc..87f9623ca805 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -18,6 +18,7 @@ #include #include #include +#include struct ppc4xx_hsta_msi { struct device *dev; From a8956a7b7232da5f4ce4a305c72a54cc5e4a8307 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 3 Jul 2015 17:39:12 +1000 Subject: [PATCH 068/210] powerpc/powernv: Fix opal-elog interrupt handler The conversion of opal events to a proper irqchip means that handlers are called until the relevant opal event has been cleared by processing it. Events that queue work should therefore use a threaded handler to mask the event until processing is complete. Signed-off-by: Alistair Popple Tested-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-elog.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 4949ef0d9400..37f959bf392e 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -237,7 +237,7 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) return elog; } -static void elog_work_fn(struct work_struct *work) +static irqreturn_t elog_event(int irq, void *data) { __be64 size; __be64 id; @@ -251,7 +251,7 @@ static void elog_work_fn(struct work_struct *work) rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { pr_err("ELOG: OPAL log info read failed\n"); - return; + return IRQ_HANDLED; } elog_size = be64_to_cpu(size); @@ -270,16 +270,10 @@ static void elog_work_fn(struct work_struct *work) * entries. */ if (kset_find_obj(elog_kset, name)) - return; + return IRQ_HANDLED; create_elog_obj(log_id, elog_size, elog_type); -} -static DECLARE_WORK(elog_work, elog_work_fn); - -static irqreturn_t elog_event(int irq, void *data) -{ - schedule_work(&elog_work); return IRQ_HANDLED; } @@ -304,8 +298,8 @@ int __init opal_elog_init(void) return irq; } - rc = request_irq(irq, elog_event, - IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL); + rc = request_threaded_irq(irq, NULL, elog_event, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL); if (rc) { pr_err("%s: Can't request OPAL event irq (%d)\n", __func__, rc); From 57ffc5ca679f499f4704fd9b6a372916f59930ee Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2015 12:32:49 +0200 Subject: [PATCH 069/210] perf: Fix AUX buffer refcounting Its currently possible to drop the last refcount to the aux buffer from NMI context, which results in the expected fireworks. The refcounting needs a bigger overhaul, but to cure the immediate problem, delay the freeing by using an irq_work. Reviewed-and-tested-by: Alexander Shishkin Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150618103249.GK19282@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 -------- kernel/events/internal.h | 10 ++++++++++ kernel/events/ring_buffer.c | 27 +++++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index e965cfae4207..d3dae3419b99 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4358,14 +4358,6 @@ static void ring_buffer_wakeup(struct perf_event *event) rcu_read_unlock(); } -static void rb_free_rcu(struct rcu_head *rcu_head) -{ - struct ring_buffer *rb; - - rb = container_of(rcu_head, struct ring_buffer, rcu_head); - rb_free(rb); -} - struct ring_buffer *ring_buffer_get(struct perf_event *event) { struct ring_buffer *rb; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 2deb24c7a40d..2bbad9c1274c 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -11,6 +11,7 @@ struct ring_buffer { atomic_t refcount; struct rcu_head rcu_head; + struct irq_work irq_work; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; int page_order; /* allocation order */ @@ -55,6 +56,15 @@ struct ring_buffer { }; extern void rb_free(struct ring_buffer *rb); + +static inline void rb_free_rcu(struct rcu_head *rcu_head) +{ + struct ring_buffer *rb; + + rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb_free(rb); +} + extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 96472824a752..b2be01b1aa9d 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -221,6 +221,8 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } +static void rb_irq_work(struct irq_work *work); + static void ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) { @@ -241,6 +243,16 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) INIT_LIST_HEAD(&rb->event_list); spin_lock_init(&rb->event_lock); + init_irq_work(&rb->irq_work, rb_irq_work); +} + +static void ring_buffer_put_async(struct ring_buffer *rb) +{ + if (!atomic_dec_and_test(&rb->refcount)) + return; + + rb->rcu_head.next = (void *)rb; + irq_work_queue(&rb->irq_work); } /* @@ -319,7 +331,7 @@ err_put: rb_free_aux(rb); err: - ring_buffer_put(rb); + ring_buffer_put_async(rb); handle->event = NULL; return NULL; @@ -370,7 +382,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, local_set(&rb->aux_nest, 0); rb_free_aux(rb); - ring_buffer_put(rb); + ring_buffer_put_async(rb); } /* @@ -557,7 +569,18 @@ static void __rb_free_aux(struct ring_buffer *rb) void rb_free_aux(struct ring_buffer *rb) { if (atomic_dec_and_test(&rb->aux_refcount)) + irq_work_queue(&rb->irq_work); +} + +static void rb_irq_work(struct irq_work *work) +{ + struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work); + + if (!atomic_read(&rb->aux_refcount)) __rb_free_aux(rb); + + if (rb->rcu_head.next == (void *)rb) + call_rcu(&rb->rcu_head, rb_free_rcu); } #ifndef CONFIG_PERF_USE_VMALLOC From ebf2d2689de551d90965090bb991fc640a0c0d41 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 22 Jun 2015 21:38:43 +0200 Subject: [PATCH 070/210] perf/x86: Fix copy_from_user_nmi() return if range is not ok Commit 0a196848ca36 ("perf: Fix arch_perf_out_copy_user default"), changes copy_from_user_nmi() to return the number of remaining bytes so that it behave like copy_from_user(). Unfortunately, when the range is outside of the process memory, the return value is still the number of byte copied, eg. 0, instead of the remaining bytes. As all users of copy_from_user_nmi() were modified as part of commit 0a196848ca36, the function should be fixed to return the total number of bytes if range is not correct. Signed-off-by: Yann Droneaud Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435001923-30986-1-git-send-email-ydroneaud@opteya.com Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index ddf9ecb53cc3..e342586db6e4 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -20,7 +20,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) unsigned long ret; if (__range_not_ok(from, n, TASK_SIZE)) - return 0; + return n; /* * Even though this function is typically called from NMI/IRQ context From d0f77d4d04b222a817925d33ba3589b190bfa863 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:33 +0300 Subject: [PATCH 071/210] x86/init: Clear 'init_level4_pgt' earlier Currently x86_64_start_kernel() has two KASAN related function calls. The first call maps shadow to early_level4_pgt, the second maps shadow to init_level4_pgt. If we move clear_page(init_level4_pgt) earlier, we could hide KASAN low level detail from generic x86_64 initialization code. The next patch will do it. Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-2-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5a4668136e98..65c8985e3eb2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -166,6 +166,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); + clear_page(init_level4_pgt); + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); @@ -177,7 +179,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) */ load_ucode_bsp(); - clear_page(init_level4_pgt); /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; From 5d5aa3cfca5cf74cd928daf3674642e6004328d1 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Thu, 2 Jul 2015 12:09:34 +0300 Subject: [PATCH 072/210] x86/kasan: Fix KASAN shadow region page tables Currently KASAN shadow region page tables created without respect of physical offset (phys_base). This causes kernel halt when phys_base is not zero. So let's initialize KASAN shadow region page tables in kasan_early_init() using __pa_nodebug() which considers phys_base. This patch also separates x86_64_start_kernel() from KASAN low level details by moving kasan_map_early_shadow(init_level4_pgt) into kasan_early_init(). Remove the comment before clear_bss() which stopped bringing much profit to the code readability. Otherwise describing all the new order dependencies would be too verbose. Signed-off-by: Alexander Popov Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-3-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kasan.h | 8 ++------ arch/x86/kernel/head64.c | 7 ++----- arch/x86/kernel/head_64.S | 29 ----------------------------- arch/x86/mm/kasan_init_64.c | 36 ++++++++++++++++++++++++++++++++++-- 4 files changed, 38 insertions(+), 42 deletions(-) diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 8b22422fbad8..74a2a8dc9908 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -14,15 +14,11 @@ #ifndef __ASSEMBLY__ -extern pte_t kasan_zero_pte[]; -extern pte_t kasan_zero_pmd[]; -extern pte_t kasan_zero_pud[]; - #ifdef CONFIG_KASAN -void __init kasan_map_early_shadow(pgd_t *pgd); +void __init kasan_early_init(void); void __init kasan_init(void); #else -static inline void kasan_map_early_shadow(pgd_t *pgd) { } +static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 65c8985e3eb2..f129a9af6357 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -161,13 +161,12 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* Kill off the identity-map trampoline */ reset_early_page_tables(); - kasan_map_early_shadow(early_level4_pgt); - - /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); clear_page(init_level4_pgt); + kasan_early_init(); + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); @@ -182,8 +181,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; - kasan_map_early_shadow(init_level4_pgt); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e5c27f729a38..1d40ca8a73f2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -516,38 +516,9 @@ ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 -#ifdef CONFIG_KASAN -#define FILL(VAL, COUNT) \ - .rept (COUNT) ; \ - .quad (VAL) ; \ - .endr - -NEXT_PAGE(kasan_zero_pte) - FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pmd) - FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pud) - FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512) - -#undef FILL -#endif - - #include "../../x86/xen/xen-head.S" __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE -#ifdef CONFIG_KASAN -/* - * This page used as early shadow. We don't use empty_zero_page - * at early stages, stack instrumentation could write some garbage - * to this page. - * Latter we reuse it as zero shadow for large ranges of memory - * that allowed to access, but not instrumented by kasan - * (vmalloc/vmemmap ...). - */ -NEXT_PAGE(kasan_zero_page) - .skip PAGE_SIZE -#endif diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 4860906c6b9f..0e4a05fa34d7 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -11,7 +11,19 @@ extern pgd_t early_level4_pgt[PTRS_PER_PGD]; extern struct range pfn_mapped[E820_X_MAX]; -extern unsigned char kasan_zero_page[PAGE_SIZE]; +static pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; +static pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss; +static pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss; + +/* + * This page used as early shadow. We don't use empty_zero_page + * at early stages, stack instrumentation could write some garbage + * to this page. + * Latter we reuse it as zero shadow for large ranges of memory + * that allowed to access, but not instrumented by kasan + * (vmalloc/vmemmap ...). + */ +static unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; static int __init map_range(struct range *range) { @@ -36,7 +48,7 @@ static void __init clear_pgds(unsigned long start, pgd_clear(pgd_offset_k(start)); } -void __init kasan_map_early_shadow(pgd_t *pgd) +static void __init kasan_map_early_shadow(pgd_t *pgd) { int i; unsigned long start = KASAN_SHADOW_START; @@ -166,6 +178,26 @@ static struct notifier_block kasan_die_notifier = { }; #endif +void __init kasan_early_init(void) +{ + int i; + pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL; + pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE; + pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; + + for (i = 0; i < PTRS_PER_PTE; i++) + kasan_zero_pte[i] = __pte(pte_val); + + for (i = 0; i < PTRS_PER_PMD; i++) + kasan_zero_pmd[i] = __pmd(pmd_val); + + for (i = 0; i < PTRS_PER_PUD; i++) + kasan_zero_pud[i] = __pud(pud_val); + + kasan_map_early_shadow(early_level4_pgt); + kasan_map_early_shadow(init_level4_pgt); +} + void __init kasan_init(void) { int i; From 241d2c54c62fa0939fc9a9512b48ac3434e90a89 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:35 +0300 Subject: [PATCH 073/210] x86/kasan: Flush TLBs after switching CR3 load_cr3() doesn't cause tlb_flush if PGE enabled. This may cause tons of false positive reports spamming the kernel to death. To fix this __flush_tlb_all() should be called explicitly after CR3 changed. Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-4-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0e4a05fa34d7..5d26642e4e8a 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -208,6 +208,7 @@ void __init kasan_init(void) memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); load_cr3(early_level4_pgt); + __flush_tlb_all(); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -234,5 +235,6 @@ void __init kasan_init(void) memset(kasan_zero_page, 0, PAGE_SIZE); load_cr3(init_level4_pgt); + __flush_tlb_all(); init_task.kasan_depth = 0; } From d4f86beacc21d538dc41e1fc75a22e084f547edf Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:36 +0300 Subject: [PATCH 074/210] x86/kasan: Fix boot crash on AMD processors While populating zero shadow wrong bits in upper level page tables used. __PAGE_KERNEL_RO that was used for pgd/pud/pmd has _PAGE_BIT_GLOBAL set. Global bit is present only in the lowest level of the page translation hierarchy (ptes), and it should be zero in upper levels. This bug seems doesn't cause any troubles on Intel cpus, while on AMDs it cause kernel crash on boot. Use _KERNPG_TABLE bits for pgds/puds/pmds to fix this. Reported-by: Borislav Petkov Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-5-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 5d26642e4e8a..9a54dbe98064 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -85,7 +85,7 @@ static int __init zero_pmd_populate(pud_t *pud, unsigned long addr, while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { WARN_ON(!pmd_none(*pmd)); set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PMD_SIZE; pmd = pmd_offset(pud, addr); } @@ -111,7 +111,7 @@ static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr, while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { WARN_ON(!pud_none(*pud)); set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PUD_SIZE; pud = pud_offset(pgd, addr); } @@ -136,7 +136,7 @@ static int __init zero_pgd_populate(unsigned long addr, unsigned long end) while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { WARN_ON(!pgd_none(*pgd)); set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PGDIR_SIZE; pgd = pgd_offset_k(addr); } From 8515522949951d81fe2d06c0a3292f171f2b8ec4 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:37 +0300 Subject: [PATCH 075/210] x86/kasan: Add message about KASAN being initialized Print informational message to tell user that kernel runs with KASAN enabled. Add a "kasan: " prefix to all messages in kasan_init_64.c. Signed-off-by: Andrey Ryabinin Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-6-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 9a54dbe98064..e1840f3db5b5 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define pr_fmt(fmt) "kasan: " fmt #include #include #include @@ -237,4 +238,6 @@ void __init kasan_init(void) load_cr3(init_level4_pgt); __flush_tlb_all(); init_task.kasan_depth = 0; + + pr_info("Kernel address sanitizer initialized\n"); } From d6f2d75a7ae06ffd793bb504c4f0d1665548cffc Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:38 +0300 Subject: [PATCH 076/210] x86/kasan: Move KASAN_SHADOW_OFFSET to the arch Kconfig KASAN_SHADOW_OFFSET is purely arch specific setting, so it should be in arch's Kconfig file. Signed-off-by: Andrey Ryabinin Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Paul Bolle Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-7-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 5 +++++ lib/Kconfig.kasan | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55bced17dc95..0b2929f49531 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -254,6 +254,11 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0xdffffc0000000000 + config HAVE_INTEL_TXT def_bool y depends on INTEL_IOMMU && ACPI diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 777eda7d1ab4..39f24d6721e5 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -18,10 +18,6 @@ config KASAN For better error detection enable CONFIG_STACKTRACE, and add slub_debug=U to boot cmdline. -config KASAN_SHADOW_OFFSET - hex - default 0xdffffc0000000000 if X86_64 - choice prompt "Instrumentation type" depends on KASAN From 1db875631f8d5bbf497f67e47f254eece888d51d Mon Sep 17 00:00:00 2001 From: Zhu Guihua Date: Fri, 3 Jul 2015 17:37:18 +0800 Subject: [PATCH 077/210] x86/espfix: Add 'cpu' parameter to init_espfix_ap() Add a CPU index parameter to init_espfix_ap(), so that the parameter could be propagated to the function for espfix page allocation. Signed-off-by: Zhu Guihua Cc: Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/cde3fcf1b3211f3f03feb1a995bce3fee850f0fc.1435824469.git.zhugh.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/espfix.h | 2 +- arch/x86/kernel/espfix_64.c | 7 +++---- arch/x86/kernel/smpboot.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h index 99efebb2f69d..ca3ce9ab9385 100644 --- a/arch/x86/include/asm/espfix.h +++ b/arch/x86/include/asm/espfix.h @@ -9,7 +9,7 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); extern void init_espfix_bsp(void); -extern void init_espfix_ap(void); +extern void init_espfix_ap(int cpu); #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index f5d0730e7b08..1fb0e0003c94 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -131,12 +131,12 @@ void __init init_espfix_bsp(void) init_espfix_random(); /* The rest is the same as for any other processor */ - init_espfix_ap(); + init_espfix_ap(0); } -void init_espfix_ap(void) +void init_espfix_ap(int cpu) { - unsigned int cpu, page; + unsigned int page; unsigned long addr; pud_t pud, *pud_p; pmd_t pmd, *pmd_p; @@ -149,7 +149,6 @@ void init_espfix_ap(void) if (likely(this_cpu_read(espfix_stack))) return; /* Already initialized */ - cpu = smp_processor_id(); addr = espfix_base_addr(cpu); page = cpu/ESPFIX_STACKS_PER_PAGE; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8add66b22f33..6f5abac6c28b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -242,7 +242,7 @@ static void notrace start_secondary(void *unused) * Enable the espfix hack for this CPU */ #ifdef CONFIG_X86_ESPFIX64 - init_espfix_ap(); + init_espfix_ap(smp_processor_id()); #endif /* From 20d5e4a9cd453991e2590a4c25230a99b42dee0c Mon Sep 17 00:00:00 2001 From: Zhu Guihua Date: Fri, 3 Jul 2015 17:37:19 +0800 Subject: [PATCH 078/210] x86/espfix: Init espfix on the boot CPU side As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called before we enable local irqs, so the lockdep sub-system would (correctly) trigger a warning about the potentially blocking API. So we allocate them on the boot CPU side when the secondary CPU is brought up by the boot CPU, and hand them over to the secondary CPU. And we use alloc_pages_node() with the secondary CPU's node, to make sure the espfix stack is NUMA-local to the CPU that is going to use it. Signed-off-by: Zhu Guihua Cc: Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c97add2670e9abebb90095369f0cfc172373ac94.1435824469.git.zhugh.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/espfix_64.c | 21 +++++++++++++-------- arch/x86/kernel/smpboot.c | 14 +++++++------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 1fb0e0003c94..ce95676abd60 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -141,12 +141,12 @@ void init_espfix_ap(int cpu) pud_t pud, *pud_p; pmd_t pmd, *pmd_p; pte_t pte, *pte_p; - int n; + int n, node; void *stack_page; pteval_t ptemask; /* We only have to do this once... */ - if (likely(this_cpu_read(espfix_stack))) + if (likely(per_cpu(espfix_stack, cpu))) return; /* Already initialized */ addr = espfix_base_addr(cpu); @@ -164,12 +164,15 @@ void init_espfix_ap(int cpu) if (stack_page) goto unlock_done; + node = cpu_to_node(cpu); ptemask = __supported_pte_mask; pud_p = &espfix_pud_page[pud_index(addr)]; pud = *pud_p; if (!pud_present(pud)) { - pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); + + pmd_p = (pmd_t *)page_address(page); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) @@ -179,7 +182,9 @@ void init_espfix_ap(int cpu) pmd_p = pmd_offset(&pud, addr); pmd = *pmd_p; if (!pmd_present(pmd)) { - pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); + + pte_p = (pte_t *)page_address(page); pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PMD_CLONES; n++) @@ -187,7 +192,7 @@ void init_espfix_ap(int cpu) } pte_p = pte_offset_kernel(&pmd, addr); - stack_page = (void *)__get_free_page(GFP_KERNEL); + stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0)); pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); for (n = 0; n < ESPFIX_PTE_CLONES; n++) set_pte(&pte_p[n*PTE_STRIDE], pte); @@ -198,7 +203,7 @@ void init_espfix_ap(int cpu) unlock_done: mutex_unlock(&espfix_init_mutex); done: - this_cpu_write(espfix_stack, addr); - this_cpu_write(espfix_waddr, (unsigned long)stack_page - + (addr & ~PAGE_MASK)); + per_cpu(espfix_stack, cpu) = addr; + per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page + + (addr & ~PAGE_MASK); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6f5abac6c28b..0bd8c1d507b3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -238,13 +238,6 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); - /* - * Enable the espfix hack for this CPU - */ -#ifdef CONFIG_X86_ESPFIX64 - init_espfix_ap(smp_processor_id()); -#endif - /* * We need to hold vector_lock so there the set of online cpus * does not change while we are assigning vectors to cpus. Holding @@ -854,6 +847,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) initial_code = (unsigned long)start_secondary; stack_start = idle->thread.sp; + /* + * Enable the espfix hack for this CPU + */ +#ifdef CONFIG_X86_ESPFIX64 + init_espfix_ap(cpu); +#endif + /* So we see what's up */ announce_cpu(cpu, apicid); From 827a82ff399523a954253dfea401af748640f0f4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Jul 2015 10:14:34 -0400 Subject: [PATCH 079/210] x86/earlyprintk: Allow early_printk() to use console style parameters like '115200n8' When I enable early_printk on a kernel, I cut and paste the console= input and add to earlyprintk parameter. But I notice recently that ktest has not been detecting triple faults. The way it detects it, is by seeing the kernel banner "Linux version .." with a different kernel version pop up. Then I noticed that early printk was no longer working on my console, which was why ktest was not seeing it. I bisected it down and it was added to 4.0 with this commit: ea9e9d802902 ("Specify PCI based UART for earlyprintk") because it converted the simple_strtoul() that converts the baud number into a kstrtoul(). The problem with this is, I had as my baud rate, 115200n8 (acceptable for console=ttyS0), but because of the "n8", the kstrtoul() doesn't parse the baud rate and returns an error, which sets the baud rate to the default 9600. This explains the garbage on my screen. Now, earlyprintk= kernel parameter does not say it accepts that format. Thus, one answer would simply be me changing my kernel parameters to remove the "n8" since it isn't parsed anyway. But I wonder if other people run into this, and it seems strange that the two consoles for serial accepts different input. I could also extend this to have earlyprintk do something with that "n8" or whatever it has and have it match the console parsing (which, BTW, still uses simple_strtoul(), as I guess it has to). This patch just makes my old kernel parameter parsing work like it use to. Although, simple_strtoull() is considered obsolete, it is the only standard string parsing function that parses a number that is attached to text. Ironically, commit ea9e9d802902 also added several calls to simple_strtoul()! Signed-off-by: Steven Rostedt Cc: Alan Cox Cc: Andy Lutomirski Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Brian Gerst Cc: David Cohen Cc: Denys Vlasenko Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stuart R. Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150706101434.5f6a351b@gandalf.local.home [ Cleaned it up a bit. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 89427d8d4fc5..eec40f595ab9 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -175,7 +175,9 @@ static __init void early_serial_init(char *s) } if (*s) { - if (kstrtoul(s, 0, &baud) < 0 || baud == 0) + baud = simple_strtoull(s, &e, 0); + + if (baud == 0 || s == e) baud = DEFAULT_BAUD; } From 4b59246d9aeaac71f5f7a29459cd2abe32c527e6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 6 Jul 2015 17:08:09 +0200 Subject: [PATCH 080/210] arm64: remove another unnecessary libfdt include path Patch 63a4aea55670 ("of: clean-up unnecessary libfdt include paths") removed all explicit libfdt include paths, since those are no longer necessary after the latest dtc upgrade. However, this one snuck in during the same merge window. Remove it. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/mm/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 9d84feb41a16..773d37a14039 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,5 +4,3 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM64_PTDUMP) += dump.o - -CFLAGS_mmu.o := -I$(srctree)/scripts/dtc/libfdt/ From 3446af31b721d839c69f8fc70b8a434df6176a64 Mon Sep 17 00:00:00 2001 From: Suneel Garapati Date: Mon, 29 Jun 2015 07:02:08 +0200 Subject: [PATCH 081/210] arm64: defconfig: Add Ceva ahci to the defconfig The Ceva ahci controller is available on the Xilinx Zynq UltraScale+ MPSoC. Signed-off-by: Suneel Garapati Signed-off-by: Michal Simek [catalin.marinas@arm.com: removed unnecessary defconfig changes] Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index f38c94f1d898..4e17e7ede33d 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -83,6 +83,7 @@ CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_AHCI_CEVA=y CONFIG_AHCI_XGENE=y CONFIG_PATA_PLATFORM=y CONFIG_PATA_OF_PLATFORM=y From 6d451367bfa16fc103604bacd258f534c65d1540 Mon Sep 17 00:00:00 2001 From: Hai Li Date: Thu, 25 Jun 2015 18:35:33 -0400 Subject: [PATCH 082/210] clk: qcom: Use parent rate when set rate to pixel RCG clock Since the parent rate has been recalculated, pixel RCG clock should rely on it to find the correct M/N values during set_rate, instead of calling __clk_round_rate() to its parent again. Signed-off-by: Hai Li Tested-by: Archit Taneja Fixes: 99cbd064b059 ("clk: qcom: Support display RCG clocks") [sboyd@codeaurora.org: Silenced unused parent variable warning] Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-rcg2.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index b95d17fbb8d7..92936f0912d2 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -530,19 +530,16 @@ static int clk_pixel_set_rate(struct clk_hw *hw, unsigned long rate, struct clk_rcg2 *rcg = to_clk_rcg2(hw); struct freq_tbl f = *rcg->freq_tbl; const struct frac_entry *frac = frac_table_pixel; - unsigned long request, src_rate; + unsigned long request; int delta = 100000; u32 mask = BIT(rcg->hid_width) - 1; u32 hid_div; - int index = qcom_find_src_index(hw, rcg->parent_map, f.src); - struct clk *parent = clk_get_parent_by_index(hw->clk, index); for (; frac->num; frac++) { request = (rate * frac->den) / frac->num; - src_rate = __clk_round_rate(parent, request); - if ((src_rate < (request - delta)) || - (src_rate > (request + delta))) + if ((parent_rate < (request - delta)) || + (parent_rate > (request + delta))) continue; regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG, From c14bada8f71eef63d4465aa8e3a479104a06e7c7 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Tue, 23 Jun 2015 16:09:21 +0200 Subject: [PATCH 083/210] drivers: clk: st: Remove unused code Remove this duplicated code due to a bad copy / paste. Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-fsyn.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index e94197f04b0b..099ed6050d25 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -1082,10 +1082,6 @@ static const struct of_device_id quadfs_of_match[] = { .compatible = "st,stih407-quadfs660-D", .data = &st_fs660c32_D_407 }, - { - .compatible = "st,stih407-quadfs660-D", - .data = (void *)&st_fs660c32_D_407 - }, {} }; From c4d339c69f91c20def1366c1db8af6ac7a3013a9 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Tue, 23 Jun 2015 16:09:22 +0200 Subject: [PATCH 084/210] drivers: clk: st: Fix FSYN channel values This patch fixes the value for disabling the FSYN channel clock. The 'is_enabled' returned value is also fixed. Signed-off-by: Pankaj Dev Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-fsyn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 099ed6050d25..95851c464478 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -489,7 +489,7 @@ static int quadfs_pll_is_enabled(struct clk_hw *hw) struct st_clk_quadfs_pll *pll = to_quadfs_pll(hw); u32 npda = CLKGEN_READ(pll, npda); - return !!npda; + return pll->data->powerup_polarity ? !npda : !!npda; } static int clk_fs660c32_vco_get_rate(unsigned long input, struct stm_fs *fs, @@ -774,7 +774,7 @@ static void quadfs_fsynth_disable(struct clk_hw *hw) if (fs->lock) spin_lock_irqsave(fs->lock, flags); - CLKGEN_WRITE(fs, nsb[fs->chan], !fs->data->standby_polarity); + CLKGEN_WRITE(fs, nsb[fs->chan], fs->data->standby_polarity); if (fs->lock) spin_unlock_irqrestore(fs->lock, flags); From 0f4f2afd4402883a51ad27a1d9e046643bb1e3cb Mon Sep 17 00:00:00 2001 From: Giuseppe Cavallaro Date: Tue, 23 Jun 2015 16:09:23 +0200 Subject: [PATCH 085/210] drivers: clk: st: Fix flexgen lock init While proving lock, the following warning happens and it is fixed after initializing lock in the setup function INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.27-02861-g39df285-dirty #33 [] (unwind_backtrace+0x0/0xf4) from [] (show_stack+0x10/0x14) [] (show_stack+0x10/0x14) from [] (__lock_acquire+0x900/0xb14) [] (__lock_acquire+0x900/0xb14) from [] (lock_acquire+0x68/0x7c) [] (lock_acquire+0x68/0x7c) from [] (_raw_spin_lock_irqsave+0x48/0x5c) [] (_raw_spin_lock_irqsave+0x48/0x5c) from [] (clk_gate_endisable+0x28/0x88) [] (clk_gate_endisable+0x28/0x88) from [] (clk_gate_enable+0xc/0x14) [] (clk_gate_enable+0xc/0x14) from [] (flexgen_enable+0x28/0x40) [] (flexgen_enable+0x28/0x40) from [] (__clk_enable+0x5c/0x9c) [] (__clk_enable+0x5c/0x9c) from [] (clk_enable+0x18/0x2c) [] (clk_enable+0x18/0x2c) from [] (st_lpc_of_register+0xc0/0x248) [] (st_lpc_of_register+0xc0/0x248) from [] (clocksource_of_init+0x34/0x58) [] (clocksource_of_init+0x34/0x58) from [] (sti_timer_init+0x10/0x18) [] (sti_timer_init+0x10/0x18) from [] (time_init+0x20/0x30) [] (time_init+0x20/0x30) from [] (start_kernel+0x20c/0x2e8) [] (start_kernel+0x20c/0x2e8) from [<40008074>] (0x40008074) Signed-off-by: Giuseppe Cavallaro Signed-off-by: Gabriel Fernandez Fixes: b116517055b7 ("clk: st: STiH407: Support for Flexgen Clocks") Signed-off-by: Stephen Boyd --- drivers/clk/st/clk-flexgen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c index 657ca14ba709..be06d2a79dce 100644 --- a/drivers/clk/st/clk-flexgen.c +++ b/drivers/clk/st/clk-flexgen.c @@ -303,6 +303,8 @@ static void __init st_of_flexgen_setup(struct device_node *np) if (!rlock) goto err; + spin_lock_init(rlock); + for (i = 0; i < clk_data->clk_num; i++) { struct clk *clk; const char *clk_name; From 18fee4538fe534c53fa95fe9eaa7f96586814e0a Mon Sep 17 00:00:00 2001 From: Pankaj Dev Date: Tue, 23 Jun 2015 16:09:24 +0200 Subject: [PATCH 086/210] drivers: clk: st: Add CLK_GET_RATE_NOCACHE flag to clocks Add the CLK_GET_RATE_NOCACHE flag to all the clocks with recalc ops, so that they reflect Hw rate after CPS wake-up when a clk_get_rate() is called Signed-off-by: Pankaj Dev Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd --- drivers/clk/st/clk-flexgen.c | 2 +- drivers/clk/st/clkgen-fsyn.c | 2 +- drivers/clk/st/clkgen-mux.c | 8 +++++--- drivers/clk/st/clkgen-pll.c | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c index be06d2a79dce..8dd8cce27361 100644 --- a/drivers/clk/st/clk-flexgen.c +++ b/drivers/clk/st/clk-flexgen.c @@ -190,7 +190,7 @@ static struct clk *clk_register_flexgen(const char *name, init.name = name; init.ops = &flexgen_ops; - init.flags = CLK_IS_BASIC | flexgen_flags; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE | flexgen_flags; init.parent_names = parent_names; init.num_parents = num_parents; diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 95851c464478..99c98c15d4a4 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -635,7 +635,7 @@ static struct clk * __init st_clk_register_quadfs_pll( init.name = name; init.ops = quadfs->pll_ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = &parent_name; init.num_parents = 1; diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c index 4fbe6e099587..bd30f8d4e902 100644 --- a/drivers/clk/st/clkgen-mux.c +++ b/drivers/clk/st/clkgen-mux.c @@ -237,7 +237,7 @@ static struct clk *clk_register_genamux(const char *name, init.name = name; init.ops = &clkgena_divmux_ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = parent_names; init.num_parents = num_parents; @@ -513,7 +513,8 @@ static void __init st_of_clkgena_prediv_setup(struct device_node *np) 0, &clk_name)) return; - clk = clk_register_divider_table(NULL, clk_name, parent_name, 0, + clk = clk_register_divider_table(NULL, clk_name, parent_name, + CLK_GET_RATE_NOCACHE, reg + data->offset, data->shift, 1, 0, data->table, NULL); if (IS_ERR(clk)) @@ -786,7 +787,8 @@ static void __init st_of_clkgen_vcc_setup(struct device_node *np) &mux->hw, &clk_mux_ops, &div->hw, &clk_divider_ops, &gate->hw, &clk_gate_ops, - data->clk_flags); + data->clk_flags | + CLK_GET_RATE_NOCACHE); if (IS_ERR(clk)) { kfree(gate); kfree(div); diff --git a/drivers/clk/st/clkgen-pll.c b/drivers/clk/st/clkgen-pll.c index 106532207213..72d1c27eaffa 100644 --- a/drivers/clk/st/clkgen-pll.c +++ b/drivers/clk/st/clkgen-pll.c @@ -406,7 +406,7 @@ static struct clk * __init clkgen_pll_register(const char *parent_name, init.name = clk_name; init.ops = pll_data->ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = &parent_name; init.num_parents = 1; From 3be6d8ce639d92e60d144fb99dd74a53fe3799bb Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Tue, 23 Jun 2015 16:09:25 +0200 Subject: [PATCH 087/210] drivers: clk: st: Fix mux bit-setting for Cortex A9 clocks This patch fixes the mux bit-setting for ClockgenA9. Signed-off-by: Gabriel Fernandez Fixes: 13e6f2da1ddf ("clk: st: STiH407: Support for A9 MUX Clocks") Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c index bd30f8d4e902..717c4a91a17b 100644 --- a/drivers/clk/st/clkgen-mux.c +++ b/drivers/clk/st/clkgen-mux.c @@ -583,7 +583,7 @@ static struct clkgen_mux_data stih416_a9_mux_data = { }; static struct clkgen_mux_data stih407_a9_mux_data = { .offset = 0x1a4, - .shift = 1, + .shift = 0, .width = 2, }; From 0294112ee3135fbd15eaa70015af8283642dd970 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 4 Jul 2015 03:09:03 +0200 Subject: [PATCH 088/210] ACPI / PNP: Reserve ACPI resources at the fs_initcall_sync stage This effectively reverts the following three commits: 7bc10388ccdd ACPI / resources: free memory on error in add_region_before() 0f1b414d1907 ACPI / PNP: Avoid conflicting resource reservations b9a5e5e18fbf ACPI / init: Fix the ordering of acpi_reserve_resources() (commit b9a5e5e18fbf introduced regressions some of which, but not all, were addressed by commit 0f1b414d1907 and commit 7bc10388ccdd was a fixup on top of the latter) and causes ACPI fixed hardware resources to be reserved at the fs_initcall_sync stage of system initialization. The story is as follows. First, a boot regression was reported due to an apparent resource reservation ordering change after a commit that shouldn't lead to such changes. Investigation led to the conclusion that the problem happened because acpi_reserve_resources() was executed at the device_initcall() stage of system initialization which wasn't strictly ordered with respect to driver initialization (and with respect to the initialization of the pcieport driver in particular), so a random change causing the device initcalls to be run in a different order might break things. The response to that was to attempt to run acpi_reserve_resources() as soon as we knew that ACPI would be in use (commit b9a5e5e18fbf). However, that turned out to be too early, because it caused resource reservations made by the PNP system driver to fail on at least one system and that failure was addressed by commit 0f1b414d1907. That fix still turned out to be insufficient, though, because calling acpi_reserve_resources() before the fs_initcall stage of system initialization caused a boot regression to happen on the eCAFE EC-800-H20G/S netbook. That meant that we only could call acpi_reserve_resources() at the fs_initcall initialization stage or later, but then we might just as well call it after the PNP initalization in which case commit 0f1b414d1907 wouldn't be necessary any more. For this reason, the changes made by commit 0f1b414d1907 are reverted (along with a memory leak fixup on top of that commit), the changes made by commit b9a5e5e18fbf that went too far are reverted too and acpi_reserve_resources() is changed into fs_initcall_sync, which will cause it to be executed after the PNP subsystem initialization (which is an fs_initcall) and before device initcalls (including the pcieport driver initialization) which should avoid the initial issue. Link: https://bugzilla.kernel.org/show_bug.cgi?id=100581 Link: http://marc.info/?t=143092384600002&r=1&w=2 Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/acpi/osl.c | 12 ++- drivers/acpi/resource.c | 162 ---------------------------------------- drivers/pnp/system.c | 35 +++------ include/linux/acpi.h | 10 --- 4 files changed, 18 insertions(+), 201 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index c262e4acd68d..3b8963f21b36 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -175,10 +175,14 @@ static void __init acpi_request_region (struct acpi_generic_address *gas, if (!addr || !length) return; - acpi_reserve_region(addr, length, gas->space_id, 0, desc); + /* Resources are never freed */ + if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) + request_region(addr, length, desc); + else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + request_mem_region(addr, length, desc); } -static void __init acpi_reserve_resources(void) +static int __init acpi_reserve_resources(void) { acpi_request_region(&acpi_gbl_FADT.xpm1a_event_block, acpi_gbl_FADT.pm1_event_length, "ACPI PM1a_EVT_BLK"); @@ -207,7 +211,10 @@ static void __init acpi_reserve_resources(void) if (!(acpi_gbl_FADT.gpe1_block_length & 0x1)) acpi_request_region(&acpi_gbl_FADT.xgpe1_block, acpi_gbl_FADT.gpe1_block_length, "ACPI GPE1_BLK"); + + return 0; } +fs_initcall_sync(acpi_reserve_resources); void acpi_os_printf(const char *fmt, ...) { @@ -1862,7 +1869,6 @@ acpi_status __init acpi_os_initialize(void) acpi_status __init acpi_os_initialize1(void) { - acpi_reserve_resources(); kacpid_wq = alloc_workqueue("kacpid", 0, 1); kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1); kacpi_hotplug_wq = alloc_ordered_workqueue("kacpi_hotplug", 0); diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 10561ce16ed1..8244f013f210 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #ifdef CONFIG_X86 @@ -622,164 +621,3 @@ int acpi_dev_filter_resource_type(struct acpi_resource *ares, return (type & types) ? 0 : 1; } EXPORT_SYMBOL_GPL(acpi_dev_filter_resource_type); - -struct reserved_region { - struct list_head node; - u64 start; - u64 end; -}; - -static LIST_HEAD(reserved_io_regions); -static LIST_HEAD(reserved_mem_regions); - -static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags, - char *desc) -{ - unsigned int length = end - start + 1; - struct resource *res; - - res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ? - request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (!res) - return -EIO; - - res->flags &= ~flags; - return 0; -} - -static int add_region_before(u64 start, u64 end, u8 space_id, - unsigned long flags, char *desc, - struct list_head *head) -{ - struct reserved_region *reg; - int error; - - reg = kmalloc(sizeof(*reg), GFP_KERNEL); - if (!reg) - return -ENOMEM; - - error = request_range(start, end, space_id, flags, desc); - if (error) { - kfree(reg); - return error; - } - - reg->start = start; - reg->end = end; - list_add_tail(®->node, head); - return 0; -} - -/** - * acpi_reserve_region - Reserve an I/O or memory region as a system resource. - * @start: Starting address of the region. - * @length: Length of the region. - * @space_id: Identifier of address space to reserve the region from. - * @flags: Resource flags to clear for the region after requesting it. - * @desc: Region description (for messages). - * - * Reserve an I/O or memory region as a system resource to prevent others from - * using it. If the new region overlaps with one of the regions (in the given - * address space) already reserved by this routine, only the non-overlapping - * parts of it will be reserved. - * - * Returned is either 0 (success) or a negative error code indicating a resource - * reservation problem. It is the code of the first encountered error, but the - * routine doesn't abort until it has attempted to request all of the parts of - * the new region that don't overlap with other regions reserved previously. - * - * The resources requested by this routine are never released. - */ -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc) -{ - struct list_head *regions; - struct reserved_region *reg; - u64 end = start + length - 1; - int ret = 0, error = 0; - - if (space_id == ACPI_ADR_SPACE_SYSTEM_IO) - regions = &reserved_io_regions; - else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - regions = &reserved_mem_regions; - else - return -EINVAL; - - if (list_empty(regions)) - return add_region_before(start, end, space_id, flags, desc, regions); - - list_for_each_entry(reg, regions, node) - if (reg->start == end + 1) { - /* The new region can be prepended to this one. */ - ret = request_range(start, end, space_id, flags, desc); - if (!ret) - reg->start = start; - - return ret; - } else if (reg->start > end) { - /* No overlap. Add the new region here and get out. */ - return add_region_before(start, end, space_id, flags, - desc, ®->node); - } else if (reg->end == start - 1) { - goto combine; - } else if (reg->end >= start) { - goto overlap; - } - - /* The new region goes after the last existing one. */ - return add_region_before(start, end, space_id, flags, desc, regions); - - overlap: - /* - * The new region overlaps an existing one. - * - * The head part of the new region immediately preceding the existing - * overlapping one can be combined with it right away. - */ - if (reg->start > start) { - error = request_range(start, reg->start - 1, space_id, flags, desc); - if (error) - ret = error; - else - reg->start = start; - } - - combine: - /* - * The new region is adjacent to an existing one. If it extends beyond - * that region all the way to the next one, it is possible to combine - * all three of them. - */ - while (reg->end < end) { - struct reserved_region *next = NULL; - u64 a = reg->end + 1, b = end; - - if (!list_is_last(®->node, regions)) { - next = list_next_entry(reg, node); - if (next->start <= end) - b = next->start - 1; - } - error = request_range(a, b, space_id, flags, desc); - if (!error) { - if (next && next->start == b + 1) { - reg->end = next->end; - list_del(&next->node); - kfree(next); - } else { - reg->end = end; - break; - } - } else if (next) { - if (!ret) - ret = error; - - reg = next; - } else { - break; - } - } - - return ret ? ret : error; -} -EXPORT_SYMBOL_GPL(acpi_reserve_region); diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 515f33882ab8..49c1720df59a 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -7,7 +7,6 @@ * Bjorn Helgaas */ -#include #include #include #include @@ -23,41 +22,25 @@ static const struct pnp_device_id pnp_dev_table[] = { {"", 0} }; -#ifdef CONFIG_ACPI -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY; - return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc); -} -#else -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - struct resource *res; - - res = io ? request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (res) { - res->flags &= ~IORESOURCE_BUSY; - return true; - } - return false; -} -#endif - static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) { char *regionid; const char *pnpid = dev_name(&dev->dev); resource_size_t start = r->start, end = r->end; - bool reserved; + struct resource *res; regionid = kmalloc(16, GFP_KERNEL); if (!regionid) return; snprintf(regionid, 16, "pnp %s", pnpid); - reserved = __reserve_range(start, end - start + 1, !!port, regionid); - if (!reserved) + if (port) + res = request_region(start, end - start + 1, regionid); + else + res = request_mem_region(start, end - start + 1, regionid); + if (res) + res->flags &= ~IORESOURCE_BUSY; + else kfree(regionid); /* @@ -66,7 +49,7 @@ static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) * have double reservations. */ dev_info(&dev->dev, "%pR %s reserved\n", r, - reserved ? "has been" : "could not be"); + res ? "has been" : "could not be"); } static void reserve_resources_of_dev(struct pnp_dev *dev) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..fedfd1bea3bf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -309,9 +309,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc); - #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -507,13 +504,6 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } -static inline int acpi_reserve_region(u64 start, unsigned int length, - u8 space_id, unsigned long flags, - char *desc) -{ - return -ENXIO; -} - struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) From d3e13ff3c1aa2403d9a5f371baac088daeb8f56d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Jul 2015 00:31:47 +0200 Subject: [PATCH 089/210] ACPI / LPSS: Fix up acpi_lpss_create_device() Fix a return value (which should be a negative error code) and a memory leak (the list allocated by acpi_dev_get_resources() needs to be freed on ioremap() errors too) in acpi_lpss_create_device() introduced by commit 4483d59e29fe 'ACPI / LPSS: check the result of ioremap()'. Fixes: 4483d59e29fe 'ACPI / LPSS: check the result of ioremap()' Reported-by: Dan Carpenter Cc: 4.0+ # 4.0+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_lpss.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 569ee090343f..46b58abb08c5 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -352,13 +352,16 @@ static int acpi_lpss_create_device(struct acpi_device *adev, pdata->mmio_size = resource_size(rentry->res); pdata->mmio_base = ioremap(rentry->res->start, pdata->mmio_size); - if (!pdata->mmio_base) - goto err_out; break; } acpi_dev_free_resource_list(&resource_list); + if (!pdata->mmio_base) { + ret = -ENOMEM; + goto err_out; + } + pdata->dev_desc = dev_desc; if (dev_desc->setup) From 7b2a4635b84b4dbb07c93201a8c0aea82ed65e4f Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 30 Jun 2015 10:58:44 +0800 Subject: [PATCH 090/210] clk: mediatek: mt8173: Fix enabling of critical clocks On the MT8173 the clocks are provided by different units. To enable the critical clocks we must be sure that all parent clocks are already registered, otherwise the parents of the critical clocks end up being unused and get disabled later. To find a place where all parents are registered we try each time after we've registered some clocks if all known providers are present now and only then we enable the critical clocks Signed-off-by: Sascha Hauer Signed-off-by: James Liao [sboyd@codeaurora.org: Marked function and data __init] Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/clk-mt8173.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c index 4b9e04cdf7e8..8b6523d15fb8 100644 --- a/drivers/clk/mediatek/clk-mt8173.c +++ b/drivers/clk/mediatek/clk-mt8173.c @@ -700,6 +700,22 @@ static const struct mtk_composite peri_clks[] __initconst = { MUX(CLK_PERI_UART3_SEL, "uart3_ck_sel", uart_ck_sel_parents, 0x40c, 3, 1), }; +static struct clk_onecell_data *mt8173_top_clk_data __initdata; +static struct clk_onecell_data *mt8173_pll_clk_data __initdata; + +static void __init mtk_clk_enable_critical(void) +{ + if (!mt8173_top_clk_data || !mt8173_pll_clk_data) + return; + + clk_prepare_enable(mt8173_pll_clk_data->clks[CLK_APMIXED_ARMCA15PLL]); + clk_prepare_enable(mt8173_pll_clk_data->clks[CLK_APMIXED_ARMCA7PLL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_MEM_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_DDRPHYCFG_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_CCI400_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_RTC_SEL]); +} + static void __init mtk_topckgen_init(struct device_node *node) { struct clk_onecell_data *clk_data; @@ -712,19 +728,19 @@ static void __init mtk_topckgen_init(struct device_node *node) return; } - clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK); + mt8173_top_clk_data = clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK); mtk_clk_register_factors(root_clk_alias, ARRAY_SIZE(root_clk_alias), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, &mt8173_clk_lock, clk_data); - clk_prepare_enable(clk_data->clks[CLK_TOP_CCI400_SEL]); - r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); if (r) pr_err("%s(): could not register clock provider: %d\n", __func__, r); + + mtk_clk_enable_critical(); } CLK_OF_DECLARE(mtk_topckgen, "mediatek,mt8173-topckgen", mtk_topckgen_init); @@ -818,13 +834,13 @@ static void __init mtk_apmixedsys_init(struct device_node *node) { struct clk_onecell_data *clk_data; - clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK); + mt8173_pll_clk_data = clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK); if (!clk_data) return; mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - clk_prepare_enable(clk_data->clks[CLK_APMIXED_ARMCA15PLL]); + mtk_clk_enable_critical(); } CLK_OF_DECLARE(mtk_apmixedsys, "mediatek,mt8173-apmixedsys", mtk_apmixedsys_init); From 93af5e93544328285a6f65f7d47bbea8979b28fb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 26 Jun 2015 11:14:14 +0200 Subject: [PATCH 091/210] PM / Domains: Avoid infinite loops in attach/detach code If pm_genpd_{add,remove}_device() keeps on failing with -EAGAIN, we end up with an infinite loop in genpd_dev_pm_{at,de}tach(). This may happen due to a genpd.prepared_count imbalance. This is a bug elsewhere, but it will result in a system lock up, possibly during reboot of an otherwise functioning system. To avoid this, put a limit on the maximum number of loop iterations, using an exponential back-off mechanism. If the limit is reached, the operation will just fail. An error message is already printed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index cdd547bd67df..0ee43c1056e0 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -6,6 +6,7 @@ * This file is released under the GPLv2. */ +#include #include #include #include @@ -19,6 +20,8 @@ #include #include +#define GENPD_RETRY_MAX_MS 250 /* Approximate */ + #define GENPD_DEV_CALLBACK(genpd, type, callback, dev) \ ({ \ type (*__routine)(struct device *__d); \ @@ -2131,6 +2134,7 @@ EXPORT_SYMBOL_GPL(of_genpd_get_from_provider); static void genpd_dev_pm_detach(struct device *dev, bool power_off) { struct generic_pm_domain *pd; + unsigned int i; int ret = 0; pd = pm_genpd_lookup_dev(dev); @@ -2139,10 +2143,12 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) dev_dbg(dev, "removing from PM domain %s\n", pd->name); - while (1) { + for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { ret = pm_genpd_remove_device(pd, dev); if (ret != -EAGAIN) break; + + mdelay(i); cond_resched(); } @@ -2183,6 +2189,7 @@ int genpd_dev_pm_attach(struct device *dev) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; + unsigned int i; int ret; if (!dev->of_node) @@ -2218,10 +2225,12 @@ int genpd_dev_pm_attach(struct device *dev) dev_dbg(dev, "adding to PM domain %s\n", pd->name); - while (1) { + for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { ret = pm_genpd_add_device(pd, dev); if (ret != -EAGAIN) break; + + mdelay(i); cond_resched(); } From b51f9b103f58db2c5c0a20d6cee26c8bc255d3ae Mon Sep 17 00:00:00 2001 From: Uwe Geuder Date: Mon, 29 Jun 2015 23:35:05 +0300 Subject: [PATCH 092/210] PM / hibernate: clarify resume documentation it was not the whole truth that kernel mode cannot be used with swap on LVM Signed-off-by: Uwe Geuder Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- Documentation/power/swsusp.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index f732a8321e8a..8cc17ca71813 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -410,8 +410,17 @@ Documentation/usb/persist.txt. Q: Can I suspend-to-disk using a swap partition under LVM? -A: No. You can suspend successfully, but you'll not be able to -resume. uswsusp should be able to work with LVM. See suspend.sf.net. +A: Yes and No. You can suspend successfully, but the kernel will not be able +to resume on its own. You need an initramfs that can recognize the resume +situation, activate the logical volume containing the swap volume (but not +touch any filesystems!), and eventually call + +echo -n "$major:$minor" > /sys/power/resume + +where $major and $minor are the respective major and minor device numbers of +the swap volume. + +uswsusp works with LVM, too. See http://suspend.sourceforge.net/ Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were compiled with the similar configuration files. Anyway I found that From 26095a01d359827eeccec5459c28ddd976183179 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 7 Jul 2015 01:55:20 +0200 Subject: [PATCH 093/210] ACPI / scan: Add support for ACPI _CLS device matching Device drivers typically use ACPI _HIDs/_CIDs listed in struct device_driver acpi_match_table to match devices. However, for generic drivers, we do not want to list _HID for all supported devices. Also, certain classes of devices do not have _CID (e.g. SATA, USB). Instead, we can leverage ACPI _CLS, which specifies PCI-defined class code (i.e. base-class, subclass and programming interface). This patch adds support for matching ACPI devices using the _CLS method. To support loadable module, current design uses _HID or _CID to match device's modalias. With the new way of matching with _CLS this would requires modification to the current ACPI modalias key to include _CLS. This patch appends PCI-defined class-code to the existing ACPI modalias as following. acpi::::..::: E.g: # cat /sys/devices/platform/AMDI0600:00/modalias acpi:AMDI0600:010601: where bb is th base-class code, ss is te sub-class code, and pp is the programming interface code Since there would not be _HID/_CID in the ACPI matching table of the driver, this patch adds a field to acpi_device_id to specify the matching _CLS. static const struct acpi_device_id ahci_acpi_match[] = { { ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) }, {}, }; In this case, the corresponded entry in modules.alias file would be: alias acpi*:010601:* ahci_platform Acked-by: Mika Westerberg Reviewed-by: Hanjun Guo Signed-off-by: Suravee Suthikulpanit Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 32 +++++++++++++++++++++++++++++-- include/linux/acpi.h | 14 ++++++++++++++ include/linux/mod_devicetable.h | 2 ++ scripts/mod/devicetable-offsets.c | 2 ++ scripts/mod/file2alias.c | 32 +++++++++++++++++++++++++++++-- 5 files changed, 78 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 2649a068671d..ec256352f423 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1019,6 +1019,29 @@ static bool acpi_of_match_device(struct acpi_device *adev, return false; } +static bool __acpi_match_device_cls(const struct acpi_device_id *id, + struct acpi_hardware_id *hwid) +{ + int i, msk, byte_shift; + char buf[3]; + + if (!id->cls) + return false; + + /* Apply class-code bitmask, before checking each class-code byte */ + for (i = 1; i <= 3; i++) { + byte_shift = 8 * (3 - i); + msk = (id->cls_msk >> byte_shift) & 0xFF; + if (!msk) + continue; + + sprintf(buf, "%02x", (id->cls >> byte_shift) & msk); + if (strncmp(buf, &hwid->id[(i - 1) * 2], 2)) + return false; + } + return true; +} + static const struct acpi_device_id *__acpi_match_device( struct acpi_device *device, const struct acpi_device_id *ids, @@ -1036,9 +1059,12 @@ static const struct acpi_device_id *__acpi_match_device( list_for_each_entry(hwid, &device->pnp.ids, list) { /* First, check the ACPI/PNP IDs provided by the caller. */ - for (id = ids; id->id[0]; id++) - if (!strcmp((char *) id->id, hwid->id)) + for (id = ids; id->id[0] || id->cls; id++) { + if (id->id[0] && !strcmp((char *) id->id, hwid->id)) return id; + else if (id->cls && __acpi_match_device_cls(id, hwid)) + return id; + } /* * Next, check ACPI_DT_NAMESPACE_HID and try to match the @@ -2101,6 +2127,8 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, if (info->valid & ACPI_VALID_UID) pnp->unique_id = kstrdup(info->unique_id.string, GFP_KERNEL); + if (info->valid & ACPI_VALID_CLS) + acpi_add_id(pnp, info->class_code.string); kfree(info); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..bdfdb9f65c23 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -58,6 +58,19 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) +/** + * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with + * the PCI-defined class-code information + * + * @_cls : the class, subclass, prog-if triple for this device + * @_msk : the class mask for this device + * + * This macro is used to create a struct acpi_device_id that matches a + * specific PCI class. The .id and .driver_data fields will be left + * initialized with the default value. + */ +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (_cls), .cls_msk = (_msk), + static inline bool has_acpi_companion(struct device *dev) { return is_acpi_node(dev->fwnode); @@ -446,6 +459,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *); #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), struct fwnode_handle; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8183d6640ca7..34f25b7bf642 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -189,6 +189,8 @@ struct css_device_id { struct acpi_device_id { __u8 id[ACPI_ID_LEN]; kernel_ulong_t driver_data; + __u32 cls; + __u32 cls_msk; }; #define PNP_ID_LEN 8 diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index eff7de1fc82e..e70fcd12eeeb 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -63,6 +63,8 @@ int main(void) DEVID(acpi_device_id); DEVID_FIELD(acpi_device_id, id); + DEVID_FIELD(acpi_device_id, cls); + DEVID_FIELD(acpi_device_id, cls_msk); DEVID(pnp_device_id); DEVID_FIELD(pnp_device_id, id); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 84c86f3cd6cd..5f2088209132 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -523,12 +523,40 @@ static int do_serio_entry(const char *filename, } ADD_TO_DEVTABLE("serio", serio_device_id, do_serio_entry); -/* looks like: "acpi:ACPI0003 or acpi:PNP0C0B" or "acpi:LNXVIDEO" */ +/* looks like: "acpi:ACPI0003" or "acpi:PNP0C0B" or "acpi:LNXVIDEO" or + * "acpi:bbsspp" (bb=base-class, ss=sub-class, pp=prog-if) + * + * NOTE: Each driver should use one of the following : _HID, _CIDs + * or _CLS. Also, bb, ss, and pp can be substituted with ?? + * as don't care byte. + */ static int do_acpi_entry(const char *filename, void *symval, char *alias) { DEF_FIELD_ADDR(symval, acpi_device_id, id); - sprintf(alias, "acpi*:%s:*", *id); + DEF_FIELD_ADDR(symval, acpi_device_id, cls); + DEF_FIELD_ADDR(symval, acpi_device_id, cls_msk); + + if (id && strlen((const char *)*id)) + sprintf(alias, "acpi*:%s:*", *id); + else if (cls) { + int i, byte_shift, cnt = 0; + unsigned int msk; + + sprintf(&alias[cnt], "acpi*:"); + cnt = 6; + for (i = 1; i <= 3; i++) { + byte_shift = 8 * (3-i); + msk = (*cls_msk >> byte_shift) & 0xFF; + if (msk) + sprintf(&alias[cnt], "%02x", + (*cls >> byte_shift) & 0xFF); + else + sprintf(&alias[cnt], "??"); + cnt += 2; + } + sprintf(&alias[cnt], ":*"); + } return 1; } ADD_TO_DEVTABLE("acpi", acpi_device_id, do_acpi_entry); From 2051e9248688e4c36e4de2e93b88fa0d74e86261 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 7 Jul 2015 01:55:21 +0200 Subject: [PATCH 094/210] ata: ahci_platform: Add ACPI _CLS matching This patch adds ACPI supports for AHCI platform driver, which uses _CLS method to match the device. The following is an example of ASL structure in DSDT for a SATA controller, which contains _CLS package to be matched by the ahci_platform driver: Device (AHC0) // AHCI Controller { Name(_HID, "AMDI0600") Name (_CCA, 1) Name (_CLS, Package (3) { 0x01, // Base Class: Mass Storage 0x06, // Sub-Class: serial ATA 0x01, // Interface: AHCI }) Name (_CRS, ResourceTemplate () { Memory32Fixed (ReadWrite, 0xE0300000, 0x00010000) Interrupt (ResourceConsumer, Level, ActiveHigh, Exclusive,,,) { 387 } }) } Also, since ATA driver should not require PCI support for ATA_ACPI, this patch removes dependency in the driver/ata/Kconfig. Acked-by: Tejun Heo Acked-by: Mika Westerberg Reviewed-by: Hanjun Guo Signed-off-by: Suravee Suthikulpanit Signed-off-by: Rafael J. Wysocki --- drivers/ata/Kconfig | 2 +- drivers/ata/ahci_platform.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 6d17a3b65ef7..15e40ee62a94 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -48,7 +48,7 @@ config ATA_VERBOSE_ERROR config ATA_ACPI bool "ATA ACPI Support" - depends on ACPI && PCI + depends on ACPI default y help This option adds support for ATA-related ACPI objects. diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 614c78f510f0..1befb114c384 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include "ahci.h" #define DRV_NAME "ahci" @@ -79,12 +81,19 @@ static const struct of_device_id ahci_of_match[] = { }; MODULE_DEVICE_TABLE(of, ahci_of_match); +static const struct acpi_device_id ahci_acpi_match[] = { + { ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, ahci_acpi_match); + static struct platform_driver ahci_driver = { .probe = ahci_probe, .remove = ata_platform_remove_one, .driver = { .name = DRV_NAME, .of_match_table = ahci_of_match, + .acpi_match_table = ahci_acpi_match, .pm = &ahci_pm_ops, }, }; From b32aadc1a8ed84afbe924cd2ced31cd6a2e67074 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Tue, 7 Jul 2015 01:39:23 +0530 Subject: [PATCH 095/210] powerpc/powernv: Fix race in updating core_idle_state core_idle_state is maintained for each core. It uses 0-7 bits to track whether a thread in the core has entered fastsleep or winkle. 8th bit is used as a lock bit. The lock bit is set in these 2 scenarios- - The thread is first in subcore to wakeup from sleep/winkle. - If its the last thread in the core about to enter sleep/winkle While the lock bit is set, if any other thread in the core wakes up, it loops until the lock bit is cleared before proceeding in the wakeup path. This helps prevent race conditions w.r.t fastsleep workaround and prevents threads from switching to process context before core/subcore resources are restored. But, in the path to sleep/winkle entry, we currently don't check for lock-bit. This exposes us to following race when running with subcore on- First thread in the subcorea Another thread in the same waking up core entering sleep/winkle lwarx r15,0,r14 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT stwcx. r15,0,r14 [Code to restore subcore state] lwarx r15,0,r14 [clear thread bit] stwcx. r15,0,r14 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS stw r15,0(r14) Here, after the thread entering sleep clears its thread bit in core_idle_state, the value is overwritten by the thread waking up. In such cases when the core enters fastsleep, code mistakes an idle thread as running. Because of this, the first thread waking up from fastsleep which is supposed to resync timebase skips it. So we can end up having a core with stale timebase value. This patch fixes the above race by looping on the lock bit even while entering the idle states. Signed-off-by: Shreyas B. Prabhu Fixes: 7b54e9f213f76 'powernv/powerpc: Add winkle support for offline cpus' Cc: stable@vger.kernel.org # 3.19+ Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_power7.S | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index ccde8f084ce4..112ccf497562 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -51,6 +51,22 @@ .text +/* + * Used by threads when the lock bit of core_idle_state is set. + * Threads will spin in HMT_LOW until the lock bit is cleared. + * r14 - pointer to core_idle_state + * r15 - used to load contents of core_idle_state + */ + +core_idle_lock_held: + HMT_LOW +3: lwz r15,0(r14) + andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + bne 3b + HMT_MEDIUM + lwarx r15,0,r14 + blr + /* * Pass requested state in r3: * r3 - PNV_THREAD_NAP/SLEEP/WINKLE @@ -150,6 +166,10 @@ power7_enter_nap_mode: ld r14,PACA_CORE_IDLE_STATE_PTR(r13) lwarx_loop1: lwarx r15,0,r14 + + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS @@ -294,7 +314,7 @@ lwarx_loop2: * workaround undo code or resyncing timebase or restoring context * In either case loop until the lock bit is cleared. */ - bne core_idle_lock_held + bnel core_idle_lock_held cmpwi cr2,r15,0 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) @@ -319,15 +339,6 @@ lwarx_loop2: isync b common_exit -core_idle_lock_held: - HMT_LOW -core_idle_lock_loop: - lwz r15,0(14) - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bne core_idle_lock_loop - HMT_MEDIUM - b lwarx_loop2 - first_thread_in_subcore: /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT From 747d34e7313034768aac83d679db43cedc5ab778 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 27 Jun 2015 23:11:44 +0200 Subject: [PATCH 096/210] clocksource/imx: Define clocksource for mx27 The rework of the imx clocksource driver missed to add an entry for imx27 which results in a boot failure on those machines. Add the proper CLOCKSOURCE_OF_DECLARE() entry for imx27 and map it to the imx21 init. Fixes: bef11c881ba5 'ARM: imx: initialize gpt device type for DT boot' Signed-off-by: Philippe Reynes Cc: linux-arm-kernel@lists.infradead.org Cc: daniel.lezcano@linaro.org Cc: fabio.estevam@freescale.com Cc: shawnguo@kernel.org Cc: kernel@pengutronix.de Link: http://lkml.kernel.org/r/1435439504-406-1-git-send-email-tremyfr@gmail.com Signed-off-by: Thomas Gleixner --- drivers/clocksource/timer-imx-gpt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c index 879c78423546..2d59038dec43 100644 --- a/drivers/clocksource/timer-imx-gpt.c +++ b/drivers/clocksource/timer-imx-gpt.c @@ -529,6 +529,7 @@ static void __init imx6dl_timer_init_dt(struct device_node *np) CLOCKSOURCE_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt); From 7c4a976cd55972b68c75a978f171b6db5df4ce66 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 7 Jul 2015 10:14:35 +0200 Subject: [PATCH 097/210] clockevents: Allow set-state callbacks to be optional Its mandatory for the drivers to provide set_state_{oneshot|periodic}() (only if related modes are supported) and set_state_shutdown() callbacks today, if they are implementing the new set-state interface. But this leads to unnecessary noop callbacks for drivers which don't want to implement them. Over that, it will lead to a full function call for nothing really useful. Lets make all set-state callbacks optional. Suggested-by: Daniel Lezcano Signed-off-by: Viresh Kumar Signed-off-by: Daniel Lezcano Link: http://lkml.kernel.org/r/1436256875-15562-1-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/clockevents.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 08ccc3da3ca0..50eb107f1198 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -120,19 +120,25 @@ static int __clockevents_switch_state(struct clock_event_device *dev, /* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN: - return dev->set_state_shutdown(dev); + if (dev->set_state_shutdown) + return dev->set_state_shutdown(dev); + return 0; case CLOCK_EVT_STATE_PERIODIC: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) return -ENOSYS; - return dev->set_state_periodic(dev); + if (dev->set_state_periodic) + return dev->set_state_periodic(dev); + return 0; case CLOCK_EVT_STATE_ONESHOT: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return -ENOSYS; - return dev->set_state_oneshot(dev); + if (dev->set_state_oneshot) + return dev->set_state_oneshot(dev); + return 0; case CLOCK_EVT_STATE_ONESHOT_STOPPED: /* Core internal bug */ @@ -471,18 +477,6 @@ static int clockevents_sanity_check(struct clock_event_device *dev) if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; - /* New state-specific callbacks */ - if (!dev->set_state_shutdown) - return -EINVAL; - - if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && - !dev->set_state_periodic) - return -EINVAL; - - if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && - !dev->set_state_oneshot) - return -EINVAL; - return 0; } From 3f8dc44d88d3e86178eb9322c779c599f3745b21 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 7 Jul 2015 11:01:17 +1000 Subject: [PATCH 098/210] cxl: Fix refcounting in kernel API Currently the kernel API AFU dev refcounting is done on context start and stop. This patch moves this refcounting to context init and release, bringing it inline with how the userspace API does it. Without this we've seen the refcounting on the AFU get out of whack between the user and kernel API usage. This causes the AFU structures to be freed when they are actually still in use. This fixes some kref warnings we've been seeing and spurious ErrIVTE IRQs. Signed-off-by: Michael Neuling Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 0c77240ae2fc..729e0851167d 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -23,6 +23,7 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) afu = cxl_pci_to_afu(dev); + get_device(&afu->dev); ctx = cxl_context_alloc(); if (IS_ERR(ctx)) return ctx; @@ -31,6 +32,7 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) rc = cxl_context_init(ctx, afu, false, NULL); if (rc) { kfree(ctx); + put_device(&afu->dev); return ERR_PTR(-ENOMEM); } cxl_assign_psn_space(ctx); @@ -60,6 +62,8 @@ int cxl_release_context(struct cxl_context *ctx) if (ctx->status != CLOSED) return -EBUSY; + put_device(&ctx->afu->dev); + cxl_context_free(ctx); return 0; @@ -159,7 +163,6 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, } ctx->status = STARTED; - get_device(&ctx->afu->dev); out: mutex_unlock(&ctx->status_mutex); return rc; @@ -175,12 +178,7 @@ EXPORT_SYMBOL_GPL(cxl_process_element); /* Stop a context. Returns 0 on success, otherwise -Errno */ int cxl_stop_context(struct cxl_context *ctx) { - int rc; - - rc = __detach_context(ctx); - if (!rc) - put_device(&ctx->afu->dev); - return rc; + return __detach_context(ctx); } EXPORT_SYMBOL_GPL(cxl_stop_context); From 5a3f75e3f02836518ce49536e9c460ca8e1fa290 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:32 +0000 Subject: [PATCH 099/210] x86/irq: Plug irq vector hotplug race Jin debugged a nasty cpu hotplug race which results in leaking a irq vector on the newly hotplugged cpu. cpu N cpu M native_cpu_up device_shutdown do_boot_cpu free_msi_irqs start_secondary arch_teardown_msi_irqs smp_callin default_teardown_msi_irqs setup_vector_irq arch_teardown_msi_irq __setup_vector_irq native_teardown_msi_irq lock(vector_lock) destroy_irq install vectors unlock(vector_lock) lock(vector_lock) ---> __clear_irq_vector unlock(vector_lock) lock(vector_lock) set_cpu_online unlock(vector_lock) This leaves the irq vector(s) which are torn down on CPU M stale in the vector array of CPU N, because CPU M does not see CPU N online yet. There is a similar issue with concurrent newly setup interrupts. The alloc/free protection of irq descriptors does not prevent the above race, because it merily prevents interrupt descriptors from going away or changing concurrently. Prevent this by moving the call to setup_vector_irq() into the vector_lock held region which protects set_cpu_online(): cpu N cpu M native_cpu_up device_shutdown do_boot_cpu free_msi_irqs start_secondary arch_teardown_msi_irqs smp_callin default_teardown_msi_irqs lock(vector_lock) arch_teardown_msi_irq setup_vector_irq() __setup_vector_irq native_teardown_msi_irq install vectors destroy_irq set_cpu_online unlock(vector_lock) lock(vector_lock) __clear_irq_vector unlock(vector_lock) So cpu M either sees the cpu N online before clearing the vector or cpu N installs the vectors after cpu M has cleared it. Reported-by: xiao jin Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.141898931@linutronix.de --- arch/x86/kernel/apic/vector.c | 10 ++-------- arch/x86/kernel/smpboot.c | 13 +++++-------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 28eba2d38b15..f813261d9740 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -409,12 +409,6 @@ static void __setup_vector_irq(int cpu) int irq, vector; struct apic_chip_data *data; - /* - * vector_lock will make sure that we don't run into irq vector - * assignments that might be happening on another cpu in parallel, - * while we setup our initial vector to irq mappings. - */ - raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { data = apic_chip_data(irq_get_irq_data(irq)); @@ -436,16 +430,16 @@ static void __setup_vector_irq(int cpu) if (!cpumask_test_cpu(cpu, data->domain)) per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; } - raw_spin_unlock(&vector_lock); } /* - * Setup the vector to irq mappings. + * Setup the vector to irq mappings. Must be called with vector_lock held. */ void setup_vector_irq(int cpu) { int irq; + lockdep_assert_held(&vector_lock); /* * On most of the platforms, legacy PIC delivers the interrupts on the * boot cpu. But there are certain platforms where PIC interrupts are diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0bd8c1d507b3..d3010aa79daf 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -170,11 +170,6 @@ static void smp_callin(void) */ apic_ap_setup(); - /* - * Need to setup vector mappings before we enable interrupts. - */ - setup_vector_irq(smp_processor_id()); - /* * Save our processor parameters. Note: this information * is needed for clock calibration. @@ -239,11 +234,13 @@ static void notrace start_secondary(void *unused) check_tsc_sync_target(); /* - * We need to hold vector_lock so there the set of online cpus - * does not change while we are assigning vectors to cpus. Holding - * this lock ensures we don't half assign or remove an irq from a cpu. + * Lock vector_lock and initialize the vectors on this cpu + * before setting the cpu online. We must set it online with + * vector_lock held to prevent a concurrent setup/teardown + * from seeing a half valid vector space. */ lock_vector_lock(); + setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); unlock_vector_lock(); cpu_set_state_online(smp_processor_id()); From cbb24dc761d95fe39a7a122bb1b298e9604cae15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:33 +0000 Subject: [PATCH 100/210] x86/irq: Use proper locking in check_irq_vectors_for_cpu_disable() It's unsafe to examine fields in the irq descriptor w/o holding the descriptor lock. Add proper locking. While at it add a comment why the vector check can run lock less Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.236544164@linutronix.de --- arch/x86/kernel/irq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 88b366487b0e..85ca76e6241c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -347,14 +347,22 @@ int check_irq_vectors_for_cpu_disable(void) if (!desc) continue; + /* + * Protect against concurrent action removal, + * affinity changes etc. + */ + raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); cpumask_copy(&affinity_new, data->affinity); cpumask_clear_cpu(this_cpu, &affinity_new); /* Do not count inactive or per-cpu irqs. */ - if (!irq_has_action(irq) || irqd_is_per_cpu(data)) + if (!irq_has_action(irq) || irqd_is_per_cpu(data)) { + raw_spin_unlock(&desc->lock); continue; + } + raw_spin_unlock(&desc->lock); /* * A single irq may be mapped to multiple * cpu's vector_irq[] (for example IOAPIC cluster @@ -385,6 +393,9 @@ int check_irq_vectors_for_cpu_disable(void) * vector. If the vector is marked in the used vectors * bitmap or an irq is assigned to it, we don't count * it as available. + * + * As this is an inaccurate snapshot anyway, we can do + * this w/o holding vector_lock. */ for (vector = FIRST_EXTERNAL_VECTOR; vector < first_system_vector; vector++) { From 09cf92b784fae6109450c5d64f9908066d605249 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:35 +0000 Subject: [PATCH 101/210] x86/irq: Retrieve irq data after locking irq_desc irq_data is protected by irq_desc->lock, so retrieving the irq chip from irq_data outside the lock is racy vs. an concurrent update. Move it into the lock held region. While at it add a comment why the vector walk does not require vector_lock. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.331320612@linutronix.de --- arch/x86/kernel/irq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 85ca76e6241c..c7dfe1be784e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -497,6 +497,11 @@ void fixup_irqs(void) */ mdelay(1); + /* + * We can walk the vector array of this cpu without holding + * vector_lock because the cpu is already marked !online, so + * nothing else will touch it. + */ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { unsigned int irr; @@ -508,9 +513,9 @@ void fixup_irqs(void) irq = __this_cpu_read(vector_irq[vector]); desc = irq_to_desc(irq); + raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); chip = irq_data_get_irq_chip(data); - raw_spin_lock(&desc->lock); if (chip->irq_retrigger) { chip->irq_retrigger(data); __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); From 6d3dab7d84177f836b14961b4d252d0959d66768 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Jul 2015 13:08:39 +0200 Subject: [PATCH 102/210] PM / wakeirq: Avoid setting power.wakeirq too hastily If dev_pm_attach_wake_irq() fails, the device's power.wakeirq field should not be set to point to the struct wake_irq passed to that function, as that object will be freed going forward. For this reason, make dev_pm_attach_wake_irq() first call device_wakeup_attach_irq() and only set the device's power.wakeirq field if that's successful. That requires device_wakeup_attach_irq() to be called under the device's power.lock lock, but since dev_pm_attach_wake_irq() is the only caller of it, the requisite changes are easy to make. Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) Reported-by: Felipe Balbi Tested-by: Tony Lindgren Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeirq.c | 12 +++++------- drivers/base/power/wakeup.c | 31 ++++++++++--------------------- 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 7470004ca810..eb6e67451dec 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -45,14 +45,12 @@ static int dev_pm_attach_wake_irq(struct device *dev, int irq, return -EEXIST; } - dev->power.wakeirq = wirq; - spin_unlock_irqrestore(&dev->power.lock, flags); - err = device_wakeup_attach_irq(dev, wirq); - if (err) - return err; + if (!err) + dev->power.wakeirq = wirq; - return 0; + spin_unlock_irqrestore(&dev->power.lock, flags); + return err; } /** @@ -105,10 +103,10 @@ void dev_pm_clear_wake_irq(struct device *dev) return; spin_lock_irqsave(&dev->power.lock, flags); + device_wakeup_detach_irq(dev); dev->power.wakeirq = NULL; spin_unlock_irqrestore(&dev->power.lock, flags); - device_wakeup_detach_irq(dev); if (wirq->dedicated_irq) free_irq(wirq->irq, wirq); kfree(wirq); diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 7332ebc9cab0..15d27d782dc1 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -247,32 +247,25 @@ EXPORT_SYMBOL_GPL(device_wakeup_enable); * Attach a device wakeirq to the wakeup source so the device * wake IRQ can be configured automatically for suspend and * resume. + * + * Call under the device's power.lock lock. */ int device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq) { struct wakeup_source *ws; - int ret = 0; - spin_lock_irq(&dev->power.lock); ws = dev->power.wakeup; if (!ws) { dev_err(dev, "forgot to call call device_init_wakeup?\n"); - ret = -EINVAL; - goto unlock; + return -EINVAL; } - if (ws->wakeirq) { - ret = -EEXIST; - goto unlock; - } + if (ws->wakeirq) + return -EEXIST; ws->wakeirq = wakeirq; - -unlock: - spin_unlock_irq(&dev->power.lock); - - return ret; + return 0; } /** @@ -280,20 +273,16 @@ unlock: * @dev: Device to handle * * Removes a device wakeirq from the wakeup source. + * + * Call under the device's power.lock lock. */ void device_wakeup_detach_irq(struct device *dev) { struct wakeup_source *ws; - spin_lock_irq(&dev->power.lock); ws = dev->power.wakeup; - if (!ws) - goto unlock; - - ws->wakeirq = NULL; - -unlock: - spin_unlock_irq(&dev->power.lock); + if (ws) + ws->wakeirq = NULL; } /** From b6cfb277378ef831c0fa84bcff5049307294adc6 Mon Sep 17 00:00:00 2001 From: Al Stone Date: Mon, 6 Jul 2015 17:16:47 -0600 Subject: [PATCH 103/210] ACPI / ARM64: add BAD_MADT_GICC_ENTRY() macro The BAD_MADT_ENTRY() macro is designed to work for all of the subtables of the MADT. In the ACPI 5.1 version of the spec, the struct for the GICC subtable (struct acpi_madt_generic_interrupt) is 76 bytes long; in ACPI 6.0, the struct is 80 bytes long. But, there is only one definition in ACPICA for this struct -- and that is the 6.0 version. Hence, when BAD_MADT_ENTRY() compares the struct size to the length in the GICC subtable, it fails if 5.1 structs are in use, and there are systems in the wild that have them. This patch adds the BAD_MADT_GICC_ENTRY() that checks the GICC subtable only, accounting for the difference in specification versions that are possible. The BAD_MADT_ENTRY() will continue to work as is for all other MADT subtables. This code is being added to an arm64 header file since that is currently the only architecture using the GICC subtable of the MADT. As a GIC is specific to ARM, it is also unlikely the subtable will be used elsewhere. Fixes: aeb823bbacc2 ("ACPICA: ACPI 6.0: Add changes for FADT table.") Signed-off-by: Al Stone Acked-by: Will Deacon Acked-by: "Rafael J. Wysocki" [catalin.marinas@arm.com: extra brackets around macro arguments] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/acpi.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 39248d3adf5d..406485ed110a 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -19,6 +19,14 @@ #include #include +/* Macros for consistency checks of the GICC subtable of MADT */ +#define ACPI_MADT_GICC_LENGTH \ + (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) + +#define BAD_MADT_GICC_ENTRY(entry, end) \ + (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \ + (entry)->header.length != ACPI_MADT_GICC_LENGTH) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI /* ACPI table mapping after acpi_gbl_permanent_mmap is set */ From 99e3e3ae332b6ca91d5c444ea7849f367f5e5a76 Mon Sep 17 00:00:00 2001 From: Al Stone Date: Mon, 6 Jul 2015 17:16:48 -0600 Subject: [PATCH 104/210] ACPI / ARM64 : use the new BAD_MADT_GICC_ENTRY macro For those parts of the arm64 ACPI code that need to check GICC subtables in the MADT, use the new BAD_MADT_GICC_ENTRY macro instead of the previous BAD_MADT_ENTRY. The new macro takes into account differences in the size of the GICC subtable that the old macro did not; this caused failures even though the subtable entries are valid. Fixes: aeb823bbacc2 ("ACPICA: ACPI 6.0: Add changes for FADT table.") Signed-off-by: Al Stone Reviewed-by: Hanjun Guo Acked-by: Will Deacon Acked-by: "Rafael J. Wysocki" Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 2 +- drivers/irqchip/irq-gic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 695801a54ca5..50fb4696654e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -438,7 +438,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, struct acpi_madt_generic_interrupt *processor; processor = (struct acpi_madt_generic_interrupt *)header; - if (BAD_MADT_ENTRY(processor, end)) + if (BAD_MADT_GICC_ENTRY(processor, end)) return -EINVAL; acpi_table_print_madt_entry(header); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 8d7e1c8b6d56..4dd88264dff5 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1055,7 +1055,7 @@ gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header, processor = (struct acpi_madt_generic_interrupt *)header; - if (BAD_MADT_ENTRY(processor, end)) + if (BAD_MADT_GICC_ENTRY(processor, end)) return -EINVAL; /* From ef37566cf8d607844cee5a01597d828c8fd1a501 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 7 Jul 2015 17:15:39 +0100 Subject: [PATCH 105/210] arm64: Keep the ARM64 Kconfig selects sorted Move EDAC_SUPPORT to the right place. Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f6edb14b7e4..318175f62c24 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -23,9 +23,9 @@ config ARM64 select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select EDAC_SUPPORT select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS + select EDAC_SUPPORT select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP From 8eb231261fdd20768db23863d00ef277de4b0543 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 14:11:00 +0200 Subject: [PATCH 106/210] tick/broadcast: Prevent hrtimer recursion The hrtimer based broadcast vehicle can cause a hrtimer recursion which went unnoticed until we changed the hrtimer expiry code to keep track of the currently running timer. local_timer_interrupt() local_handler() hrtimer_interrupt() expire_hrtimers() broadcast_hrtimer() send_ipis() local_handler() hrtimer_interrupt() .... Solution is simple: Prevent the local handler call from the broadcast code when the broadcast 'device' is hrtimer based. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index d39f32cdd1b5..a76204089f78 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -265,8 +265,22 @@ static bool tick_do_broadcast(struct cpumask *mask) * Check, if the current cpu is in the mask */ if (cpumask_test_cpu(cpu, mask)) { + struct clock_event_device *bc = tick_broadcast_device.evtdev; + cpumask_clear_cpu(cpu, mask); - local = true; + /* + * We only run the local handler, if the broadcast + * device is not hrtimer based. Otherwise we run into + * a hrtimer recursion. + * + * local timer_interrupt() + * local_handler() + * expire_hrtimers() + * bc_handler() + * local_handler() + * expire_hrtimers() + */ + local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); } if (!cpumask_empty(mask)) { From e0454311903d3fd0f12a86c9e65d7b271c2bb05d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 14:07:27 +0200 Subject: [PATCH 107/210] tick/broadcast: Sanity check the shutdown of the local clock_event The broadcast code shuts down the local clock event unconditionally even if no broadcast device is installed or if the broadcast device is hrtimer based. Add proper sanity checks. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index a76204089f78..9877d0b0aefc 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -159,7 +159,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; - int ret; + int ret = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -221,13 +221,14 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) * If we kept the cpu in the broadcast mask, * tell the caller to leave the per cpu device * in shutdown state. The periodic interrupt - * is delivered by the broadcast device. + * is delivered by the broadcast device, if + * the broadcast device exists and is not + * hrtimer based. */ - ret = cpumask_test_cpu(cpu, tick_broadcast_mask); + if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) + ret = cpumask_test_cpu(cpu, tick_broadcast_mask); break; default: - /* Nothing to do */ - ret = 0; break; } } @@ -373,8 +374,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { - if (tick_broadcast_device.mode == - TICKDEV_MODE_PERIODIC) + /* + * Only shutdown the cpu local device, if: + * + * - the broadcast device exists + * - the broadcast device is not a hrtimer based one + * - the broadcast device is in periodic mode to + * avoid a hickup during switch to oneshot mode + */ + if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && + tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } break; From f32dd117051185da6e923b35491a44d7debeeea5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:29:38 +0200 Subject: [PATCH 108/210] tick/broadcast: Make idle check independent from mode and config Currently the broadcast busy check, which prevents the idle code from going into deep idle, works only in one shot mode. If NOHZ and HIGHRES are off (config or command line) there is no sanity check at all, so under certain conditions cpus are allowed to go into deep idle, where the local timer stops, and are not woken up again because there is no broadcast timer installed or a hrtimer based broadcast device is not evaluated. Move tick_broadcast_oneshot_control() into the common code and provide proper subfunctions for the various config combinations. The common check in tick_broadcast_oneshot_control() is for the C3STOP misfeature flag of the local clock event device. If its not set, idle can proceed. If set, further checks are necessary. Provide checks for the trivial cases: - If broadcast is disabled in the config, then return busy - If oneshot mode (NOHZ/HIGHES) is disabled in the config, return busy if the broadcast device is hrtimer based. - If oneshot mode is enabled in the config call the original tick_broadcast_oneshot_control() function. That function needs extra checks which will be implemented in seperate patches. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- include/linux/tick.h | 4 ---- kernel/time/tick-broadcast.c | 26 +++++++++++--------------- kernel/time/tick-common.c | 21 +++++++++++++++++++++ kernel/time/tick-sched.h | 10 ++++++++++ 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 3741ba1a652c..6916dcb61857 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,11 +67,7 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); -#else -static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; } -#endif static inline void tick_broadcast_enable(void) { diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 9877d0b0aefc..ef77b16ad5df 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -685,18 +685,7 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -/** - * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode - * @state: The target state (enter/exit) - * - * The system enters/leaves a state, where affected devices might stop - * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. - * - * Called with interrupts disabled, so clockevents_lock is not - * required here because the local clock event device cannot go away - * under us. - */ -int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; struct tick_device *td; @@ -717,9 +706,6 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state) td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; - if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return 0; - raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; cpu = smp_processor_id(); @@ -961,6 +947,16 @@ bool tick_broadcast_oneshot_available(void) return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; } +#else +int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + struct clock_event_device *bc = tick_broadcast_device.evtdev; + + if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) + return -EBUSY; + + return 0; +} #endif void __init tick_broadcast_init(void) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 76446cb5dfe1..55e13efff1ab 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -343,6 +343,27 @@ out_bc: tick_install_broadcast_device(newdev); } +/** + * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode + * @state: The target state (enter/exit) + * + * The system enters/leaves a state, where affected devices might stop + * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. + */ +int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + + if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP)) + return 0; + + return __tick_broadcast_oneshot_control(state); +} + #ifdef CONFIG_HOTPLUG_CPU /* * Transfer the do_timer job away from a dying cpu. diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 42fdf4958bcc..a4a8d4e9baa1 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -71,4 +71,14 @@ extern void tick_cancel_sched_timer(int cpu); static inline void tick_cancel_sched_timer(int cpu) { } #endif +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +extern int __tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int +__tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return -EBUSY; +} +#endif + #endif From b78f3f3c898c824bf56ab55cfa59fc72be49c349 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:34:32 +0200 Subject: [PATCH 109/210] tick/broadcast: Prevent deep idle if no broadcast device available Add a check for a installed broadcast device to the oneshot control function and return busy if not. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index ef77b16ad5df..fad3f789beec 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -692,6 +692,13 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) int cpu, ret = 0; ktime_t now; + /* + * If there is no broadcast device, tell the caller not to go + * into deep idle. + */ + if (!tick_broadcast_device.evtdev) + return -EBUSY; + /* * Periodic mode does not care about the enter/exit of power * states From e3ac79e087ffe8a1f953ed44a74acf7616cb0b25 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:38:11 +0200 Subject: [PATCH 110/210] tick/broadcast: Move the check for periodic mode inside state handling We need to check more than the periodic mode for proper operation in all runtime combinations. To avoid code duplication move the check into the enter state handling. No functional change. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index fad3f789beec..83aa92e87a85 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -688,7 +688,6 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; - struct tick_device *td; int cpu, ret = 0; ktime_t now; @@ -699,25 +698,20 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) if (!tick_broadcast_device.evtdev) return -EBUSY; - /* - * Periodic mode does not care about the enter/exit of power - * states - */ - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - return 0; - - /* - * We are called with preemtion disabled from the depth of the - * idle code, so we can't be moved away. - */ - td = this_cpu_ptr(&tick_cpu_device); - dev = td->evtdev; + dev = this_cpu_ptr(&tick_cpu_device)->evtdev; raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; cpu = smp_processor_id(); if (state == TICK_BROADCAST_ENTER) { + /* + * If the broadcast device is in periodic mode, we + * return. + */ + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + goto out; + if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); broadcast_shutdown_local(bc, dev); From d33257264b0267a8fd20f6717abbb484c9e21130 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 17:45:22 +0200 Subject: [PATCH 111/210] tick/broadcast: Return busy if periodic mode and hrtimer broadcast If the system is in periodic mode and the broadcast device is hrtimer based, return busy as we have no proper handling for this. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 83aa92e87a85..da7b40fdf6d0 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -709,8 +709,12 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) * If the broadcast device is in periodic mode, we * return. */ - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { + /* If it is a hrtimer based broadcast, return busy */ + if (bc->features & CLOCK_EVT_FEAT_HRTIMER) + ret = -EBUSY; goto out; + } if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); From 0cc5281aa592d0020868f6ccaed359b4ad7b2684 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:45:15 +0200 Subject: [PATCH 112/210] tick/broadcast: Return busy when IPI is pending Tell the idle code not to go deep if the broadcast IPI is about to arrive. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index da7b40fdf6d0..70b47bc928b9 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -725,11 +725,15 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) * if the cpu local event is earlier than the * broadcast event. If the current CPU is in * the force mask, then we are going to be - * woken by the IPI right away. + * woken by the IPI right away; we return + * busy, so the CPU does not try to go deep + * idle. */ - if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) && - dev->next_event.tv64 < bc->next_event.tv64) + if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { + ret = -EBUSY; + } else if (dev->next_event.tv64 < bc->next_event.tv64) { tick_broadcast_set_event(bc, cpu, dev->next_event); + } } /* * If the current CPU owns the hrtimer broadcast From d5113e13a550bc9c2b53cc9944b8a06453c4a0a1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:43:04 +0200 Subject: [PATCH 113/210] tick/broadcast: Check for hrtimer broadcast active early If the current cpu is the one which has the hrtimer based broadcast queued then we better return busy immediately instead of going through loops and hoops to figure that out. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 70b47bc928b9..c8d731ac9563 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -705,6 +705,17 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) cpu = smp_processor_id(); if (state == TICK_BROADCAST_ENTER) { + /* + * If the current CPU owns the hrtimer broadcast + * mechanism, it cannot go deep idle and we do not add + * the CPU to the broadcast mask. We don't have to go + * through the EXIT path as the local timer is not + * shutdown. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) + goto out; + /* * If the broadcast device is in periodic mode, we * return. @@ -718,7 +729,10 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); + + /* Conditionally shut down the local timer. */ broadcast_shutdown_local(bc, dev); + /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and @@ -733,18 +747,20 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) ret = -EBUSY; } else if (dev->next_event.tv64 < bc->next_event.tv64) { tick_broadcast_set_event(bc, cpu, dev->next_event); + /* + * In case of hrtimer broadcasts the + * programming might have moved the + * timer to this cpu. If yes, remove + * us from the broadcast mask and + * return busy. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) { + cpumask_clear_cpu(cpu, + tick_broadcast_oneshot_mask); + } } } - /* - * If the current CPU owns the hrtimer broadcast - * mechanism, it cannot go deep idle and we remove the - * CPU from the broadcast mask. We don't have to go - * through the EXIT path as the local timer is not - * shutdown. - */ - ret = broadcast_needs_cpu(bc, cpu); - if (ret) - cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); From c4288334818c81c946acb23d2319881f58c3d497 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 20:53:17 +0000 Subject: [PATCH 114/210] tick/broadcast: Handle spurious interrupts gracefully Andriy reported that on a virtual machine the warning about negative expiry time in the clock events programming code triggered: hpet: hpet0 irq 40 for MSI hpet: hpet1 irq 41 for MSI Switching to clocksource hpet WARNING: at kernel/time/clockevents.c:239 [] clockevents_program_event+0xdb/0xf0 [] tick_handle_periodic_broadcast+0x41/0x50 [] timer_interrupt+0x15/0x20 When the second hpet is installed as a per cpu timer the broadcast event is not longer required and stopped, which sets the next_evt of the broadcast device to KTIME_MAX. If after that a spurious interrupt happens on the broadcast device, then the current code blindly handles it and tries to reprogram the broadcast device afterwards, which adds the period to next_evt. KTIME_MAX + period results in a negative expiry value causing the WARN_ON in the clockevents code to trigger. Add a proper check for the state of the broadcast device into the interrupt handler and return if the interrupt is spurious. [ Folded in pointer fix from Sudeep ] Reported-by: Andriy Gapon Signed-off-by: Thomas Gleixner Cc: Sudeep Holla Cc: Peter Zijlstra Cc: Preeti U Murthy Link: http://lkml.kernel.org/r/20150705205221.802094647@linutronix.de --- kernel/time/tick-broadcast.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index c8d731ac9563..ee3cf942d6eb 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -316,6 +316,13 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) bool bc_local; raw_spin_lock(&tick_broadcast_lock); + + /* Handle spurious interrupts gracefully */ + if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { + raw_spin_unlock(&tick_broadcast_lock); + return; + } + bc_local = tick_do_periodic_broadcast(); if (clockevent_state_oneshot(dev)) { From 539c4b88146cc320aee18b08ebb43ab57d523dcc Mon Sep 17 00:00:00 2001 From: duson Date: Tue, 7 Jul 2015 10:25:39 -0700 Subject: [PATCH 115/210] Input: elan_i2c - change the hover event from MT to ST The firmware only reports hover condition while the very first contact is approaching the surface; the hover is not reported for the subsequent contacts. Therefore we should not be using ABS_MT_DISTANCE to report hover but rather its single-touch counterpart ABS_DISTANCE. Signed-off-by: Duson Lin Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 62641f2adaf7..5b5f403d8ce6 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -771,7 +771,7 @@ static const struct attribute_group *elan_sysfs_groups[] = { */ static void elan_report_contact(struct elan_tp_data *data, int contact_num, bool contact_valid, - bool hover_event, u8 *finger_data) + u8 *finger_data) { struct input_dev *input = data->input; unsigned int pos_x, pos_y; @@ -815,9 +815,7 @@ static void elan_report_contact(struct elan_tp_data *data, input_mt_report_slot_state(input, MT_TOOL_FINGER, true); input_report_abs(input, ABS_MT_POSITION_X, pos_x); input_report_abs(input, ABS_MT_POSITION_Y, data->max_y - pos_y); - input_report_abs(input, ABS_MT_DISTANCE, hover_event); - input_report_abs(input, ABS_MT_PRESSURE, - hover_event ? 0 : scaled_pressure); + input_report_abs(input, ABS_MT_PRESSURE, scaled_pressure); input_report_abs(input, ABS_TOOL_WIDTH, mk_x); input_report_abs(input, ABS_MT_TOUCH_MAJOR, major); input_report_abs(input, ABS_MT_TOUCH_MINOR, minor); @@ -839,14 +837,14 @@ static void elan_report_absolute(struct elan_tp_data *data, u8 *packet) hover_event = hover_info & 0x40; for (i = 0; i < ETP_MAX_FINGERS; i++) { contact_valid = tp_info & (1U << (3 + i)); - elan_report_contact(data, i, contact_valid, hover_event, - finger_data); + elan_report_contact(data, i, contact_valid, finger_data); if (contact_valid) finger_data += ETP_FINGER_DATA_LEN; } input_report_key(input, BTN_LEFT, tp_info & 0x01); + input_report_abs(input, ABS_DISTANCE, hover_event != 0); input_mt_report_pointer_emulation(input, true); input_sync(input); } @@ -922,6 +920,7 @@ static int elan_setup_input_device(struct elan_tp_data *data) input_abs_set_res(input, ABS_Y, data->y_res); input_set_abs_params(input, ABS_PRESSURE, 0, ETP_MAX_PRESSURE, 0, 0); input_set_abs_params(input, ABS_TOOL_WIDTH, 0, ETP_FINGER_WIDTH, 0, 0); + input_set_abs_params(input, ABS_DISTANCE, 0, 1, 0, 0); /* And MT parameters */ input_set_abs_params(input, ABS_MT_POSITION_X, 0, data->max_x, 0, 0); @@ -934,7 +933,6 @@ static int elan_setup_input_device(struct elan_tp_data *data) ETP_FINGER_WIDTH * max_width, 0, 0); input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, ETP_FINGER_WIDTH * min_width, 0, 0); - input_set_abs_params(input, ABS_MT_DISTANCE, 0, 1, 0, 0); data->input = input; From 71eeedcf51544831ae356a773814401143ed32d4 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 7 Jul 2015 20:49:03 +0200 Subject: [PATCH 116/210] MIPS: Lemote 2F: Fix build caused by recent mass rename. CC arch/mips/loongson64/lemote-2f/clock.o /home/ralf/src/linux/linux-mips/arch/mips/loongson64/lemote-2f/clock.c:18:40: fatal error: asm/mach-loongson/loongson.h: No such file or directory #include ^ compilation terminated. Signed-off-by: Ralf Baechle --- arch/mips/loongson64/lemote-2f/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c index 462e34d46b4a..4c5b94af3643 100644 --- a/arch/mips/loongson64/lemote-2f/clock.c +++ b/arch/mips/loongson64/lemote-2f/clock.c @@ -15,7 +15,7 @@ #include #include -#include +#include static LIST_HEAD(clock_list); static DEFINE_SPINLOCK(clock_lock); From 0bb383a2d8f51e32ecc156f3f84067420ffe6d20 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 7 Jul 2015 20:56:04 +0200 Subject: [PATCH 117/210] MIPS, CPUFREQ: Fix spelling of Institute. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-loongson64/mmzone.h | 2 +- arch/mips/loongson64/common/bonito-irq.c | 2 +- arch/mips/loongson64/common/cmdline.c | 2 +- arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c | 2 +- arch/mips/loongson64/common/env.c | 2 +- arch/mips/loongson64/common/irq.c | 2 +- arch/mips/loongson64/common/setup.c | 2 +- arch/mips/loongson64/fuloong-2e/irq.c | 2 +- arch/mips/loongson64/lemote-2f/clock.c | 2 +- arch/mips/loongson64/loongson-3/numa.c | 2 +- drivers/cpufreq/loongson2_cpufreq.c | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h index 37c08a27b4f0..c9f7e231e66b 100644 --- a/arch/mips/include/asm/mach-loongson64/mmzone.h +++ b/arch/mips/include/asm/mach-loongson64/mmzone.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2010 Loongson Inc. & Lemote Inc. & - * Insititute of Computing Technology + * Institute of Computing Technology * Author: Xiang Gao, gaoxiang@ict.ac.cn * Huacai Chen, chenhc@lemote.com * Xiaofu Meng, Shuangshuang Zhang diff --git a/arch/mips/loongson64/common/bonito-irq.c b/arch/mips/loongson64/common/bonito-irq.c index cc0e4fd548e6..4e116d23bab3 100644 --- a/arch/mips/loongson64/common/bonito-irq.c +++ b/arch/mips/loongson64/common/bonito-irq.c @@ -3,7 +3,7 @@ * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org) * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c index 72fed003a536..01fbed137028 100644 --- a/arch/mips/loongson64/common/cmdline.c +++ b/arch/mips/loongson64/common/cmdline.c @@ -6,7 +6,7 @@ * Copyright 2003 ICT CAS * Author: Michael Guo * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c index 12c75db23420..875037063a80 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c @@ -1,7 +1,7 @@ /* * CS5536 General timer functions * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c index 22f04ca2ff3e..f6c44dd332e2 100644 --- a/arch/mips/loongson64/common/env.c +++ b/arch/mips/loongson64/common/env.c @@ -6,7 +6,7 @@ * Copyright 2003 ICT CAS * Author: Michael Guo * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/irq.c b/arch/mips/loongson64/common/irq.c index 687003b19b45..d36d969a4a87 100644 --- a/arch/mips/loongson64/common/irq.c +++ b/arch/mips/loongson64/common/irq.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson64/common/setup.c index d477dd6bb326..2dc5122f0e09 100644 --- a/arch/mips/loongson64/common/setup.c +++ b/arch/mips/loongson64/common/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/fuloong-2e/irq.c b/arch/mips/loongson64/fuloong-2e/irq.c index ef5ec8f3de5f..892963f860b7 100644 --- a/arch/mips/loongson64/fuloong-2e/irq.c +++ b/arch/mips/loongson64/fuloong-2e/irq.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c index 4c5b94af3643..a78fb657068c 100644 --- a/arch/mips/loongson64/lemote-2f/clock.c +++ b/arch/mips/loongson64/lemote-2f/clock.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * This file is subject to the terms and conditions of the GNU General Public diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 12d14ed48778..6f9e010cec4d 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010 Loongson Inc. & Lemote Inc. & - * Insititute of Computing Technology + * Institute of Computing Technology * Author: Xiang Gao, gaoxiang@ict.ac.cn * Huacai Chen, chenhc@lemote.com * Xiaofu Meng, Shuangshuang Zhang diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c index fc897babab55..e362860c2b50 100644 --- a/drivers/cpufreq/loongson2_cpufreq.c +++ b/drivers/cpufreq/loongson2_cpufreq.c @@ -3,7 +3,7 @@ * * The 2E revision of loongson processor not support this feature. * - * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * This file is subject to the terms and conditions of the GNU General Public From 37b64a42067a04a22468c4e52c12af00d72e462b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 21:56:34 +0200 Subject: [PATCH 118/210] tick/broadcast: Unbreak CONFIG_GENERIC_CLOCKEVENTS=n build Making tick_broadcast_oneshot_control() independent from CONFIG_GENERIC_CLOCKEVENTS_BROADCAST broke the build for CONFIG_GENERIC_CLOCKEVENTS=n because the function is not defined there. Provide a proper stub inline. Fixes: f32dd1170511 'tick/broadcast: Make idle check independent from mode and config' Reported-by: kbuild test robot Signed-off-by: Thomas Gleixner --- include/linux/tick.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/tick.h b/include/linux/tick.h index 6916dcb61857..edbfc9a5293e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,7 +67,14 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ +#ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return 0; +} +#endif static inline void tick_broadcast_enable(void) { From 4f273959b850569253299987eee611927f048de7 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 8 Jul 2015 00:22:03 +0300 Subject: [PATCH 119/210] mei: nfc: fix deadlock on shutdown/suspend path In function mei_nfc_host_exit mei_cl_remove_device cannot be called under the device mutex as device removing flow invokes the device driver remove handler that calls in turn to mei_cl_disable_device which naturally acquires the device mutex. Also remove mei_cl_bus_remove_devices which has the same issue, but is never executed as currently the only device on the mei client bus is NFC and a new device cannot be easily added till the bus revamp is completed. This fixes regression caused by commit be9b720a0ccb ("mei_phy: move all nfc logic from mei driver to nfc") Prior to this change the nfc driver remove handler called to no-op disable function while actual nfc device was disabled directly from the mei driver. Reported-by: Linus Torvalds Acked-by: Greg Kroah-Hartman Cc: Samuel Ortiz Signed-off-by: Tomas Winkler Signed-off-by: Linus Torvalds --- drivers/misc/mei/bus.c | 16 ---------------- drivers/misc/mei/init.c | 2 -- drivers/misc/mei/nfc.c | 3 ++- 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 357b6ae4d207..458aa5a09c52 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -552,22 +552,6 @@ void mei_cl_bus_rx_event(struct mei_cl *cl) schedule_work(&device->event_work); } -void mei_cl_bus_remove_devices(struct mei_device *dev) -{ - struct mei_cl *cl, *next; - - mutex_lock(&dev->device_lock); - list_for_each_entry_safe(cl, next, &dev->device_list, device_link) { - if (cl->device) - mei_cl_remove_device(cl->device); - - list_del(&cl->device_link); - mei_cl_unlink(cl); - kfree(cl); - } - mutex_unlock(&dev->device_lock); -} - int __init mei_cl_bus_init(void) { return bus_register(&mei_cl_bus_type); diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index 94514b2c7a50..00c3865ca3b1 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -333,8 +333,6 @@ void mei_stop(struct mei_device *dev) mei_nfc_host_exit(dev); - mei_cl_bus_remove_devices(dev); - mutex_lock(&dev->device_lock); mei_wd_stop(dev); diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c index b983c4ecad38..290ef3037437 100644 --- a/drivers/misc/mei/nfc.c +++ b/drivers/misc/mei/nfc.c @@ -402,11 +402,12 @@ void mei_nfc_host_exit(struct mei_device *dev) cldev->priv_data = NULL; - mutex_lock(&dev->device_lock); /* Need to remove the device here * since mei_nfc_free will unlink the clients */ mei_cl_remove_device(cldev); + + mutex_lock(&dev->device_lock); mei_nfc_free(ndev); mutex_unlock(&dev->device_lock); } From 56551da9255f20ffd3a9711728a1a3ad4b7100af Mon Sep 17 00:00:00 2001 From: Pankaj Dev Date: Tue, 7 Jul 2015 09:40:50 +0200 Subject: [PATCH 120/210] drivers: clk: st: Incorrect register offset used for lock_status Incorrect register offset used for sthi407 clockgenC Signed-off-by: Pankaj Dev Signed-off-by: Gabriel Fernandez Fixes: 51306d56ba81 ("clk: st: STiH407: Support for clockgenC0") Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-fsyn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 99c98c15d4a4..d9eb2e1d8471 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -340,7 +340,7 @@ static const struct clkgen_quadfs_data st_fs660c32_C_407 = { CLKGEN_FIELD(0x30c, 0xf, 20), CLKGEN_FIELD(0x310, 0xf, 20) }, .lockstatus_present = true, - .lock_status = CLKGEN_FIELD(0x2A0, 0x1, 24), + .lock_status = CLKGEN_FIELD(0x2f0, 0x1, 24), .powerup_polarity = 1, .standby_polarity = 1, .pll_ops = &st_quadfs_pll_c32_ops, From 7928eb0370d1133d0d8cd2f5ddfca19c309079d5 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 8 Jul 2015 04:49:10 +0200 Subject: [PATCH 121/210] MIPS: O32: Do not handle require 32 bytes from the stack to be readable. Commit 46e12c07b3b9603c60fc1d421ff18618241cb081 (MIPS: O32 / 32-bit: Always copy 4 stack arguments.) change the O32 syscall handler to always load four arguments from the userspace stack even for syscalls that require fewer or no arguments to be copied. This removes a large table from kernel space and need to maintain it. It appeared that it was ok the implementation chosen requires 16 bytes of readable stack space above the user stack pointer. Turned out a few threading implementations munmap the user stack before the thread exits resulting in errors due to the unreadable stack. We now treat any failed load as a if the loaded value was zero and let the actual syscall deal with the situation. Signed-off-by: Ralf Baechle --- arch/mips/kernel/scall32-o32.S | 37 +++++++++++++++++++++++++--------- arch/mips/kernel/scall64-o32.S | 33 ++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 6e8de80bb446..4cc13508d967 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -73,10 +73,11 @@ NESTED(handle_sys, PT_SIZE, sp) .set noreorder .set nomacro -1: user_lw(t5, 16(t0)) # argument #5 from usp -4: user_lw(t6, 20(t0)) # argument #6 from usp -3: user_lw(t7, 24(t0)) # argument #7 from usp -2: user_lw(t8, 28(t0)) # argument #8 from usp +load_a4: user_lw(t5, 16(t0)) # argument #5 from usp +load_a5: user_lw(t6, 20(t0)) # argument #6 from usp +load_a6: user_lw(t7, 24(t0)) # argument #7 from usp +load_a7: user_lw(t8, 28(t0)) # argument #8 from usp +loads_done: sw t5, 16(sp) # argument #5 to ksp sw t6, 20(sp) # argument #6 to ksp @@ -85,10 +86,10 @@ NESTED(handle_sys, PT_SIZE, sp) .set pop .section __ex_table,"a" - PTR 1b,bad_stack - PTR 2b,bad_stack - PTR 3b,bad_stack - PTR 4b,bad_stack + PTR load_a4, bad_stack_a4 + PTR load_a5, bad_stack_a5 + PTR load_a6, bad_stack_a6 + PTR load_a7, bad_stack_a7 .previous lw t0, TI_FLAGS($28) # syscall tracing enabled? @@ -153,8 +154,8 @@ syscall_trace_entry: /* ------------------------------------------------------------------------ */ /* - * The stackpointer for a call with more than 4 arguments is bad. - * We probably should handle this case a bit more drastic. + * Our open-coded access area sanity test for the stack pointer + * failed. We probably should handle this case a bit more drastic. */ bad_stack: li v0, EFAULT @@ -163,6 +164,22 @@ bad_stack: sw t0, PT_R7(sp) j o32_syscall_exit +bad_stack_a4: + li t5, 0 + b load_a5 + +bad_stack_a5: + li t6, 0 + b load_a6 + +bad_stack_a6: + li t7, 0 + b load_a7 + +bad_stack_a7: + li t8, 0 + b loads_done + /* * The system call does not exist in this kernel */ diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index d07b210fbeff..66d618bb2fa2 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -69,16 +69,17 @@ NESTED(handle_sys, PT_SIZE, sp) daddu t1, t0, 32 bltz t1, bad_stack -1: lw a4, 16(t0) # argument #5 from usp -2: lw a5, 20(t0) # argument #6 from usp -3: lw a6, 24(t0) # argument #7 from usp -4: lw a7, 28(t0) # argument #8 from usp (for indirect syscalls) +load_a4: lw a4, 16(t0) # argument #5 from usp +load_a5: lw a5, 20(t0) # argument #6 from usp +load_a6: lw a6, 24(t0) # argument #7 from usp +load_a7: lw a7, 28(t0) # argument #8 from usp +loads_done: .section __ex_table,"a" - PTR 1b, bad_stack - PTR 2b, bad_stack - PTR 3b, bad_stack - PTR 4b, bad_stack + PTR load_a4, bad_stack_a4 + PTR load_a5, bad_stack_a5 + PTR load_a6, bad_stack_a6 + PTR load_a7, bad_stack_a7 .previous li t1, _TIF_WORK_SYSCALL_ENTRY @@ -167,6 +168,22 @@ bad_stack: sd t0, PT_R7(sp) j o32_syscall_exit +bad_stack_a4: + li a4, 0 + b load_a5 + +bad_stack_a5: + li a5, 0 + b load_a6 + +bad_stack_a6: + li a6, 0 + b load_a7 + +bad_stack_a7: + li a7, 0 + b loads_done + not_o32_scall: /* * This is not an o32 compatibility syscall, pass it on From 5caaf5346892d1e7f0b8b7223062644f8538483f Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 7 Jul 2015 15:45:46 +1000 Subject: [PATCH 122/210] cxl: Fail mmap if requested mapping is larger than assigned problem state area This patch makes the mmap call fail outright if the requested region is larger than the problem state area assigned to the context so the error is reported immediately rather than waiting for an attempt to access an address out of bounds. Although we never expect users to map more than the assigned problem state area and are not aware of anyone doing this (other than for testing), this does have the potential to break users if someone has used a larger range regardless. I'm submitting it for consideration, but if this change is not considered acceptable the previous patch is sufficient to prevent access out of bounds without breaking anyone. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 2a4c80ac322a..6236d4a1f7ad 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -145,8 +145,16 @@ static const struct vm_operations_struct cxl_mmap_vmops = { */ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) { + u64 start = vma->vm_pgoff << PAGE_SHIFT; u64 len = vma->vm_end - vma->vm_start; - len = min(len, ctx->psn_size); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { + if (start + len > ctx->afu->adapter->ps_size) + return -EINVAL; + } else { + if (start + len > ctx->psn_size) + return -EINVAL; + } if (ctx->afu->current_mode != CXL_MODE_DEDICATED) { /* make sure there is a valid per process space for this AFU */ From 10a5894f2dedd8a26b3132497445b314c0d952c4 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 7 Jul 2015 15:45:45 +1000 Subject: [PATCH 123/210] cxl: Fix off by one error allowing subsequent mmap page to be accessed It was discovered that if a process mmaped their problem state area they were able to access one page more than expected, potentially allowing them to access the problem state area of an unrelated process. This was due to a simple off by one error in the mmap fault handler introduced in 0712dc7e73e59d79bcead5d5520acf4e9e917e87 ("cxl: Fix issues when unmapping contexts"), which is fixed in this patch. Cc: stable@vger.kernel.org Fixes: 0712dc7e73e5 ("cxl: Fix issues when unmapping contexts") Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 6236d4a1f7ad..1287148629c0 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -113,11 +113,11 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { area = ctx->afu->psn_phys; - if (offset > ctx->afu->adapter->ps_size) + if (offset >= ctx->afu->adapter->ps_size) return VM_FAULT_SIGBUS; } else { area = ctx->psn_phys; - if (offset > ctx->psn_size) + if (offset >= ctx->psn_size) return VM_FAULT_SIGBUS; } From 442053e57a4fc58b719b6ceab60f29ef9cf4404c Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 7 Jul 2015 12:21:10 -0400 Subject: [PATCH 124/210] powerpc/perf/24x7: Fix lockdep warning The sysfs attributes for the 24x7 counters are dynamically allocated. Initialize the attributes using sysfs_attr_init() to fix following warning which occurs when CONFIG_DEBUG_LOCK_VMALLOC=y. [ 0.346249] audit: initializing netlink subsys (disabled) [ 0.346284] audit: type=2000 audit(1436295254.340:1): initialized [ 0.346489] BUG: key c0000000efe90198 not in .data! [ 0.346491] DEBUG_LOCKS_WARN_ON(1) [ 0.346502] ------------[ cut here ]------------ [ 0.346504] WARNING: at ../kernel/locking/lockdep.c:3002 [ 0.346506] Modules linked in: Reported-by: Gustavo Luiz Duarte Signed-off-by: Sukadev Bhattiprolu Tested-by: Gustavo Luiz Duarte Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index ec2eb20631d1..df956295c2a7 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -320,6 +320,8 @@ static struct attribute *device_str_attr_create_(char *name, char *str) if (!attr) return NULL; + sysfs_attr_init(&attr->attr.attr); + attr->var = str; attr->attr.attr.name = name; attr->attr.attr.mode = 0444; From 9958084a5275ca2e8f55c5b18729307f2f0cb53b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 7 Jul 2015 19:16:23 +1000 Subject: [PATCH 125/210] powerpc: Update MAINTAINERS to point at shared tree Now that we have a shared powerpc tree on kernel.org, point folks at that as the primary place to look for powerpc stuff. Signed-off-by: Michael Ellerman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8133cefb6b6e..b29f8ebc10b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6161,7 +6161,7 @@ M: Michael Ellerman W: http://www.penguinppc.org/ L: linuxppc-dev@lists.ozlabs.org Q: http://patchwork.ozlabs.org/project/linuxppc-dev/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git S: Supported F: Documentation/powerpc/ F: arch/powerpc/ From 69711ca19b06d1b33d8f21213b540b5d1c638dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Hinderer?= Date: Wed, 8 Jul 2015 00:02:01 +0200 Subject: [PATCH 126/210] x86/kconfig: Fix typo in the CONFIG_CMDLINE_BOOL help text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sébastien Hinderer Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Samuel Thibault Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0b2929f49531..3dbb7e7909ca 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2020,7 +2020,7 @@ config CMDLINE_BOOL To compile command line arguments into the kernel, set this option to 'Y', then fill in the - the boot arguments in CONFIG_CMDLINE. + boot arguments in CONFIG_CMDLINE. Systems with fully functional boot loaders (i.e. non-embedded) should leave this option set to 'N'. From 2c3d99845eb7b1e9f3d2863099bc5a25c8461d3f Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 6 Jul 2015 15:15:01 +0100 Subject: [PATCH 127/210] drm/i915: Restore all GGTT VMAs on resume When rotated and partial views were added no one spotted the resume path which assumes only one GGTT VMA per object and hence is now skipping rebind of alternative views. Signed-off-by: Tvrtko Ursulin Cc: Daniel Vetter Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9daa2883ac18..dcc6a88c560e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2546,6 +2546,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct i915_address_space *vm; + struct i915_vma *vma; + bool flush; i915_check_and_clear_faults(dev); @@ -2555,17 +2557,24 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) dev_priv->gtt.base.total, true); + /* Cache flush objects bound into GGTT and rebind them. */ + vm = &dev_priv->gtt.base; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, - &dev_priv->gtt.base); - if (!vma) - continue; + flush = false; + list_for_each_entry(vma, &obj->vma_list, vma_link) { + if (vma->vm != vm) + continue; - i915_gem_clflush_object(obj, obj->pin_display); - WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE)); + WARN_ON(i915_vma_bind(vma, obj->cache_level, + PIN_UPDATE)); + + flush = true; + } + + if (flush) + i915_gem_clflush_object(obj, obj->pin_display); } - if (INTEL_INFO(dev)->gen >= 8) { if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) chv_setup_private_ppat(dev_priv); From a899418167264c7bac574b1a0f1b2c26c5b0995a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:30 +0000 Subject: [PATCH 128/210] hotplug: Prevent alloc/free of irq descriptors during cpu up/down When a cpu goes up some architectures (e.g. x86) have to walk the irq space to set up the vector space for the cpu. While this needs extra protection at the architecture level we can avoid a few race conditions by preventing the concurrent allocation/free of irq descriptors and the associated data. When a cpu goes down it moves the interrupts which are targeted to this cpu away by reassigning the affinities. While this happens interrupts can be allocated and freed, which opens a can of race conditions in the code which reassignes the affinities because interrupt descriptors might be freed underneath. Example: CPU1 CPU2 cpu_up/down irq_desc = irq_to_desc(irq); remove_from_radix_tree(desc); raw_spin_lock(&desc->lock); free(desc); We could protect the irq descriptors with RCU, but that would require a full tree change of all accesses to interrupt descriptors. But fortunately these kind of race conditions are rather limited to a few things like cpu hotplug. The normal setup/teardown is very well serialized. So the simpler and obvious solution is: Prevent allocation and freeing of interrupt descriptors accross cpu hotplug. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.063519515@linutronix.de --- include/linux/irqdesc.h | 7 ++++++- kernel/cpu.c | 22 +++++++++++++++++++++- kernel/irq/internals.h | 4 ---- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 624a668e61f1..fcea4e48e21f 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -87,7 +87,12 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -#ifndef CONFIG_SPARSE_IRQ +#ifdef CONFIG_SPARSE_IRQ +extern void irq_lock_sparse(void); +extern void irq_unlock_sparse(void); +#else +static inline void irq_lock_sparse(void) { } +static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 9c9c9fab16cc..6a374544d495 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "smpboot.h" @@ -391,14 +392,20 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) smpboot_park_threads(cpu); + /* + * Prevent irq alloc/free while the dying cpu reorganizes the + * interrupt affinities. + */ + irq_lock_sparse(); + /* * So now all preempt/rcu users must observe !cpu_active(). */ - err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); + irq_unlock_sparse(); goto out_release; } BUG_ON(cpu_online(cpu)); @@ -415,6 +422,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */ per_cpu(cpu_dead_idle, cpu) = false; + /* Interrupts are moved away from the dying cpu, reenable alloc/free */ + irq_unlock_sparse(); + hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ __cpu_die(cpu); @@ -517,8 +527,18 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; } + /* + * Some architectures have to walk the irq descriptors to + * setup the vector space for the cpu which comes online. + * Prevent irq alloc/free across the bringup. + */ + irq_lock_sparse(); + /* Arch-specific enabling code. */ ret = __cpu_up(cpu, idle); + + irq_unlock_sparse(); + if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4834ee828c41..61008b8433ab 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -76,12 +76,8 @@ extern void unmask_threaded_irq(struct irq_desc *desc); #ifdef CONFIG_SPARSE_IRQ static inline void irq_mark_irq(unsigned int irq) { } -extern void irq_lock_sparse(void); -extern void irq_unlock_sparse(void); #else extern void irq_mark_irq(unsigned int irq); -static inline void irq_lock_sparse(void) { } -static inline void irq_unlock_sparse(void) { } #endif extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); From 63fef06ada94172d7b4295fc205441bafd8f8fb3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 7 Jul 2015 11:15:46 +0200 Subject: [PATCH 129/210] drm/i915: Check crtc->active in intel_crtc_disable_planes This was lost in commit ce22dba92de22e951dee2ff89937a39754d2dd91 Author: Maarten Lankhorst Date: Tue Apr 21 17:12:56 2015 +0300 drm/i915: Move toggling planes out of crtc enable/disable. and we still need that crtc->active check since the overall modeset flow doesn't yet take dpms state into account properly. Fixes WARNING backtraces on at least bdw/hsw due to the ips disabling code being upset about being run on a switched-off pipe. We don't need a corresponding change on the enable side since with the old setCrtc semantics we always force-enable the pipe after a modeset. And the dpms function intel_crtc_control already checks for ->active. Reported-by: Linus Torvalds Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Ander Conselvan de Oliveira Signed-off-by: Daniel Vetter Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8aa803848f35..647b1404c441 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4854,6 +4854,9 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc) struct intel_plane *intel_plane; int pipe = intel_crtc->pipe; + if (!intel_crtc->active) + return; + intel_crtc_wait_for_pending_flips(crtc); intel_pre_disable_primary(crtc); From dec4f799d0a4c9edae20512fa60b0a36f3299ca2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 7 Jul 2015 11:15:47 +0200 Subject: [PATCH 130/210] drm/i915: Use crtc_state->active in primary check_plane func Since commit 8c7b5ccb729870e606321b3703e2c2e698c49a95 Author: Ander Conselvan de Oliveira Date: Tue Apr 21 17:13:19 2015 +0300 drm/i915: Use atomic helpers for computing changed flags we compute the plane state for a modeset before actually committing any changes, which means crtc->active won't be correct yet. Looking at future work in the modeset conversion targetting 4.3 the only places where crtc_state->active isn't accurate is when disabling other CRTCs than the one the modeset is for (when stealing connectors). Which isn't the case here. And that's also confirmed by an audit, we do unconditionally update crtc_state->active for the current pipe. We also don't need to update any other plane check functions since we only ever add the primary state to the modeset update right now. Cc: Ander Conselvan de Oliveira Cc: Maarten Lankhorst Cc: Jani Nikula Signed-off-by: Daniel Vetter Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 647b1404c441..ba9321998a41 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13276,7 +13276,7 @@ intel_check_primary_plane(struct drm_plane *plane, if (ret) return ret; - if (intel_crtc->active) { + if (crtc_state->base.active) { struct intel_plane_state *old_state = to_intel_plane_state(plane->state); From 45820c294fe1b1a9df495d57f40585ef2d069a39 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jul 2015 09:33:38 -0700 Subject: [PATCH 131/210] Fix broken audit tests for exec arg len The "fix" in commit 0b08c5e5944 ("audit: Fix check of return value of strnlen_user()") didn't fix anything, it broke things. As reported by Steven Rostedt: "Yes, strnlen_user() returns 0 on fault, but if you look at what len is set to, than you would notice that on fault len would be -1" because we just subtracted one from the return value. So testing against 0 doesn't test for a fault condition, it tests against a perfectly valid empty string. Also fix up the usual braindamage wrt using WARN_ON() inside a conditional - make it part of the conditional and remove the explicit unlikely() (which is already part of the WARN_ON*() logic, exactly so that you don't have to write unreadable code. Reported-and-tested-by: Steven Rostedt Cc: Jan Kara Cc: Paul Moore Signed-off-by: Linus Torvalds --- kernel/auditsc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 09c65640cad6..e85bdfd15fed 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1021,8 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, * for strings that are too long, we should not have created * any. */ - if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) { - WARN_ON(1); + if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) { send_sig(SIGKILL, current, 0); return -1; } From 07f18f0bb8d8d65badd8b4988b40d329fc0cc6dc Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 3 Jul 2015 06:03:06 +0200 Subject: [PATCH 132/210] drm/radeon: Handle irqs only based on irq ring, not irq status regs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trying to resolve issues with missed vblanks and impossible values inside delivered kms pageflip completion events showed that radeon's irq handling sometimes doesn't handle valid irqs, but silently skips them. This was observed for vblank interrupts. Although those irqs have corresponding events queued in the gpu's irq ring at time of interrupt, and therefore the corresponding handling code gets triggered by these events, the handling code sometimes silently skipped processing the irq. The reason for those skips is that the handling code double-checks for each irq event if the corresponding irq status bits in the irq status registers are set. Sometimes those bits are not set at time of check for valid irqs, maybe due to some hardware race on some setups? The problem only seems to happen on some machine + card combos sometimes, e.g., never happened during my testing of different PC cards of the DCE-2/3/4 generation a year ago, but happens consistently now on two different Apple Mac cards (RV730, DCE-3, Apple iMac and Evergreen JUNIPER, DCE-4 in a Apple MacPro). It also doesn't happen at each interrupt but only occassionally every couple of hundred or thousand vblank interrupts. This results in XOrg warning messages like "[ 7084.472] (WW) RADEON(0): radeon_dri2_flip_event_handler: Pageflip completion event has impossible msc 420120 < target_msc 420121" as well as skipped frames and problems for applications that use kms pageflip events or vblank events, e.g., users of DRI2 and DRI3/Present, Waylands Weston compositor, etc. See also https://bugs.freedesktop.org/show_bug.cgi?id=85203 After some talking to Alex and Michel, we decided to fix this by turning the double-check for asserted irq status bits into a warning. Whenever a irq event is queued in the IH ring, always execute the corresponding interrupt handler. Still check the irq status bits, but only to log a DRM_DEBUG message on a mismatch. This fixed the problems reliably on both previously failing cards, RV-730 dual-head tested on both crtcs (pipes D1 and D2) and a triple-output Juniper HD-5770 card tested on all three available crtcs (D1/D2/D3). The r600 and evergreen irq handling is therefore tested, but the cik an si handling is only compile tested due to lack of hw. Reviewed-by: Christian König Signed-off-by: Mario Kleiner CC: Michel Dänzer CC: Alex Deucher CC: # v3.16+ Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 336 ++++++++++++++----------- drivers/gpu/drm/radeon/evergreen.c | 392 ++++++++++++++++------------- drivers/gpu/drm/radeon/r600.c | 155 +++++++----- drivers/gpu/drm/radeon/si.c | 336 ++++++++++++++----------- 4 files changed, 688 insertions(+), 531 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 4ecf5caa8c6d..248953d2fdb7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7964,23 +7964,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -7990,23 +7994,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8016,23 +8024,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8042,23 +8054,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8068,23 +8084,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8094,23 +8114,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8130,88 +8154,112 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 3a6d483a2c36..0acde1949c18 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4924,7 +4924,7 @@ restart_ih: return IRQ_NONE; rptr = rdev->ih.rptr; - DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + DRM_DEBUG("evergreen_irq_process start: rptr %d, wptr %d\n", rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); @@ -4942,23 +4942,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4968,23 +4972,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4994,23 +5002,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D3 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D3 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5020,23 +5032,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D4 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D4 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5046,23 +5062,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D5 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D5 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5072,23 +5092,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D6 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D6 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5108,88 +5132,100 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5199,46 +5235,52 @@ restart_ih: case 44: /* hdmi */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI2\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI3\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI4\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI5\n"); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 35dafd77a639..4ea5b10ff5f4 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -4086,23 +4086,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4112,23 +4116,27 @@ restart_ih: case 5: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4148,46 +4156,53 @@ restart_ih: case 19: /* HPD/DAC hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: HPD1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: HPD2 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 4: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: HPD3 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 5: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: HPD4 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 10: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: HPD5 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 12: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: HPD6 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4197,18 +4212,22 @@ restart_ih: case 21: /* hdmi */ switch (src_data) { case 4: - if (rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI0 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); + break; case 5: - if (rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); + break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 26388b5dd6ed..07037e32dea3 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6466,23 +6466,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6492,23 +6496,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6518,23 +6526,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6544,23 +6556,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6570,23 +6586,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6596,23 +6616,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6632,88 +6656,112 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); From bd833144a23dead304744dc748f5d72d7e92d315 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 3 Jul 2015 06:03:07 +0200 Subject: [PATCH 133/210] drm/amdgpu: Handle irqs only based on irq ring, not irq status regs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a translation of the patch ... "drm/radeon: Handle irqs only based on irq ring, not irq status regs." ... for the vblank irq handling, to fix the same problem described in that patch on the new driver. Only compile tested due to lack of suitable hw. Reviewed-by: Christian König Signed-off-by: Mario Kleiner CC: Michel Dänzer CC: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 22 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 22 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 22 ++++++++++++++-------- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5cde635978f9..6e77964f1b64 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3403,19 +3403,25 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) dce_v10_0_crtc_vblank_int_ack(adev, crtc); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) dce_v10_0_crtc_vline_int_ack(adev, crtc); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 95efd98b202d..7f7abb0e0be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3402,19 +3402,25 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) dce_v11_0_crtc_vblank_int_ack(adev, crtc); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) dce_v11_0_crtc_vline_int_ack(adev, crtc); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index aaca8d663f2c..08387dfd98a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -3237,19 +3237,25 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], LB_VBLANK_STATUS__VBLANK_ACK_MASK); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], LB_VLINE_STATUS__VLINE_ACK_MASK); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); From 828202a382a06eda74202d4888d01a1078c68922 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Fri, 3 Jul 2015 01:54:10 +0200 Subject: [PATCH 134/210] drm/radeon: use RCU query for GEM_BUSY syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to call the (expensive) radeon_bo_wait, checking the fences via RCU is much faster. The reservation done by radeon_bo_wait does not save us from any race conditions. Reviewed-by: Christian König Signed-off-by: Grigori Goronzy Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c1310b953..7199e19eb8ba 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -428,7 +428,6 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_busy *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -440,10 +439,16 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_radeon_bo(gobj); - r = radeon_bo_wait(robj, &cur_placement, true); + + r = reservation_object_test_signaled_rcu(robj->tbo.resv, true); + if (r == 0) + r = -EBUSY; + else + r = 0; + + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_unreference_unlocked(gobj); - r = radeon_gem_handle_lockup(rdev, r); return r; } From 54e03986133468e02cb01b76215e4d53a9cf6380 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Fri, 3 Jul 2015 01:54:11 +0200 Subject: [PATCH 135/210] drm/radeon: fix HDP flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was regressed by commit 39e7f6f8, although I don't know of any actual issues caused by it. The storage domain is read without TTM locking now, but the lock never helped to prevent any races. Reviewed-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Grigori Goronzy Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 7199e19eb8ba..013ec7106e55 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -476,6 +476,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; /* Flush HDP cache via MMIO if necessary */ + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev); From 5e3c4f907043a7fae1b48e86536dc7b9efa07e29 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Fri, 3 Jul 2015 01:54:12 +0200 Subject: [PATCH 136/210] drm/radeon: default to 2048 MB GART size on SI+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer ASICs have more VRAM on average and allocating more GART as well can have advantages. Also see commit edcd26e8. Ideally, we should scale GART size based on actual VRAM size, but that requires significant restructuring of initialization. v2: extract small helper, apply to error paths Reviewed-by: Christian König Signed-off-by: Grigori Goronzy Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 32 +++++++++++++++----------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 2593b1168bd6..1a532a9bc3a3 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1079,6 +1079,22 @@ static bool radeon_check_pot_argument(int arg) return (arg & (arg - 1)) == 0; } +/** + * Determine a sensible default GART size according to ASIC family. + * + * @family ASIC family name + */ +static int radeon_gart_size_auto(enum radeon_family family) +{ + /* default to a larger gart size on newer asics */ + if (family >= CHIP_TAHITI) + return 2048; + else if (family >= CHIP_RV770) + return 1024; + else + return 512; +} + /** * radeon_check_arguments - validate module params * @@ -1097,27 +1113,17 @@ static void radeon_check_arguments(struct radeon_device *rdev) } if (radeon_gart_size == -1) { - /* default to a larger gart size on newer asics */ - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } /* gtt size must be power of two and greater or equal to 32M */ if (radeon_gart_size < 32) { dev_warn(rdev->dev, "gart size (%d) too small\n", radeon_gart_size); - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } else if (!radeon_check_pot_argument(radeon_gart_size)) { dev_warn(rdev->dev, "gart size (%d) must be a power of 2\n", radeon_gart_size); - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } rdev->mc.gtt_size = (uint64_t)radeon_gart_size << 20; From 866a92040392d844d151aa6a2ba15be2e6e5df4f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Jul 2015 11:54:28 +0300 Subject: [PATCH 137/210] drm/radeon: fix underflow in r600_cp_dispatch_texture() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "if (pass_size > buf->total)" can underflow so I have changed the type of size and pass_size to unsigned to avoid this problem. Reviewed-by: Christian König Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600_cp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 09e3f39925fa..98f9adaccc3d 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -2483,7 +2483,7 @@ int r600_cp_dispatch_texture(struct drm_device *dev, struct drm_buf *buf; u32 *buffer; const u8 __user *data; - int size, pass_size; + unsigned int size, pass_size; u64 src_offset, dst_offset; if (!radeon_check_offset(dev_priv, tex->offset)) { From 1b42804d27b1c2623309950e9b203b11f4c67f4f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 7 Jul 2015 18:00:49 +0100 Subject: [PATCH 138/210] arm64: entry: handle debug exceptions in el*_inv Currently we enable debug exceptions before reading ESR_EL1 in both el0_inv and el1_inv. If a debug exception is taken before we read ESR_EL1, the value will have been corrupted. As el*_inv is typically fatal, an intervening debug exception results in misleading debug information being logged to the console, but is not otherwise harmful. As with the other entry paths, we can use the ESR_EL1 value stashed earlier in the exception entry (in x25 for el0_sync{,_compat}, and x1 for el1_sync), giving us better error reporting in this case. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a7691a378668..f860bfda454a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -352,8 +352,8 @@ el1_inv: // TODO: add support for undefined instructions in kernel mode enable_dbg mov x0, sp + mov x2, x1 mov x1, #BAD_SYNC - mrs x2, esr_el1 b bad_mode ENDPROC(el1_sync) @@ -553,7 +553,7 @@ el0_inv: ct_user_exit mov x0, sp mov x1, #BAD_SYNC - mrs x2, esr_el1 + mov x2, x25 bl bad_mode b ret_to_user ENDPROC(el0_sync) From 758556bdc1c8a8dffea0ea9f9df891878cc2468c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Jul 2015 06:48:06 +0930 Subject: [PATCH 139/210] module: Fix load_module() error path The load_module() error path frees a module but forgot to take it out of the mod_tree, leaving a dangling entry in the tree, causing havoc. Cc: Mathieu Desnoyers Reported-by: Arthur Marsh Tested-by: Arthur Marsh Fixes: 93c2e105f6bc ("module: Optimize __module_address() using a latched RB-tree") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- kernel/module.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/module.c b/kernel/module.c index 3e0e19763d24..4d2b82e610e2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3557,6 +3557,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mod_tree_remove(mod); wake_up_all(&module_wq); /* Wait for RCU-sched synchronizing before releasing mod->list. */ synchronize_sched(); From 673c2c34f684e9d4328459e426ab54d51a5865c5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 8 Jul 2015 17:07:41 -0400 Subject: [PATCH 140/210] modpost: work correctly with tile coldtext sections The tilegx and tilepro compilers use .coldtext for their unlikely executed text section name, so an __attribute__((cold)) function will (when compiled with higher optimization levels) land in the .coldtext section. Modify modpost to add .coldtext to the set of OTHER_TEXT_SECTIONS so we don't get warnings about referencing such a section in an __ex_table block, and then also modify arch/tile/lib/memcpy_user_64.c so that it uses plain ".coldtext" instead of ".coldtext.memcpy". The latter naming is a relic of an earlier use of -ffunction-sections, which we no longer use by default. Signed-off-by: Chris Metcalf Acked-by: Rusty Russell --- arch/tile/lib/memcpy_user_64.c | 4 ++-- scripts/mod/modpost.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 88c7016492c4..97bbb6060b25 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c @@ -28,7 +28,7 @@ #define _ST(p, inst, v) \ ({ \ asm("1: " #inst " %0, %1;" \ - ".pushsection .coldtext.memcpy,\"ax\";" \ + ".pushsection .coldtext,\"ax\";" \ "2: { move r0, %2; jrp lr };" \ ".section __ex_table,\"a\";" \ ".align 8;" \ @@ -41,7 +41,7 @@ ({ \ unsigned long __v; \ asm("1: " #inst " %0, %1;" \ - ".pushsection .coldtext.memcpy,\"ax\";" \ + ".pushsection .coldtext,\"ax\";" \ "2: { move r0, %2; jrp lr };" \ ".section __ex_table,\"a\";" \ ".align 8;" \ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 91ee1b2e0f9a..12d3db3bd46b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -886,7 +886,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ ".kprobes.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ - ".fixup", ".entry.text", ".exception.text", ".text.*" + ".fixup", ".entry.text", ".exception.text", ".text.*", \ + ".coldtext" #define INIT_SECTIONS ".init.*" #define MEM_INIT_SECTIONS ".meminit.*" From 19ee835cdb0b5a8eb11a68f25a51b8039d564488 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 29 Jun 2015 14:01:19 +0100 Subject: [PATCH 141/210] drm/i915: Declare the swizzling unknown for L-shaped configurations The old style of memory interleaving swizzled upto the end of the first even bank of memory, and then used the remainder as unswizzled on the unpaired bank - i.e. swizzling is not constant for all memory. This causes problems when we try to migrate memory and so the kernel prevents migration at all when we detect L-shaped inconsistent swizzling. However, this issue also extends to userspace who try to manually detile into memory as the swizzling for an individual page is unknown (it depends on its physical address only known to the kernel), userspace cannot correctly swizzle objects. v2: Mark the global swizzling as unknown rather than adjust the value reported to userspace. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91105 Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_tiling.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 633bd1fcab69..d61e74a08f82 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -183,8 +183,18 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) if (IS_GEN4(dev)) { uint32_t ddc2 = I915_READ(DCC2); - if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) + if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) { + /* Since the swizzling may vary within an + * object, we have no idea what the swizzling + * is for any page in particular. Thus we + * cannot migrate tiled pages using the GPU, + * nor can we tell userspace what the exact + * swizzling is for any object. + */ dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } } if (dcc == 0xffffffff) { From 52613921b31d8573a212a4b0854b390a18d9849c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 29 Jun 2015 20:28:35 +0300 Subject: [PATCH 142/210] Revert "drm/i915: Allocate context objects from stolen" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stolen gets trashed during hibernation, so storing contexts there is not a very good idea. On my IVB machines this leads to a totally dead GPU on resume. A reboot is required to resurrect it. So let's not store contexts where they will get trampled. This reverts commit 149c86e74fe44dcbac5e9f8d145c5fbc5dc21261. Cc: Chris Wilson Signed-off-by: Ville Syrjälä Acked-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8867818b1401..d65cbe6afb92 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -157,9 +157,7 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) struct drm_i915_gem_object *obj; int ret; - obj = i915_gem_object_create_stolen(dev, size); - if (obj == NULL) - obj = i915_gem_alloc_object(dev, size); + obj = i915_gem_alloc_object(dev, size); if (obj == NULL) return ERR_PTR(-ENOMEM); From 0ec62aaee919991d9e0e278f70776d560d95fc29 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Jul 2015 10:51:46 +0200 Subject: [PATCH 143/210] cris: Replace do_posix_clock_monotonic_gettime() ktime_get_ts() is the proper interface today. Signed-off-by: Thomas Gleixner Cc: Jesper Nilsson --- arch/cris/arch-v32/drivers/sync_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index 4dda9bd6b8fb..e989cee77414 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -1464,7 +1464,7 @@ static inline void handle_rx_packet(struct sync_port *port) if (port->write_ts_idx == NBR_IN_DESCR) port->write_ts_idx = 0; idx = port->write_ts_idx++; - do_posix_clock_monotonic_gettime(&port->timestamp[idx]); + ktime_get_ts(&port->timestamp[idx]); port->in_buffer_len += port->inbufchunk; } spin_unlock_irqrestore(&port->lock, flags); From 1f6823faa8c563431a94e614d2b63ce16bb6f658 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Jul 2015 10:51:46 +0200 Subject: [PATCH 144/210] time: Get rid of do_posix_clock_monotonic_gettime All users gone. Remove it before we get another one. Signed-off-by: Thomas Gleixner --- include/linux/timekeeping.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3aa72e648650..6e191e4e6ab6 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -145,7 +145,6 @@ static inline void getboottime(struct timespec *ts) } #endif -#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) #define ktime_get_real_ts64(ts) getnstimeofday64(ts) /* From a6335fa11e08fc386740557b2a4c1b7ff34bc499 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 2 Jul 2015 17:16:01 +0200 Subject: [PATCH 145/210] MIPS: bootmem: Don't use memory holes for page bitmap Commit f9a7febd leads to a fact that mapstart and therefore a page bitmap for bootmem allocator immediately follows initrd_end. This doesn't always work well on Octeon, where there are holes in PFN ranges (refer to 5b3b1688 and 4MB-aligned PFN allocation). Depending on the inird location it could happen, that mapstart would be in an area not allocated by plat_mem_setup() in arch/mips/cavium-octeon/setup.c, but in the alignment hole between initrd and the next PFN area. Later on this memory will be unconditionally made available to buddy allocator at the end of free_all_bootmem_core() (mm/bootmem.c). All of this results in Linux using the memory not designated for Linux in Octeon's plat_mem_setup(), which in turn means corruption of the memory used by another OS/baremetal code on the same SoC. It doesn't look to me as a problem of Octeon platform code, but rather as an inability of f9a7febd to deal correctly with the fragmented memory-mappings. Proposed fix moves the check for initrd address to the same calculation-loop in bootmem_init() (arch/mips/kernel/setup.c), which also accounts for kernel code location. This should result in mapstart located starting from the first PFN area after kernel code AND initrd. Signed-off-by: Alexander Sverdlin Cc: linux-mips@linux-mips.org Cc: David Daney Cc: Zubair Lutfullah Kakakhel Cc: Huacai Chen Cc: Andreas Herrmann Cc: Joe Perches Cc: Steven J. Hill Cc: Yusuf Khan Cc: Michael Kreuzer Cc: Aaro Koskinen Patchwork: https://patchwork.linux-mips.org/patch/10594/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index be73c491182b..008b3378653a 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -337,6 +337,11 @@ static void __init bootmem_init(void) min_low_pfn = start; if (end <= reserved_end) continue; +#ifdef CONFIG_BLK_DEV_INITRD + /* mapstart should be after initrd_end */ + if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end))) + continue; +#endif if (start >= mapstart) continue; mapstart = max(reserved_end, start); @@ -366,14 +371,6 @@ static void __init bootmem_init(void) max_low_pfn = PFN_DOWN(HIGHMEM_START); } -#ifdef CONFIG_BLK_DEV_INITRD - /* - * mapstart should be after initrd_end - */ - if (initrd_end) - mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end))); -#endif - /* * Initialize the boot-time allocator with low memory only. */ From 761b4493bbb7b614387794f39e3969716e9e0215 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 24 Jun 2015 09:29:20 +0100 Subject: [PATCH 146/210] MIPS: kernel: traps: Fix broken indentation Fix broken indentation caused by the SMTC removal commit b633648c5ad3cfbda0b3daea50d2135d44899259 ("MIPS: MT: Remove SMTC support") Signed-off-by: Markos Chandras Fixes: b633648c5ad3c ("MIPS: MT: Remove SMTC support") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10581/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 2a7b38ed23f0..e207a43b5f8f 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2130,10 +2130,10 @@ void per_cpu_trap_init(bool is_boot_cpu) BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); - /* Boot CPU's cache setup in setup_arch(). */ - if (!is_boot_cpu) - cpu_cache_init(); - tlb_init(); + /* Boot CPU's cache setup in setup_arch(). */ + if (!is_boot_cpu) + cpu_cache_init(); + tlb_init(); TLBMISS_HANDLER_SETUP(); } From e9d92d223381f1f3be5d87322b576721d3b93612 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 24 Jun 2015 09:52:00 +0100 Subject: [PATCH 147/210] MIPS: Fix branch emulation for BLTC and BGEC instructions Commits f1b44067c19258b7614e3cd09dfe8d8e12ff5895 ("MIPS: Emulate the new MIPS R6 B{L,G}T{Z,}{AL,}C instructions") and commit a8ff66f52d3f17b5ae793955270675c197f73d6c ("MIPS: Emulate the new MIPS R6 B{L,G}E{Z,}{AL,}C instructions") added support for emulating various branch compact instructions. However, it missed the case for those which use the old BLEZL and BGTZL opcodes leading to random crashes when the R6 emulator is disabled. We fix this by ensuring that the 'rt' field is not zero which is always true for these branch compact instructions. Fixes: f1b44067c192 ("MIPS: Emulate the new MIPS R6 B{L,G}T{Z,}{AL,}C instructions") Fixes: a8ff66f52d3f ("MIPS: Emulate the new MIPS R6 B{L,G}E{Z,}{AL,}C instructions") Cc: # 4.0+ Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: Markos Chandras Patchwork: https://patchwork.linux-mips.org/patch/10582/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/branch.c | 4 ++-- arch/mips/math-emu/cp1emu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index c0c5e5972256..d8f9b357b222 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -600,7 +600,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, break; case blezl_op: /* not really i_format */ - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) goto sigill_r6; case blez_op: /* @@ -635,7 +635,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, break; case bgtzl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) goto sigill_r6; case bgtz_op: /* diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 22b9b2cb9219..00c241ae04ce 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -551,7 +551,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; return 1; case blezl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) break; case blez_op: @@ -588,7 +588,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; return 1; case bgtzl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) break; case bgtz_op: /* From 143fefc8f315cd10e046e6860913c421c3385cb1 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 24 Jun 2015 09:52:01 +0100 Subject: [PATCH 148/210] MIPS: Fix erroneous JR emulation for MIPS R6 Commit 5f9f41c474befb4ebbc40b27f65bb7d649241581 ("MIPS: kernel: Prepare the JR instruction for emulation on MIPS R6") added support for emulating the JR instruction on MIPS R6 cores but that introduced a bug which could be triggered when hitting a JALR opcode because the code used the wrong field in the 'r_format' struct to determine the instruction opcode. This lead to crashes because an emulated JALR instruction was treated as a JR one when the R6 emulator was turned off. Fixes: 5f9f41c474be ("MIPS: kernel: Prepare the JR instruction for emulation on MIPS R6") Cc: # 4.0+ Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10583/ Signed-off-by: Ralf Baechle --- arch/mips/math-emu/cp1emu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 00c241ae04ce..712f17a2ecf2 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -451,7 +451,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, /* Fall through */ case jr_op: /* For R6, JR already emulated in jalr_op */ - if (NO_R6EMU && insn.r_format.opcode == jr_op) + if (NO_R6EMU && insn.r_format.func == jr_op) break; *contpc = regs->regs[insn.r_format.rs]; return 1; From fd5ed3066bb2f47814fe53cdc56d11a678551ae1 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:28 +0100 Subject: [PATCH 149/210] MIPS: kernel: smp-cps: Fix 64-bit compatibility errors due to pointer casting Commit 1d8f1f5a780a ("MIPS: smp-cps: hotplug support") added hotplug support in the SMP/CPS implementation but it introduced a few build problems on 64-bit kernels due to pointer being casted to and from 'int' C types. We fix this problem by using 'unsigned long' instead which should match the size of the pointers in 32/64-bit kernels. Finally, we fix the comment since the CM base address is loaded to v1($3) instead of v0. Fixes the following build problems: arch/mips/kernel/smp-cps.c: In function 'wait_for_sibling_halt': arch/mips/kernel/smp-cps.c:366:17: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] [...] arch/mips/kernel/smp-cps.c: In function 'cps_cpu_die': arch/mips/kernel/smp-cps.c:427:13: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] cc1: all warnings being treated as errors Fixes: 1d8f1f5a780a ("MIPS: smp-cps: hotplug support") Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10586/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp-cps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 4251d390b5b6..c88937745b4e 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -133,7 +133,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) /* * Patch the start of mips_cps_core_entry to provide: * - * v0 = CM base address + * v1 = CM base address * s0 = kseg0 CCA */ entry_code = (u32 *)&mips_cps_core_entry; @@ -369,7 +369,7 @@ void play_dead(void) static void wait_for_sibling_halt(void *ptr_cpu) { - unsigned cpu = (unsigned)ptr_cpu; + unsigned cpu = (unsigned long)ptr_cpu; unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]); unsigned halted; unsigned long flags; @@ -430,7 +430,7 @@ static void cps_cpu_die(unsigned int cpu) */ err = smp_call_function_single(cpu_death_sibling, wait_for_sibling_halt, - (void *)cpu, 1); + (void *)(unsigned long)cpu, 1); if (err) panic("Failed to call remote sibling CPU\n"); } From 81a02e34ded906357deac7003fbb0d36b6cc503f Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:29 +0100 Subject: [PATCH 150/210] MIPS: kernel: cps-vec: Replace 'la' macro with PTR_LA The PTR_LA macro will pick the correct "la" or "dla" macro to load an address to a register. This gets rids of the following warnings (and others) when building a 64-bit CPS kernel: arch/mips/kernel/cps-vec.S:63: Warning: la used to load 64-bit address arch/mips/kernel/cps-vec.S:159: Warning: la used to load 64-bit address arch/mips/kernel/cps-vec.S:220: Warning: la used to load 64-bit address arch/mips/kernel/cps-vec.S:240: Warning: la used to load 64-bit address [...] Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10587/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 55b759a0019e..a4b2d81f45dd 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -60,7 +60,7 @@ LEAF(mips_cps_core_entry) nop /* This is an NMI */ - la k0, nmi_handler + PTR_LA k0, nmi_handler jr k0 nop @@ -156,7 +156,7 @@ dcache_done: ehb /* Jump to kseg0 */ - la t0, 1f + PTR_LA t0, 1f jr t0 nop @@ -217,7 +217,7 @@ LEAF(excep_intex) .org 0x480 LEAF(excep_ejtag) - la k0, ejtag_debug_handler + PTR_LA k0, ejtag_debug_handler jr k0 nop END(excep_ejtag) @@ -237,7 +237,7 @@ LEAF(mips_cps_core_init) /* ...and for the moment only 1 VPE */ dvpe - la t1, 1f + PTR_LA t1, 1f jr.hb t1 nop @@ -298,14 +298,14 @@ LEAF(mips_cps_core_init) LEAF(mips_cps_boot_vpes) /* Retrieve CM base address */ - la t0, mips_cm_base + PTR_LA t0, mips_cm_base lw t0, 0(t0) /* Calculate a pointer to this cores struct core_boot_config */ lw t0, GCR_CL_ID_OFS(t0) li t1, COREBOOTCFG_SIZE mul t0, t0, t1 - la t1, mips_cps_core_bootcfg + PTR_LA t1, mips_cps_core_bootcfg lw t1, 0(t1) addu t0, t0, t1 @@ -351,7 +351,7 @@ LEAF(mips_cps_boot_vpes) 1: /* Enter VPE configuration state */ dvpe - la t1, 1f + PTR_LA t1, 1f jr.hb t1 nop 1: mfc0 t1, CP0_MVPCONTROL @@ -445,7 +445,7 @@ LEAF(mips_cps_boot_vpes) /* This VPE should be offline, halt the TC */ li t0, TCHALT_H mtc0 t0, CP0_TCHALT - la t0, 1f + PTR_LA t0, 1f 1: jr.hb t0 nop @@ -466,10 +466,10 @@ LEAF(mips_cps_boot_vpes) .set noat lw $1, TI_CPU(gp) sll $1, $1, LONGLOG - la \dest, __per_cpu_offset + PTR_LA \dest, __per_cpu_offset addu $1, $1, \dest lw $1, 0($1) - la \dest, cps_cpu_state + PTR_LA \dest, cps_cpu_state addu \dest, \dest, $1 .set pop .endm From 977e043d5ea1270ce985e4c165724ff91dc3c3e2 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:30 +0100 Subject: [PATCH 151/210] MIPS: kernel: cps-vec: Replace mips32r2 ISA level with mips64r2 mips32r2 is a subset of mips64r2, so we replace mips32r2 with mips64r2 in preparation for 64-bit CPS support. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10588/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index a4b2d81f45dd..bbbd88e994f0 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -229,7 +229,7 @@ LEAF(mips_cps_core_init) nop .set push - .set mips32r2 + .set mips64r2 .set mt /* Only allow 1 TC per VPE to execute... */ @@ -346,7 +346,7 @@ LEAF(mips_cps_boot_vpes) nop .set push - .set mips32r2 + .set mips64r2 .set mt 1: /* Enter VPE configuration state */ From 0586ac75cd0746a4d5c43372dabcea8739ae0176 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:31 +0100 Subject: [PATCH 152/210] MIPS: kernel: cps-vec: Use ta0-ta3 pseudo-registers for 64-bit The cps-vec code assumes O32 ABI and uses t4-t7 in quite a few places. This breaks the build on 64-bit. As a result of which, use the pseudo-registers ta0-ta3 to make the code compatible with 64-bit. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10589/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index bbbd88e994f0..21f714a81ebd 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -250,25 +250,25 @@ LEAF(mips_cps_core_init) mfc0 t0, CP0_MVPCONF0 srl t0, t0, MVPCONF0_PVPE_SHIFT andi t0, t0, (MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT) - addiu t7, t0, 1 + addiu ta3, t0, 1 /* If there's only 1, we're done */ beqz t0, 2f nop /* Loop through each VPE within this core */ - li t5, 1 + li ta1, 1 1: /* Operate on the appropriate TC */ - mtc0 t5, CP0_VPECONTROL + mtc0 ta1, CP0_VPECONTROL ehb /* Bind TC to VPE (1:1 TC:VPE mapping) */ - mttc0 t5, CP0_TCBIND + mttc0 ta1, CP0_TCBIND /* Set exclusive TC, non-active, master */ li t0, VPECONF0_MVP - sll t1, t5, VPECONF0_XTC_SHIFT + sll t1, ta1, VPECONF0_XTC_SHIFT or t0, t0, t1 mttc0 t0, CP0_VPECONF0 @@ -280,8 +280,8 @@ LEAF(mips_cps_core_init) mttc0 t0, CP0_TCHALT /* Next VPE */ - addiu t5, t5, 1 - slt t0, t5, t7 + addiu ta1, ta1, 1 + slt t0, ta1, ta3 bnez t0, 1b nop @@ -310,7 +310,7 @@ LEAF(mips_cps_boot_vpes) addu t0, t0, t1 /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ - has_mt t6, 1f + has_mt ta2, 1f li t9, 0 /* Find the number of VPEs present in the core */ @@ -334,13 +334,13 @@ LEAF(mips_cps_boot_vpes) 1: /* Calculate a pointer to this VPEs struct vpe_boot_config */ li t1, VPEBOOTCFG_SIZE mul v0, t9, t1 - lw t7, COREBOOTCFG_VPECONFIG(t0) - addu v0, v0, t7 + lw ta3, COREBOOTCFG_VPECONFIG(t0) + addu v0, v0, ta3 #ifdef CONFIG_MIPS_MT /* If the core doesn't support MT then return */ - bnez t6, 1f + bnez ta2, 1f nop jr ra nop @@ -360,12 +360,12 @@ LEAF(mips_cps_boot_vpes) ehb /* Loop through each VPE */ - lw t6, COREBOOTCFG_VPEMASK(t0) - move t8, t6 - li t5, 0 + lw ta2, COREBOOTCFG_VPEMASK(t0) + move t8, ta2 + li ta1, 0 /* Check whether the VPE should be running. If not, skip it */ -1: andi t0, t6, 1 +1: andi t0, ta2, 1 beqz t0, 2f nop @@ -373,7 +373,7 @@ LEAF(mips_cps_boot_vpes) mfc0 t0, CP0_VPECONTROL ori t0, t0, VPECONTROL_TARGTC xori t0, t0, VPECONTROL_TARGTC - or t0, t0, t5 + or t0, t0, ta1 mtc0 t0, CP0_VPECONTROL ehb @@ -384,8 +384,8 @@ LEAF(mips_cps_boot_vpes) /* Calculate a pointer to the VPEs struct vpe_boot_config */ li t0, VPEBOOTCFG_SIZE - mul t0, t0, t5 - addu t0, t0, t7 + mul t0, t0, ta1 + addu t0, t0, ta3 /* Set the TC restart PC */ lw t1, VPEBOOTCFG_PC(t0) @@ -423,9 +423,9 @@ LEAF(mips_cps_boot_vpes) mttc0 t0, CP0_VPECONF0 /* Next VPE */ -2: srl t6, t6, 1 - addiu t5, t5, 1 - bnez t6, 1b +2: srl ta2, ta2, 1 + addiu ta1, ta1, 1 + bnez ta2, 1b nop /* Leave VPE configuration state */ From 717f14255a52ad445d6f0eca7d0f22f59d6ba1f8 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:32 +0100 Subject: [PATCH 153/210] MIPS: kernel: cps-vec: Replace KSEG0 with CKSEG0 In preparation for 64-bit CPS support, we replace KSEG0 with CKSEG0 so 64-bit kernels can be supported. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10590/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 21f714a81ebd..2f95568e0da5 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -107,7 +107,7 @@ not_nmi: mul t1, t1, t0 mul t1, t1, t2 - li a0, KSEG0 + li a0, CKSEG0 add a1, a0, t1 1: cache Index_Store_Tag_I, 0(a0) add a0, a0, t0 @@ -134,7 +134,7 @@ icache_done: mul t1, t1, t0 mul t1, t1, t2 - li a0, KSEG0 + li a0, CKSEG0 addu a1, a0, t1 subu a1, a1, t0 1: cache Index_Store_Tag_D, 0(a0) From b677bc03d757c7d749527cccdd2afcf34ebeeb07 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:33 +0100 Subject: [PATCH 154/210] MIPS: cps-vec: Use macros for various arithmetics and memory operations Replace lw/sw and various arithmetic instructions with macros so the code can work on 64-bit kernels as well. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10591/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 2f95568e0da5..1b6ca634e646 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -108,9 +108,9 @@ not_nmi: mul t1, t1, t2 li a0, CKSEG0 - add a1, a0, t1 + PTR_ADD a1, a0, t1 1: cache Index_Store_Tag_I, 0(a0) - add a0, a0, t0 + PTR_ADD a0, a0, t0 bne a0, a1, 1b nop icache_done: @@ -135,11 +135,11 @@ icache_done: mul t1, t1, t2 li a0, CKSEG0 - addu a1, a0, t1 - subu a1, a1, t0 + PTR_ADDU a1, a0, t1 + PTR_SUBU a1, a1, t0 1: cache Index_Store_Tag_D, 0(a0) bne a0, a1, 1b - add a0, a0, t0 + PTR_ADD a0, a0, t0 dcache_done: /* Set Kseg0 CCA to that in s0 */ @@ -152,7 +152,7 @@ dcache_done: /* Enter the coherent domain */ li t0, 0xff - sw t0, GCR_CL_COHERENCE_OFS(v1) + PTR_S t0, GCR_CL_COHERENCE_OFS(v1) ehb /* Jump to kseg0 */ @@ -178,9 +178,9 @@ dcache_done: nop /* Off we go! */ - lw t1, VPEBOOTCFG_PC(v0) - lw gp, VPEBOOTCFG_GP(v0) - lw sp, VPEBOOTCFG_SP(v0) + PTR_L t1, VPEBOOTCFG_PC(v0) + PTR_L gp, VPEBOOTCFG_GP(v0) + PTR_L sp, VPEBOOTCFG_SP(v0) jr t1 nop END(mips_cps_core_entry) @@ -299,15 +299,15 @@ LEAF(mips_cps_core_init) LEAF(mips_cps_boot_vpes) /* Retrieve CM base address */ PTR_LA t0, mips_cm_base - lw t0, 0(t0) + PTR_L t0, 0(t0) /* Calculate a pointer to this cores struct core_boot_config */ - lw t0, GCR_CL_ID_OFS(t0) + PTR_L t0, GCR_CL_ID_OFS(t0) li t1, COREBOOTCFG_SIZE mul t0, t0, t1 PTR_LA t1, mips_cps_core_bootcfg - lw t1, 0(t1) - addu t0, t0, t1 + PTR_L t1, 0(t1) + PTR_ADDU t0, t0, t1 /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ has_mt ta2, 1f @@ -334,8 +334,8 @@ LEAF(mips_cps_boot_vpes) 1: /* Calculate a pointer to this VPEs struct vpe_boot_config */ li t1, VPEBOOTCFG_SIZE mul v0, t9, t1 - lw ta3, COREBOOTCFG_VPECONFIG(t0) - addu v0, v0, ta3 + PTR_L ta3, COREBOOTCFG_VPECONFIG(t0) + PTR_ADDU v0, v0, ta3 #ifdef CONFIG_MIPS_MT @@ -360,7 +360,7 @@ LEAF(mips_cps_boot_vpes) ehb /* Loop through each VPE */ - lw ta2, COREBOOTCFG_VPEMASK(t0) + PTR_L ta2, COREBOOTCFG_VPEMASK(t0) move t8, ta2 li ta1, 0 From 1c885357da2d3cf62132e611c0beaf4cdf607dd9 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:31:14 +0100 Subject: [PATCH 155/210] Revert "MIPS: Kconfig: Disable SMP/CPS for 64-bit" This reverts commit 6ca716f2e5571d25a3899c6c5c91ff72ea6d6f5e. SMP/CPS is now supported on 64bit cores. Cc: # 4.1 Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10592/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2a14585c90d2..aab7e46cadd5 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2231,7 +2231,7 @@ config MIPS_CMP config MIPS_CPS bool "MIPS Coherent Processing System support" - depends on SYS_SUPPORTS_MIPS_CPS && !64BIT + depends on SYS_SUPPORTS_MIPS_CPS select MIPS_CM select MIPS_CPC select MIPS_CPS_PM if HOTPLUG_CPU From a0f67441b06525a1e5fd713ba0d75af4e5d6b198 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Thu, 9 Jul 2015 14:41:53 +0530 Subject: [PATCH 156/210] drm/amdkfd: validate pdd where it acquired first Currently pdd is validate after dereferencing it, which is not correct, Thus validate pdd before its first use. Signed-off-by: Maninder Singh Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 8a1f999daa24..9be007081b72 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -420,6 +420,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) pqm_uninit(&p->pqm); pdd = kfd_get_process_device_data(dev, p); + + if (!pdd) { + mutex_unlock(&p->mutex); + return; + } + if (pdd->reset_wavefronts) { dbgdev_wave_reset_wavefronts(pdd->dev, p); pdd->reset_wavefronts = false; @@ -431,8 +437,7 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) * We don't call amd_iommu_unbind_pasid() here * because the IOMMU called us. */ - if (pdd) - pdd->bound = false; + pdd->bound = false; mutex_unlock(&p->mutex); } From cd404af0c930104462aa91344f07d002cf8248ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 7 Jul 2015 16:27:28 +0900 Subject: [PATCH 157/210] drm/radeon: Clean up reference counting and pinning of the cursor BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take a GEM reference for and pin the new cursor BO, unpin and drop the GEM reference for the old cursor BO in radeon_crtc_cursor_set2, and use radeon_crtc->cursor_addr in radeon_set_cursor. This fixes radeon_cursor_reset accidentally incrementing the cursor BO pin count, and cleans up the code a little. Cc: stable@vger.kernel.org Reviewed-by: Grigori Goronzy Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cursor.c | 84 ++++++++++++-------------- drivers/gpu/drm/radeon/radeon_mode.h | 1 - 2 files changed, 37 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 45e54060ee97..fa661744a1f5 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -205,8 +205,9 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) | (x << 16) | y)); /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset + - (yorigin * 256))); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr + + yorigin * 256); } radeon_crtc->cursor_x = x; @@ -227,51 +228,32 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, return ret; } -static int radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj) +static void radeon_set_cursor(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; - struct radeon_bo *robj = gem_to_radeon_bo(obj); - uint64_t gpu_addr; - int ret; - - ret = radeon_bo_reserve(robj, false); - if (unlikely(ret != 0)) - goto fail; - /* Only 27 bit offset for legacy cursor */ - ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, - ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, - &gpu_addr); - radeon_bo_unreserve(robj); - if (ret) - goto fail; if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, - upper_32_bits(gpu_addr)); + upper_32_bits(radeon_crtc->cursor_addr)); WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); + lower_32_bits(radeon_crtc->cursor_addr)); } else if (ASIC_IS_AVIVO(rdev)) { if (rdev->family >= CHIP_RV770) { if (radeon_crtc->crtc_id) - WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); + WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); else - WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); + WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); } WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); + lower_32_bits(radeon_crtc->cursor_addr)); } else { - radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr; /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); } - - return 0; - -fail: - drm_gem_object_unreference_unlocked(obj); - - return ret; } int radeon_crtc_cursor_set2(struct drm_crtc *crtc, @@ -283,7 +265,9 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, int32_t hot_y) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct radeon_device *rdev = crtc->dev->dev_private; struct drm_gem_object *obj; + struct radeon_bo *robj; int ret; if (!handle) { @@ -305,6 +289,23 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, return -ENOENT; } + robj = gem_to_radeon_bo(obj); + ret = radeon_bo_reserve(robj, false); + if (ret != 0) { + drm_gem_object_unreference_unlocked(obj); + return ret; + } + /* Only 27 bit offset for legacy cursor */ + ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, + &radeon_crtc->cursor_addr); + radeon_bo_unreserve(robj); + if (ret) { + DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); + drm_gem_object_unreference_unlocked(obj); + return ret; + } + radeon_crtc->cursor_width = width; radeon_crtc->cursor_height = height; @@ -323,13 +324,8 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_crtc->cursor_hot_y = hot_y; } - ret = radeon_set_cursor(crtc, obj); - - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not changing cursor\n", - ret); - else - radeon_show_cursor(crtc); + radeon_set_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); @@ -341,8 +337,7 @@ unpin: radeon_bo_unpin(robj); radeon_bo_unreserve(robj); } - if (radeon_crtc->cursor_bo != obj) - drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); + drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); } radeon_crtc->cursor_bo = obj; @@ -360,7 +355,6 @@ unpin: void radeon_cursor_reset(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - int ret; if (radeon_crtc->cursor_bo) { radeon_lock_cursor(crtc, true); @@ -368,12 +362,8 @@ void radeon_cursor_reset(struct drm_crtc *crtc) radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x, radeon_crtc->cursor_y); - ret = radeon_set_cursor(crtc, radeon_crtc->cursor_bo); - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not showing " - "cursor\n", ret); - else - radeon_show_cursor(crtc); + radeon_set_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); } diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 6de5459316b5..07909d817381 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -343,7 +343,6 @@ struct radeon_crtc { int max_cursor_width; int max_cursor_height; uint32_t legacy_display_base_addr; - uint32_t legacy_cursor_offset; enum radeon_rmx_type rmx_type; u8 h_border; u8 v_border; From f3cbb17bcf676a2fc6aedebe9fbebd59e550c51a Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Tue, 7 Jul 2015 16:27:29 +0900 Subject: [PATCH 158/210] drm/radeon: unpin cursor BOs on suspend and pin them again on resume (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Everything is evicted from VRAM before suspend, so we need to make sure all BOs are unpinned and re-pinned after resume. Fixes broken mouse cursor after resume introduced by commit b9729b17. [Michel Dänzer: Add pinning BOs on resume] v2: [Alex Deucher: merge cursor unpin into fb unpin loop] Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100541 Cc: stable@vger.kernel.org Reviewed-by: Christian König (v1) Signed-off-by: Grigori Goronzy Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 34 +++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 1a532a9bc3a3..d8319dae8358 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1578,11 +1578,21 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } - /* unpin the front buffers */ + /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb); struct radeon_bo *robj; + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + radeon_bo_unpin(robj); + radeon_bo_unreserve(robj); + } + } + if (rfb == NULL || rfb->obj == NULL) { continue; } @@ -1645,6 +1655,7 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) { struct drm_connector *connector; struct radeon_device *rdev = dev->dev_private; + struct drm_crtc *crtc; int r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) @@ -1684,6 +1695,27 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) radeon_restore_bios_scratch_regs(rdev); + /* pin cursors */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + /* Only 27 bit offset for legacy cursor */ + r = radeon_bo_pin_restricted(robj, + RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? + 0 : 1 << 27, + &radeon_crtc->cursor_addr); + if (r != 0) + DRM_ERROR("Failed to pin cursor BO (%d)\n", r); + radeon_bo_unreserve(robj); + } + } + } + /* init dig PHYs, disp eng pll */ if (rdev->is_atom_bios) { radeon_atom_encoder_init(rdev); From 8991668ab4e26f985a8485719bce5d6d0623a644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 7 Jul 2015 16:27:30 +0900 Subject: [PATCH 159/210] drm/radeon: Fold radeon_set_cursor() into radeon_show_cursor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Grigori Goronzy Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cursor.c | 49 ++++++++++---------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index fa661744a1f5..afaf346bd50e 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -91,15 +91,34 @@ static void radeon_show_cursor(struct drm_crtc *crtc) struct radeon_device *rdev = crtc->dev->dev_private; if (ASIC_IS_DCE4(rdev)) { + WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, + upper_32_bits(radeon_crtc->cursor_addr)); + WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + lower_32_bits(radeon_crtc->cursor_addr)); WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_EN | EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) | EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2)); } else if (ASIC_IS_AVIVO(rdev)) { + if (rdev->family >= CHIP_RV770) { + if (radeon_crtc->crtc_id) + WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); + else + WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); + } + + WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + lower_32_bits(radeon_crtc->cursor_addr)); WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, AVIVO_D1CURSOR_EN | (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); } else { + /* offset is from DISP(2)_BASE_ADDRESS */ + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); + switch (radeon_crtc->crtc_id) { case 0: WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL); @@ -228,34 +247,6 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, return ret; } -static void radeon_set_cursor(struct drm_crtc *crtc) -{ - struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_device *rdev = crtc->dev->dev_private; - - if (ASIC_IS_DCE4(rdev)) { - WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, - upper_32_bits(radeon_crtc->cursor_addr)); - WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - lower_32_bits(radeon_crtc->cursor_addr)); - } else if (ASIC_IS_AVIVO(rdev)) { - if (rdev->family >= CHIP_RV770) { - if (radeon_crtc->crtc_id) - WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, - upper_32_bits(radeon_crtc->cursor_addr)); - else - WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, - upper_32_bits(radeon_crtc->cursor_addr)); - } - WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - lower_32_bits(radeon_crtc->cursor_addr)); - } else { - /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, - radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); - } -} - int radeon_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv, uint32_t handle, @@ -324,7 +315,6 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_crtc->cursor_hot_y = hot_y; } - radeon_set_cursor(crtc); radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); @@ -362,7 +352,6 @@ void radeon_cursor_reset(struct drm_crtc *crtc) radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x, radeon_crtc->cursor_y); - radeon_set_cursor(crtc); radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); From d57c0edfe00d3274b50f91ce3076ed0e82d28782 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jul 2015 14:08:12 -0400 Subject: [PATCH 160/210] Revert "Revert "drm/radeon: dont switch vt on suspend"" This reverts commit ac9134906b3f5c2b45dc80dab0fee792bd516d52. We've fixed the underlying problem with cursors, so re-enable this. --- drivers/gpu/drm/radeon/radeon_fb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 634793ea8418..aeb676708e60 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -257,6 +257,7 @@ static int radeonfb_create(struct drm_fb_helper *helper, } info->par = rfbdev; + info->skip_vt_switch = true; ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { From eb99070b4ad105f8a516d99e95226180924eddc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 8 Jul 2015 09:56:13 +0200 Subject: [PATCH 161/210] drm/radeon: allways add the VM clear duplicate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to allways add the VM clear duplicate of the BO_VA, no matter what the old status was. Signed-off-by: Christian König Test-by: hadack@gmx.de Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vm.c | 34 ++++++++++++++---------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index ec10533a49b8..031096599f84 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -493,29 +493,27 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, } if (bo_va->it.start || bo_va->it.last) { - spin_lock(&vm->status_lock); - if (list_empty(&bo_va->vm_status)) { - /* add a clone of the bo_va to clear the old address */ - struct radeon_bo_va *tmp; - spin_unlock(&vm->status_lock); - tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); - if (!tmp) { - mutex_unlock(&vm->mutex); - r = -ENOMEM; - goto error_unreserve; - } - tmp->it.start = bo_va->it.start; - tmp->it.last = bo_va->it.last; - tmp->vm = vm; - tmp->bo = radeon_bo_ref(bo_va->bo); - spin_lock(&vm->status_lock); - list_add(&tmp->vm_status, &vm->freed); + /* add a clone of the bo_va to clear the old address */ + struct radeon_bo_va *tmp; + tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (!tmp) { + mutex_unlock(&vm->mutex); + r = -ENOMEM; + goto error_unreserve; } - spin_unlock(&vm->status_lock); + tmp->it.start = bo_va->it.start; + tmp->it.last = bo_va->it.last; + tmp->vm = vm; + tmp->bo = radeon_bo_ref(bo_va->bo); interval_tree_remove(&bo_va->it, &vm->va); bo_va->it.start = 0; bo_va->it.last = 0; + + spin_lock(&vm->status_lock); + list_del_init(&bo_va->vm_status); + list_add(&tmp->vm_status, &vm->freed); + spin_unlock(&vm->status_lock); } if (soffset || eoffset) { From dbedff05d163f8d1b4a97b91c57b939830fc235f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 8 Jul 2015 09:56:14 +0200 Subject: [PATCH 162/210] drm/radeon: check if BO_VA is set before adding it to the invalidation list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we try to clear BO_VAs without an address. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=91141 Signed-off-by: Christian König Test-by: hadack@gmx.de Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 031096599f84..48d97c040f49 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -507,22 +507,21 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, tmp->bo = radeon_bo_ref(bo_va->bo); interval_tree_remove(&bo_va->it, &vm->va); + spin_lock(&vm->status_lock); bo_va->it.start = 0; bo_va->it.last = 0; - - spin_lock(&vm->status_lock); list_del_init(&bo_va->vm_status); list_add(&tmp->vm_status, &vm->freed); spin_unlock(&vm->status_lock); } if (soffset || eoffset) { + spin_lock(&vm->status_lock); bo_va->it.start = soffset; bo_va->it.last = eoffset - 1; - interval_tree_insert(&bo_va->it, &vm->va); - spin_lock(&vm->status_lock); list_add(&bo_va->vm_status, &vm->cleared); spin_unlock(&vm->status_lock); + interval_tree_insert(&bo_va->it, &vm->va); } bo_va->flags = flags; @@ -1156,7 +1155,8 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, list_for_each_entry(bo_va, &bo->va, bo_list) { spin_lock(&bo_va->vm->status_lock); - if (list_empty(&bo_va->vm_status)) + if (list_empty(&bo_va->vm_status) && + (bo_va->it.start || bo_va->it.last)) list_add(&bo_va->vm_status, &bo_va->vm->invalidated); spin_unlock(&bo_va->vm->status_lock); } From 0f11770417d849558c727ed20e1cd07444e15a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 8 Jul 2015 16:58:48 +0200 Subject: [PATCH 163/210] drm/amdgpu: fix timeout calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 975edb1000a2..ae43b58c9733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -352,7 +352,7 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns) if (((int64_t)timeout_ns) < 0) return MAX_SCHEDULE_TIMEOUT; - timeout = ktime_sub_ns(ktime_get(), timeout_ns); + timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get()); if (ktime_to_ns(timeout) < 0) return 0; From 355c822847fa70fa1df6a7451b7ecf76116efcd2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jul 2015 12:58:20 -0400 Subject: [PATCH 164/210] drm/radeon: disable vce init on cayman (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cayman does not have vce. There were a few places in the shared cayman/TV code where we were trying to do vce stuff. v2: remove -ENOENT check Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ni.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 8e5aeeb058a5..158872eb78e4 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2162,18 +2162,20 @@ static int cayman_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } - ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; - if (ring->ring_size) - r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + if (rdev->family == CHIP_ARUBA) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); - ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; - if (ring->ring_size) - r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); - if (!r) - r = vce_v1_0_init(rdev); - else if (r != -ENOENT) - DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + if (!r) + r = vce_v1_0_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + } r = radeon_ib_pool_init(rdev); if (r) { @@ -2396,7 +2398,8 @@ void cayman_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); - radeon_vce_fini(rdev); + if (rdev->family == CHIP_ARUBA) + radeon_vce_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); From 757856d2b9568a701df9ea6a4be68effbb9d6f44 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 25 Jun 2015 17:47:45 +0300 Subject: [PATCH 165/210] libceph: enable ceph in a non-default network namespace Grab a reference on a network namespace of the 'rbd map' (in case of rbd) or 'mount' (in case of ceph) process and use that to open sockets instead of always using init_net and bailing if network namespace is anything but init_net. Be careful to not share struct ceph_client instances between different namespaces and don't add any code in the !CONFIG_NET_NS case. This is based on a patch from Hong Zhiguo . Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/messenger.h | 3 +++ net/ceph/ceph_common.c | 16 ++++++++++------ net/ceph/messenger.c | 10 +++++++++- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e15499422fdc..37753278987a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ struct ceph_messenger { struct ceph_entity_addr my_enc_addr; atomic_t stopping; + possible_net_t net; bool nocrc; bool tcp_nodelay; @@ -267,6 +269,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, u64 required_features, bool nocrc, bool tcp_nodelay); +extern void ceph_messenger_fini(struct ceph_messenger *msgr); extern void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index cb7db320dd27..f30329f72641 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -16,8 +17,6 @@ #include #include #include -#include -#include #include @@ -131,6 +130,13 @@ int ceph_compare_options(struct ceph_options *new_opt, int i; int ret; + /* + * Don't bother comparing options if network namespaces don't + * match. + */ + if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net))) + return -1; + ret = memcmp(opt1, opt2, ofs); if (ret) return ret; @@ -335,9 +341,6 @@ ceph_parse_options(char *options, const char *dev_name, int err = -ENOMEM; substring_t argstr[MAX_OPT_ARGS]; - if (current->nsproxy->net_ns != &init_net) - return ERR_PTR(-EINVAL); - opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) return ERR_PTR(-ENOMEM); @@ -608,6 +611,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, fail_monc: ceph_monc_stop(&client->monc); fail: + ceph_messenger_fini(&client->msgr); kfree(client); return ERR_PTR(err); } @@ -621,8 +625,8 @@ void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_osdc_stop(&client->osdc); - ceph_monc_stop(&client->monc); + ceph_messenger_fini(&client->msgr); ceph_debugfs_client_cleanup(client); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 1679f47280e2..5c1f98ea6741 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -479,7 +480,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) int ret; BUG_ON(con->sock); - ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family, + ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; @@ -2944,11 +2945,18 @@ void ceph_messenger_init(struct ceph_messenger *msgr, msgr->tcp_nodelay = tcp_nodelay; atomic_set(&msgr->stopping, 0); + write_pnet(&msgr->net, get_net(current->nsproxy->net_ns)); dout("%s %p\n", __func__, msgr); } EXPORT_SYMBOL(ceph_messenger_init); +void ceph_messenger_fini(struct ceph_messenger *msgr) +{ + put_net(read_pnet(&msgr->net)); +} +EXPORT_SYMBOL(ceph_messenger_fini); + static void clear_standby(struct ceph_connection *con) { /* come back from STANDBY? */ From c44bd69c0c8cfadf0239437635b2933efb1f6c4c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 9 Jul 2015 13:57:52 +0300 Subject: [PATCH 166/210] libceph: treat sockaddr_storage with uninitialized family as blank addr_is_blank() should return true if family is neither AF_INET nor AF_INET6. This is what its counterpart entity_addr_t::is_blank_ip() is doing and it is the right thing to do: in process_banner() we check if our address is blank and if it is "learn" it from our peer. As it is, we never learn our address and always send out a blank one. This goes way back to ceph.git commit dd732cbfc1c9 ("use sockaddr_storage; and some ipv6 support groundwork") from 2009. While at at, do not open-code ipv6_addr_any() and use INADDR_ANY constant instead of 0. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/messenger.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5c1f98ea6741..e3be1d22a247 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1732,17 +1732,17 @@ static int verify_hello(struct ceph_connection *con) static bool addr_is_blank(struct sockaddr_storage *ss) { + struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; + struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; + switch (ss->ss_family) { case AF_INET: - return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; + return addr->s_addr == htonl(INADDR_ANY); case AF_INET6: - return - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; + return ipv6_addr_any(addr6); + default: + return true; } - return false; } static int addr_port(struct sockaddr_storage *ss) From 398ecff5a562b7b69f77582f98a4b04d0de1f066 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Jul 2015 11:46:14 -0400 Subject: [PATCH 167/210] MAINTAINERS: update ceph entries - The Ceph common code is used by both fs/ceph and drivers/block/rbd. Add a separate maintainers entry. - Add Ilya as libceph maintainer and cephfs submaintainer. - Attribute Documentation/ABI/testing/sysfs-bus-rbd to rbd. - ceph-devel@vger.kernel.org should be L, not M in rbd entry. Signed-off-by: Sage Weil Signed-off-by: Ilya Dryomov --- MAINTAINERS | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8133cefb6b6e..58cbc2118e46 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2562,19 +2562,29 @@ F: arch/powerpc/include/uapi/asm/spu*.h F: arch/powerpc/oprofile/*cell* F: arch/powerpc/platforms/cell/ -CEPH DISTRIBUTED FILE SYSTEM CLIENT +CEPH COMMON CODE (LIBCEPH) +M: Ilya Dryomov M: "Yan, Zheng" M: Sage Weil L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported -F: Documentation/filesystems/ceph.txt -F: fs/ceph/ F: net/ceph/ F: include/linux/ceph/ F: include/linux/crush/ +CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH) +M: "Yan, Zheng" +M: Sage Weil +M: Ilya Dryomov +L: ceph-devel@vger.kernel.org +W: http://ceph.com/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +S: Supported +F: Documentation/filesystems/ceph.txt +F: fs/ceph/ + CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: L: linux-usb@vger.kernel.org S: Orphan @@ -8366,10 +8376,11 @@ RADOS BLOCK DEVICE (RBD) M: Ilya Dryomov M: Sage Weil M: Alex Elder -M: ceph-devel@vger.kernel.org +L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported +F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c F: drivers/block/rbd_types.h From 6e67b7ae2157bdeb6b56381e530fc74bb68b6149 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Jul 2015 11:47:37 -0400 Subject: [PATCH 168/210] MAINTAINERS: add secondary tree for ceph modules The Ceph kernel code is primarily developed in the github tree, and only pushed to the korg tree before going to Linus. If Sage is unavailable and another maintainer needs to push something upstream, pull requests may originate from the github tree instead of Sage's korg tree. Signed-off-by: Sage Weil Signed-off-by: Ilya Dryomov --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 58cbc2118e46..0d70760e8135 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2569,6 +2569,7 @@ M: Sage Weil L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: net/ceph/ F: include/linux/ceph/ @@ -2581,6 +2582,7 @@ M: Ilya Dryomov L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: Documentation/filesystems/ceph.txt F: fs/ceph/ @@ -8379,6 +8381,7 @@ M: Alex Elder L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c From 6b7339f4c31ad69c8e9c0b2859276e22cf72176d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 6 Jul 2015 23:18:37 +0300 Subject: [PATCH 169/210] mm: avoid setting up anonymous pages into file mapping Reading page fault handler code I've noticed that under right circumstances kernel would map anonymous pages into file mappings: if the VMA doesn't have vm_ops->fault() and the VMA wasn't fully populated on ->mmap(), kernel would handle page fault to not populated pte with do_anonymous_page(). Let's change page fault handler to use do_anonymous_page() only on anonymous VMA (->vm_ops == NULL) and make sure that the VMA is not shared. For file mappings without vm_ops->fault() or shred VMA without vm_ops, page fault on pte_none() entry would lead to SIGBUS. Signed-off-by: Kirill A. Shutemov Acked-by: Oleg Nesterov Cc: Andrew Morton Cc: Willy Tarreau Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- mm/memory.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index a84fbb772034..388dcf9aa283 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2670,6 +2670,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap(page_table); + /* File mapping without ->vm_ops ? */ + if (vma->vm_flags & VM_SHARED) + return VM_FAULT_SIGBUS; + /* Check if we need to add a guard page to the stack */ if (check_stack_guard_page(vma, address) < 0) return VM_FAULT_SIGSEGV; @@ -3099,6 +3103,9 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; pte_unmap(page_table); + /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ + if (!vma->vm_ops->fault) + return VM_FAULT_SIGBUS; if (!(flags & FAULT_FLAG_WRITE)) return do_read_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); @@ -3244,13 +3251,12 @@ static int handle_pte_fault(struct mm_struct *mm, barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { - if (vma->vm_ops) { - if (likely(vma->vm_ops->fault)) - return do_fault(mm, vma, address, pte, - pmd, flags, entry); - } - return do_anonymous_page(mm, vma, address, - pte, pmd, flags); + if (vma->vm_ops) + return do_fault(mm, vma, address, pte, pmd, + flags, entry); + + return do_anonymous_page(mm, vma, address, pte, pmd, + flags); } return do_swap_page(mm, vma, address, pte, pmd, flags, entry); From 6f957724b94cb19f5c1c97efd01dd4df8ced323c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Jul 2015 11:20:01 -0700 Subject: [PATCH 170/210] Fix firmware loader uevent buffer NULL pointer dereference The firmware class uevent function accessed the "fw_priv->buf" buffer without the proper locking and testing for NULL. This is an old bug (looks like it goes back to 2012 and commit 1244691c73b2: "firmware loader: introduce firmware_buf"), but for some reason it's triggering only now in 4.2-rc1. Shuah Khan is trying to bisect what it is that causes this to trigger more easily, but in the meantime let's just fix the bug since others are hitting it too (at least Ingo reports having seen it as well). Reported-and-tested-by: Shuah Khan Acked-by: Ming Lei Cc: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/base/firmware_class.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 9c4288362a8e..894bda114224 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -563,10 +563,8 @@ static void fw_dev_release(struct device *dev) kfree(fw_priv); } -static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +static int do_firmware_uevent(struct firmware_priv *fw_priv, struct kobj_uevent_env *env) { - struct firmware_priv *fw_priv = to_firmware_priv(dev); - if (add_uevent_var(env, "FIRMWARE=%s", fw_priv->buf->fw_id)) return -ENOMEM; if (add_uevent_var(env, "TIMEOUT=%i", loading_timeout)) @@ -577,6 +575,18 @@ static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct firmware_priv *fw_priv = to_firmware_priv(dev); + int err = 0; + + mutex_lock(&fw_lock); + if (fw_priv->buf) + err = do_firmware_uevent(fw_priv, env); + mutex_unlock(&fw_lock); + return err; +} + static struct class firmware_class = { .name = "firmware", .class_attrs = firmware_class_attrs, From 3324603524925c7727207027d1c15e597412d15e Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 9 Jul 2015 14:20:36 -0400 Subject: [PATCH 171/210] selinux: don't waste ebitmap space when importing NetLabel categories At present we don't create efficient ebitmaps when importing NetLabel category bitmaps. This can present a problem when comparing ebitmaps since ebitmap_cmp() is very strict about these things and considers these wasteful ebitmaps not equal when compared to their more efficient counterparts, even if their values are the same. This isn't likely to cause problems on 64-bit systems due to a bit of luck on how NetLabel/CIPSO works and the default ebitmap size, but it can be a problem on 32-bit systems. This patch fixes this problem by being a bit more intelligent when importing NetLabel category bitmaps by skipping over empty sections which should result in a nice, efficient ebitmap. Cc: stable@vger.kernel.org # 3.17 Signed-off-by: Paul Moore --- security/selinux/ss/ebitmap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index afe6a269ec17..57644b1dc42e 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -153,6 +153,12 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap, if (offset == (u32)-1) return 0; + /* don't waste ebitmap space if the netlabel bitmap is empty */ + if (bitmap == 0) { + offset += EBITMAP_UNIT_SIZE; + continue; + } + if (e_iter == NULL || offset >= e_iter->startbit + EBITMAP_SIZE) { e_prev = e_iter; From 9abea2d64ce93b6909de7f83a7f681f572369708 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 9 Jul 2015 18:05:15 +0200 Subject: [PATCH 172/210] ioctl_compat: handle FITRIM The FITRIM ioctl has the same arguments on 32-bit and 64-bit architectures, so we can add it to the list of compatible ioctls and drop it from compat_ioctl method of various filesystems. Signed-off-by: Mikulas Patocka Cc: Al Viro Cc: Ted Ts'o Signed-off-by: Linus Torvalds --- fs/compat_ioctl.c | 1 + fs/ecryptfs/file.c | 1 - fs/ext4/ioctl.c | 1 - fs/jfs/ioctl.c | 3 --- fs/nilfs2/ioctl.c | 1 - fs/ocfs2/ioctl.c | 1 - 6 files changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6b8e2f091f5b..48851f6ea6ec 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -896,6 +896,7 @@ COMPATIBLE_IOCTL(FIGETBSZ) /* 'X' - originally XFS but some now in the VFS */ COMPATIBLE_IOCTL(FIFREEZE) COMPATIBLE_IOCTL(FITHAW) +COMPATIBLE_IOCTL(FITRIM) COMPATIBLE_IOCTL(KDGETKEYCODE) COMPATIBLE_IOCTL(KDSETKEYCODE) COMPATIBLE_IOCTL(KDGKBTYPE) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 72afcc629d7b..feef8a9c4de7 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -325,7 +325,6 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return rc; switch (cmd) { - case FITRIM: case FS_IOC32_GETFLAGS: case FS_IOC32_SETFLAGS: case FS_IOC32_GETVERSION: diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index cb8451246b30..1346cfa355d0 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -755,7 +755,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } case EXT4_IOC_MOVE_EXT: - case FITRIM: case EXT4_IOC_RESIZE_FS: case EXT4_IOC_PRECACHE_EXTENTS: case EXT4_IOC_SET_ENCRYPTION_POLICY: diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 93a1232894f6..8db8b7d61e40 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -180,9 +180,6 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case JFS_IOC_SETFLAGS32: cmd = JFS_IOC_SETFLAGS; break; - case FITRIM: - cmd = FITRIM; - break; } return jfs_ioctl(filp, cmd, arg); } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 9a20e513d7eb..aba43811d6ef 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1369,7 +1369,6 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_SYNC: case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: - case FITRIM: break; default: return -ENOIOCTLCMD; diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 53e6c40ed4c6..3cb097ccce60 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -980,7 +980,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) case OCFS2_IOC_GROUP_EXTEND: case OCFS2_IOC_GROUP_ADD: case OCFS2_IOC_GROUP_ADD64: - case FITRIM: break; case OCFS2_IOC_REFLINK: if (copy_from_user(&args, argp, sizeof(args))) From fc0a1f035cf15f704f4092da294963c57e94c1c0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 26 Jun 2015 14:12:10 +0200 Subject: [PATCH 173/210] i2c: I2C_MT65XX should depend on HAS_DMA If NO_DMA=y: ERROR: "dma_unmap_single" [drivers/i2c/busses/i2c-mt65xx.ko] undefined! ERROR: "dma_mapping_error" [drivers/i2c/busses/i2c-mt65xx.ko] undefined! ERROR: "dma_map_single" [drivers/i2c/busses/i2c-mt65xx.ko] undefined! Add a dependency on HAS_DMA to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 35ac23768ce9..577d58d1f1a1 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -633,6 +633,7 @@ config I2C_MPC config I2C_MT65XX tristate "MediaTek I2C adapter" depends on ARCH_MEDIATEK || COMPILE_TEST + depends on HAS_DMA help This selects the MediaTek(R) Integrated Inter Circuit bus driver for MT65xx and MT81xx. From 724948106ed236fc528c720ae12c79af7e2aea4e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 30 Jun 2015 11:08:46 +0800 Subject: [PATCH 174/210] i2c: xgene-slimpro: Fix missing mbox_free_channel call in probe error path Free requested mailbox channel before return error. Signed-off-by: Axel Lin Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xgene-slimpro.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index dcca7076231e..1c9cb65ac4cf 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -419,6 +419,7 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev) rc = i2c_add_adapter(adapter); if (rc) { dev_err(&pdev->dev, "Adapter registeration failed\n"); + mbox_free_channel(ctx->mbox_chan); return rc; } From eb8173e3d7b94664ecd3acf34942c9c9d6f6cb73 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 30 Jun 2015 11:41:58 +0800 Subject: [PATCH 175/210] i2c: jz4780: Fix return value if probe fails Current code returns 0 if fails to read clock-frequency DT property, fix it. Also add checking return value of clk_prepare_enable and propagate return value of devm_request_irq. Signed-off-by: Axel Lin Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-jz4780.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 19b2d689a5ef..f325663c27c5 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -764,12 +764,15 @@ static int jz4780_i2c_probe(struct platform_device *pdev) if (IS_ERR(i2c->clk)) return PTR_ERR(i2c->clk); - clk_prepare_enable(i2c->clk); + ret = clk_prepare_enable(i2c->clk); + if (ret) + return ret; - if (of_property_read_u32(pdev->dev.of_node, "clock-frequency", - &clk_freq)) { + ret = of_property_read_u32(pdev->dev.of_node, "clock-frequency", + &clk_freq); + if (ret) { dev_err(&pdev->dev, "clock-frequency not specified in DT"); - return clk_freq; + goto err; } i2c->speed = clk_freq / 1000; @@ -790,10 +793,8 @@ static int jz4780_i2c_probe(struct platform_device *pdev) i2c->irq = platform_get_irq(pdev, 0); ret = devm_request_irq(&pdev->dev, i2c->irq, jz4780_i2c_irq, 0, dev_name(&pdev->dev), i2c); - if (ret) { - ret = -ENODEV; + if (ret) goto err; - } ret = i2c_add_adapter(&i2c->adap); if (ret < 0) { From 4f001fd30145a6a8f72f9544c982cfd3dcb7c6df Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Sat, 24 Jan 2015 09:16:29 +0200 Subject: [PATCH 176/210] i2c: Mark instantiated device nodes with OF_POPULATE Mark (and unmark) device nodes with the POPULATE flag as appropriate. This is required to avoid multi probing when using I2C and device overlays containing a mux. This patch is also more careful with the release of the adapter device which caused a deadlock with muxes, and does not break the build on !OF since the node flag accessors are not defined then. Signed-off-by: Pantelis Antoniou Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 069a41f116dd..e6d4935161e4 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1012,6 +1012,8 @@ EXPORT_SYMBOL_GPL(i2c_new_device); */ void i2c_unregister_device(struct i2c_client *client) { + if (client->dev.of_node) + of_node_clear_flag(client->dev.of_node, OF_POPULATED); device_unregister(&client->dev); } EXPORT_SYMBOL_GPL(i2c_unregister_device); @@ -1320,8 +1322,11 @@ static void of_i2c_register_devices(struct i2c_adapter *adap) dev_dbg(&adap->dev, "of_i2c: walking child nodes\n"); - for_each_available_child_of_node(adap->dev.of_node, node) + for_each_available_child_of_node(adap->dev.of_node, node) { + if (of_node_test_and_set_flag(node, OF_POPULATED)) + continue; of_i2c_register_device(adap, node); + } } static int of_dev_node_match(struct device *dev, void *data) @@ -1853,6 +1858,11 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, if (adap == NULL) return NOTIFY_OK; /* not for us */ + if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) { + put_device(&adap->dev); + return NOTIFY_OK; + } + client = of_i2c_register_device(adap, rd->dn); put_device(&adap->dev); @@ -1863,6 +1873,10 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, } break; case OF_RECONFIG_CHANGE_REMOVE: + /* already depopulated? */ + if (!of_node_check_flag(rd->dn, OF_POPULATED)) + return NOTIFY_OK; + /* find our device by node */ client = of_find_i2c_device_by_node(rd->dn); if (client == NULL) From a27b5b97d6fe91f55058ad8ac28a8768700201ab Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 28 Jun 2015 15:16:57 +0200 Subject: [PATCH 177/210] hpfs: add fstrim support This patch adds support for fstrim to the HPFS filesystem. Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds --- fs/hpfs/alloc.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++ fs/hpfs/dir.c | 1 + fs/hpfs/file.c | 1 + fs/hpfs/hpfs_fn.h | 4 ++ fs/hpfs/super.c | 27 ++++++++++++++ 5 files changed, 128 insertions(+) diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index f005046e1591..d6a4b55d2ab0 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -484,3 +484,98 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a a->btree.first_free = cpu_to_le16(8); return a; } + +static unsigned find_run(__le32 *bmp, unsigned *idx) +{ + unsigned len; + while (tstbits(bmp, *idx, 1)) { + (*idx)++; + if (unlikely(*idx >= 0x4000)) + return 0; + } + len = 1; + while (!tstbits(bmp, *idx + len, 1)) + len++; + return len; +} + +static int do_trim(struct super_block *s, secno start, unsigned len, secno limit_start, secno limit_end, unsigned minlen, unsigned *result) +{ + int err; + secno end; + if (fatal_signal_pending(current)) + return -EINTR; + end = start + len; + if (start < limit_start) + start = limit_start; + if (end > limit_end) + end = limit_end; + if (start >= end) + return 0; + if (end - start < minlen) + return 0; + err = sb_issue_discard(s, start, end - start, GFP_NOFS, 0); + if (err) + return err; + *result += end - start; + return 0; +} + +int hpfs_trim_fs(struct super_block *s, u64 start, u64 end, u64 minlen, unsigned *result) +{ + int err = 0; + struct hpfs_sb_info *sbi = hpfs_sb(s); + unsigned idx, len, start_bmp, end_bmp; + __le32 *bmp; + struct quad_buffer_head qbh; + + *result = 0; + if (!end || end > sbi->sb_fs_size) + end = sbi->sb_fs_size; + if (start >= sbi->sb_fs_size) + return 0; + if (minlen > 0x4000) + return 0; + if (start < sbi->sb_dirband_start + sbi->sb_dirband_size && end > sbi->sb_dirband_start) { + hpfs_lock(s); + if (s->s_flags & MS_RDONLY) { + err = -EROFS; + goto unlock_1; + } + if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { + err = -EIO; + goto unlock_1; + } + idx = 0; + while ((len = find_run(bmp, &idx)) && !err) { + err = do_trim(s, sbi->sb_dirband_start + idx * 4, len * 4, start, end, minlen, result); + idx += len; + } + hpfs_brelse4(&qbh); +unlock_1: + hpfs_unlock(s); + } + start_bmp = start >> 14; + end_bmp = (end + 0x3fff) >> 14; + while (start_bmp < end_bmp && !err) { + hpfs_lock(s); + if (s->s_flags & MS_RDONLY) { + err = -EROFS; + goto unlock_2; + } + if (!(bmp = hpfs_map_bitmap(s, start_bmp, &qbh, "trim"))) { + err = -EIO; + goto unlock_2; + } + idx = 0; + while ((len = find_run(bmp, &idx)) && !err) { + err = do_trim(s, (start_bmp << 14) + idx, len, start, end, minlen, result); + idx += len; + } + hpfs_brelse4(&qbh); +unlock_2: + hpfs_unlock(s); + start_bmp++; + } + return err; +} diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 2a8e07425de0..dc540bfcee1d 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -327,4 +327,5 @@ const struct file_operations hpfs_dir_ops = .iterate = hpfs_readdir, .release = hpfs_dir_release, .fsync = hpfs_file_fsync, + .unlocked_ioctl = hpfs_ioctl, }; diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 6d8cfe9b52d6..7ca28d604bf7 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -203,6 +203,7 @@ const struct file_operations hpfs_file_ops = .release = hpfs_file_release, .fsync = hpfs_file_fsync, .splice_read = generic_file_splice_read, + .unlocked_ioctl = hpfs_ioctl, }; const struct inode_operations hpfs_file_iops = diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index bb04b58d1d69..c4867b5116dd 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include "hpfs.h" @@ -200,6 +202,7 @@ void hpfs_free_dnode(struct super_block *, secno); struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *); struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **); struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **); +int hpfs_trim_fs(struct super_block *, u64, u64, u64, unsigned *); /* anode.c */ @@ -318,6 +321,7 @@ __printf(2, 3) void hpfs_error(struct super_block *, const char *, ...); int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *); unsigned hpfs_get_free_dnodes(struct super_block *); +long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg); /* * local time (HPFS) to GMT (Unix) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 7cd00d3a7c9b..037e3e597ff4 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -196,6 +196,33 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } + +long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg) +{ + switch (cmd) { + case FITRIM: { + struct fstrim_range range; + secno n_trimmed; + int r; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) + return -EFAULT; + r = hpfs_trim_fs(file_inode(file)->i_sb, range.start >> 9, (range.start + range.len) >> 9, (range.minlen + 511) >> 9, &n_trimmed); + if (r) + return r; + range.len = (u64)n_trimmed << 9; + if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) + return -EFAULT; + return 0; + } + default: { + return -ENOIOCTLCMD; + } + } +} + + static struct kmem_cache * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) From d7b04097c250e6322ba73d3b03337074afeeb314 Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Thu, 23 Apr 2015 17:28:45 +0800 Subject: [PATCH 178/210] hpfs: Remove unessary cast Avoid a pointless kmem_cache_alloc() return value cast in fs/hpfs/super.c::hpfs_alloc_inode() Signed-off-by: Firo Yang Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds --- fs/hpfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 037e3e597ff4..0642516d36c8 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -228,7 +228,7 @@ static struct kmem_cache * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) { struct hpfs_inode_info *ei; - ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); + ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; ei->vfs_inode.i_version = 1; From ce657611baf902f14ae559ce4e0787ead6712067 Mon Sep 17 00:00:00 2001 From: Sanidhya Kashyap Date: Sat, 21 Mar 2015 12:57:50 -0400 Subject: [PATCH 179/210] hpfs: kstrdup() out of memory handling There is a possibility of nothing being allocated to the new_opts in case of memory pressure, therefore return ENOMEM for such case. Signed-off-by: Sanidhya Kashyap Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/hpfs/super.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 0642516d36c8..cde044d41f69 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -451,11 +451,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) int o; struct hpfs_sb_info *sbi = hpfs_sb(s); char *new_opts = kstrdup(data, GFP_KERNEL); - + + if (!new_opts) + return -ENOMEM; + sync_filesystem(s); *flags |= MS_NOATIME; - + hpfs_lock(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; From a28e4b2b18ccb90df402da3f21e1a83c9d4f8ec1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 26 Mar 2015 20:47:10 -0700 Subject: [PATCH 180/210] hpfs: hpfs_error: Remove static buffer, use vsprintf extension %pV instead Removing unnecessary static buffers is good. Use the vsprintf %pV extension instead. Signed-off-by: Joe Perches Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v2.6.36+ Signed-off-by: Linus Torvalds --- fs/hpfs/super.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index cde044d41f69..68a9bed05628 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -52,17 +52,20 @@ static void unmark_dirty(struct super_block *s) } /* Filesystem error... */ -static char err_buf[1024]; - void hpfs_error(struct super_block *s, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(err_buf, sizeof(err_buf), fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("filesystem error: %pV", &vaf); + va_end(args); - pr_err("filesystem error: %s", err_buf); if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { pr_cont("; crashing the system because you wanted it\n"); From 2c069a118fe1d80c47dca84e1561045fc7f3cc9e Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 10 Jul 2015 09:04:25 +1000 Subject: [PATCH 181/210] cxl: Check if afu is not null in cxl_slbia The pointer to an AFU in the adapter's list of AFUs can be null if we're in the process of removing AFUs. The afu_list_lock doesn't guard against this. Say we have 2 slices, and we're in the process of removing cxl. - We remove the AFUs in order (see cxl_remove). In cxl_remove_afu for AFU 0, we take the lock, set adapter->afu[0] = NULL, and release the lock. - Then we get an slbia. In cxl_slbia we take the lock, and set afu = adapter->afu[0], which is NULL. - Therefore our attempt to check afu->enabled will blow up. Therefore, check if afu is a null pointer before dereferencing it. Cc: stable@vger.kernel.org Signed-off-by: Daniel Axtens Acked-by: Michael Neuling Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 833348e2c9cb..4a164ab8b35a 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -73,7 +73,7 @@ static inline void cxl_slbia_core(struct mm_struct *mm) spin_lock(&adapter->afu_list_lock); for (slice = 0; slice < adapter->slices; slice++) { afu = adapter->afu[slice]; - if (!afu->enabled) + if (!afu || !afu->enabled) continue; rcu_read_lock(); idr_for_each_entry(&afu->contexts_idr, ctx, id) From cccf34e9411c41b0cbfb41980fe55fc8e7c98fd2 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Fri, 10 Jul 2015 09:29:10 +0100 Subject: [PATCH 182/210] MIPS: c-r4k: Fix cache flushing for MT cores MT_SMP is not the only SMP option for MT cores. The MT_SMP option allows more than one VPE per core to appear as a secondary CPU in the system. Because of how CM works, it propagates the address-based cache ops to the secondary cores but not the index-based ones. Because of that, the code does not use IPIs to flush the L1 caches on secondary cores because the CM would have done that already. However, the CM functionality is independent of the type of SMP kernel so even in non-MT kernels, IPIs are not necessary. As a result of which, we change the conditional to depend on the CM presence. Moreover, since VPEs on the same core share the same L1 caches, there is no need to send an IPI on all of them so we calculate a suitable cpumask with only one VPE per core. Signed-off-by: Markos Chandras Cc: # 3.15+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10654/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/smp.h | 1 + arch/mips/kernel/smp.c | 44 ++++++++++++++++++++++++++++++++++++- arch/mips/mm/c-r4k.c | 14 +++++++++--- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 2b25d1ba1ea0..16f1ea9ab191 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -23,6 +23,7 @@ extern int smp_num_siblings; extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; +extern cpumask_t cpu_foreign_map; #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index faa46ebd9dda..d0744cc77ea7 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -63,6 +63,13 @@ EXPORT_SYMBOL(cpu_sibling_map); cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); +/* + * A logcal cpu mask containing only one VPE per core to + * reduce the number of IPIs on large MT systems. + */ +cpumask_t cpu_foreign_map __read_mostly; +EXPORT_SYMBOL(cpu_foreign_map); + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -103,6 +110,29 @@ static inline void set_cpu_core_map(int cpu) } } +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +static inline void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpu_data[i].package == cpu_data[k].package && + cpu_data[i].core == cpu_data[k].core) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + cpumask_copy(&cpu_foreign_map, &temp_foreign_map); +} + struct plat_smp_ops *mp_ops; EXPORT_SYMBOL(mp_ops); @@ -146,6 +176,8 @@ asmlinkage void start_secondary(void) set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); + calculate_cpu_foreign_map(); + cpumask_set_cpu(cpu, &cpu_callin_map); synchronise_count_slave(cpu); @@ -173,9 +205,18 @@ void __irq_entry smp_call_function_interrupt(void) static void stop_this_cpu(void *dummy) { /* - * Remove this CPU: + * Remove this CPU. Be a bit slow here and + * set the bits for every online CPU so we don't miss + * any IPI whilst taking this VPE down. */ + + cpumask_copy(&cpu_foreign_map, cpu_online_mask); + + /* Make it visible to every other CPU */ + smp_mb(); + set_cpu_online(smp_processor_id(), false); + calculate_cpu_foreign_map(); local_irq_disable(); while (1); } @@ -197,6 +238,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) mp_ops->prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); + calculate_cpu_foreign_map(); #ifndef CONFIG_HOTPLUG_CPU init_cpu_present(cpu_possible_mask); #endif diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 7f660dc67596..a5974ddba44b 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -37,6 +37,7 @@ #include /* for run_uncached() */ #include #include +#include /* * Special Variant of smp_call_function for use by cache functions: @@ -51,9 +52,16 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info) { preempt_disable(); -#ifndef CONFIG_MIPS_MT_SMP - smp_call_function(func, info, 1); -#endif + /* + * The Coherent Manager propagates address-based cache ops to other + * cores but not index-based ops. However, r4k_on_each_cpu is used + * in both cases so there is no easy way to tell what kind of op is + * executed to the other cores. The best we can probably do is + * to restrict that call when a CM is not present because both + * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops. + */ + if (!mips_cm_present()) + smp_call_function_many(&cpu_foreign_map, func, info, 1); func(info); preempt_enable(); } From 6249ecbbb75cd635025cc681fcf51fb8659edbab Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 17 Apr 2015 10:44:15 +0100 Subject: [PATCH 183/210] MIPS: Malta: Make GIC FDC IRQ workaround Malta specific Wider testing reveals that the Fast Debug Channel (FDC) interrupt is routed through the GIC just fine on Pistachio SoC, even though it contains interAptiv cores. Clearly the FDC interrupt routing problems previously observed on interAptiv and proAptiv cores are specific to the Malta FPGA bitstreams. Move the workaround for interAptiv and proAptiv out of gic_get_c0_fdc_int() in the GIC irqchip driver into Malta's get_c0_fdc_int() platform callback, to allow the Pistachio SoC to use the FDC interrupt. Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Andrew Bresticker Cc: Thomas Gleixner Cc: Jason Cooper Cc: linux-mips@linux-mips.org Reviewed-by: Andrew Bresticker Cc: James Hartley Patchwork: http://patchwork.linux-mips.org/patch/9748/ Signed-off-by: Ralf Baechle --- arch/mips/mti-malta/malta-time.c | 20 +++++++++++++------- drivers/irqchip/irq-mips-gic.c | 10 ---------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 185e68261f45..5625b190edc0 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -119,18 +119,24 @@ void read_persistent_clock(struct timespec *ts) int get_c0_fdc_int(void) { - int mips_cpu_fdc_irq; + /* + * Some cores claim the FDC is routable through the GIC, but it doesn't + * actually seem to be connected for those Malta bitstreams. + */ + switch (current_cpu_type()) { + case CPU_INTERAPTIV: + case CPU_PROAPTIV: + return -1; + }; if (cpu_has_veic) - mips_cpu_fdc_irq = -1; + return -1; else if (gic_present) - mips_cpu_fdc_irq = gic_get_c0_fdc_int(); + return gic_get_c0_fdc_int(); else if (cp0_fdc_irq >= 0) - mips_cpu_fdc_irq = MIPS_CPU_IRQ_BASE + cp0_fdc_irq; + return MIPS_CPU_IRQ_BASE + cp0_fdc_irq; else - mips_cpu_fdc_irq = -1; - - return mips_cpu_fdc_irq; + return -1; } int get_c0_perfcount_int(void) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 4400edd1a6c7..b7d54d428b5e 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -257,16 +257,6 @@ int gic_get_c0_fdc_int(void) return MIPS_CPU_IRQ_BASE + cp0_fdc_irq; } - /* - * Some cores claim the FDC is routable but it doesn't actually seem to - * be connected. - */ - switch (current_cpu_type()) { - case CPU_INTERAPTIV: - case CPU_PROAPTIV: - return -1; - } - return irq_create_mapping(gic_irq_domain, GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_FDC)); } From 6b5e741e9a834a8cf2d5b895319045ab17ad37fe Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 17 Apr 2015 10:44:16 +0100 Subject: [PATCH 184/210] MIPS: Pistachio: Support CDMM & Fast Debug Channel Implement the mips_cdmm_phys_base() platform callback to provide a default Common Device Memory Map (CDMM) physical base address for the Pistachio SoC. This allows the CDMM in each VPE to be configured and probed for devices, such as the Fast Debug Channel (FDC). The physical address chosen is just below the default CPC address, which appears to also be unallocated. The FDC IRQ is also usable on Pistachio, and is routed through the GIC, so implement the get_c0_fdc_int() platform callback using gic_get_c0_fdc_int(), so the FDC driver doesn't have to fall back to polling. Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Andrew Bresticker Cc: James Hartley Cc: linux-mips@linux-mips.org Reviewed-by: Andrew Bresticker Patchwork: http://patchwork.linux-mips.org/patch/9749/ Signed-off-by: Ralf Baechle --- arch/mips/pistachio/init.c | 8 +++++++- arch/mips/pistachio/time.c | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c index d2dc836523a3..8bd8ebb20a72 100644 --- a/arch/mips/pistachio/init.c +++ b/arch/mips/pistachio/init.c @@ -63,13 +63,19 @@ void __init plat_mem_setup(void) plat_setup_iocoherency(); } -#define DEFAULT_CPC_BASE_ADDR 0x1bde0000 +#define DEFAULT_CPC_BASE_ADDR 0x1bde0000 +#define DEFAULT_CDMM_BASE_ADDR 0x1bdd0000 phys_addr_t mips_cpc_default_phys_base(void) { return DEFAULT_CPC_BASE_ADDR; } +phys_addr_t mips_cdmm_phys_base(void) +{ + return DEFAULT_CDMM_BASE_ADDR; +} + static void __init mips_nmi_setup(void) { void *base; diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 67889fcea8aa..7c73fcb92a10 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -27,6 +27,11 @@ int get_c0_perfcount_int(void) return gic_get_c0_perfcount_int(); } +int get_c0_fdc_int(void) +{ + return gic_get_c0_fdc_int(); +} + void __init plat_time_init(void) { struct device_node *np; From 1e18ac7aeaec357048172695b1fbb461205b166f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 9 Jul 2015 10:40:41 +0100 Subject: [PATCH 185/210] MIPS: c-r4k: Extend way_string array The L2 cache in the I6400 core has 16 ways, so extend the way_string array to take such caches into account. [ralf@linux-mips.org: Other already supported CPUs are free to support more than 8 ways of cache as well.] Signed-off-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10640/ Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index a5974ddba44b..fbea4432f3f2 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -945,7 +945,9 @@ static void b5k_instruction_hazard(void) } static char *way_string[] = { NULL, "direct mapped", "2-way", - "3-way", "4-way", "5-way", "6-way", "7-way", "8-way" + "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", + "9-way", "10-way", "11-way", "12-way", + "13-way", "14-way", "15-way", "16-way", }; static void probe_pcache(void) From 51d53674c3fff11e9231e66ca8616c65deadfb6c Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 9 Jul 2015 18:02:51 +0200 Subject: [PATCH 186/210] MIPS: O32: Use compat_sys_getsockopt. We were using the native syscall and that results in subtle breakage. This is the same issue as fixed in 077d0e65618f27b2199d622e12ada6d8f3dbd862 (MIPS: N32: Use compat getsockopt syscall) but that commit did fix it only for N32. Signed-off-by: Ralf Baechle Link: https://bugzilla.kernel.org/show_bug.cgi?id=100291 --- arch/mips/kernel/scall64-o32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 66d618bb2fa2..f543ff4feef9 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -400,7 +400,7 @@ EXPORT(sys32_call_table) PTR sys_connect /* 4170 */ PTR sys_getpeername PTR sys_getsockname - PTR sys_getsockopt + PTR compat_sys_getsockopt PTR sys_listen PTR compat_sys_recv /* 4175 */ PTR compat_sys_recvfrom From ad2daa85bd50b7ff5c851b80a0b813bdc8d14f8e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 10 Jul 2015 15:46:32 +0100 Subject: [PATCH 187/210] arm64: entry32: remove pointless register assignment We currently set x27 in compat_sys_sigreturn_wrapper and compat_sys_rt_sigreturn_wrapper, similarly to what we do with r8/why on 32-bit ARM, in an attempt to prevent sigreturns from being restarted. However, on arm64 we have always used pt_regs::syscallno for syscall restarting (for both native and compat tasks), and x27 is never inspected again before being overwritten in kernel_exit. This patch removes the pointless register assignments. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry32.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S index bd9bfaa9269b..f332d5d1f6b4 100644 --- a/arch/arm64/kernel/entry32.S +++ b/arch/arm64/kernel/entry32.S @@ -32,13 +32,11 @@ ENTRY(compat_sys_sigreturn_wrapper) mov x0, sp - mov x27, #0 // prevent syscall restart handling (why) b compat_sys_sigreturn ENDPROC(compat_sys_sigreturn_wrapper) ENTRY(compat_sys_rt_sigreturn_wrapper) mov x0, sp - mov x27, #0 // prevent syscall restart handling (why) b compat_sys_rt_sigreturn ENDPROC(compat_sys_rt_sigreturn_wrapper) From dbf3c370862d73fcd2c74ca55e254bb02143238d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 10 Jul 2015 10:11:07 -0700 Subject: [PATCH 188/210] Revert "Input: synaptics - allocate 3 slots to keep stability in image sensors" This reverts commit 63c4fda3c0bb841b1aad1298fc7fe94058fc79f8 as it causes issues with detecting 3-finger taps. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100481 Cc: stable@vger.kernel.org Acked-by: Benjamin Tissoires --- drivers/input/mouse/synaptics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 35c8d0ceabee..3a32caf06bf1 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1199,7 +1199,7 @@ static void set_input_params(struct psmouse *psmouse, ABS_MT_POSITION_Y); /* Image sensors can report per-contact pressure */ input_set_abs_params(dev, ABS_MT_PRESSURE, 0, 255, 0, 0); - input_mt_init_slots(dev, 3, INPUT_MT_POINTER | INPUT_MT_TRACK); + input_mt_init_slots(dev, 2, INPUT_MT_POINTER | INPUT_MT_TRACK); /* Image sensors can signal 4 and 5 finger clicks */ __set_bit(BTN_TOOL_QUADTAP, dev->keybit); From b864bc17f1c326783f2388057e15d3e153125ab9 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 10 Jul 2015 11:06:10 -0600 Subject: [PATCH 189/210] pmem: add maintainer for include/linux/pmem.h The file include/linux/pmem.h was recently created to hold the PMEM API, and is logically part of the PMEM driver. Add an entry for this file to MAINTAINERS. Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0e6b09150aad..a8306b25c13c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6134,6 +6134,7 @@ L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported F: drivers/nvdimm/pmem.c +F: include/linux/pmem.h LINUX FOR IBM pSERIES (RS/6000) M: Paul Mackerras From b1b2e6235a44174151fa3bb22657f74972635618 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Jul 2015 11:06:11 -0600 Subject: [PATCH 190/210] tools/testing/nvdimm: mock ioremap_wt In the 4.2-rc1 merge the default_memremap_pmem() implementation switched from ioremap_nocache() to ioremap_wt(). Add it to the list of mocked routines to restore the ability to run the unit tests. Signed-off-by: Dan Williams --- tools/testing/nvdimm/Kbuild | 1 + tools/testing/nvdimm/test/iomap.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8e9b64520ec1..8e020601c773 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,3 +1,4 @@ +ldflags-y += --wrap=ioremap_wt ldflags-y += --wrap=ioremap_cache ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index c85a6f6ba559..9f21b150396b 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -77,6 +77,12 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) } EXPORT_SYMBOL(__wrap_ioremap_nocache); +void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wt); +} +EXPORT_SYMBOL(__wrap_ioremap_wt); + void __wrap_iounmap(volatile void __iomem *addr) { struct nfit_test_resource *nfit_res; From f7ec83684af020c961d7fab801f8e3ef7ce5de33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Jul 2015 11:06:12 -0600 Subject: [PATCH 191/210] tools/testing/nvdimm: fix return code for unimplemented commands The implementation for the new "DIMM Flags" DSM relies on the -ENOTTY return code to indicate that the flags are unimplimented and to fall back to a safe default. As is the -ENXIO error code erroneoously indicates to fail enabling a BLK region. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4b69b8368de0..092d4724fe16 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -155,7 +155,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, int i, rc; if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) - return -ENXIO; + return -ENOTTY; /* lookup label space for the given dimm */ for (i = 0; i < ARRAY_SIZE(handle); i++) From 9d27a87ec9e1317d368b1e5e3f4808078baa8c4c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Jul 2015 14:07:03 -0400 Subject: [PATCH 192/210] tools/testing/nvdimm: add mock acpi_nfit_flush_address entries to nfit_test In preparation for fixing the BLK path to properly use "directed pcommit" enable the unit test infrastructure to emit mock "flush" tables. Writes to these flush addresses trigger a memory controller to flush its internal buffers to persistent media, similar to the x86 "pcommit" instruction. Signed-off-by: Dan Williams --- tools/testing/nvdimm/Kbuild | 2 ++ tools/testing/nvdimm/test/iomap.c | 21 +++++++++++++ tools/testing/nvdimm/test/nfit.c | 50 +++++++++++++++++++++++++++++-- 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8e020601c773..f56914c7929b 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,4 +1,6 @@ ldflags-y += --wrap=ioremap_wt +ldflags-y += --wrap=ioremap_wc +ldflags-y += --wrap=devm_ioremap_nocache ldflags-y += --wrap=ioremap_cache ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 9f21b150396b..64bfaa50831c 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -65,6 +65,21 @@ void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, return fallback_fn(offset, size); } +void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, + resource_size_t offset, unsigned long size) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return (void __iomem *) nfit_res->buf + offset + - nfit_res->res->start; + return devm_ioremap_nocache(dev, offset, size); +} +EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); + void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size) { return __nfit_test_ioremap(offset, size, ioremap_cache); @@ -83,6 +98,12 @@ void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size) } EXPORT_SYMBOL(__wrap_ioremap_wt); +void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wc); +} +EXPORT_SYMBOL(__wrap_ioremap_wc); + void __wrap_iounmap(volatile void __iomem *addr) { struct nfit_test_resource *nfit_res; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 092d4724fe16..d0bdae40ccc9 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -128,6 +128,8 @@ struct nfit_test { int num_pm; void **dimm; dma_addr_t *dimm_dma; + void **flush; + dma_addr_t *flush_dma; void **label; dma_addr_t *label_dma; void **spa_set; @@ -331,7 +333,8 @@ static int nfit_test0_alloc(struct nfit_test *t) + sizeof(struct acpi_nfit_system_address) * NUM_SPA + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + sizeof(struct acpi_nfit_control_region) * NUM_DCR - + sizeof(struct acpi_nfit_data_region) * NUM_BDW; + + sizeof(struct acpi_nfit_data_region) * NUM_BDW + + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -356,6 +359,10 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->label[i]) return -ENOMEM; sprintf(t->label[i], "label%d", i); + + t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); + if (!t->flush[i]) + return -ENOMEM; } for (i = 0; i < NUM_DCR; i++) { @@ -408,6 +415,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_system_address *spa; struct acpi_nfit_control_region *dcr; struct acpi_nfit_data_region *bdw; + struct acpi_nfit_flush_address *flush; unsigned int offset; nfit_test_init_header(nfit_buf, size); @@ -831,6 +839,39 @@ static void nfit_test0_setup(struct nfit_test *t) bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset = offset + sizeof(struct acpi_nfit_data_region) * 4; + /* flush0 (dimm0) */ + flush = nfit_buf + offset; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[0]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[0]; + + /* flush1 (dimm1) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[1]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[1]; + + /* flush2 (dimm2) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[2]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[2]; + + /* flush3 (dimm3) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[3]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[3]; + acpi_desc = &t->acpi_desc; set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); @@ -933,6 +974,10 @@ static int nfit_test_probe(struct platform_device *pdev) GFP_KERNEL); nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), + GFP_KERNEL); nfit_test->label = devm_kcalloc(dev, num, sizeof(void *), GFP_KERNEL); nfit_test->label_dma = devm_kcalloc(dev, num, @@ -943,7 +988,8 @@ static int nfit_test_probe(struct platform_device *pdev) sizeof(dma_addr_t), GFP_KERNEL); if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label && nfit_test->label_dma && nfit_test->dcr - && nfit_test->dcr_dma) + && nfit_test->dcr_dma && nfit_test->flush + && nfit_test->flush_dma) /* pass */; else return -ENOMEM; From c2ad29540cb913bd9e526fae77c35c7fb45f24a3 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 10 Jul 2015 11:06:13 -0600 Subject: [PATCH 193/210] nfit: update block I/O path to use PMEM API Update the nfit block I/O path to use the new PMEM API and to adhere to the read/write flows outlined in the "NVDIMM Block Window Driver Writer's Guide": http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf This includes adding support for targeted NVDIMM flushes called "flush hints" in the ACPI 6.0 specification: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf For performance and media durability the mapping for a BLK aperture is moved to a write-combining mapping which is consistent with memcpy_to_pmem() and wmb_blk(). Signed-off-by: Ross Zwisler Acked-by: Rafael J. Wysocki Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 97 ++++++++++++++++++++++++++++++++++++++++----- drivers/acpi/nfit.h | 15 ++++++- 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index a20b7c883ca0..11aa513f2fda 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "nfit.h" @@ -305,6 +306,23 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc, return true; } +static bool add_flush(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_flush_address *flush) +{ + struct device *dev = acpi_desc->dev; + struct nfit_flush *nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), + GFP_KERNEL); + + if (!nfit_flush) + return false; + INIT_LIST_HEAD(&nfit_flush->list); + nfit_flush->flush = flush; + list_add_tail(&nfit_flush->list, &acpi_desc->flushes); + dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__, + flush->device_handle, flush->hint_count); + return true; +} + static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, const void *end) { @@ -338,7 +356,8 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, return err; break; case ACPI_NFIT_TYPE_FLUSH_ADDRESS: - dev_dbg(dev, "%s: flush\n", __func__); + if (!add_flush(acpi_desc, table)) + return err; break; case ACPI_NFIT_TYPE_SMBIOS: dev_dbg(dev, "%s: smbios\n", __func__); @@ -389,6 +408,7 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, { u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; struct nfit_memdev *nfit_memdev; + struct nfit_flush *nfit_flush; struct nfit_dcr *nfit_dcr; struct nfit_bdw *nfit_bdw; struct nfit_idt *nfit_idt; @@ -442,6 +462,14 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, nfit_mem->idt_bdw = nfit_idt->idt; break; } + + list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) { + if (nfit_flush->flush->device_handle != + nfit_memdev->memdev->device_handle) + continue; + nfit_mem->nfit_flush = nfit_flush; + break; + } break; } @@ -978,6 +1006,24 @@ static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio) return mmio->base_offset + line_offset + table_offset + sub_line_offset; } +static void wmb_blk(struct nfit_blk *nfit_blk) +{ + + if (nfit_blk->nvdimm_flush) { + /* + * The first wmb() is needed to 'sfence' all previous writes + * such that they are architecturally visible for the platform + * buffer flush. Note that we've already arranged for pmem + * writes to avoid the cache via arch_memcpy_to_pmem(). The + * final wmb() ensures ordering for the NVDIMM flush write. + */ + wmb(); + writeq(1, nfit_blk->nvdimm_flush); + wmb(); + } else + wmb_pmem(); +} + static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) { struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; @@ -1012,6 +1058,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, offset = to_interleave_offset(offset, mmio); writeq(cmd, mmio->base + offset); + wmb_blk(nfit_blk); /* FIXME: conditionally perform read-back if mandated by firmware */ } @@ -1026,7 +1073,6 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES + lane * mmio->size; - /* TODO: non-temporal access, flush hints, cache management etc... */ write_blk_ctl(nfit_blk, lane, dpa, len, rw); while (len) { unsigned int c; @@ -1045,13 +1091,19 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, } if (rw) - memcpy(mmio->aperture + offset, iobuf + copied, c); + memcpy_to_pmem(mmio->aperture + offset, + iobuf + copied, c); else - memcpy(iobuf + copied, mmio->aperture + offset, c); + memcpy_from_pmem(iobuf + copied, + mmio->aperture + offset, c); copied += c; len -= c; } + + if (rw) + wmb_blk(nfit_blk); + rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; return rc; } @@ -1124,7 +1176,7 @@ static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc, } static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, - struct acpi_nfit_system_address *spa) + struct acpi_nfit_system_address *spa, enum spa_map_type type) { resource_size_t start = spa->address; resource_size_t n = spa->length; @@ -1152,8 +1204,15 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, if (!res) goto err_mem; - /* TODO: cacheability based on the spa type */ - spa_map->iomem = ioremap_nocache(start, n); + if (type == SPA_MAP_APERTURE) { + /* + * TODO: memremap_pmem() support, but that requires cache + * flushing when the aperture is moved. + */ + spa_map->iomem = ioremap_wc(start, n); + } else + spa_map->iomem = ioremap_nocache(start, n); + if (!spa_map->iomem) goto err_map; @@ -1171,6 +1230,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges * @nvdimm_bus: NFIT-bus that provided the spa table entry * @nfit_spa: spa table to map + * @type: aperture or control region * * In the case where block-data-window apertures and * dimm-control-regions are interleaved they will end up sharing a @@ -1180,12 +1240,12 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, * unbound. */ static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc, - struct acpi_nfit_system_address *spa) + struct acpi_nfit_system_address *spa, enum spa_map_type type) { void __iomem *iomem; mutex_lock(&acpi_desc->spa_map_mutex); - iomem = __nfit_spa_map(acpi_desc, spa); + iomem = __nfit_spa_map(acpi_desc, spa, type); mutex_unlock(&acpi_desc->spa_map_mutex); return iomem; @@ -1212,6 +1272,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nfit_flush *nfit_flush; struct nfit_blk_mmio *mmio; struct nfit_blk *nfit_blk; struct nfit_mem *nfit_mem; @@ -1237,7 +1298,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, /* map block aperture memory */ nfit_blk->bdw_offset = nfit_mem->bdw->offset; mmio = &nfit_blk->mmio[BDW]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw); + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, + SPA_MAP_APERTURE); if (!mmio->base) { dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, nvdimm_name(nvdimm)); @@ -1259,7 +1321,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; nfit_blk->stat_offset = nfit_mem->dcr->status_offset; mmio = &nfit_blk->mmio[DCR]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr); + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, + SPA_MAP_CONTROL); if (!mmio->base) { dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, nvdimm_name(nvdimm)); @@ -1277,6 +1340,17 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return rc; } + nfit_flush = nfit_mem->nfit_flush; + if (nfit_flush && nfit_flush->flush->hint_count != 0) { + nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev, + nfit_flush->flush->hint_address[0], 8); + if (!nfit_blk->nvdimm_flush) + return -ENOMEM; + } + + if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush) + dev_warn(dev, "unable to guarantee persistence of writes\n"); + if (mmio->line_size == 0) return 0; @@ -1459,6 +1533,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) INIT_LIST_HEAD(&acpi_desc->dcrs); INIT_LIST_HEAD(&acpi_desc->bdws); INIT_LIST_HEAD(&acpi_desc->idts); + INIT_LIST_HEAD(&acpi_desc->flushes); INIT_LIST_HEAD(&acpi_desc->memdevs); INIT_LIST_HEAD(&acpi_desc->dimms); mutex_init(&acpi_desc->spa_map_mutex); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 81f2e8c5a79c..815cb561cdba 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -60,6 +60,11 @@ struct nfit_idt { struct list_head list; }; +struct nfit_flush { + struct acpi_nfit_flush_address *flush; + struct list_head list; +}; + struct nfit_memdev { struct acpi_nfit_memory_map *memdev; struct list_head list; @@ -77,6 +82,7 @@ struct nfit_mem { struct acpi_nfit_system_address *spa_bdw; struct acpi_nfit_interleave *idt_dcr; struct acpi_nfit_interleave *idt_bdw; + struct nfit_flush *nfit_flush; struct list_head list; struct acpi_device *adev; unsigned long dsm_mask; @@ -88,6 +94,7 @@ struct acpi_nfit_desc { struct mutex spa_map_mutex; struct list_head spa_maps; struct list_head memdevs; + struct list_head flushes; struct list_head dimms; struct list_head spas; struct list_head dcrs; @@ -109,7 +116,7 @@ struct nfit_blk { struct nfit_blk_mmio { union { void __iomem *base; - void *aperture; + void __pmem *aperture; }; u64 size; u64 base_offset; @@ -123,6 +130,12 @@ struct nfit_blk { u64 bdw_offset; /* post interleave offset */ u64 stat_offset; u64 cmd_offset; + void __iomem *nvdimm_flush; +}; + +enum spa_map_type { + SPA_MAP_CONTROL, + SPA_MAP_APERTURE, }; struct nfit_spa_mapping { From f0f2c072cf530d5b8890be5051cc8b36b0c54cce Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 10 Jul 2015 11:06:14 -0600 Subject: [PATCH 194/210] nfit: add support for NVDIMM "latch" flag Add support in the NFIT BLK I/O path for the "latch" flag defined in the "Get Block NVDIMM Flags" _DSM function: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf This flag requires the driver to read back the command register after it is written in the block I/O path. This ensures that the hardware has fully processed the new command and moved the aperture appropriately. Signed-off-by: Ross Zwisler Acked-by: Rafael J. Wysocki Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 33 ++++++++++++++++++++++++++++++++- drivers/acpi/nfit.h | 5 +++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 11aa513f2fda..628a42c41ab1 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -1059,7 +1059,9 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, writeq(cmd, mmio->base + offset); wmb_blk(nfit_blk); - /* FIXME: conditionally perform read-back if mandated by firmware */ + + if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) + readq(mmio->base + offset); } static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, @@ -1266,6 +1268,28 @@ static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio, return 0; } +static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, struct nfit_blk *nfit_blk) +{ + struct nd_cmd_dimm_flags flags; + int rc; + + memset(&flags, 0, sizeof(flags)); + rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags, + sizeof(flags)); + + if (rc >= 0 && flags.status == 0) + nfit_blk->dimm_flags = flags.flags; + else if (rc == -ENOTTY) { + /* fall back to a conservative default */ + nfit_blk->dimm_flags = ND_BLK_DCR_LATCH; + rc = 0; + } else + rc = -ENXIO; + + return rc; +} + static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, struct device *dev) { @@ -1340,6 +1364,13 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return rc; } + rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk); + if (rc < 0) { + dev_dbg(dev, "%s: %s failed get DIMM flags\n", + __func__, nvdimm_name(nvdimm)); + return rc; + } + nfit_flush = nfit_mem->nfit_flush; if (nfit_flush && nfit_flush->flush->hint_count != 0) { nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev, diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 815cb561cdba..79b6d83875c1 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -40,6 +40,10 @@ enum nfit_uuids { NFIT_UUID_MAX, }; +enum { + ND_BLK_DCR_LATCH = 2, +}; + struct nfit_spa { struct acpi_nfit_system_address *spa; struct list_head list; @@ -131,6 +135,7 @@ struct nfit_blk { u64 stat_offset; u64 cmd_offset; void __iomem *nvdimm_flush; + u32 dimm_flags; }; enum spa_map_type { From cb908ed3495496b9973a2b9ed1a60f43933fdf01 Mon Sep 17 00:00:00 2001 From: Alex Ivanov Date: Mon, 15 Jun 2015 08:50:45 +0300 Subject: [PATCH 195/210] stifb: Implement hardware accelerated copyarea This patch adds hardware assisted scrolling. The code is based upon the following investigation: https://parisc.wiki.kernel.org/index.php/NGLE#Blitter A simple 'time ls -la /usr/bin' test shows 1.6x speed increase over soft copy and 2.3x increase over FBINFO_READS_FAST (prefer soft copy over screen redraw) on Artist framebuffer. Signed-off-by: Alex Ivanov Signed-off-by: Helge Deller --- drivers/video/fbdev/stifb.c | 40 +++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 86621fabbb8b..735355b0e023 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -121,6 +121,7 @@ static int __initdata stifb_bpp_pref[MAX_STI_ROMS]; #define REG_3 0x0004a0 #define REG_4 0x000600 #define REG_6 0x000800 +#define REG_7 0x000804 #define REG_8 0x000820 #define REG_9 0x000a04 #define REG_10 0x018000 @@ -135,6 +136,8 @@ static int __initdata stifb_bpp_pref[MAX_STI_ROMS]; #define REG_21 0x200218 #define REG_22 0x0005a0 #define REG_23 0x0005c0 +#define REG_24 0x000808 +#define REG_25 0x000b00 #define REG_26 0x200118 #define REG_27 0x200308 #define REG_32 0x21003c @@ -429,6 +432,9 @@ ARTIST_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable) #define SET_LENXY_START_RECFILL(fb, lenxy) \ WRITE_WORD(lenxy, fb, REG_9) +#define SETUP_COPYAREA(fb) \ + WRITE_BYTE(0, fb, REG_16b1) + static void HYPER_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable) { @@ -1004,6 +1010,36 @@ stifb_blank(int blank_mode, struct fb_info *info) return 0; } +static void +stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area) +{ + struct stifb_info *fb = container_of(info, struct stifb_info, info); + + SETUP_COPYAREA(fb); + + SETUP_HW(fb); + if (fb->info.var.bits_per_pixel == 32) { + WRITE_WORD(0xBBA0A000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff); + } else { + WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff); + } + + NGLE_QUICK_SET_IMAGE_BITMAP_OP(fb, + IBOvals(RopSrc, MaskAddrOffset(0), + BitmapExtent08, StaticReg(1), + DataDynamic, MaskOtc, BGx(0), FGx(0))); + + WRITE_WORD(((area->sx << 16) | area->sy), fb, REG_24); + WRITE_WORD(((area->width << 16) | area->height), fb, REG_7); + WRITE_WORD(((area->dx << 16) | area->dy), fb, REG_25); + + SETUP_FB(fb); +} + static void __init stifb_init_display(struct stifb_info *fb) { @@ -1069,7 +1105,7 @@ static struct fb_ops stifb_ops = { .fb_setcolreg = stifb_setcolreg, .fb_blank = stifb_blank, .fb_fillrect = cfb_fillrect, - .fb_copyarea = cfb_copyarea, + .fb_copyarea = stifb_copyarea, .fb_imageblit = cfb_imageblit, }; @@ -1258,7 +1294,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) info->fbops = &stifb_ops; info->screen_base = ioremap_nocache(REGION_BASE(fb,1), fix->smem_len); info->screen_size = fix->smem_len; - info->flags = FBINFO_DEFAULT; + info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA; info->pseudo_palette = &fb->pseudo_palette; /* This has to be done !!! */ From 01ab60570427caa24b9debc369e452e86cd9beb4 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Wed, 1 Jul 2015 17:18:37 -0400 Subject: [PATCH 196/210] parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/pgtable.h | 55 ++++++---- arch/parisc/include/asm/tlbflush.h | 53 +++++----- arch/parisc/kernel/cache.c | 105 ++++++++++++------- arch/parisc/kernel/entry.S | 163 ++++++++++++++--------------- arch/parisc/kernel/traps.c | 4 - 5 files changed, 212 insertions(+), 168 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 0a183756d6ec..f93c4a4e6580 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -16,7 +16,7 @@ #include #include -extern spinlock_t pa_dbit_lock; +extern spinlock_t pa_tlb_lock; /* * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel @@ -33,6 +33,19 @@ extern spinlock_t pa_dbit_lock; */ #define kern_addr_valid(addr) (1) +/* Purge data and instruction TLB entries. Must be called holding + * the pa_tlb_lock. The TLB purge instructions are slow on SMP + * machines since the purge must be broadcast to all CPUs. + */ + +static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) +{ + mtsp(mm->context, 1); + pdtlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); +} + /* Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following * hook is made available. @@ -42,15 +55,20 @@ extern spinlock_t pa_dbit_lock; *(pteptr) = (pteval); \ } while(0) -extern void purge_tlb_entries(struct mm_struct *, unsigned long); +#define pte_inserted(x) \ + ((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED)) \ + == (_PAGE_PRESENT|_PAGE_ACCESSED)) -#define set_pte_at(mm, addr, ptep, pteval) \ - do { \ +#define set_pte_at(mm, addr, ptep, pteval) \ + do { \ + pte_t old_pte; \ unsigned long flags; \ - spin_lock_irqsave(&pa_dbit_lock, flags); \ - set_pte(ptep, pteval); \ - purge_tlb_entries(mm, addr); \ - spin_unlock_irqrestore(&pa_dbit_lock, flags); \ + spin_lock_irqsave(&pa_tlb_lock, flags); \ + old_pte = *ptep; \ + set_pte(ptep, pteval); \ + if (pte_inserted(old_pte)) \ + purge_tlb_entries(mm, addr); \ + spin_unlock_irqrestore(&pa_tlb_lock, flags); \ } while (0) #endif /* !__ASSEMBLY__ */ @@ -268,7 +286,7 @@ extern unsigned long *empty_zero_page; #define pte_none(x) (pte_val(x) == 0) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) -#define pte_clear(mm,addr,xp) do { pte_val(*(xp)) = 0; } while (0) +#define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0)) #define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) #define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) @@ -435,15 +453,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned if (!pte_young(*ptep)) return 0; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); pte = *ptep; if (!pte_young(pte)) { - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 0; } set_pte(ptep, pte_mkold(pte)); purge_tlb_entries(vma->vm_mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 1; } @@ -453,11 +471,12 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t old_pte; unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); old_pte = *ptep; - pte_clear(mm,addr,ptep); - purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + set_pte(ptep, __pte(0)); + if (pte_inserted(old_pte)) + purge_tlb_entries(mm, addr); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return old_pte; } @@ -465,10 +484,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); set_pte(ptep, pte_wrprotect(*ptep)); purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index 9d086a599fa0..e84b96478193 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -13,6 +13,9 @@ * active at any one time on the Merced bus. This tlb purge * synchronisation is fairly lightweight and harmless so we activate * it on all systems not just the N class. + + * It is also used to ensure PTE updates are atomic and consistent + * with the TLB. */ extern spinlock_t pa_tlb_lock; @@ -24,20 +27,24 @@ extern void flush_tlb_all_local(void *); #define smp_flush_tlb_all() flush_tlb_all() +int __flush_tlb_range(unsigned long sid, + unsigned long start, unsigned long end); + +#define flush_tlb_range(vma, start, end) \ + __flush_tlb_range((vma)->vm_mm->context, start, end) + +#define flush_tlb_kernel_range(start, end) \ + __flush_tlb_range(0, start, end) + /* * flush_tlb_mm() * - * XXX This code is NOT valid for HP-UX compatibility processes, - * (although it will probably work 99% of the time). HP-UX - * processes are free to play with the space id's and save them - * over long periods of time, etc. so we have to preserve the - * space and just flush the entire tlb. We need to check the - * personality in order to do that, but the personality is not - * currently being set correctly. - * - * Of course, Linux processes could do the same thing, but - * we don't support that (and the compilers, dynamic linker, - * etc. do not do that). + * The code to switch to a new context is NOT valid for processes + * which play with the space id's. Thus, we have to preserve the + * space and just flush the entire tlb. However, the compilers, + * dynamic linker, etc, do not manipulate space id's, so there + * could be a significant performance benefit in switching contexts + * and not flushing the whole tlb. */ static inline void flush_tlb_mm(struct mm_struct *mm) @@ -45,10 +52,18 @@ static inline void flush_tlb_mm(struct mm_struct *mm) BUG_ON(mm == &init_mm); /* Should never happen */ #if 1 || defined(CONFIG_SMP) + /* Except for very small threads, flushing the whole TLB is + * faster than using __flush_tlb_range. The pdtlb and pitlb + * instructions are very slow because of the TLB broadcast. + * It might be faster to do local range flushes on all CPUs + * on PA 2.0 systems. + */ flush_tlb_all(); #else /* FIXME: currently broken, causing space id and protection ids - * to go out of sync, resulting in faults on userspace accesses. + * to go out of sync, resulting in faults on userspace accesses. + * This approach needs further investigation since running many + * small applications (e.g., GCC testsuite) is faster on HP-UX. */ if (mm) { if (mm->context != 0) @@ -65,22 +80,12 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, { unsigned long flags, sid; - /* For one page, it's not worth testing the split_tlb variable */ - - mb(); sid = vma->vm_mm->context; purge_tlb_start(flags); mtsp(sid, 1); pdtlb(addr); - pitlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); purge_tlb_end(flags); } - -void __flush_tlb_range(unsigned long sid, - unsigned long start, unsigned long end); - -#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end) - -#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end) - #endif diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index f6448c7c62b5..cda6dbbe9842 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -342,12 +342,15 @@ EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ -int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; +static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; + +#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */ +static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD; void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; - unsigned long size; + unsigned long size, start; alltime = mfctl(16); flush_data_cache(); @@ -364,14 +367,43 @@ void __init parisc_setup_cache_timing(void) /* Racy, but if we see an intermediate value, it's ok too... */ parisc_cache_flush_threshold = size * alltime / rangetime; - parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1); + parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold); if (!parisc_cache_flush_threshold) parisc_cache_flush_threshold = FLUSH_THRESHOLD; if (parisc_cache_flush_threshold > cache_info.dc_size) parisc_cache_flush_threshold = cache_info.dc_size; - printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus()); + printk(KERN_INFO "Setting cache flush threshold to %lu kB\n", + parisc_cache_flush_threshold/1024); + + /* calculate TLB flush threshold */ + + alltime = mfctl(16); + flush_tlb_all(); + alltime = mfctl(16) - alltime; + + size = PAGE_SIZE; + start = (unsigned long) _text; + rangetime = mfctl(16); + while (start < (unsigned long) _end) { + flush_tlb_kernel_range(start, start + PAGE_SIZE); + start += PAGE_SIZE; + size += PAGE_SIZE; + } + rangetime = mfctl(16) - rangetime; + + printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n", + alltime, size, rangetime); + + parisc_tlb_flush_threshold = size * alltime / rangetime; + parisc_tlb_flush_threshold *= num_online_cpus(); + parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold); + if (!parisc_tlb_flush_threshold) + parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD; + + printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n", + parisc_tlb_flush_threshold/1024); } extern void purge_kernel_dcache_page_asm(unsigned long); @@ -403,48 +435,45 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, } EXPORT_SYMBOL(copy_user_page); -void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) +/* __flush_tlb_range() + * + * returns 1 if all TLBs were flushed. + */ +int __flush_tlb_range(unsigned long sid, unsigned long start, + unsigned long end) { - unsigned long flags; + unsigned long flags, size; - /* Note: purge_tlb_entries can be called at startup with - no context. */ - - purge_tlb_start(flags); - mtsp(mm->context, 1); - pdtlb(addr); - pitlb(addr); - purge_tlb_end(flags); -} -EXPORT_SYMBOL(purge_tlb_entries); - -void __flush_tlb_range(unsigned long sid, unsigned long start, - unsigned long end) -{ - unsigned long npages; - - npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */ + size = (end - start); + if (size >= parisc_tlb_flush_threshold) { flush_tlb_all(); - else { - unsigned long flags; + return 1; + } + /* Purge TLB entries for small ranges using the pdtlb and + pitlb instructions. These instructions execute locally + but cause a purge request to be broadcast to other TLBs. */ + if (likely(!split_tlb)) { + while (start < end) { + purge_tlb_start(flags); + mtsp(sid, 1); + pdtlb(start); + purge_tlb_end(flags); + start += PAGE_SIZE; + } + return 0; + } + + /* split TLB case */ + while (start < end) { purge_tlb_start(flags); mtsp(sid, 1); - if (split_tlb) { - while (npages--) { - pdtlb(start); - pitlb(start); - start += PAGE_SIZE; - } - } else { - while (npages--) { - pdtlb(start); - start += PAGE_SIZE; - } - } + pdtlb(start); + pitlb(start); purge_tlb_end(flags); + start += PAGE_SIZE; } + return 0; } static void cacheflush_h_tmp_function(void *dummy) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 75819617f93b..c5ef4081b01d 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -45,7 +45,7 @@ .level 2.0 #endif - .import pa_dbit_lock,data + .import pa_tlb_lock,data /* space_to_prot macro creates a prot id from a space id */ @@ -420,8 +420,8 @@ SHLREG %r9,PxD_VALUE_SHIFT,\pmd extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ - shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd - LDREG %r0(\pmd),\pte /* pmd is now pte */ + shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */ + LDREG %r0(\pmd),\pte bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault .endm @@ -453,57 +453,53 @@ L2_ptep \pgd,\pte,\index,\va,\fault .endm - /* Acquire pa_dbit_lock lock. */ - .macro dbit_lock spc,tmp,tmp1 + /* Acquire pa_tlb_lock lock and recheck page is still present. */ + .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_dbit_lock),\tmp + load32 PA(pa_tlb_lock),\tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop + LDREG 0(\ptp),\pte + bb,<,n \pte,_PAGE_PRESENT_BIT,2f + b \fault + stw \spc,0(\tmp) 2: #endif .endm - /* Release pa_dbit_lock lock without reloading lock address. */ - .macro dbit_unlock0 spc,tmp + /* Release pa_tlb_lock lock without reloading lock address. */ + .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP or,COND(=) %r0,\spc,%r0 stw \spc,0(\tmp) #endif .endm - /* Release pa_dbit_lock lock. */ - .macro dbit_unlock1 spc,tmp + /* Release pa_tlb_lock lock. */ + .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_dbit_lock),\tmp - dbit_unlock0 \spc,\tmp + load32 PA(pa_tlb_lock),\tmp + tlb_unlock0 \spc,\tmp #endif .endm /* Set the _PAGE_ACCESSED bit of the PTE. Be clever and * don't needlessly dirty the cache line if it was already set */ - .macro update_ptep spc,ptep,pte,tmp,tmp1 -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_accessed ptp,pte,tmp,tmp1 ldi _PAGE_ACCESSED,\tmp1 or \tmp1,\pte,\tmp and,COND(<>) \tmp1,\pte,%r0 - STREG \tmp,0(\ptep) + STREG \tmp,0(\ptp) .endm /* Set the dirty bit (and accessed bit). No need to be * clever, this is only used from the dirty fault */ - .macro update_dirty spc,ptep,pte,tmp -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_dirty ptp,pte,tmp ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp or \tmp,\pte,\pte - STREG \pte,0(\ptep) + STREG \pte,0(\ptp) .endm /* bitshift difference between a PFN (based on kernel's PAGE_SIZE) @@ -1148,14 +1144,14 @@ dtlb_miss_20w: L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1174,14 +1170,14 @@ nadtlb_miss_20w: L3_ptep ptp,pte,t0,va,nadtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1202,20 +1198,20 @@ dtlb_miss_11: L2_ptep ptp,pte,t0,va,dtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1235,21 +1231,20 @@ nadtlb_miss_11: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1269,16 +1264,16 @@ dtlb_miss_20: L2_ptep ptp,pte,t0,va,dtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1297,16 +1292,16 @@ nadtlb_miss_20: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 - idtlbt pte,prot - dbit_unlock1 spc,t0 + idtlbt pte,prot + tlb_unlock1 spc,t0 rfir nop @@ -1406,14 +1401,14 @@ itlb_miss_20w: L3_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1430,14 +1425,14 @@ naitlb_miss_20w: L3_ptep ptp,pte,t0,va,naitlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1458,20 +1453,20 @@ itlb_miss_11: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1482,20 +1477,20 @@ naitlb_miss_11: L2_ptep ptp,pte,t0,va,naitlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1516,16 +1511,16 @@ itlb_miss_20: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1536,16 +1531,16 @@ naitlb_miss_20: L2_ptep ptp,pte,t0,va,naitlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1568,14 +1563,14 @@ dbit_trap_20w: L3_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop #else @@ -1588,8 +1583,8 @@ dbit_trap_11: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb_11 spc,pte,prot @@ -1600,8 +1595,8 @@ dbit_trap_11: idtlbp prot,(%sr1,va) mtsp t1, %sr1 /* Restore sr1 */ - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop @@ -1612,16 +1607,16 @@ dbit_trap_20: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot f_extend pte,t1 - idtlbt pte,prot - dbit_unlock0 spc,t0 + idtlbt pte,prot + tlb_unlock0 spc,t0 rfir nop #endif diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 6548fd1d2e62..b99b39f1da02 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -43,10 +43,6 @@ #include "../math-emu/math-emu.h" /* for handle_fpe() */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -DEFINE_SPINLOCK(pa_dbit_lock); -#endif - static void parisc_show_stack(struct task_struct *task, unsigned long *sp, struct pt_regs *regs); From 892e8cac99a71f6254f84fc662068d912e1943bf Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 10 Jul 2015 09:40:59 -0400 Subject: [PATCH 197/210] selinux: fix mprotect PROT_EXEC regression caused by mm change commit 66fc13039422ba7df2d01a8ee0873e4ef965b50b ("mm: shmem_zero_setup skip security check and lockdep conflict with XFS") caused a regression for SELinux by disabling any SELinux checking of mprotect PROT_EXEC on shared anonymous mappings. However, even before that regression, the checking on such mprotect PROT_EXEC calls was inconsistent with the checking on a mmap PROT_EXEC call for a shared anonymous mapping. On a mmap, the security hook is passed a NULL file and knows it is dealing with an anonymous mapping and therefore applies an execmem check and no file checks. On a mprotect, the security hook is passed a vma with a non-NULL vm_file (as this was set from the internally-created shmem file during mmap) and therefore applies the file-based execute check and no execmem check. Since the aforementioned commit now marks the shmem zero inode with the S_PRIVATE flag, the file checks are disabled and we have no checking at all on mprotect PROT_EXEC. Add a test to the mprotect hook logic for such private inodes, and apply an execmem check in that case. This makes the mmap and mprotect checking consistent for shared anonymous mappings, as well as for /dev/zero and ashmem. Cc: # 4.1.x Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 312537d48050..692e3cc8ce23 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3271,7 +3271,8 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared int rc = 0; if (default_noexec && - (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { + (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) || + (!shared && (prot & PROT_WRITE)))) { /* * We are making executable an anonymous mapping or a * private file mapping that will also be writable. From c4d029f2d43b39de7b9299e8b58102a442ba86f8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 11 Jul 2015 14:26:34 +0200 Subject: [PATCH 198/210] tick/broadcast: Prevent NULL pointer dereference Dan reported that the recent changes to the broadcast code introduced a potential NULL dereference. Add the proper check. Fixes: e0454311903d "tick/broadcast: Sanity check the shutdown of the local clock_event" Reported-by: Dan Carpenter Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index ee3cf942d6eb..52b9e199b5ac 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -409,14 +409,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) break; } - if (cpumask_empty(tick_broadcast_mask)) { - if (!bc_stopped) - clockevents_shutdown(bc); - } else if (bc_stopped) { - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - tick_broadcast_start_periodic(bc); - else - tick_broadcast_setup_oneshot(bc); + if (bc) { + if (cpumask_empty(tick_broadcast_mask)) { + if (!bc_stopped) + clockevents_shutdown(bc); + } else if (bc_stopped) { + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + tick_broadcast_start_periodic(bc); + else + tick_broadcast_setup_oneshot(bc); + } } raw_spin_unlock(&tick_broadcast_lock); } From 0a73d0a204a4a04a1e110539c5a524ae51f91d6d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jul 2015 10:34:29 -0400 Subject: [PATCH 199/210] 9p: don't leave a half-initialized inode sitting around Cc: stable@vger.kernel.org # all branches Signed-off-by: Al Viro --- fs/9p/vfs_inode.c | 3 +-- fs/9p/vfs_inode_dotl.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 510040b04c96..b1dc51888048 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -540,8 +540,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 09e4433717b8..e8aa57dc8d6d 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -149,8 +149,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } From 9391dd00d13c853ab4f2a85435288ae2202e0e43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jul 2015 10:39:45 -0400 Subject: [PATCH 200/210] fix a braino in ovl_d_select_inode() when opening a directory we want the overlayfs inode, not one from the topmost layer. Reported-By: Andrey Jr. Melnikov Tested-By: Andrey Jr. Melnikov Signed-off-by: Al Viro --- fs/overlayfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f140e3dbfb7b..d9da5a4e9382 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -343,6 +343,9 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) struct path realpath; enum ovl_path_type type; + if (d_is_dir(dentry)) + return d_backing_inode(dentry); + type = ovl_path_real(dentry, &realpath); if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { err = ovl_want_write(dentry); From 75a6f82a0d10ef8f13cd8fe7212911a0252ab99e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Jul 2015 02:42:38 +0100 Subject: [PATCH 201/210] freeing unlinked file indefinitely delayed Normally opening a file, unlinking it and then closing will have the inode freed upon close() (provided that it's not otherwise busy and has no remaining links, of course). However, there's one case where that does *not* happen. Namely, if you open it by fhandle with cold dcache, then unlink() and close(). In normal case you get d_delete() in unlink(2) notice that dentry is busy and unhash it; on the final dput() it will be forcibly evicted from dcache, triggering iput() and inode removal. In this case, though, we end up with *two* dentries - disconnected (created by open-by-fhandle) and regular one (used by unlink()). The latter will have its reference to inode dropped just fine, but the former will not - it's considered hashed (it is on the ->s_anon list), so it will stay around until the memory pressure will finally do it in. As the result, we have the final iput() delayed indefinitely. It's trivial to reproduce - void flush_dcache(void) { system("mount -o remount,rw /"); } static char buf[20 * 1024 * 1024]; main() { int fd; union { struct file_handle f; char buf[MAX_HANDLE_SZ]; } x; int m; x.f.handle_bytes = sizeof(x); chdir("/root"); mkdir("foo", 0700); fd = open("foo/bar", O_CREAT | O_RDWR, 0600); close(fd); name_to_handle_at(AT_FDCWD, "foo/bar", &x.f, &m, 0); flush_dcache(); fd = open_by_handle_at(AT_FDCWD, &x.f, O_RDWR); unlink("foo/bar"); write(fd, buf, sizeof(buf)); system("df ."); /* 20Mb eaten */ close(fd); system("df ."); /* should've freed those 20Mb */ flush_dcache(); system("df ."); /* should be the same as #2 */ } will spit out something like Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 303843 1131 100% / Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 303843 1131 100% / Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 283282 21692 93% / - inode gets freed only when dentry is finally evicted (here we trigger than by remount; normally it would've happened in response to memory pressure hell knows when). Cc: stable@vger.kernel.org # v2.6.38+; earlier ones need s/kill_it/unhash_it/ Acked-by: J. Bruce Fields Signed-off-by: Al Viro --- fs/dcache.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 7a3f3e5f9cea..5c8ea15e73a5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -642,7 +642,7 @@ static inline bool fast_dput(struct dentry *dentry) /* * If we have a d_op->d_delete() operation, we sould not - * let the dentry count go to zero, so use "put__or_lock". + * let the dentry count go to zero, so use "put_or_lock". */ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) return lockref_put_or_lock(&dentry->d_lockref); @@ -697,7 +697,7 @@ static inline bool fast_dput(struct dentry *dentry) */ smp_rmb(); d_flags = ACCESS_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) @@ -776,6 +776,9 @@ repeat: if (unlikely(d_unhashed(dentry))) goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { if (dentry->d_op->d_delete(dentry)) goto kill_it; From 01e2d0627a9a6edb24c37db45db5ecb31e9de808 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Jul 2015 15:00:20 -0700 Subject: [PATCH 202/210] Revert "drm/i915: Use crtc_state->active in primary check_plane func" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit dec4f799d0a4c9edae20512fa60b0a36f3299ca2. Jörg Otte reports a NULL pointder dereference due to this commit, as 'crtc_state' very much can be NULL: crtc_state = state->base.state ? intel_atomic_get_crtc_state(state->base.state, intel_crtc) : NULL; So the change to test 'crtc_state->base.active' cannot possibly be correct as-is. There may be some other minimal fix (like just checking crtc_state for NULL), but I'm just reverting it now for the rc2 release, and people like Daniel Vetter who actually know this code will figure out what the right solution is in the longer term. Reported-and-bisected-by: Jörg Otte Cc: Ander Conselvan de Oliveira Cc: Jani Nikula Cc: Daniel Vetter CC: Maarten Lankhorst Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ba9321998a41..647b1404c441 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13276,7 +13276,7 @@ intel_check_primary_plane(struct drm_plane *plane, if (ret) return ret; - if (crtc_state->base.active) { + if (intel_crtc->active) { struct intel_plane_state *old_state = to_intel_plane_state(plane->state); From bc0195aad0daa2ad5b0d76cce22b167bc3435590 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Jul 2015 15:10:30 -0700 Subject: [PATCH 203/210] Linux 4.2-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 13270c0a9336..257ef5892ab7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* From 3c20b827b1803335aacdab63fb5ed0fd2f4c04ca Mon Sep 17 00:00:00 2001 From: Adam YH Lee Date: Fri, 12 Jun 2015 13:37:22 -0700 Subject: [PATCH 204/210] ARM: dts: omap3: overo: Update LCD panel names For Gumstix Overo COMs, the u-boot bootloader typically passes an argument specifying the default display via the omapdss.def_disp parameter. When a default display is specified, DSS2 tries to match this name with either the device tree label (e.g. label=dvi) or, failing this, the device tree alias (e.g. label=display0). Update the panel names for the 'lcd43' and 'lcd35' displays in the device tree such that they match the names passed by u-boot. Signed-off-by: Ash Charles Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi | 2 +- arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi index 233c69e50ae3..df8908adb0cb 100644 --- a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi +++ b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi @@ -120,7 +120,7 @@ lcd0: display@0 { compatible = "lgphilips,lb035q02"; - label = "lcd"; + label = "lcd35"; reg = <1>; /* CS1 */ spi-max-frequency = <10000000>; diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi index f5395b7da912..048fd216970a 100644 --- a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi +++ b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi @@ -98,7 +98,7 @@ lcd0: display@0 { compatible = "samsung,lte430wq-f0c", "panel-dpi"; - label = "lcd"; + label = "lcd43"; pinctrl-names = "default"; pinctrl-0 = <<e430_pins>; From 5aed4ee7cc1f214173b3a029452fe4be7bf89541 Mon Sep 17 00:00:00 2001 From: Adam YH Lee Date: Thu, 9 Jul 2015 15:37:57 -0700 Subject: [PATCH 205/210] ARM: dts: configure regulators for Gumstix Pepper Boot process is halting in midway because some of the necessary voltage regulators are deemed unused and subsequently powered off, leading to a completely unresponsive system. Most of the device nodes had correct voltage regulator attachments. Yet these nodes had to set stricter enforcement on them through 'regulator-boot-on' and 'regulator-always-on' to function correctly. The consumers of the regulators this commit affect are the followings: DCDC1: vdd_1v8 system supply, USB-PHY, and ADC DCDC2: Core domain DCDC3: MPU core domain LDO1: RTC LDO2: 3v3 IO domain LDO3: USB-PHY; not a boot-time requirement LDO4: LCD [16:23] All but LDO3 need to be always-on for the system to be functional. Additionally regulator-name properties have been added for the kernel to display the name from the schematic. This will improve diagnostics. Signed-off-by: Adam YH Lee Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-pepper.dts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts index 0d35ab64641c..661f835addba 100644 --- a/arch/arm/boot/dts/am335x-pepper.dts +++ b/arch/arm/boot/dts/am335x-pepper.dts @@ -438,6 +438,7 @@ regulators { dcdc1_reg: regulator@0 { /* VDD_1V8 system supply */ + regulator-always-on; }; dcdc2_reg: regulator@1 { @@ -446,6 +447,7 @@ regulator-min-microvolt = <925000>; regulator-max-microvolt = <1325000>; regulator-boot-on; + regulator-always-on; }; dcdc3_reg: regulator@2 { @@ -454,25 +456,32 @@ regulator-min-microvolt = <925000>; regulator-max-microvolt = <1150000>; regulator-boot-on; + regulator-always-on; }; ldo1_reg: regulator@3 { /* VRTC 1.8V always-on supply */ + regulator-name = "vrtc,vdds"; regulator-always-on; }; ldo2_reg: regulator@4 { /* 3.3V rail */ + regulator-name = "vdd_3v3aux"; + regulator-always-on; }; ldo3_reg: regulator@5 { /* VDD_3V3A 3.3V rail */ + regulator-name = "vdd_3v3a"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; }; ldo4_reg: regulator@6 { /* VDD_3V3B 3.3V rail */ + regulator-name = "vdd_3v3b"; + regulator-always-on; }; }; }; From 693f4f4c969dff9fa80e186e9077c74056f71829 Mon Sep 17 00:00:00 2001 From: Arun Bharadwaj Date: Thu, 4 Jun 2015 11:08:19 -0700 Subject: [PATCH 206/210] ARM: dts: Fix frequency scaling on Gumstix Pepper The device tree for Gumstix Pepper has DCDC2 and DCDC3 correctly labelled but the upper limit values are wrong. The confusion is due to the hardware quirk where the DCDC2 and DCDC3 wires are flipped in Pepper. Signed-off-by: Arun Bharadwaj Tested-by: Ash Charles Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-pepper.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts index 661f835addba..206c042a7c48 100644 --- a/arch/arm/boot/dts/am335x-pepper.dts +++ b/arch/arm/boot/dts/am335x-pepper.dts @@ -445,7 +445,7 @@ /* VDD_CORE voltage limits 0.95V - 1.26V with +/-4% tolerance */ regulator-name = "vdd_core"; regulator-min-microvolt = <925000>; - regulator-max-microvolt = <1325000>; + regulator-max-microvolt = <1150000>; regulator-boot-on; regulator-always-on; }; @@ -454,7 +454,7 @@ /* VDD_MPU voltage limits 0.95V - 1.1V with +/-4% tolerance */ regulator-name = "vdd_mpu"; regulator-min-microvolt = <925000>; - regulator-max-microvolt = <1150000>; + regulator-max-microvolt = <1325000>; regulator-boot-on; regulator-always-on; }; From 22e3bcc62e528a0dca7f4fafd71feeb9cdb8a95b Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 10 Jul 2015 12:28:55 -0500 Subject: [PATCH 207/210] ARM: dts: OMAP4: Add #iommu-cells property to IOMMUs Add missing #iommu-cells property to the DSP and IPU IOMMU nodes for OMAP4 platforms. This property is required as per the generic iommu binding. Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index f884d6adb71e..7d31c6ff246f 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -551,6 +551,7 @@ reg = <0x4a066000 0x100>; interrupts = ; ti,hwmods = "mmu_dsp"; + #iommu-cells = <0>; }; mmu_ipu: mmu@55082000 { @@ -558,6 +559,7 @@ reg = <0x55082000 0x100>; interrupts = ; ti,hwmods = "mmu_ipu"; + #iommu-cells = <0>; ti,iommu-bus-err-back; }; From c1b5d0f624970020162b2054cc435712b8a0d422 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 10 Jul 2015 12:28:56 -0500 Subject: [PATCH 208/210] ARM: dts: OMAP5: Add #iommu-cells property to IOMMUs Add missing #iommu-cells property to the DSP and IPU IOMMU nodes for OMAP5 platforms. This property is required as per the generic iommu binding. Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 7d24ae0306b5..c8fd648a7108 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -612,6 +612,7 @@ reg = <0x4a066000 0x100>; interrupts = ; ti,hwmods = "mmu_dsp"; + #iommu-cells = <0>; }; mmu_ipu: mmu@55082000 { @@ -619,6 +620,7 @@ reg = <0x55082000 0x100>; interrupts = ; ti,hwmods = "mmu_ipu"; + #iommu-cells = <0>; ti,iommu-bus-err-back; }; From f87d089d4469d05f565c8ce350b6631b14c4fff8 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 10 Jul 2015 16:05:48 -0500 Subject: [PATCH 209/210] ARM: OMAP2+: Add HAVE_ARM_SCU for AM43XX CONFIG_HAVE_ARM_SCU only gets selected if CONFIG_SMP is selected in an OMAP system, however AM43XX needs this option regardless of CONFIG_SMP and also for an AM43XX only build as it is important for controlling power in the SoC. Without this we cannot suspend the CPU for SoC suspend or cpuidle. The ARM Cortex A9 needs SCU CPU Power Status bits to be set to off mode in order for the PRCM to transition the MPU to low power modes. Signed-off-by: Dave Gerlach Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index ecc04ff13e95..4a023e8d1bdb 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -60,6 +60,7 @@ config SOC_AM43XX select ARM_GIC select MACH_OMAP_GENERIC select MIGHT_HAVE_CACHE_L2X0 + select HAVE_ARM_SCU config SOC_DRA7XX bool "TI DRA7XX" From 9908ac3daa3da2d236b5406b95d0865ddb8b29c4 Mon Sep 17 00:00:00 2001 From: Adam YH Lee Date: Tue, 14 Jul 2015 11:17:24 -0700 Subject: [PATCH 210/210] ARM: dts: Correct audio input route & set mic bias for am335x-pepper Audio-in was incorrectly routed to Line In. It should be Mic3L as per schematic. Using mic-bias voltage at 2.0v (<0x1>) does not work for some reason. There is no voltage seen on micbias (R127). Mic-bias voltage of 2.5v (<0x2>) works. I see voltage of 2.475v across GND and micbias. With these changes, I can record audio with a pair of proliferate TRRS earbuds. Signed-off-by: Adam YH Lee Acked-by: Ash Charles Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-pepper.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts index 206c042a7c48..7106114c7464 100644 --- a/arch/arm/boot/dts/am335x-pepper.dts +++ b/arch/arm/boot/dts/am335x-pepper.dts @@ -74,6 +74,7 @@ audio_codec: tlv320aic3106@1b { compatible = "ti,tlv320aic3106"; reg = <0x1b>; + ai3x-micbias-vg = <0x2>; }; accel: lis331dlh@1d { @@ -153,7 +154,7 @@ ti,audio-routing = "Headphone Jack", "HPLOUT", "Headphone Jack", "HPROUT", - "LINE1L", "Line In"; + "MIC3L", "Mic3L Switch"; }; &mcasp0 {