diff --git a/.mailmap b/.mailmap index 59c9a841bf71..8284692f9610 100644 --- a/.mailmap +++ b/.mailmap @@ -20,6 +20,7 @@ Adam Oldham Adam Radford Adriana Reus Adrian Bunk +Ajay Kaher Akhil P Oommen Alan Cox Alan Cox @@ -36,6 +37,7 @@ Alexei Avshalom Lazar Alexei Starovoitov Alexei Starovoitov Alexei Starovoitov +Alexey Makhalov Alex Hung Alex Shi Alex Shi @@ -110,6 +112,7 @@ Brendan Higgins Brian Avery Brian King Brian Silverman +Bryan Tan Cai Huoqing Can Guo Carl Huang @@ -529,6 +532,7 @@ Rocky Liao Roman Gushchin Roman Gushchin Roman Gushchin +Ronak Doshi Muchun Song Muchun Song Ross Zwisler @@ -651,6 +655,7 @@ Viresh Kumar Viresh Kumar Viresh Kumar Viresh Kumar +Vishnu Dasa Vivek Aknurwar Vivien Didelot Vlad Dogaru diff --git a/MAINTAINERS b/MAINTAINERS index 75381386fe4c..793926018188 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16733,9 +16733,9 @@ F: include/uapi/linux/ppdev.h PARAVIRT_OPS INTERFACE M: Juergen Gross -R: Ajay Kaher -R: Alexey Makhalov -R: VMware PV-Drivers Reviewers +R: Ajay Kaher +R: Alexey Makhalov +R: Broadcom internal kernel review list L: virtualization@lists.linux.dev L: x86@kernel.org S: Supported @@ -23654,9 +23654,9 @@ S: Supported F: drivers/misc/vmw_balloon.c VMWARE HYPERVISOR INTERFACE -M: Ajay Kaher -M: Alexey Makhalov -R: VMware PV-Drivers Reviewers +M: Ajay Kaher +M: Alexey Makhalov +R: Broadcom internal kernel review list L: virtualization@lists.linux.dev L: x86@kernel.org S: Supported @@ -23665,33 +23665,34 @@ F: arch/x86/include/asm/vmware.h F: arch/x86/kernel/cpu/vmware.c VMWARE PVRDMA DRIVER -M: Bryan Tan -M: Vishnu Dasa -R: VMware PV-Drivers Reviewers +M: Bryan Tan +M: Vishnu Dasa +R: Broadcom internal kernel review list L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/vmw_pvrdma/ VMWARE PVSCSI DRIVER -M: Vishal Bhakta -R: VMware PV-Drivers Reviewers +M: Vishal Bhakta +R: Broadcom internal kernel review list L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/vmw_pvscsi.c F: drivers/scsi/vmw_pvscsi.h VMWARE VIRTUAL PTP CLOCK DRIVER -R: Ajay Kaher -R: Alexey Makhalov -R: VMware PV-Drivers Reviewers +M: Nick Shi +R: Ajay Kaher +R: Alexey Makhalov +R: Broadcom internal kernel review list L: netdev@vger.kernel.org S: Supported F: drivers/ptp/ptp_vmw.c VMWARE VMCI DRIVER -M: Bryan Tan -M: Vishnu Dasa -R: VMware PV-Drivers Reviewers +M: Bryan Tan +M: Vishnu Dasa +R: Broadcom internal kernel review list L: linux-kernel@vger.kernel.org S: Supported F: drivers/misc/vmw_vmci/ @@ -23706,16 +23707,16 @@ F: drivers/input/mouse/vmmouse.c F: drivers/input/mouse/vmmouse.h VMWARE VMXNET3 ETHERNET DRIVER -M: Ronak Doshi -R: VMware PV-Drivers Reviewers +M: Ronak Doshi +R: Broadcom internal kernel review list L: netdev@vger.kernel.org S: Supported F: drivers/net/vmxnet3/ VMWARE VSOCK VMCI TRANSPORT DRIVER -M: Bryan Tan -M: Vishnu Dasa -R: VMware PV-Drivers Reviewers +M: Bryan Tan +M: Vishnu Dasa +R: Broadcom internal kernel review list L: linux-kernel@vger.kernel.org S: Supported F: net/vmw_vsock/vmci_transport* diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 0d72183b5dd0..36b603d0cdde 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -947,6 +947,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, + pgprot_t *pgprot) +{ + unsigned long prot; + + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); + + /* + * We need the starting PFN and cachemode used for track_pfn_remap() + * that covered the whole VMA. For most mappings, we can obtain that + * information from the page tables. For COW mappings, we might now + * suddenly have anon folios mapped and follow_phys() will fail. + * + * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to + * detect the PFN. If we need the cachemode as well, we're out of luck + * for now and have to fail fork(). + */ + if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (pgprot) + *pgprot = __pgprot(prot); + return 0; + } + if (is_cow_mapping(vma->vm_flags)) { + if (pgprot) + return -EINVAL; + *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return 0; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). @@ -957,20 +989,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; - unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, &pgprot)) return -EINVAL; - } - pgprot = __pgprot(prot); + /* reserve the whole chunk covered by vma. */ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } @@ -1045,7 +1070,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size, bool mm_wr_locked) { resource_size_t paddr; - unsigned long prot; if (vma && !(vma->vm_flags & VM_PAT)) return; @@ -1053,11 +1077,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, /* free the chunk starting from pfn or the whole chunk */ paddr = (resource_size_t)pfn << PAGE_SHIFT; if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, NULL)) return; - } - size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 35f3a4a8ceb1..acf7e1a3f3de 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(struct folio *folio) /* * Using folio_mapping() is quite slow because of the actual call * instruction. - * We know that secretmem pages are not compound and LRU so we can + * We know that secretmem pages are not compound, so we can * save a couple of cycles here. */ - if (folio_test_large(folio) || !folio_test_lru(folio)) + if (folio_test_large(folio)) return false; mapping = (struct address_space *) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 3c6caa5abc7c..e9ec32fb97d4 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -44,10 +44,9 @@ typedef u32 depot_stack_handle_t; union handle_parts { depot_stack_handle_t handle; struct { - /* pool_index is offset by 1 */ - u32 pool_index : DEPOT_POOL_INDEX_BITS; - u32 offset : DEPOT_OFFSET_BITS; - u32 extra : STACK_DEPOT_EXTRA_BITS; + u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS; + u32 offset : DEPOT_OFFSET_BITS; + u32 extra : STACK_DEPOT_EXTRA_BITS; }; }; diff --git a/init/initramfs.c b/init/initramfs.c index 3127e0bf7bbd..a298a3854a80 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -367,7 +367,7 @@ static int __init do_name(void) if (S_ISREG(mode)) { int ml = maybe_link(); if (ml >= 0) { - int openflags = O_WRONLY|O_CREAT; + int openflags = O_WRONLY|O_CREAT|O_LARGEFILE; if (ml != 1) openflags |= O_TRUNC; wfile = filp_open(collected, openflags, mode); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index af6cc19a2003..68c97387aa54 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -330,7 +330,7 @@ static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) stack = current_pool + pool_offset; /* Pre-initialize handle once. */ - stack->handle.pool_index = pool_index + 1; + stack->handle.pool_index_plus_1 = pool_index + 1; stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; stack->handle.extra = 0; INIT_LIST_HEAD(&stack->hash_list); @@ -441,7 +441,7 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) const int pools_num_cached = READ_ONCE(pools_num); union handle_parts parts = { .handle = handle }; void *pool; - u32 pool_index = parts.pool_index - 1; + u32 pool_index = parts.pool_index_plus_1 - 1; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; diff --git a/mm/memory.c b/mm/memory.c index 904f70b99498..d2155ced45f8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5973,6 +5973,10 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = ptep_get(ptep); + /* Never return PFNs of anon folios in COW mappings. */ + if (vm_normal_folio(vma, address, pte)) + goto unlock; + if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 22aa63f4ef63..68fa001648cc 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -989,6 +989,27 @@ unsigned long vmalloc_nr_pages(void) return atomic_long_read(&nr_vmalloc_pages); } +static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root) +{ + struct rb_node *n = root->rb_node; + + addr = (unsigned long)kasan_reset_tag((void *)addr); + + while (n) { + struct vmap_area *va; + + va = rb_entry(n, struct vmap_area, rb_node); + if (addr < va->va_start) + n = n->rb_left; + else if (addr >= va->va_end) + n = n->rb_right; + else + return va; + } + + return NULL; +} + /* Look up the first VA which satisfies addr < va_end, NULL if none. */ static struct vmap_area * __find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root) @@ -1025,47 +1046,39 @@ __find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root) static struct vmap_node * find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va) { - struct vmap_node *vn, *va_node = NULL; - struct vmap_area *va_lowest; + unsigned long va_start_lowest; + struct vmap_node *vn; int i; - for (i = 0; i < nr_vmap_nodes; i++) { +repeat: + for (i = 0, va_start_lowest = 0; i < nr_vmap_nodes; i++) { vn = &vmap_nodes[i]; spin_lock(&vn->busy.lock); - va_lowest = __find_vmap_area_exceed_addr(addr, &vn->busy.root); - if (va_lowest) { - if (!va_node || va_lowest->va_start < (*va)->va_start) { - if (va_node) - spin_unlock(&va_node->busy.lock); + *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root); - *va = va_lowest; - va_node = vn; - continue; - } - } + if (*va) + if (!va_start_lowest || (*va)->va_start < va_start_lowest) + va_start_lowest = (*va)->va_start; spin_unlock(&vn->busy.lock); } - return va_node; -} + /* + * Check if found VA exists, it might have gone away. In this case we + * repeat the search because a VA has been removed concurrently and we + * need to proceed to the next one, which is a rare case. + */ + if (va_start_lowest) { + vn = addr_to_node(va_start_lowest); -static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root) -{ - struct rb_node *n = root->rb_node; + spin_lock(&vn->busy.lock); + *va = __find_vmap_area(va_start_lowest, &vn->busy.root); - addr = (unsigned long)kasan_reset_tag((void *)addr); + if (*va) + return vn; - while (n) { - struct vmap_area *va; - - va = rb_entry(n, struct vmap_area, rb_node); - if (addr < va->va_start) - n = n->rb_left; - else if (addr >= va->va_end) - n = n->rb_right; - else - return va; + spin_unlock(&vn->busy.lock); + goto repeat; } return NULL; @@ -2343,6 +2356,9 @@ struct vmap_area *find_vmap_area(unsigned long addr) struct vmap_area *va; int i, j; + if (unlikely(!vmap_initialized)) + return NULL; + /* * An addr_to_node_id(addr) converts an address to a node index * where a VA is located. If VA spans several zones and passed diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index c02990bbd56f..9007c420d52c 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -3,7 +3,7 @@ #include #include #include -#include /* ffsl() */ +#include /* ffsl() */ #include /* _SC_PAGESIZE */ #define BIT_ULL(nr) (1ULL << (nr))