5e31275cc9
Introduce per-VMA locking. The lock implementation relies on a per-vma and per-mm sequence counters to note exclusive locking: - read lock - (implemented by vma_start_read) requires the vma (vm_lock_seq) and mm (mm_lock_seq) sequence counters to differ. If they match then there must be a vma exclusive lock held somewhere. - read unlock - (implemented by vma_end_read) is a trivial vma->lock unlock. - write lock - (vma_start_write) requires the mmap_lock to be held exclusively and the current mm counter is assigned to the vma counter. This will allow multiple vmas to be locked under a single mmap_lock write lock (e.g. during vma merging). The vma counter is modified under exclusive vma lock. - write unlock - (vma_end_write_all) is a batch release of all vma locks held. It doesn't pair with a specific vma_start_write! It is done before exclusive mmap_lock is released by incrementing mm sequence counter (mm_lock_seq). - write downgrade - if the mmap_lock is downgraded to the read lock, all vma write locks are released as well (effectivelly same as write unlock). Link: https://lkml.kernel.org/r/20230227173632.3292573-13-surenb@google.com Signed-off-by: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
59 lines
1.7 KiB
C
59 lines
1.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/mm_types.h>
|
|
#include <linux/maple_tree.h>
|
|
#include <linux/rwsem.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/list.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/pgtable.h>
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/user_namespace.h>
|
|
#include <linux/ioasid.h>
|
|
#include <asm/mmu.h>
|
|
|
|
#ifndef INIT_MM_CONTEXT
|
|
#define INIT_MM_CONTEXT(name)
|
|
#endif
|
|
|
|
/*
|
|
* For dynamically allocated mm_structs, there is a dynamically sized cpumask
|
|
* at the end of the structure, the size of which depends on the maximum CPU
|
|
* number the system can see. That way we allocate only as much memory for
|
|
* mm_cpumask() as needed for the hundreds, or thousands of processes that
|
|
* a system typically runs.
|
|
*
|
|
* Since there is only one init_mm in the entire system, keep it simple
|
|
* and size this cpu_bitmask to NR_CPUS.
|
|
*/
|
|
struct mm_struct init_mm = {
|
|
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, init_mm.mmap_lock),
|
|
.pgd = swapper_pg_dir,
|
|
.mm_users = ATOMIC_INIT(2),
|
|
.mm_count = ATOMIC_INIT(1),
|
|
.write_protect_seq = SEQCNT_ZERO(init_mm.write_protect_seq),
|
|
MMAP_LOCK_INITIALIZER(init_mm)
|
|
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
|
|
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
|
|
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
|
#ifdef CONFIG_PER_VMA_LOCK
|
|
.mm_lock_seq = 0,
|
|
#endif
|
|
.user_ns = &init_user_ns,
|
|
.cpu_bitmap = CPU_BITS_NONE,
|
|
#ifdef CONFIG_IOMMU_SVA
|
|
.pasid = INVALID_IOASID,
|
|
#endif
|
|
INIT_MM_CONTEXT(init_mm)
|
|
};
|
|
|
|
void setup_initial_init_mm(void *start_code, void *end_code,
|
|
void *end_data, void *brk)
|
|
{
|
|
init_mm.start_code = (unsigned long)start_code;
|
|
init_mm.end_code = (unsigned long)end_code;
|
|
init_mm.end_data = (unsigned long)end_data;
|
|
init_mm.brk = (unsigned long)brk;
|
|
}
|