f394576eb1
The top of the data structure provides an IO Address Space (IOAS) that is similar to a VFIO container. The IOAS allows map/unmap of memory into ranges of IOVA called iopt_areas. Multiple IOMMU domains (IO page tables) and in-kernel accesses (like VFIO mdevs) can be attached to the IOAS to access the PFNs that those IOVA areas cover. The IO Address Space (IOAS) datastructure is composed of: - struct io_pagetable holding the IOVA map - struct iopt_areas representing populated portions of IOVA - struct iopt_pages representing the storage of PFNs - struct iommu_domain representing each IO page table in the system IOMMU - struct iopt_pages_access representing in-kernel accesses of PFNs (ie VFIO mdevs) - struct xarray pinned_pfns holding a list of pages pinned by in-kernel accesses This patch introduces the lowest part of the datastructure - the movement of PFNs in a tiered storage scheme: 1) iopt_pages::pinned_pfns xarray 2) Multiple iommu_domains 3) The origin of the PFNs, i.e. the userspace pointer PFN have to be copied between all combinations of tiers, depending on the configuration. The interface is an iterator called a 'pfn_reader' which determines which tier each PFN is stored and loads it into a list of PFNs held in a struct pfn_batch. Each step of the iterator will fill up the pfn_batch, then the caller can use the pfn_batch to send the PFNs to the required destination. Repeating this loop will read all the PFNs in an IOVA range. The pfn_reader and pfn_batch also keep track of the pinned page accounting. While PFNs are always stored and accessed as full PAGE_SIZE units the iommu_domain tier can store with a sub-page offset/length to support IOMMUs with a smaller IOPTE size than PAGE_SIZE. Link: https://lore.kernel.org/r/8-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Kevin Tian <kevin.tian@intel.com> Tested-by: Nicolin Chen <nicolinc@nvidia.com> Tested-by: Yi Liu <yi.l.liu@intel.com> Tested-by: Lixiao Yang <lixiao.yang@intel.com> Tested-by: Matthew Rosato <mjrosato@linux.ibm.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
54 lines
1.8 KiB
C
54 lines
1.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.
|
|
*/
|
|
#ifndef __IOMMUFD_DOUBLE_SPAN_H
|
|
#define __IOMMUFD_DOUBLE_SPAN_H
|
|
|
|
#include <linux/interval_tree.h>
|
|
|
|
/*
|
|
* This is a variation of the general interval_tree_span_iter that computes the
|
|
* spans over the union of two different interval trees. Used ranges are broken
|
|
* up and reported based on the tree that provides the interval. The first span
|
|
* always takes priority. Like interval_tree_span_iter it is greedy and the same
|
|
* value of is_used will not repeat on two iteration cycles.
|
|
*/
|
|
struct interval_tree_double_span_iter {
|
|
struct rb_root_cached *itrees[2];
|
|
struct interval_tree_span_iter spans[2];
|
|
union {
|
|
unsigned long start_hole;
|
|
unsigned long start_used;
|
|
};
|
|
union {
|
|
unsigned long last_hole;
|
|
unsigned long last_used;
|
|
};
|
|
/* 0 = hole, 1 = used span[0], 2 = used span[1], -1 done iteration */
|
|
int is_used;
|
|
};
|
|
|
|
void interval_tree_double_span_iter_update(
|
|
struct interval_tree_double_span_iter *iter);
|
|
void interval_tree_double_span_iter_first(
|
|
struct interval_tree_double_span_iter *iter,
|
|
struct rb_root_cached *itree1, struct rb_root_cached *itree2,
|
|
unsigned long first_index, unsigned long last_index);
|
|
void interval_tree_double_span_iter_next(
|
|
struct interval_tree_double_span_iter *iter);
|
|
|
|
static inline bool
|
|
interval_tree_double_span_iter_done(struct interval_tree_double_span_iter *state)
|
|
{
|
|
return state->is_used == -1;
|
|
}
|
|
|
|
#define interval_tree_for_each_double_span(span, itree1, itree2, first_index, \
|
|
last_index) \
|
|
for (interval_tree_double_span_iter_first(span, itree1, itree2, \
|
|
first_index, last_index); \
|
|
!interval_tree_double_span_iter_done(span); \
|
|
interval_tree_double_span_iter_next(span))
|
|
|
|
#endif
|