drm/i915: support 64K GTT pages for discrete cards
discrete cards optimise 64K GTT pages for local-memory, since everything should be allocated at 64K granularity. We say goodbye to sparse entries, and instead get a compact 256B page-table for 64K pages, which should be more cache friendly. 4K pages for local-memory are no longer supported by the HW. v4: don't return uninitialized err in igt_ppgtt_compact Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Stuart Summers <stuart.summers@intel.com> Signed-off-by: Ramalingam C <ramalingam.c@intel.com> Signed-off-by: Robert Beckett <bob.beckett@collabora.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220218184752.7524-8-ramalingam.c@intel.com
This commit is contained in:
parent
87bd701ee2
commit
5189e3126e
@ -1478,6 +1478,65 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_ppgtt_compact(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct drm_i915_gem_object *obj;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Simple test to catch issues with compact 64K pages -- since the pt is
|
||||
* compacted to 256B that gives us 32 entries per pt, however since the
|
||||
* backing page for the pt is 4K, any extra entries we might incorrectly
|
||||
* write out should be ignored by the HW. If ever hit such a case this
|
||||
* test should catch it since some of our writes would land in scratch.
|
||||
*/
|
||||
|
||||
if (!HAS_64K_PAGES(i915)) {
|
||||
pr_info("device lacks compact 64K page support, skipping\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!HAS_LMEM(i915)) {
|
||||
pr_info("device lacks LMEM support, skipping\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We want the range to cover multiple page-table boundaries. */
|
||||
obj = i915_gem_object_create_lmem(i915, SZ_4M, 0);
|
||||
if (IS_ERR(obj))
|
||||
return PTR_ERR(obj);
|
||||
|
||||
err = i915_gem_object_pin_pages_unlocked(obj);
|
||||
if (err)
|
||||
goto out_put;
|
||||
|
||||
if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
|
||||
pr_info("LMEM compact unable to allocate huge-page(s)\n");
|
||||
goto out_unpin;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable 2M GTT pages by forcing the page-size to 64K for the GTT
|
||||
* insertion.
|
||||
*/
|
||||
obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K;
|
||||
|
||||
err = igt_write_huge(i915, obj);
|
||||
if (err)
|
||||
pr_err("LMEM compact write-huge failed\n");
|
||||
|
||||
out_unpin:
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
out_put:
|
||||
i915_gem_object_put(obj);
|
||||
|
||||
if (err == -ENOMEM)
|
||||
err = 0;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_tmpfs_fallback(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
@ -1735,6 +1794,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
|
||||
SUBTEST(igt_tmpfs_fallback),
|
||||
SUBTEST(igt_ppgtt_smoke_huge),
|
||||
SUBTEST(igt_ppgtt_sanity_check),
|
||||
SUBTEST(igt_ppgtt_compact),
|
||||
};
|
||||
|
||||
if (!HAS_PPGTT(i915)) {
|
||||
|
@ -233,6 +233,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
|
||||
start, end, lvl);
|
||||
} else {
|
||||
unsigned int count;
|
||||
unsigned int pte = gen8_pd_index(start, 0);
|
||||
unsigned int num_ptes;
|
||||
u64 *vaddr;
|
||||
|
||||
count = gen8_pt_count(start, end);
|
||||
@ -242,10 +244,18 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
|
||||
atomic_read(&pt->used));
|
||||
GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
|
||||
|
||||
num_ptes = count;
|
||||
if (pt->is_compact) {
|
||||
GEM_BUG_ON(num_ptes % 16);
|
||||
GEM_BUG_ON(pte % 16);
|
||||
num_ptes /= 16;
|
||||
pte /= 16;
|
||||
}
|
||||
|
||||
vaddr = px_vaddr(pt);
|
||||
memset64(vaddr + gen8_pd_index(start, 0),
|
||||
memset64(vaddr + pte,
|
||||
vm->scratch[0]->encode,
|
||||
count);
|
||||
num_ptes);
|
||||
|
||||
atomic_sub(count, &pt->used);
|
||||
start += count;
|
||||
@ -453,6 +463,95 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
|
||||
return idx;
|
||||
}
|
||||
|
||||
static void
|
||||
xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
|
||||
struct i915_vma_resource *vma_res,
|
||||
struct sgt_dma *iter,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
{
|
||||
const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
|
||||
unsigned int rem = sg_dma_len(iter->sg);
|
||||
u64 start = vma_res->start;
|
||||
|
||||
GEM_BUG_ON(!i915_vm_is_4lvl(vm));
|
||||
|
||||
do {
|
||||
struct i915_page_directory * const pdp =
|
||||
gen8_pdp_for_page_address(vm, start);
|
||||
struct i915_page_directory * const pd =
|
||||
i915_pd_entry(pdp, __gen8_pte_index(start, 2));
|
||||
struct i915_page_table *pt =
|
||||
i915_pt_entry(pd, __gen8_pte_index(start, 1));
|
||||
gen8_pte_t encode = pte_encode;
|
||||
unsigned int page_size;
|
||||
gen8_pte_t *vaddr;
|
||||
u16 index, max;
|
||||
|
||||
max = I915_PDES;
|
||||
|
||||
if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
|
||||
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
|
||||
rem >= I915_GTT_PAGE_SIZE_2M &&
|
||||
!__gen8_pte_index(start, 0)) {
|
||||
index = __gen8_pte_index(start, 1);
|
||||
encode |= GEN8_PDE_PS_2M;
|
||||
page_size = I915_GTT_PAGE_SIZE_2M;
|
||||
|
||||
vaddr = px_vaddr(pd);
|
||||
} else {
|
||||
if (encode & GEN12_PPGTT_PTE_LM) {
|
||||
GEM_BUG_ON(__gen8_pte_index(start, 0) % 16);
|
||||
GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K);
|
||||
GEM_BUG_ON(!IS_ALIGNED(iter->dma,
|
||||
I915_GTT_PAGE_SIZE_64K));
|
||||
|
||||
index = __gen8_pte_index(start, 0) / 16;
|
||||
page_size = I915_GTT_PAGE_SIZE_64K;
|
||||
|
||||
max /= 16;
|
||||
|
||||
vaddr = px_vaddr(pd);
|
||||
vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
|
||||
|
||||
pt->is_compact = true;
|
||||
} else {
|
||||
GEM_BUG_ON(pt->is_compact);
|
||||
index = __gen8_pte_index(start, 0);
|
||||
page_size = I915_GTT_PAGE_SIZE;
|
||||
}
|
||||
|
||||
vaddr = px_vaddr(pt);
|
||||
}
|
||||
|
||||
do {
|
||||
GEM_BUG_ON(rem < page_size);
|
||||
vaddr[index++] = encode | iter->dma;
|
||||
|
||||
start += page_size;
|
||||
iter->dma += page_size;
|
||||
rem -= page_size;
|
||||
if (iter->dma >= iter->max) {
|
||||
iter->sg = __sg_next(iter->sg);
|
||||
if (!iter->sg)
|
||||
break;
|
||||
|
||||
rem = sg_dma_len(iter->sg);
|
||||
if (!rem)
|
||||
break;
|
||||
|
||||
iter->dma = sg_dma_address(iter->sg);
|
||||
iter->max = iter->dma + rem;
|
||||
|
||||
if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
|
||||
break;
|
||||
}
|
||||
} while (rem >= page_size && index < max);
|
||||
|
||||
vma_res->page_sizes_gtt |= page_size;
|
||||
} while (iter->sg && sg_dma_len(iter->sg));
|
||||
}
|
||||
|
||||
static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
|
||||
struct i915_vma_resource *vma_res,
|
||||
struct sgt_dma *iter,
|
||||
@ -586,7 +685,10 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
|
||||
struct sgt_dma iter = sgt_dma(vma_res);
|
||||
|
||||
if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
|
||||
gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
|
||||
if (HAS_64K_PAGES(vm->i915))
|
||||
xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
|
||||
else
|
||||
gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
|
||||
} else {
|
||||
u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
|
||||
|
||||
|
@ -92,6 +92,8 @@ typedef u64 gen8_pte_t;
|
||||
|
||||
#define GEN12_GGTT_PTE_LM BIT_ULL(1)
|
||||
|
||||
#define GEN12_PDE_64K BIT(6)
|
||||
|
||||
/*
|
||||
* Cacheability Control is a 4-bit value. The low three bits are stored in bits
|
||||
* 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
|
||||
@ -160,6 +162,7 @@ struct i915_page_table {
|
||||
atomic_t used;
|
||||
struct i915_page_table *stash;
|
||||
};
|
||||
bool is_compact;
|
||||
};
|
||||
|
||||
struct i915_page_directory {
|
||||
|
@ -26,6 +26,7 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
pt->is_compact = false;
|
||||
atomic_set(&pt->used, 0);
|
||||
return pt;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user