Merge tag 'drm-intel-gt-next-2022-11-03' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

Driver Changes:

- Fix for #7306: [Arc A380] white flickering when using arc as a
  secondary gpu (Matt A)
- Add Wa_18017747507 for DG2 (Wayne)
- Avoid spurious WARN on DG1 due to incorrect cache_dirty flag
  (Niranjana, Matt A)
- Corrections to CS timestamp support for Gen5 and earlier (Ville)

- Fix a build error used with clang compiler on hwmon (GG)
- Improvements to LMEM handling with RPM (Anshuman, Matt A)
- Cleanups in dmabuf code (Mike)

- Selftest improvements (Matt A)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Y2N11wu175p6qeEN@jlahtine-mobl.ger.corp.intel.com
This commit is contained in:
Dave Airlie 2022-11-04 17:20:12 +10:00
commit 60ba8c5bd9
150 changed files with 6696 additions and 2060 deletions

View File

@ -0,0 +1,75 @@
What: /sys/devices/.../hwmon/hwmon<i>/in0_input
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
Description: RO. Current Voltage in millivolt.
Only supported for particular Intel i915 graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/power1_max
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts.
The power controller will throttle the operating frequency
if the power averaged over a window (typically seconds)
exceeds this limit.
Only supported for particular Intel i915 graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
Description: RO. Card default power limit (default TDP setting).
Only supported for particular Intel i915 graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
Description: RW. Sustained power limit interval (Tau in PL1/Tau) in
milliseconds over which sustained power is averaged.
Only supported for particular Intel i915 graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/power1_crit
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
Description: RW. Card reactive critical (I1) power limit in microwatts.
Card reactive critical (I1) power limit in microwatts is exposed
for client products. The power controller will throttle the
operating frequency if the power averaged over a window exceeds
this limit.
Only supported for particular Intel i915 graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/curr1_crit
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
Description: RW. Card reactive critical (I1) power limit in milliamperes.
Card reactive critical (I1) power limit in milliamperes is
exposed for server products. The power controller will throttle
the operating frequency if the power averaged over a window
exceeds this limit.
Only supported for particular Intel i915 graphics platforms.
What: /sys/devices/.../hwmon/hwmon<i>/energy1_input
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
Description: RO. Energy input of device or gt in microjoules.
For i915 device level hwmon devices (name "i915") this
reflects energy input for the entire device. For gt level
hwmon devices (name "i915_gtN") this reflects energy input
for the gt.
Only supported for particular Intel i915 graphics platforms.

View File

@ -10224,6 +10224,7 @@ Q: http://patchwork.freedesktop.org/project/intel-gfx/
B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
C: irc://irc.oftc.net/intel-gfx
T: git git://anongit.freedesktop.org/drm-intel
F: Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
F: Documentation/gpu/i915.rst
F: drivers/gpu/drm/i915/
F: include/drm/i915*

View File

@ -57,10 +57,28 @@ config DRM_I915_PREEMPT_TIMEOUT
default 640 # milliseconds
help
How long to wait (in milliseconds) for a preemption event to occur
when submitting a new context via execlists. If the current context
does not hit an arbitration point and yield to HW before the timer
expires, the HW will be reset to allow the more important context
to execute.
when submitting a new context. If the current context does not hit
an arbitration point and yield to HW before the timer expires, the
HW will be reset to allow the more important context to execute.
This is adjustable via
/sys/class/drm/card?/engine/*/preempt_timeout_ms
May be 0 to disable the timeout.
The compiled in default may get overridden at driver probe time on
certain platforms and certain engines which will be reflected in the
sysfs control.
config DRM_I915_PREEMPT_TIMEOUT_COMPUTE
int "Preempt timeout for compute engines (ms, jiffy granularity)"
default 7500 # milliseconds
help
How long to wait (in milliseconds) for a preemption event to occur
when submitting a new context to a compute capable engine. If the
current context does not hit an arbitration point and yield to HW
before the timer expires, the HW will be reset to allow the more
important context to execute.
This is adjustable via
/sys/class/drm/card?/engine/*/preempt_timeout_ms

View File

@ -209,6 +209,9 @@ i915-y += gt/uc/intel_uc.o \
# graphics system controller (GSC) support
i915-y += gt/intel_gsc.o
# graphics hardware monitoring (HWMON) support
i915-$(CONFIG_HWMON) += i915_hwmon.o
# modesetting core code
i915-y += \
display/hsw_ips.o \
@ -310,15 +313,18 @@ i915-y += \
i915-y += i915_perf.o
# Protected execution platform (PXP) support
i915-$(CONFIG_DRM_I915_PXP) += \
# Protected execution platform (PXP) support. Base support is required for HuC
i915-y += \
pxp/intel_pxp.o \
pxp/intel_pxp_tee.o \
pxp/intel_pxp_huc.o
i915-$(CONFIG_DRM_I915_PXP) += \
pxp/intel_pxp_cmd.o \
pxp/intel_pxp_debugfs.o \
pxp/intel_pxp_irq.o \
pxp/intel_pxp_pm.o \
pxp/intel_pxp_session.o \
pxp/intel_pxp_tee.o
pxp/intel_pxp_session.o
# Post-mortem debug and GPU hang state capture
i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o

View File

@ -5,6 +5,7 @@
#include "gem/i915_gem_domain.h"
#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"
#include "gt/gen8_ppgtt.h"
#include "i915_drv.h"

View File

@ -167,7 +167,6 @@ retry:
ret = i915_gem_object_attach_phys(obj, alignment);
else if (!ret && HAS_LMEM(dev_priv))
ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM_0);
/* TODO: Do we need to sync when migration becomes async? */
if (!ret)
ret = i915_gem_object_pin_pages(obj);
if (ret)

View File

@ -100,9 +100,9 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
rsc[0].flags = IORESOURCE_IRQ;
rsc[0].name = "hdmi-lpe-audio-irq";
rsc[1].start = pci_resource_start(pdev, GTTMMADR_BAR) +
rsc[1].start = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) +
I915_HDMI_LPE_AUDIO_BASE;
rsc[1].end = pci_resource_start(pdev, GTTMMADR_BAR) +
rsc[1].end = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) +
I915_HDMI_LPE_AUDIO_BASE + I915_HDMI_LPE_AUDIO_SIZE - 1;
rsc[1].flags = IORESOURCE_MEM;
rsc[1].name = "hdmi-lpe-audio-mmio";

View File

@ -1452,7 +1452,7 @@ static void engines_idle_release(struct i915_gem_context *ctx,
int err;
/* serialises with execbuf */
set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
intel_context_close(ce);
if (!intel_context_pin_if_active(ce))
continue;
@ -2298,7 +2298,6 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
}
args->ctx_id = id;
drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id);
return 0;

View File

@ -25,43 +25,44 @@ static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
return to_intel_bo(buf->priv);
}
static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach,
enum dma_data_direction dir)
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
struct sg_table *st;
struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf);
struct sg_table *sgt;
struct scatterlist *src, *dst;
int ret, i;
/* Copy sg so that we make an independent mapping */
st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
if (st == NULL) {
/*
* Make a copy of the object's sgt, so that we can make an independent
* mapping
*/
sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
if (!sgt) {
ret = -ENOMEM;
goto err;
}
ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
ret = sg_alloc_table(sgt, obj->mm.pages->orig_nents, GFP_KERNEL);
if (ret)
goto err_free;
src = obj->mm.pages->sgl;
dst = st->sgl;
for (i = 0; i < obj->mm.pages->nents; i++) {
dst = sgt->sgl;
for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->orig_nents, i) {
sg_set_page(dst, sg_page(src), src->length, 0);
dst = sg_next(dst);
src = sg_next(src);
}
ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC);
ret = dma_map_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC);
if (ret)
goto err_free_sg;
return st;
return sgt;
err_free_sg:
sg_free_table(st);
sg_free_table(sgt);
err_free:
kfree(st);
kfree(sgt);
err:
return ERR_PTR(ret);
}
@ -236,15 +237,15 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *pages;
struct sg_table *sgt;
unsigned int sg_page_sizes;
assert_object_held(obj);
pages = dma_buf_map_attachment(obj->base.import_attach,
DMA_BIDIRECTIONAL);
if (IS_ERR(pages))
return PTR_ERR(pages);
sgt = dma_buf_map_attachment(obj->base.import_attach,
DMA_BIDIRECTIONAL);
if (IS_ERR(sgt))
return PTR_ERR(sgt);
/*
* DG1 is special here since it still snoops transactions even with
@ -261,16 +262,16 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
(!HAS_LLC(i915) && !IS_DG1(i915)))
wbinvd_on_all_cpus();
sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
sg_page_sizes = i915_sg_dma_sizes(sgt->sgl);
__i915_gem_object_set_pages(obj, sgt, sg_page_sizes);
return 0;
}
static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
struct sg_table *pages)
struct sg_table *sgt)
{
dma_buf_unmap_attachment(obj->base.import_attach, pages,
dma_buf_unmap_attachment(obj->base.import_attach, sgt,
DMA_BIDIRECTIONAL);
}
@ -313,7 +314,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
get_dma_buf(dma_buf);
obj = i915_gem_object_alloc();
if (obj == NULL) {
if (!obj) {
ret = -ENOMEM;
goto fail_detach;
}

View File

@ -2954,11 +2954,6 @@ await_fence_array(struct i915_execbuffer *eb,
int err;
for (n = 0; n < eb->num_fences; n++) {
struct drm_syncobj *syncobj;
unsigned int flags;
syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
if (!eb->fences[n].dma_fence)
continue;

View File

@ -6,7 +6,6 @@
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/swiotlb.h>
#include "i915_drv.h"
#include "i915_gem.h"
@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
struct scatterlist *sg;
unsigned int sg_page_sizes;
unsigned int npages;
int max_order;
int max_order = MAX_ORDER;
unsigned int max_segment;
gfp_t gfp;
max_order = MAX_ORDER;
#ifdef CONFIG_SWIOTLB
if (is_swiotlb_active(obj->base.dev->dev)) {
unsigned int max_segment;
max_segment = swiotlb_max_segment();
if (max_segment) {
max_segment = max_t(unsigned int, max_segment,
PAGE_SIZE) >> PAGE_SHIFT;
max_order = min(max_order, ilog2(max_segment));
}
}
#endif
max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
max_order = min(max_order, get_order(max_segment));
gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
if (IS_I965GM(i915) || IS_I965G(i915)) {

View File

@ -413,7 +413,7 @@ retry:
vma->mmo = mmo;
if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
intel_wakeref_auto(&to_gt(i915)->userfault_wakeref,
intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
if (write) {
@ -557,11 +557,13 @@ void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *
drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping);
if (obj->userfault_count) {
/* rpm wakeref provide exclusive access */
list_del(&obj->userfault_link);
obj->userfault_count = 0;
}
/*
* We have exclusive access here via runtime suspend. All other callers
* must first grab the rpm wakeref.
*/
GEM_BUG_ON(!obj->userfault_count);
list_del(&obj->userfault_link);
obj->userfault_count = 0;
}
void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
@ -587,13 +589,6 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
spin_lock(&obj->mmo.lock);
}
spin_unlock(&obj->mmo.lock);
if (obj->userfault_count) {
mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
list_del(&obj->userfault_link);
mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
obj->userfault_count = 0;
}
}
static struct i915_mmap_offset *

View File

@ -458,6 +458,16 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
io_mapping_unmap(src_map);
}
static bool object_has_mappable_iomem(struct drm_i915_gem_object *obj)
{
GEM_BUG_ON(!i915_gem_object_has_iomem(obj));
if (IS_DGFX(to_i915(obj->base.dev)))
return i915_ttm_resource_mappable(i915_gem_to_ttm(obj)->resource);
return true;
}
/**
* i915_gem_object_read_from_page - read data from the page of a GEM object
* @obj: GEM object to read from
@ -480,7 +490,7 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,
if (i915_gem_object_has_struct_page(obj))
i915_gem_object_read_from_page_kmap(obj, offset, dst, size);
else if (i915_gem_object_has_iomem(obj))
else if (i915_gem_object_has_iomem(obj) && object_has_mappable_iomem(obj))
i915_gem_object_read_from_page_iomap(obj, offset, dst, size);
else
return -ENODEV;

View File

@ -482,6 +482,10 @@ void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
void *__must_check i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
enum i915_map_type type);
enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj,
bool always_coherent);
void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
unsigned long offset,
unsigned long size);

View File

@ -466,6 +466,18 @@ void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
return ret;
}
enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj,
bool always_coherent)
{
if (i915_gem_object_is_lmem(obj))
return I915_MAP_WC;
if (HAS_LLC(i915) || always_coherent)
return I915_MAP_WB;
else
return I915_MAP_WC;
}
void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
unsigned long offset,
unsigned long size)

View File

@ -22,9 +22,12 @@
void i915_gem_suspend(struct drm_i915_private *i915)
{
struct intel_gt *gt;
unsigned int i;
GEM_TRACE("%s\n", dev_name(i915->drm.dev));
intel_wakeref_auto(&to_gt(i915)->userfault_wakeref, 0);
intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0);
flush_workqueue(i915->wq);
/*
@ -36,7 +39,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
* state. Fortunately, the kernel_context is disposable and we do
* not rely on its state.
*/
intel_gt_suspend_prepare(to_gt(i915));
for_each_gt(gt, i915, i)
intel_gt_suspend_prepare(gt);
i915_gem_drain_freed_objects(i915);
}
@ -131,7 +135,9 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
&i915->mm.purge_list,
NULL
}, **phase;
struct intel_gt *gt;
unsigned long flags;
unsigned int i;
bool flush = false;
/*
@ -154,7 +160,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
* machine in an unusable condition.
*/
intel_gt_suspend_late(to_gt(i915));
for_each_gt(gt, i915, i)
intel_gt_suspend_late(gt);
spin_lock_irqsave(&i915->mm.obj_lock, flags);
for (phase = phases; *phase; phase++) {
@ -212,7 +219,8 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
void i915_gem_resume(struct drm_i915_private *i915)
{
int ret;
struct intel_gt *gt;
int ret, i, j;
GEM_TRACE("%s\n", dev_name(i915->drm.dev));
@ -224,8 +232,25 @@ void i915_gem_resume(struct drm_i915_private *i915)
* guarantee that the context image is complete. So let's just reset
* it and start again.
*/
intel_gt_resume(to_gt(i915));
for_each_gt(gt, i915, i)
if (intel_gt_resume(gt))
goto err_wedged;
ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU);
GEM_WARN_ON(ret);
return;
err_wedged:
for_each_gt(gt, i915, j) {
if (!intel_gt_is_wedged(gt)) {
dev_err(i915->drm.dev,
"Failed to re-initialize GPU[%u], declaring it wedged!\n",
j);
intel_gt_set_wedged(gt);
}
if (j == i)
break;
}
}

View File

@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
unsigned int max_segment = i915_sg_segment_size();
unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
struct sg_table *st;
struct sgt_iter sgt_iter;
struct page *page;
@ -369,14 +369,14 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
__start_cpu_write(obj);
/*
* On non-LLC platforms, force the flush-on-acquire if this is ever
* On non-LLC igfx platforms, force the flush-on-acquire if this is ever
* swapped-in. Our async flush path is not trust worthy enough yet(and
* happens in the wrong order), and with some tricks it's conceivable
* for userspace to change the cache-level to I915_CACHE_NONE after the
* pages are swapped-in, and since execbuf binds the object before doing
* the async flush, we have a race window.
*/
if (!HAS_LLC(i915))
if (!HAS_LLC(i915) && !IS_DGFX(i915))
obj->cache_dirty = true;
}

View File

@ -77,22 +77,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
mutex_unlock(&i915->mm.stolen_lock);
}
static int i915_adjust_stolen(struct drm_i915_private *i915,
struct resource *dsm)
static bool valid_stolen_size(struct drm_i915_private *i915, struct resource *dsm)
{
return (dsm->start != 0 || HAS_LMEMBAR_SMEM_STOLEN(i915)) && dsm->end > dsm->start;
}
static int adjust_stolen(struct drm_i915_private *i915,
struct resource *dsm)
{
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
struct resource *r;
if (dsm->start == 0 || dsm->end <= dsm->start)
if (!valid_stolen_size(i915, dsm))
return -EINVAL;
/*
* Make sure we don't clobber the GTT if it's within stolen memory
*
* TODO: We have yet too encounter the case where the GTT wasn't at the
* end of stolen. With that assumption we could simplify this.
*/
/* Make sure we don't clobber the GTT if it's within stolen memory */
if (GRAPHICS_VER(i915) <= 4 &&
!IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) {
struct resource stolen[2] = {*dsm, *dsm};
@ -131,12 +135,25 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
}
}
if (!valid_stolen_size(i915, dsm))
return -EINVAL;
return 0;
}
static int request_smem_stolen(struct drm_i915_private *i915,
struct resource *dsm)
{
struct resource *r;
/*
* With stolen lmem, we don't need to check if the address range
* overlaps with the non-stolen system memory range, since lmem is local
* to the gpu.
* With stolen lmem, we don't need to request system memory for the
* address range since it's local to the gpu.
*
* Starting MTL, in IGFX devices the stolen memory is exposed via
* LMEMBAR and shall be considered similar to stolen lmem.
*/
if (HAS_LMEM(i915))
if (HAS_LMEM(i915) || HAS_LMEMBAR_SMEM_STOLEN(i915))
return 0;
/*
@ -371,8 +388,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
case GEN8_STOLEN_RESERVED_1M:
*size = 1024 * 1024;
@ -390,41 +405,30 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
*size = 8 * 1024 * 1024;
MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
}
if (HAS_LMEMBAR_SMEM_STOLEN(i915))
/* the base is initialized to stolen top so subtract size to get base */
*base -= *size;
else
*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
}
static int i915_gem_init_stolen(struct intel_memory_region *mem)
/*
* Initialize i915->dsm_reserved to contain the reserved space within the Data
* Stolen Memory. This is a range on the top of DSM that is reserved, not to
* be used by driver, so must be excluded from the region passed to the
* allocator later. In the spec this is also called as WOPCM.
*
* Our expectation is that the reserved space is at the top of the stolen
* region, as it has been the case for every platform, and *never* at the
* bottom, so the calculation here can be simplified.
*/
static int init_reserved_stolen(struct drm_i915_private *i915)
{
struct drm_i915_private *i915 = mem->i915;
struct intel_uncore *uncore = &i915->uncore;
resource_size_t reserved_base, stolen_top;
resource_size_t reserved_total, reserved_size;
mutex_init(&i915->mm.stolen_lock);
if (intel_vgpu_active(i915)) {
drm_notice(&i915->drm,
"%s, disabling use of stolen memory\n",
"iGVT-g active");
return 0;
}
if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
drm_notice(&i915->drm,
"%s, disabling use of stolen memory\n",
"DMAR active");
return 0;
}
if (resource_size(&mem->region) == 0)
return 0;
i915->dsm = mem->region;
if (i915_adjust_stolen(i915, &i915->dsm))
return 0;
GEM_BUG_ON(i915->dsm.start == 0);
GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
resource_size_t reserved_size;
int ret = 0;
stolen_top = i915->dsm.end + 1;
reserved_base = stolen_top;
@ -455,17 +459,16 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
&reserved_base, &reserved_size);
}
/*
* Our expectation is that the reserved space is at the top of the
* stolen region and *never* at the bottom. If we see !reserved_base,
* it likely means we failed to read the registers correctly.
*/
/* No reserved stolen */
if (reserved_base == stolen_top)
goto bail_out;
if (!reserved_base) {
drm_err(&i915->drm,
"inconsistent reservation %pa + %pa; ignoring\n",
&reserved_base, &reserved_size);
reserved_base = stolen_top;
reserved_size = 0;
ret = -EINVAL;
goto bail_out;
}
i915->dsm_reserved =
@ -475,19 +478,55 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
drm_err(&i915->drm,
"Stolen reserved area %pR outside stolen memory %pR\n",
&i915->dsm_reserved, &i915->dsm);
return 0;
ret = -EINVAL;
goto bail_out;
}
return 0;
bail_out:
i915->dsm_reserved =
(struct resource)DEFINE_RES_MEM(reserved_base, 0);
return ret;
}
static int i915_gem_init_stolen(struct intel_memory_region *mem)
{
struct drm_i915_private *i915 = mem->i915;
mutex_init(&i915->mm.stolen_lock);
if (intel_vgpu_active(i915)) {
drm_notice(&i915->drm,
"%s, disabling use of stolen memory\n",
"iGVT-g active");
return -ENOSPC;
}
if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
drm_notice(&i915->drm,
"%s, disabling use of stolen memory\n",
"DMAR active");
return -ENOSPC;
}
if (adjust_stolen(i915, &mem->region))
return -ENOSPC;
if (request_smem_stolen(i915, &mem->region))
return -ENOSPC;
i915->dsm = mem->region;
if (init_reserved_stolen(i915))
return -ENOSPC;
/* Exclude the reserved region from driver use */
mem->region.end = reserved_base - 1;
mem->region.end = i915->dsm_reserved.start - 1;
mem->io_size = min(mem->io_size, resource_size(&mem->region));
/* It is possible for the reserved area to end before the end of stolen
* memory, so just consider the start. */
reserved_total = stolen_top - reserved_base;
i915->stolen_usable_size =
resource_size(&i915->dsm) - reserved_total;
i915->stolen_usable_size = resource_size(&mem->region);
drm_dbg(&i915->drm,
"Memory reserved for graphics device: %lluK, usable: %lluK\n",
@ -495,7 +534,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
(u64)i915->stolen_usable_size >> 10);
if (i915->stolen_usable_size == 0)
return 0;
return -ENOSPC;
/* Basic memrange allocator for stolen space. */
drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size);
@ -733,11 +772,17 @@ i915_gem_object_create_stolen(struct drm_i915_private *i915,
static int init_stolen_smem(struct intel_memory_region *mem)
{
int err;
/*
* Initialise stolen early so that we may reserve preallocated
* objects for the BIOS to KMS transition.
*/
return i915_gem_init_stolen(mem);
err = i915_gem_init_stolen(mem);
if (err)
drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n");
return 0;
}
static int release_stolen_smem(struct intel_memory_region *mem)
@ -754,27 +799,26 @@ static const struct intel_memory_region_ops i915_region_stolen_smem_ops = {
static int init_stolen_lmem(struct intel_memory_region *mem)
{
struct drm_i915_private *i915 = mem->i915;
int err;
if (GEM_WARN_ON(resource_size(&mem->region) == 0))
return -ENODEV;
return 0;
/*
* TODO: For stolen lmem we mostly just care about populating the dsm
* related bits and setting up the drm_mm allocator for the range.
* Perhaps split up i915_gem_init_stolen() for this.
*/
err = i915_gem_init_stolen(mem);
if (err)
return err;
if (mem->io_size && !io_mapping_init_wc(&mem->iomap,
mem->io_start,
mem->io_size)) {
err = -EIO;
goto err_cleanup;
if (err) {
drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n");
return 0;
}
if (mem->io_size &&
!io_mapping_init_wc(&mem->iomap, mem->io_start, mem->io_size))
goto err_cleanup;
drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
&mem->io_start);
drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &mem->region.start);
return 0;
err_cleanup:
@ -796,6 +840,29 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
.init_object = _i915_gem_object_stolen_init,
};
static int mtl_get_gms_size(struct intel_uncore *uncore)
{
u16 ggc, gms;
ggc = intel_uncore_read16(uncore, GGC);
/* check GGMS, should be fixed 0x3 (8MB) */
if ((ggc & GGMS_MASK) != GGMS_MASK)
return -EIO;
/* return valid GMS value, -EIO if invalid */
gms = REG_FIELD_GET(GMS_MASK, ggc);
switch (gms) {
case 0x0 ... 0x04:
return gms * 32;
case 0xf0 ... 0xfe:
return (gms - 0xf0 + 1) * 4;
default:
MISSING_CASE(gms);
return -EIO;
}
}
struct intel_memory_region *
i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
u16 instance)
@ -806,6 +873,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
struct intel_memory_region *mem;
resource_size_t io_start, io_size;
resource_size_t min_page_size;
int ret;
if (WARN_ON_ONCE(instance))
return ERR_PTR(-ENODEV);
@ -813,12 +881,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
return ERR_PTR(-ENXIO);
/* Use DSM base address instead for stolen memory */
dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
if (IS_DG1(uncore->i915)) {
if (HAS_LMEMBAR_SMEM_STOLEN(i915) || IS_DG1(i915)) {
lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
if (WARN_ON(lmem_size < dsm_base))
return ERR_PTR(-ENODEV);
} else {
resource_size_t lmem_range;
@ -827,13 +891,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
lmem_size *= SZ_1G;
}
dsm_size = lmem_size - dsm_base;
if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
if (HAS_LMEMBAR_SMEM_STOLEN(i915)) {
/*
* MTL dsm size is in GGC register.
* Also MTL uses offset to DSMBASE in ptes, so i915
* uses dsm_base = 0 to setup stolen region.
*/
ret = mtl_get_gms_size(uncore);
if (ret < 0) {
drm_err(&i915->drm, "invalid MTL GGC register setting\n");
return ERR_PTR(ret);
}
dsm_base = 0;
dsm_size = (resource_size_t)(ret * SZ_1M);
GEM_BUG_ON(pci_resource_len(pdev, GEN12_LMEM_BAR) != SZ_256M);
GEM_BUG_ON((dsm_size + SZ_8M) > lmem_size);
} else {
/* Use DSM base address instead for stolen memory */
dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
if (WARN_ON(lmem_size < dsm_base))
return ERR_PTR(-ENODEV);
dsm_size = lmem_size - dsm_base;
}
io_size = dsm_size;
if (HAS_LMEMBAR_SMEM_STOLEN(i915)) {
io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + SZ_8M;
} else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
io_start = 0;
io_size = 0;
} else {
io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
io_size = dsm_size;
}
min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
@ -847,16 +937,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
if (IS_ERR(mem))
return mem;
/*
* TODO: consider creating common helper to just print all the
* interesting stuff from intel_memory_region, which we can use for all
* our probed regions.
*/
drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
&mem->io_start);
drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base);
intel_memory_region_set_name(mem, "stolen-local");
mem->private = true;
@ -881,6 +961,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
intel_memory_region_set_name(mem, "stolen-system");
mem->private = true;
return mem;
}

View File

@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
const unsigned int max_segment = i915_sg_segment_size();
const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
struct file *filp = i915_tt->filp;
struct sgt_iter sgt_iter;
@ -279,7 +279,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
struct i915_ttm_tt *i915_tt;
int ret;
if (!obj)
if (i915_ttm_is_ghost_object(bo))
return NULL;
i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL);
@ -362,7 +362,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
if (!obj)
if (i915_ttm_is_ghost_object(bo))
return false;
/*
@ -509,18 +509,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
intel_wakeref_t wakeref = 0;
if (bo->resource && likely(obj)) {
/* ttm_bo_release() already has dma_resv_lock */
if (i915_ttm_cpu_maps_iomem(bo->resource))
wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
if (bo->resource && !i915_ttm_is_ghost_object(bo)) {
__i915_gem_object_pages_fini(obj);
if (wakeref)
intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
i915_ttm_free_cached_io_rsgt(obj);
}
}
@ -538,7 +529,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm)
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
i915_sg_segment_size(), GFP_KERNEL);
i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
if (ret) {
st->sgl = NULL;
return ERR_PTR(ret);
@ -624,7 +615,7 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
int ret;
if (!obj)
if (i915_ttm_is_ghost_object(bo))
return;
ret = i915_ttm_move_notify(bo);
@ -657,7 +648,7 @@ static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource
struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo);
bool unknown_state;
if (!obj)
if (i915_ttm_is_ghost_object(mem->bo))
return -EINVAL;
if (!kref_get_unless_zero(&obj->base.refcount))
@ -690,7 +681,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
unsigned long base;
unsigned int ofs;
GEM_BUG_ON(!obj);
GEM_BUG_ON(i915_ttm_is_ghost_object(bo));
GEM_WARN_ON(bo->ttm);
base = obj->mm.region->iomap.base - obj->mm.region->region.start;
@ -699,6 +690,50 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs;
}
static int i915_ttm_access_memory(struct ttm_buffer_object *bo,
unsigned long offset, void *buf,
int len, int write)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
resource_size_t iomap = obj->mm.region->iomap.base -
obj->mm.region->region.start;
unsigned long page = offset >> PAGE_SHIFT;
unsigned long bytes_left = len;
/*
* TODO: For now just let it fail if the resource is non-mappable,
* otherwise we need to perform the memcpy from the gpu here, without
* interfering with the object (like moving the entire thing).
*/
if (!i915_ttm_resource_mappable(bo->resource))
return -EIO;
offset -= page << PAGE_SHIFT;
do {
unsigned long bytes = min(bytes_left, PAGE_SIZE - offset);
void __iomem *ptr;
dma_addr_t daddr;
daddr = i915_gem_object_get_dma_address(obj, page);
ptr = ioremap_wc(iomap + daddr + offset, bytes);
if (!ptr)
return -EIO;
if (write)
memcpy_toio(ptr, buf, bytes);
else
memcpy_fromio(buf, ptr, bytes);
iounmap(ptr);
page++;
buf += bytes;
bytes_left -= bytes;
offset = 0;
} while (bytes_left);
return len;
}
/*
* All callbacks need to take care not to downcast a struct ttm_buffer_object
* without checking its subclass, since it might be a TTM ghost object.
@ -715,6 +750,7 @@ static struct ttm_device_funcs i915_ttm_bo_driver = {
.delete_mem_notify = i915_ttm_delete_mem_notify,
.io_mem_reserve = i915_ttm_io_mem_reserve,
.io_mem_pfn = i915_ttm_io_mem_pfn,
.access_memory = i915_ttm_access_memory,
};
/**
@ -990,13 +1026,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
struct vm_area_struct *area = vmf->vma;
struct ttm_buffer_object *bo = area->vm_private_data;
struct drm_device *dev = bo->base.dev;
struct drm_i915_gem_object *obj;
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
intel_wakeref_t wakeref = 0;
vm_fault_t ret;
int idx;
obj = i915_ttm_to_gem(bo);
if (!obj)
if (i915_ttm_is_ghost_object(bo))
return VM_FAULT_SIGBUS;
/* Sanity check that we allow writing into this object */
@ -1035,7 +1070,8 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
}
if (err) {
drm_dbg(dev, "Unable to make resource CPU accessible\n");
drm_dbg(dev, "Unable to make resource CPU accessible(err = %pe)\n",
ERR_PTR(err));
dma_resv_unlock(bo->base.resv);
ret = VM_FAULT_SIGBUS;
goto out_rpm;
@ -1053,16 +1089,19 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
goto out_rpm;
/* ttm_bo_vm_reserve() already has dma_resv_lock */
/*
* ttm_bo_vm_reserve() already has dma_resv_lock.
* userfault_count is protected by dma_resv lock and rpm wakeref.
*/
if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
obj->userfault_count = 1;
mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list);
mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
list_add(&obj->userfault_link, &to_i915(obj->base.dev)->runtime_pm.lmem_userfault_list);
spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
}
if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref,
intel_wakeref_auto(&to_i915(obj->base.dev)->runtime_pm.userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
i915_ttm_adjust_lru(obj);
@ -1094,7 +1133,7 @@ static void ttm_vm_open(struct vm_area_struct *vma)
struct drm_i915_gem_object *obj =
i915_ttm_to_gem(vma->vm_private_data);
GEM_BUG_ON(!obj);
GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data));
i915_gem_object_get(obj);
}
@ -1103,7 +1142,7 @@ static void ttm_vm_close(struct vm_area_struct *vma)
struct drm_i915_gem_object *obj =
i915_ttm_to_gem(vma->vm_private_data);
GEM_BUG_ON(!obj);
GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data));
i915_gem_object_put(obj);
}
@ -1124,7 +1163,27 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
intel_wakeref_t wakeref = 0;
assert_object_held_shared(obj);
if (i915_ttm_cpu_maps_iomem(bo->resource)) {
wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
/* userfault_count is protected by obj lock and rpm wakeref. */
if (obj->userfault_count) {
spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
list_del(&obj->userfault_link);
spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
obj->userfault_count = 0;
}
}
ttm_bo_unmap_virtual(i915_gem_to_ttm(obj));
if (wakeref)
intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
}
static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {

View File

@ -27,19 +27,27 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj)
*/
void i915_ttm_bo_destroy(struct ttm_buffer_object *bo);
/**
* i915_ttm_is_ghost_object - Check if the ttm bo is a ghost object.
* @bo: Pointer to the ttm buffer object
*
* Return: True if the ttm bo is not a i915 object but a ghost ttm object,
* False otherwise.
*/
static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo)
{
return bo->destroy != i915_ttm_bo_destroy;
}
/**
* i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding
* struct drm_i915_gem_object.
*
* Return: Pointer to the embedding struct ttm_buffer_object, or NULL
* if the object was not an i915 ttm object.
* Return: Pointer to the embedding struct ttm_buffer_object.
*/
static inline struct drm_i915_gem_object *
i915_ttm_to_gem(struct ttm_buffer_object *bo)
{
if (bo->destroy != i915_ttm_bo_destroy)
return NULL;
return container_of(bo, struct drm_i915_gem_object, __do_not_access);
}

View File

@ -560,7 +560,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
bool clear;
int ret;
if (GEM_WARN_ON(!obj)) {
if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) {
ttm_bo_move_null(bo, dst_mem);
return 0;
}

View File

@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)
static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
{
const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
unsigned int max_segment = i915_sg_segment_size();
unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev);
struct sg_table *st;
unsigned int sg_page_sizes;
struct page **pvec;
@ -292,7 +292,7 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
if (!i915_gem_object_is_readonly(obj))
gup_flags |= FOLL_WRITE;
pinned = ret = 0;
pinned = 0;
while (pinned < num_pages) {
ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE,
num_pages - pinned, gup_flags,
@ -302,7 +302,6 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
pinned += ret;
}
ret = 0;
ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)

View File

@ -1161,7 +1161,8 @@ static int igt_write_huge(struct drm_i915_private *i915,
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
size = obj->base.size;
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
!HAS_64K_PAGES(i915))
size = round_up(size, I915_GTT_PAGE_SIZE_2M);
n = 0;
@ -1214,6 +1215,10 @@ static int igt_write_huge(struct drm_i915_private *i915,
* size and ensure the vma offset is at the start of the pt
* boundary, however to improve coverage we opt for testing both
* aligned and unaligned offsets.
*
* With PS64 this is no longer the case, but to ensure we
* sometimes get the compact layout for smaller objects, apply
* the round_up anyway.
*/
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
offset_low = round_down(offset_low,
@ -1411,6 +1416,7 @@ static int igt_ppgtt_sanity_check(void *arg)
{ SZ_2M + SZ_4K, SZ_64K | SZ_4K },
{ SZ_2M + SZ_4K, SZ_2M | SZ_4K },
{ SZ_2M + SZ_64K, SZ_2M | SZ_64K },
{ SZ_2M + SZ_64K, SZ_64K },
};
int i, j;
int err;
@ -1540,6 +1546,154 @@ out_put:
return err;
}
static int igt_ppgtt_mixed(void *arg)
{
struct drm_i915_private *i915 = arg;
const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
struct drm_i915_gem_object *obj, *on;
struct i915_gem_engines *engines;
struct i915_gem_engines_iter it;
struct i915_address_space *vm;
struct i915_gem_context *ctx;
struct intel_context *ce;
struct file *file;
I915_RND_STATE(prng);
LIST_HEAD(objects);
struct intel_memory_region *mr;
struct i915_vma *vma;
unsigned int count;
u32 i, addr;
int *order;
int n, err;
/*
* Sanity check mixing 4K and 64K pages within the same page-table via
* the new PS64 TLB hint.
*/
if (!HAS_64K_PAGES(i915)) {
pr_info("device lacks PS64, skipping\n");
return 0;
}
file = mock_file(i915);
if (IS_ERR(file))
return PTR_ERR(file);
ctx = hugepage_ctx(i915, file);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out;
}
vm = i915_gem_context_get_eb_vm(ctx);
i = 0;
addr = 0;
do {
u32 sz;
sz = i915_prandom_u32_max_state(SZ_4M, &prng);
sz = max_t(u32, sz, SZ_4K);
mr = i915->mm.regions[INTEL_REGION_LMEM_0];
if (i & 1)
mr = i915->mm.regions[INTEL_REGION_SMEM];
obj = i915_gem_object_create_region(mr, sz, 0, 0);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto out_vm;
}
list_add_tail(&obj->st_link, &objects);
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_put;
}
addr = round_up(addr, mr->min_page_size);
err = i915_vma_pin(vma, 0, 0, addr | flags);
if (err)
goto err_put;
if (mr->type == INTEL_MEMORY_LOCAL &&
(vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) {
err = -EINVAL;
goto err_put;
}
addr += obj->base.size;
i++;
} while (addr <= SZ_16M);
n = 0;
count = 0;
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
count++;
if (!intel_engine_can_store_dword(ce->engine))
continue;
n++;
}
i915_gem_context_unlock_engines(ctx);
if (!n)
goto err_put;
order = i915_random_order(count * count, &prng);
if (!order) {
err = -ENOMEM;
goto err_put;
}
i = 0;
addr = 0;
engines = i915_gem_context_lock_engines(ctx);
list_for_each_entry(obj, &objects, st_link) {
u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng);
addr = round_up(addr, obj->mm.region->min_page_size);
ce = engines->engines[order[i] % engines->num_engines];
i = (i + 1) % (count * count);
if (!ce || !intel_engine_can_store_dword(ce->engine))
continue;
err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd);
if (err)
break;
err = __igt_write_huge(ce, obj, obj->base.size, addr,
offset_in_page(rnd) / sizeof(u32), rnd + 1);
if (err)
break;
err = __igt_write_huge(ce, obj, obj->base.size, addr,
(PAGE_SIZE / sizeof(u32)) - 1,
rnd + 2);
if (err)
break;
addr += obj->base.size;
cond_resched();
}
i915_gem_context_unlock_engines(ctx);
kfree(order);
err_put:
list_for_each_entry_safe(obj, on, &objects, st_link) {
list_del(&obj->st_link);
i915_gem_object_put(obj);
}
out_vm:
i915_vm_put(vm);
out:
fput(file);
return err;
}
static int igt_tmpfs_fallback(void *arg)
{
struct drm_i915_private *i915 = arg;
@ -1803,6 +1957,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_ppgtt_smoke_huge),
SUBTEST(igt_ppgtt_sanity_check),
SUBTEST(igt_ppgtt_compact),
SUBTEST(igt_ppgtt_mixed),
};
if (!HAS_PPGTT(i915)) {

View File

@ -179,97 +179,108 @@ out_file:
}
struct parallel_switch {
struct task_struct *tsk;
struct kthread_worker *worker;
struct kthread_work work;
struct intel_context *ce[2];
int result;
};
static int __live_parallel_switch1(void *data)
static void __live_parallel_switch1(struct kthread_work *work)
{
struct parallel_switch *arg = data;
struct parallel_switch *arg =
container_of(work, typeof(*arg), work);
IGT_TIMEOUT(end_time);
unsigned long count;
count = 0;
arg->result = 0;
do {
struct i915_request *rq = NULL;
int err, n;
int n;
err = 0;
for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
struct i915_request *prev = rq;
rq = i915_request_create(arg->ce[n]);
if (IS_ERR(rq)) {
i915_request_put(prev);
return PTR_ERR(rq);
arg->result = PTR_ERR(rq);
break;
}
i915_request_get(rq);
if (prev) {
err = i915_request_await_dma_fence(rq, &prev->fence);
arg->result =
i915_request_await_dma_fence(rq,
&prev->fence);
i915_request_put(prev);
}
i915_request_add(rq);
}
if (IS_ERR_OR_NULL(rq))
break;
if (i915_request_wait(rq, 0, HZ) < 0)
err = -ETIME;
arg->result = -ETIME;
i915_request_put(rq);
if (err)
return err;
count++;
} while (!__igt_timeout(end_time, NULL));
} while (!arg->result && !__igt_timeout(end_time, NULL));
pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
return 0;
pr_info("%s: %lu switches (sync) <%d>\n",
arg->ce[0]->engine->name, count, arg->result);
}
static int __live_parallel_switchN(void *data)
static void __live_parallel_switchN(struct kthread_work *work)
{
struct parallel_switch *arg = data;
struct parallel_switch *arg =
container_of(work, typeof(*arg), work);
struct i915_request *rq = NULL;
IGT_TIMEOUT(end_time);
unsigned long count;
int n;
count = 0;
arg->result = 0;
do {
for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
struct i915_request *prev = rq;
int err = 0;
rq = i915_request_create(arg->ce[n]);
if (IS_ERR(rq)) {
i915_request_put(prev);
return PTR_ERR(rq);
arg->result = PTR_ERR(rq);
break;
}
i915_request_get(rq);
if (prev) {
err = i915_request_await_dma_fence(rq, &prev->fence);
arg->result =
i915_request_await_dma_fence(rq,
&prev->fence);
i915_request_put(prev);
}
i915_request_add(rq);
if (err) {
i915_request_put(rq);
return err;
}
}
count++;
} while (!__igt_timeout(end_time, NULL));
i915_request_put(rq);
} while (!arg->result && !__igt_timeout(end_time, NULL));
pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
return 0;
if (!IS_ERR_OR_NULL(rq))
i915_request_put(rq);
pr_info("%s: %lu switches (many) <%d>\n",
arg->ce[0]->engine->name, count, arg->result);
}
static int live_parallel_switch(void *arg)
{
struct drm_i915_private *i915 = arg;
static int (* const func[])(void *arg) = {
static void (* const func[])(struct kthread_work *) = {
__live_parallel_switch1,
__live_parallel_switchN,
NULL,
@ -277,7 +288,7 @@ static int live_parallel_switch(void *arg)
struct parallel_switch *data = NULL;
struct i915_gem_engines *engines;
struct i915_gem_engines_iter it;
int (* const *fn)(void *arg);
void (* const *fn)(struct kthread_work *);
struct i915_gem_context *ctx;
struct intel_context *ce;
struct file *file;
@ -348,9 +359,22 @@ static int live_parallel_switch(void *arg)
}
}
for (n = 0; n < count; n++) {
struct kthread_worker *worker;
if (!data[n].ce[0])
continue;
worker = kthread_create_worker(0, "igt/parallel:%s",
data[n].ce[0]->engine->name);
if (IS_ERR(worker))
goto out;
data[n].worker = worker;
}
for (fn = func; !err && *fn; fn++) {
struct igt_live_test t;
int n;
err = igt_live_test_begin(&t, i915, __func__, "");
if (err)
@ -360,30 +384,17 @@ static int live_parallel_switch(void *arg)
if (!data[n].ce[0])
continue;
data[n].tsk = kthread_run(*fn, &data[n],
"igt/parallel:%s",
data[n].ce[0]->engine->name);
if (IS_ERR(data[n].tsk)) {
err = PTR_ERR(data[n].tsk);
break;
}
get_task_struct(data[n].tsk);
data[n].result = 0;
kthread_init_work(&data[n].work, *fn);
kthread_queue_work(data[n].worker, &data[n].work);
}
yield(); /* start all threads before we kthread_stop() */
for (n = 0; n < count; n++) {
int status;
if (IS_ERR_OR_NULL(data[n].tsk))
continue;
status = kthread_stop(data[n].tsk);
if (status && !err)
err = status;
put_task_struct(data[n].tsk);
data[n].tsk = NULL;
if (data[n].ce[0]) {
kthread_flush_work(&data[n].work);
if (data[n].result && !err)
err = data[n].result;
}
}
if (igt_live_test_end(&t))
@ -399,6 +410,9 @@ out:
intel_context_unpin(data[n].ce[m]);
intel_context_put(data[n].ce[m]);
}
if (data[n].worker)
kthread_destroy_worker(data[n].worker);
}
kfree(data);
out_file:

View File

@ -6,8 +6,12 @@
#include "i915_drv.h"
#include "i915_selftest.h"
#include "gem/i915_gem_context.h"
#include "mock_context.h"
#include "mock_dmabuf.h"
#include "igt_gem_utils.h"
#include "selftests/mock_drm.h"
#include "selftests/mock_gem_device.h"
static int igt_dmabuf_export(void *arg)
@ -140,6 +144,75 @@ out_ret:
return err;
}
static int verify_access(struct drm_i915_private *i915,
struct drm_i915_gem_object *native_obj,
struct drm_i915_gem_object *import_obj)
{
struct i915_gem_engines_iter it;
struct i915_gem_context *ctx;
struct intel_context *ce;
struct i915_vma *vma;
struct file *file;
u32 *vaddr;
int err = 0, i;
file = mock_file(i915);
if (IS_ERR(file))
return PTR_ERR(file);
ctx = live_context(i915, file);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out_file;
}
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
if (intel_engine_can_store_dword(ce->engine))
break;
}
i915_gem_context_unlock_engines(ctx);
if (!ce)
goto out_file;
vma = i915_vma_instance(import_obj, ce->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto out_file;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
goto out_file;
err = igt_gpu_fill_dw(ce, vma, 0,
vma->size >> PAGE_SHIFT, 0xdeadbeaf);
i915_vma_unpin(vma);
if (err)
goto out_file;
err = i915_gem_object_wait(import_obj, 0, MAX_SCHEDULE_TIMEOUT);
if (err)
goto out_file;
vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto out_file;
}
for (i = 0; i < native_obj->base.size / sizeof(u32); i += PAGE_SIZE / sizeof(u32)) {
if (vaddr[i] != 0xdeadbeaf) {
pr_err("Data mismatch [%d]=%u\n", i, vaddr[i]);
err = -EINVAL;
goto out_file;
}
}
out_file:
fput(file);
return err;
}
static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
struct intel_memory_region **regions,
unsigned int num_regions)
@ -154,7 +227,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
force_different_devices = true;
obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
obj = __i915_gem_object_create_user(i915, SZ_8M,
regions, num_regions);
if (IS_ERR(obj)) {
pr_err("__i915_gem_object_create_user failed with err=%ld\n",
@ -206,6 +279,10 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
i915_gem_object_unlock(import_obj);
err = verify_access(i915, obj, import_obj);
if (err)
goto out_import;
/* Now try a fake an importer */
import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev);
if (IS_ERR(import_attach)) {

View File

@ -8,6 +8,7 @@
#include <linux/prime_numbers.h>
#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_region.h"
#include "gem/i915_gem_ttm.h"
#include "gem/i915_gem_ttm_move.h"

View File

@ -396,15 +396,17 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq)
return 0;
}
static int __gen125_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags,
u32 arb)
static int __xehp_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags,
u32 arb)
{
struct intel_context *ce = rq->context;
u32 wa_offset = lrc_indirect_bb(ce);
u32 *cs;
GEM_BUG_ON(!ce->wa_bb_page);
cs = intel_ring_begin(rq, 12);
if (IS_ERR(cs))
return PTR_ERR(cs);
@ -435,18 +437,18 @@ static int __gen125_emit_bb_start(struct i915_request *rq,
return 0;
}
int gen125_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
int xehp_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
}
int gen125_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
int xehp_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
}
int gen8_emit_bb_start_noarb(struct i915_request *rq,
@ -583,6 +585,8 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE,
@ -600,15 +604,21 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
/*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
cs = gen8_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
return gen8_emit_fini_breadcrumb_tail(rq, cs);
}
@ -715,6 +725,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
struct drm_i915_private *i915 = rq->engine->i915;
u32 flags = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@ -731,11 +742,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
else if (rq->engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
/*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
cs = gen12_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
flags);
0,
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
return gen12_emit_fini_breadcrumb_tail(rq, cs);
}

View File

@ -32,12 +32,12 @@ int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
int gen125_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
int gen125_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
int xehp_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
int xehp_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);

View File

@ -476,6 +476,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
u64 end = start + vma_res->vma_size;
GEM_BUG_ON(!i915_vm_is_4lvl(vm));
@ -489,9 +490,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
gen8_pte_t encode = pte_encode;
unsigned int page_size;
gen8_pte_t *vaddr;
u16 index, max;
u16 index, max, nent, i;
max = I915_PDES;
nent = 1;
if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
@ -503,25 +505,37 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
vaddr = px_vaddr(pd);
} else {
if (encode & GEN12_PPGTT_PTE_LM) {
GEM_BUG_ON(__gen8_pte_index(start, 0) % 16);
GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K);
GEM_BUG_ON(!IS_ALIGNED(iter->dma,
I915_GTT_PAGE_SIZE_64K));
index = __gen8_pte_index(start, 0);
page_size = I915_GTT_PAGE_SIZE;
index = __gen8_pte_index(start, 0) / 16;
page_size = I915_GTT_PAGE_SIZE_64K;
if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
/*
* Device local-memory on these platforms should
* always use 64K pages or larger (including GTT
* alignment), therefore if we know the whole
* page-table needs to be filled we can always
* safely use the compact-layout. Otherwise fall
* back to the TLB hint with PS64. If this is
* system memory we only bother with PS64.
*/
if ((encode & GEN12_PPGTT_PTE_LM) &&
end - start >= SZ_2M && !index) {
index = __gen8_pte_index(start, 0) / 16;
page_size = I915_GTT_PAGE_SIZE_64K;
max /= 16;
max /= 16;
vaddr = px_vaddr(pd);
vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
vaddr = px_vaddr(pd);
vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
pt->is_compact = true;
} else {
GEM_BUG_ON(pt->is_compact);
index = __gen8_pte_index(start, 0);
page_size = I915_GTT_PAGE_SIZE;
pt->is_compact = true;
} else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
rem >= I915_GTT_PAGE_SIZE_64K &&
!(index % 16)) {
encode |= GEN12_PTE_PS64;
page_size = I915_GTT_PAGE_SIZE_64K;
nent = 16;
}
}
vaddr = px_vaddr(pt);
@ -529,7 +543,12 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
do {
GEM_BUG_ON(rem < page_size);
vaddr[index++] = encode | iter->dma;
for (i = 0; i < nent; i++) {
vaddr[index++] =
encode | (iter->dma + i *
I915_GTT_PAGE_SIZE);
}
start += page_size;
iter->dma += page_size;
@ -745,6 +764,8 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
/* XXX: we don't strictly need to use this layout */
if (!pt->is_compact) {
vaddr = px_vaddr(pd);
vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
@ -929,29 +950,18 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
*/
ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
if (HAS_LMEM(gt->i915)) {
if (HAS_LMEM(gt->i915))
ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
/*
* On some platforms the hw has dropped support for 4K GTT pages
* when dealing with LMEM, and due to the design of 64K GTT
* pages in the hw, we can only mark the *entire* page-table as
* operating in 64K GTT mode, since the enable bit is still on
* the pde, and not the pte. And since we still need to allow
* 4K GTT pages for SMEM objects, we can't have a "normal" 4K
* page-table with scratch pointing to LMEM, since that's
* undefined from the hw pov. The simplest solution is to just
* move the 64K scratch page to SMEM on such platforms and call
* it a day, since that should work for all configurations.
*/
if (HAS_64K_PAGES(gt->i915))
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
else
ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
} else {
else
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
}
/*
* Using SMEM here instead of LMEM has the advantage of not reserving
* high performance memory for a "never" used filler page. It also
* removes the device access that would be required to initialise the
* scratch page, reducing pressure on an even scarcer resource.
*/
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
ppgtt->vm.pte_encode = gen8_pte_encode;

View File

@ -276,6 +276,14 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce)
return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
}
static inline void intel_context_close(struct intel_context *ce)
{
set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
if (ce->ops->close)
ce->ops->close(ce);
}
static inline bool intel_context_is_closed(const struct intel_context *ce)
{
return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);

View File

@ -43,6 +43,8 @@ struct intel_context_ops {
void (*revoke)(struct intel_context *ce, struct i915_request *rq,
unsigned int preempt_timeout_ms);
void (*close)(struct intel_context *ce);
int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
int (*pin)(struct intel_context *ce, void *vaddr);
void (*unpin)(struct intel_context *ce);
@ -197,8 +199,6 @@ struct intel_context {
* context's submissions is complete.
*/
struct i915_sw_fence blocked;
/** @number_committed_requests: number of committed requests */
int number_committed_requests;
/** @requests: list of active requests on this context */
struct list_head requests;
/** @prio: the context's current guc priority */
@ -208,6 +208,11 @@ struct intel_context {
* each priority bucket
*/
u32 prio_count[GUC_CLIENT_PRIORITY_NUM];
/**
* @sched_disable_delay_work: worker to disable scheduling on this
* context
*/
struct delayed_work sched_disable_delay_work;
} guc_state;
struct {

View File

@ -348,4 +348,10 @@ intel_engine_get_hung_context(struct intel_engine_cs *engine)
return engine->hung_ce;
}
u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value);
u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value);
u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value);
u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value);
u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value);
#endif /* _INTEL_RINGBUFFER_H_ */

View File

@ -486,6 +486,17 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine);
if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
__ffs(CCS_MASK(engine->gt)) == engine->instance) ||
engine->class == RENDER_CLASS)
engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
/* features common between engines sharing EUs */
if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
}
engine->props.heartbeat_interval_ms =
CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
engine->props.max_busywait_duration_ns =
@ -497,20 +508,34 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
engine->props.timeslice_duration_ms =
CONFIG_DRM_I915_TIMESLICE_DURATION;
/* Override to uninterruptible for OpenCL workloads. */
if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
engine->props.preempt_timeout_ms = 0;
/*
* Mid-thread pre-emption is not available in Gen12. Unfortunately,
* some compute workloads run quite long threads. That means they get
* reset due to not pre-empting in a timely manner. So, bump the
* pre-emption timeout value to be much higher for compute engines.
*/
if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE;
if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
__ffs(CCS_MASK(engine->gt)) == engine->instance) ||
engine->class == RENDER_CLASS)
engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
/* Cap properties according to any system limits */
#define CLAMP_PROP(field) \
do { \
u64 clamp = intel_clamp_##field(engine, engine->props.field); \
if (clamp != engine->props.field) { \
drm_notice(&engine->i915->drm, \
"Warning, clamping %s to %lld to prevent overflow\n", \
#field, clamp); \
engine->props.field = clamp; \
} \
} while (0)
/* features common between engines sharing EUs */
if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
}
CLAMP_PROP(heartbeat_interval_ms);
CLAMP_PROP(max_busywait_duration_ns);
CLAMP_PROP(preempt_timeout_ms);
CLAMP_PROP(stop_timeout_ms);
CLAMP_PROP(timeslice_duration_ms);
#undef CLAMP_PROP
engine->defaults = engine->props; /* never to change again */
@ -534,6 +559,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
return 0;
}
u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)
{
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
return value;
}
u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value)
{
value = min(value, jiffies_to_nsecs(2));
return value;
}
u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
{
/*
* NB: The GuC API only supports 32bit values. However, the limit is further
* reduced due to internal calculations which would otherwise overflow.
*/
if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
return value;
}
u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
{
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
return value;
}
u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
{
/*
* NB: The GuC API only supports 32bit values. However, the limit is further
* reduced due to internal calculations which would otherwise overflow.
*/
if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
return value;
}
static void __setup_engine_capabilities(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
@ -1274,8 +1348,13 @@ int intel_engines_init(struct intel_gt *gt)
return err;
err = setup(engine);
if (err)
if (err) {
intel_engine_cleanup_common(engine);
return err;
}
/* The backend should now be responsible for cleanup */
GEM_BUG_ON(engine->release == NULL);
err = engine_init_common(engine);
if (err)
@ -1554,11 +1633,11 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
for_each_ss_steering(iter, engine->gt, slice, subslice) {
instdone->sampler[slice][subslice] =
intel_gt_mcr_read(engine->gt,
GEN7_SAMPLER_INSTDONE,
GEN8_SAMPLER_INSTDONE,
slice, subslice);
instdone->row[slice][subslice] =
intel_gt_mcr_read(engine->gt,
GEN7_ROW_INSTDONE,
GEN8_ROW_INSTDONE,
slice, subslice);
}

View File

@ -22,9 +22,37 @@
static bool next_heartbeat(struct intel_engine_cs *engine)
{
struct i915_request *rq;
long delay;
delay = READ_ONCE(engine->props.heartbeat_interval_ms);
rq = engine->heartbeat.systole;
/*
* FIXME: The final period extension is disabled if the period has been
* modified from the default. This is to prevent issues with certain
* selftests which override the value and expect specific behaviour.
* Once the selftests have been updated to either cope with variable
* heartbeat periods (or to override the pre-emption timeout as well,
* or just to add a selftest specific override of the extension), the
* generic override can be removed.
*/
if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER &&
delay == engine->defaults.heartbeat_interval_ms) {
long longer;
/*
* The final try is at the highest priority possible. Up until now
* a pre-emption might not even have been attempted. So make sure
* this last attempt allows enough time for a pre-emption to occur.
*/
longer = READ_ONCE(engine->props.preempt_timeout_ms) * 2;
longer = intel_clamp_heartbeat_interval_ms(engine, longer);
if (longer > delay)
delay = longer;
}
if (!delay)
return false;
@ -288,6 +316,17 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
if (!delay && !intel_engine_has_preempt_reset(engine))
return -ENODEV;
/* FIXME: Remove together with equally marked hack in next_heartbeat. */
if (delay != engine->defaults.heartbeat_interval_ms &&
delay < 2 * engine->props.preempt_timeout_ms) {
if (intel_engine_uses_guc(engine))
drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may downgrade individual engine resets to full GPU resets!\n",
engine->name);
else
drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may cause engine resets to target innocent contexts!\n",
engine->name);
}
intel_engine_pm_get(engine);
err = mutex_lock_interruptible(&ce->timeline->mutex);

View File

@ -201,6 +201,7 @@
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
#define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */
#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8)
#define MI_PREDICATE_RESULT_2_ENGINE(base) _MMIO((base) + 0x3bc)
#define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4)
#define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30)
#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2)

View File

@ -3471,9 +3471,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
if (intel_engine_has_preemption(engine))
engine->emit_bb_start = gen125_emit_bb_start;
engine->emit_bb_start = xehp_emit_bb_start;
else
engine->emit_bb_start = gen125_emit_bb_start_noarb;
engine->emit_bb_start = xehp_emit_bb_start_noarb;
} else {
if (intel_engine_has_preemption(engine))
engine->emit_bb_start = gen8_emit_bb_start;

View File

@ -871,8 +871,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
u32 pte_flags;
int ret;
GEM_WARN_ON(pci_resource_len(pdev, GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
phys_addr = pci_resource_start(pdev, GTTMMADR_BAR) + gen6_gttadr_offset(i915);
GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
/*
* On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
@ -931,11 +931,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
unsigned int size;
u16 snb_gmch_ctl;
if (!HAS_LMEM(i915)) {
if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) {
if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
return -ENXIO;
ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR);
ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
ggtt->mappable_end = resource_size(&ggtt->gmadr);
}
@ -986,7 +986,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
setup_private_pat(ggtt->vm.gt->uncore);
setup_private_pat(ggtt->vm.gt);
return ggtt_probe_common(ggtt, size);
}
@ -1089,10 +1089,10 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
unsigned int size;
u16 snb_gmch_ctl;
if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
return -ENXIO;
ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR);
ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
ggtt->mappable_end = resource_size(&ggtt->gmadr);
/*
@ -1308,7 +1308,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
wbinvd_on_all_cpus();
if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
setup_private_pat(ggtt->vm.gt->uncore);
setup_private_pat(ggtt->vm.gt);
intel_ggtt_restore_fences(ggtt);
}

View File

@ -187,6 +187,10 @@
#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
#define MI_OPCODE(x) (((x) >> 23) & 0x3f)
#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
/*
* 3D instructions used by the kernel
*/

View File

@ -7,6 +7,7 @@
#include <linux/mei_aux.h>
#include "i915_drv.h"
#include "i915_reg.h"
#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_region.h"
#include "gt/intel_gsc.h"
#include "gt/intel_gt.h"
@ -142,8 +143,14 @@ static void gsc_destroy_one(struct drm_i915_private *i915,
struct intel_gsc_intf *intf = &gsc->intf[intf_id];
if (intf->adev) {
auxiliary_device_delete(&intf->adev->aux_dev);
auxiliary_device_uninit(&intf->adev->aux_dev);
struct auxiliary_device *aux_dev = &intf->adev->aux_dev;
if (intf_id == 0)
intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
aux_dev->dev.bus);
auxiliary_device_delete(aux_dev);
auxiliary_device_uninit(aux_dev);
intf->adev = NULL;
}
@ -242,14 +249,24 @@ add_device:
goto fail;
}
intf->adev = adev; /* needed by the notifier */
if (intf_id == 0)
intel_huc_register_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
aux_dev->dev.bus);
ret = auxiliary_device_add(aux_dev);
if (ret < 0) {
drm_err(&i915->drm, "gsc aux add failed %d\n", ret);
if (intf_id == 0)
intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
aux_dev->dev.bus);
intf->adev = NULL;
/* adev will be freed with the put_device() and .release sequence */
auxiliary_device_uninit(aux_dev);
goto fail;
}
intf->adev = adev;
return;
fail:

View File

@ -40,8 +40,6 @@ void intel_gt_common_init_early(struct intel_gt *gt)
{
spin_lock_init(gt->irq_lock);
INIT_LIST_HEAD(&gt->lmem_userfault_list);
mutex_init(&gt->lmem_userfault_lock);
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
@ -231,6 +229,16 @@ static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
GEN6_RING_FAULT_REG_POSTING_READ(engine);
}
i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt)
{
/* GT0_PERF_LIMIT_REASONS is available only for Gen11+ */
if (GRAPHICS_VER(gt->i915) < 11)
return INVALID_MMIO_REG;
return gt->type == GT_MEDIA ?
MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS;
}
void
intel_gt_clear_error_registers(struct intel_gt *gt,
intel_engine_mask_t engine_mask)
@ -260,7 +268,11 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
I915_MASTER_ERROR_INTERRUPT);
}
if (GRAPHICS_VER(i915) >= 12) {
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
RING_FAULT_VALID, 0);
intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
} else if (GRAPHICS_VER(i915) >= 12) {
rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
} else if (GRAPHICS_VER(i915) >= 8) {
@ -298,6 +310,42 @@ static void gen6_check_faults(struct intel_gt *gt)
}
}
static void xehp_check_faults(struct intel_gt *gt)
{
u32 fault;
/*
* Although the fault register now lives in an MCR register range,
* the GAM registers are special and we only truly need to read
* the "primary" GAM instance rather than handling each instance
* individually. intel_gt_mcr_read_any() will automatically steer
* toward the primary instance.
*/
fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
if (fault & RING_FAULT_VALID) {
u32 fault_data0, fault_data1;
u64 fault_addr;
fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
((u64)fault_data0 << 12);
drm_dbg(&gt->i915->drm, "Unexpected fault\n"
"\tAddr: 0x%08x_%08x\n"
"\tAddress space: %s\n"
"\tEngine ID: %d\n"
"\tSource ID: %d\n"
"\tType: %d\n",
upper_32_bits(fault_addr), lower_32_bits(fault_addr),
fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
GEN8_RING_FAULT_ENGINE_ID(fault),
RING_FAULT_SRCID(fault),
RING_FAULT_FAULT_TYPE(fault));
}
}
static void gen8_check_faults(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
@ -344,7 +392,9 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
/* From GEN8 onwards we only have one 'All Engine Fault Register' */
if (GRAPHICS_VER(i915) >= 8)
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
xehp_check_faults(gt);
else if (GRAPHICS_VER(i915) >= 8)
gen8_check_faults(gt);
else if (GRAPHICS_VER(i915) >= 6)
gen6_check_faults(gt);
@ -807,7 +857,6 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
}
intel_uncore_init_early(gt->uncore, gt);
intel_wakeref_auto_init(&gt->userfault_wakeref, gt->uncore->rpm);
ret = intel_uncore_setup_mmio(gt->uncore, phys_addr);
if (ret)
@ -828,7 +877,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
unsigned int i;
int ret;
mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR;
mmio_bar = intel_mmio_bar(GRAPHICS_VER(i915));
phys_addr = pci_resource_start(pdev, mmio_bar);
/*
@ -939,7 +988,10 @@ void intel_gt_info_print(const struct intel_gt_info *info,
}
struct reg_and_bit {
i915_reg_t reg;
union {
i915_reg_t reg;
i915_mcr_reg_t mcr_reg;
};
u32 bit;
};
@ -965,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
return rb;
}
/*
* HW architecture suggest typical invalidation time at 40us,
* with pessimistic cases up to 100us and a recommendation to
* cap at 1ms. We go a bit higher just in case.
*/
#define TLB_INVAL_TIMEOUT_US 100
#define TLB_INVAL_TIMEOUT_MS 4
/*
* On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
* but are now considered MCR registers. Since they exist within a GAM range,
* the primary instance of the register rolls up the status from each unit.
*/
static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
{
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0,
TLB_INVAL_TIMEOUT_US,
TLB_INVAL_TIMEOUT_MS);
else
return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
TLB_INVAL_TIMEOUT_US,
TLB_INVAL_TIMEOUT_MS,
NULL);
}
static void mmio_invalidate_full(struct intel_gt *gt)
{
static const i915_reg_t gen8_regs[] = {
@ -980,6 +1058,13 @@ static void mmio_invalidate_full(struct intel_gt *gt)
[COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
[COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
};
static const i915_mcr_reg_t xehp_regs[] = {
[RENDER_CLASS] = XEHP_GFX_TLB_INV_CR,
[VIDEO_DECODE_CLASS] = XEHP_VD_TLB_INV_CR,
[VIDEO_ENHANCEMENT_CLASS] = XEHP_VE_TLB_INV_CR,
[COPY_ENGINE_CLASS] = XEHP_BLT_TLB_INV_CR,
[COMPUTE_CLASS] = XEHP_COMPCTX_TLB_INV_CR,
};
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
@ -988,7 +1073,10 @@ static void mmio_invalidate_full(struct intel_gt *gt)
const i915_reg_t *regs;
unsigned int num = 0;
if (GRAPHICS_VER(i915) == 12) {
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
regs = NULL;
num = ARRAY_SIZE(xehp_regs);
} else if (GRAPHICS_VER(i915) == 12) {
regs = gen12_regs;
num = ARRAY_SIZE(gen12_regs);
} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
@ -1013,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
if (!intel_engine_pm_is_awake(engine))
continue;
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (!i915_mmio_reg_offset(rb.reg))
continue;
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
intel_gt_mcr_multicast_write_fw(gt,
xehp_regs[engine->class],
BIT(engine->instance));
} else {
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (!i915_mmio_reg_offset(rb.reg))
continue;
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
}
awake |= engine->mask;
}
@ -1037,22 +1131,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
for_each_engine_masked(engine, gt, awake, tmp) {
struct reg_and_bit rb;
/*
* HW architecture suggest typical invalidation time at 40us,
* with pessimistic cases up to 100us and a recommendation to
* cap at 1ms. We go a bit higher just in case.
*/
const unsigned int timeout_us = 100;
const unsigned int timeout_ms = 4;
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
rb.mcr_reg = xehp_regs[engine->class];
rb.bit = BIT(engine->instance);
} else {
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
}
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
NULL))
if (wait_for_invalidate(gt, rb))
drm_err_ratelimited(&gt->i915->drm,
"%s TLB invalidation did not complete in %ums!\n",
engine->name, timeout_ms);
engine->name, TLB_INVAL_TIMEOUT_MS);
}
/*

View File

@ -60,6 +60,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915);
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
void intel_gt_check_and_clear_faults(struct intel_gt *gt);
i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt);
void intel_gt_clear_error_registers(struct intel_gt *gt,
intel_engine_mask_t engine_mask);

View File

@ -107,7 +107,7 @@ static u32 gen9_read_clock_frequency(struct intel_uncore *uncore)
return freq;
}
static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
static u32 gen6_read_clock_frequency(struct intel_uncore *uncore)
{
/*
* PRMs say:
@ -119,7 +119,27 @@ static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
return 12500000;
}
static u32 gen2_read_clock_frequency(struct intel_uncore *uncore)
static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
{
/*
* 63:32 increments every 1000 ns
* 31:0 mbz
*/
return 1000000000 / 1000;
}
static u32 g4x_read_clock_frequency(struct intel_uncore *uncore)
{
/*
* 63:20 increments every 1/4 ns
* 19:0 mbz
*
* -> 63:32 increments every 1024 ns
*/
return 1000000000 / 1024;
}
static u32 gen4_read_clock_frequency(struct intel_uncore *uncore)
{
/*
* PRMs say:
@ -127,8 +147,10 @@ static u32 gen2_read_clock_frequency(struct intel_uncore *uncore)
* "The value in this register increments once every 16
* hclks." (through the “Clocking Configuration”
* (CLKCFG) MCHBAR register)
*
* Testing on actual hardware has shown there is no /16.
*/
return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16;
return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000;
}
static u32 read_clock_frequency(struct intel_uncore *uncore)
@ -137,10 +159,16 @@ static u32 read_clock_frequency(struct intel_uncore *uncore)
return gen11_read_clock_frequency(uncore);
else if (GRAPHICS_VER(uncore->i915) >= 9)
return gen9_read_clock_frequency(uncore);
else if (GRAPHICS_VER(uncore->i915) >= 5)
else if (GRAPHICS_VER(uncore->i915) >= 6)
return gen6_read_clock_frequency(uncore);
else if (GRAPHICS_VER(uncore->i915) == 5)
return gen5_read_clock_frequency(uncore);
else if (IS_G4X(uncore->i915))
return g4x_read_clock_frequency(uncore);
else if (GRAPHICS_VER(uncore->i915) == 4)
return gen4_read_clock_frequency(uncore);
else
return gen2_read_clock_frequency(uncore);
return 0;
}
void intel_gt_init_clock_frequency(struct intel_gt *gt)

View File

@ -40,6 +40,9 @@ static const char * const intel_steering_types[] = {
"L3BANK",
"MSLICE",
"LNCF",
"GAM",
"DSS",
"OADDRM",
"INSTANCE 0",
};
@ -48,14 +51,23 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = {
{},
};
/*
* Although the bspec lists more "MSLICE" ranges than shown here, some of those
* are of a "GAM" subclass that has special rules. Thus we use a separate
* GAM table farther down for those.
*/
static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
{ 0x004000, 0x004AFF },
{ 0x00C800, 0x00CFFF },
{ 0x00DD00, 0x00DDFF },
{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
{},
};
static const struct intel_mmio_range xehpsdv_gam_steering_table[] = {
{ 0x004000, 0x004AFF },
{ 0x00C800, 0x00CFFF },
{},
};
static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
{ 0x00B000, 0x00B0FF },
{ 0x00D800, 0x00D8FF },
@ -89,9 +101,47 @@ static const struct intel_mmio_range pvc_instance0_steering_table[] = {
{},
};
static const struct intel_mmio_range xelpg_instance0_steering_table[] = {
{ 0x000B00, 0x000BFF }, /* SQIDI */
{ 0x001000, 0x001FFF }, /* SQIDI */
{ 0x004000, 0x0048FF }, /* GAM */
{ 0x008700, 0x0087FF }, /* SQIDI */
{ 0x00B000, 0x00B0FF }, /* NODE */
{ 0x00C800, 0x00CFFF }, /* GAM */
{ 0x00D880, 0x00D8FF }, /* NODE */
{ 0x00DD00, 0x00DDFF }, /* OAAL2 */
{},
};
static const struct intel_mmio_range xelpg_l3bank_steering_table[] = {
{ 0x00B100, 0x00B3FF },
{},
};
/* DSS steering is used for SLICE ranges as well */
static const struct intel_mmio_range xelpg_dss_steering_table[] = {
{ 0x005200, 0x0052FF }, /* SLICE */
{ 0x005500, 0x007FFF }, /* SLICE */
{ 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
{ 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
{ 0x009680, 0x0096FF }, /* DSS */
{ 0x00D800, 0x00D87F }, /* SLICE */
{ 0x00DC00, 0x00DCFF }, /* SLICE */
{ 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */
{},
};
static const struct intel_mmio_range xelpmp_oaddrm_steering_table[] = {
{ 0x393200, 0x39323F },
{ 0x393400, 0x3934FF },
{},
};
void intel_gt_mcr_init(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
unsigned long fuse;
int i;
/*
* An mslice is unavailable only if both the meml3 for the slice is
@ -109,14 +159,36 @@ void intel_gt_mcr_init(struct intel_gt *gt)
drm_warn(&i915->drm, "mslice mask all zero!\n");
}
if (IS_PONTEVECCHIO(i915)) {
if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) {
gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
fuse = REG_FIELD_GET(GT_L3_EXC_MASK,
intel_uncore_read(gt->uncore, XEHP_FUSE4));
/*
* Despite the register field being named "exclude mask" the
* bits actually represent enabled banks (two banks per bit).
*/
for_each_set_bit(i, &fuse, 3)
gt->info.l3bank_mask |= 0x3 << 2 * i;
gt->steering_table[INSTANCE0] = xelpg_instance0_steering_table;
gt->steering_table[L3BANK] = xelpg_l3bank_steering_table;
gt->steering_table[DSS] = xelpg_dss_steering_table;
} else if (IS_PONTEVECCHIO(i915)) {
gt->steering_table[INSTANCE0] = pvc_instance0_steering_table;
} else if (IS_DG2(i915)) {
gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
gt->steering_table[LNCF] = dg2_lncf_steering_table;
/*
* No need to hook up the GAM table since it has a dedicated
* steering control register on DG2 and can use implicit
* steering.
*/
} else if (IS_XEHPSDV(i915)) {
gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
gt->steering_table[GAM] = xehpsdv_gam_steering_table;
} else if (GRAPHICS_VER(i915) >= 11 &&
GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
gt->steering_table[L3BANK] = icl_l3bank_steering_table;
@ -134,6 +206,19 @@ void intel_gt_mcr_init(struct intel_gt *gt)
}
}
/*
* Although the rest of the driver should use MCR-specific functions to
* read/write MCR registers, we still use the regular intel_uncore_* functions
* internally to implement those, so we need a way for the functions in this
* file to "cast" an i915_mcr_reg_t into an i915_reg_t.
*/
static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
{
i915_reg_t r = { .reg = mcr.reg };
return r;
}
/*
* rw_with_mcr_steering_fw - Access a register with specific MCR steering
* @uncore: pointer to struct intel_uncore
@ -148,14 +233,26 @@ void intel_gt_mcr_init(struct intel_gt *gt)
* Caller needs to make sure the relevant forcewake wells are up.
*/
static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
i915_reg_t reg, u8 rw_flag,
i915_mcr_reg_t reg, u8 rw_flag,
int group, int instance, u32 value)
{
u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
lockdep_assert_held(&uncore->lock);
if (GRAPHICS_VER(uncore->i915) >= 11) {
if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) {
/*
* Always leave the hardware in multicast mode when doing reads
* (see comment about Wa_22013088509 below) and only change it
* to unicast mode when doing writes of a specific instance.
*
* No need to save old steering reg value.
*/
intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR,
REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance) |
(rw_flag == FW_REG_READ ? GEN11_MCR_MULTICAST : 0));
} else if (GRAPHICS_VER(uncore->i915) >= 11) {
mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance);
@ -173,39 +270,53 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
*/
if (rw_flag == FW_REG_WRITE)
mcr_mask |= GEN11_MCR_MULTICAST;
mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
old_mcr = mcr;
mcr &= ~mcr_mask;
mcr |= mcr_ss;
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
} else {
mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
mcr_ss = GEN8_MCR_SLICE(group) | GEN8_MCR_SUBSLICE(instance);
mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
old_mcr = mcr;
mcr &= ~mcr_mask;
mcr |= mcr_ss;
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
}
old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
mcr &= ~mcr_mask;
mcr |= mcr_ss;
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
if (rw_flag == FW_REG_READ)
val = intel_uncore_read_fw(uncore, reg);
val = intel_uncore_read_fw(uncore, mcr_reg_cast(reg));
else
intel_uncore_write_fw(uncore, reg, value);
intel_uncore_write_fw(uncore, mcr_reg_cast(reg), value);
mcr &= ~mcr_mask;
mcr |= old_mcr & mcr_mask;
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
/*
* For pre-MTL platforms, we need to restore the old value of the
* steering control register to ensure that implicit steering continues
* to behave as expected. For MTL and beyond, we need only reinstate
* the 'multicast' bit (and only if we did a write that cleared it).
*/
if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70) && rw_flag == FW_REG_WRITE)
intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
else if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 70))
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, old_mcr);
return val;
}
static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
i915_reg_t reg, u8 rw_flag,
i915_mcr_reg_t reg, u8 rw_flag,
int group, int instance,
u32 value)
{
enum forcewake_domains fw_domains;
u32 val;
fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
fw_domains = intel_uncore_forcewake_for_reg(uncore, mcr_reg_cast(reg),
rw_flag);
fw_domains |= intel_uncore_forcewake_for_reg(uncore,
GEN8_MCR_SELECTOR,
@ -233,7 +344,7 @@ static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
* group/instance.
*/
u32 intel_gt_mcr_read(struct intel_gt *gt,
i915_reg_t reg,
i915_mcr_reg_t reg,
int group, int instance)
{
return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0);
@ -250,7 +361,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt,
* Write an MCR register in unicast mode after steering toward a specific
* group/instance.
*/
void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value,
void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value,
int group, int instance)
{
rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value);
@ -265,9 +376,16 @@ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value,
* Write an MCR register in multicast mode to update all instances.
*/
void intel_gt_mcr_multicast_write(struct intel_gt *gt,
i915_reg_t reg, u32 value)
i915_mcr_reg_t reg, u32 value)
{
intel_uncore_write(gt->uncore, reg, value);
/*
* Ensure we have multicast behavior, just in case some non-i915 agent
* left the hardware in unicast mode.
*/
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
intel_uncore_write(gt->uncore, mcr_reg_cast(reg), value);
}
/**
@ -281,9 +399,44 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt,
* domains; use intel_gt_mcr_multicast_write() in cases where forcewake should
* be obtained automatically.
*/
void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 value)
void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value)
{
intel_uncore_write_fw(gt->uncore, reg, value);
/*
* Ensure we have multicast behavior, just in case some non-i915 agent
* left the hardware in unicast mode.
*/
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
intel_uncore_write_fw(gt->uncore, mcr_reg_cast(reg), value);
}
/**
* intel_gt_mcr_multicast_rmw - Performs a multicast RMW operations
* @gt: GT structure
* @reg: the MCR register to read and write
* @clear: bits to clear during RMW
* @set: bits to set during RMW
*
* Performs a read-modify-write on an MCR register in a multicast manner.
* This operation only makes sense on MCR registers where all instances are
* expected to have the same value. The read will target any non-terminated
* instance and the write will be applied to all instances.
*
* This function assumes the caller is already holding any necessary forcewake
* domains; use intel_gt_mcr_multicast_rmw() in cases where forcewake should
* be obtained automatically.
*
* Returns the old (unmodified) value read.
*/
u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg,
u32 clear, u32 set)
{
u32 val = intel_gt_mcr_read_any(gt, reg);
intel_gt_mcr_multicast_write(gt, reg, (val & ~clear) | set);
return val;
}
/*
@ -301,7 +454,7 @@ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 va
* for @type steering too.
*/
static bool reg_needs_read_steering(struct intel_gt *gt,
i915_reg_t reg,
i915_mcr_reg_t reg,
enum intel_steering_type type)
{
const u32 offset = i915_mmio_reg_offset(reg);
@ -332,6 +485,8 @@ static void get_nonterminated_steering(struct intel_gt *gt,
enum intel_steering_type type,
u8 *group, u8 *instance)
{
u32 dss;
switch (type) {
case L3BANK:
*group = 0; /* unused */
@ -351,6 +506,15 @@ static void get_nonterminated_steering(struct intel_gt *gt,
*group = __ffs(gt->info.mslice_mask) << 1;
*instance = 0; /* unused */
break;
case GAM:
*group = IS_DG2(gt->i915) ? 1 : 0;
*instance = 0;
break;
case DSS:
dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
*group = dss / GEN_DSS_PER_GSLICE;
*instance = dss % GEN_DSS_PER_GSLICE;
break;
case INSTANCE0:
/*
* There are a lot of MCR types for which instance (0, 0)
@ -359,6 +523,13 @@ static void get_nonterminated_steering(struct intel_gt *gt,
*group = 0;
*instance = 0;
break;
case OADDRM:
if ((VDBOX_MASK(gt) | VEBOX_MASK(gt) | gt->info.sfc_mask) & BIT(0))
*group = 0;
else
*group = 1;
*instance = 0;
break;
default:
MISSING_CASE(type);
*group = 0;
@ -380,7 +551,7 @@ static void get_nonterminated_steering(struct intel_gt *gt,
* steering.
*/
void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
i915_reg_t reg,
i915_mcr_reg_t reg,
u8 *group, u8 *instance)
{
int type;
@ -409,7 +580,7 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
*
* Returns the value from a non-terminated instance of @reg.
*/
u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg)
{
int type;
u8 group, instance;
@ -423,7 +594,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
}
}
return intel_uncore_read_fw(gt->uncore, reg);
return intel_uncore_read_fw(gt->uncore, mcr_reg_cast(reg));
}
/**
@ -436,7 +607,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
*
* Returns the value from a non-terminated instance of @reg.
*/
u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg)
u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg)
{
int type;
u8 group, instance;
@ -450,7 +621,7 @@ u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg)
}
}
return intel_uncore_read(gt->uncore, reg);
return intel_uncore_read(gt->uncore, mcr_reg_cast(reg));
}
static void report_steering_type(struct drm_printer *p,
@ -483,11 +654,20 @@ static void report_steering_type(struct drm_printer *p,
void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
bool dump_table)
{
drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n",
gt->default_steering.groupid,
gt->default_steering.instanceid);
/*
* Starting with MTL we no longer have default steering;
* all ranges are explicitly steered.
*/
if (GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))
drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n",
gt->default_steering.groupid,
gt->default_steering.instanceid);
if (IS_PONTEVECCHIO(gt->i915)) {
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
for (int i = 0; i < NUM_STEERING_TYPES; i++)
if (gt->steering_table[i])
report_steering_type(p, gt, i, dump_table);
} else if (IS_PONTEVECCHIO(gt->i915)) {
report_steering_type(p, gt, INSTANCE0, dump_table);
} else if (HAS_MSLICE_STEERING(gt->i915)) {
report_steering_type(p, gt, MSLICE, dump_table);
@ -520,3 +700,58 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
return;
}
}
/**
* intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state
* @gt: GT structure
* @reg: the register to read
* @mask: mask to apply to register value
* @value: value to wait for
* @fast_timeout_us: fast timeout in microsecond for atomic/tight wait
* @slow_timeout_ms: slow timeout in millisecond
*
* This routine waits until the target register @reg contains the expected
* @value after applying the @mask, i.e. it waits until ::
*
* (intel_gt_mcr_read_any_fw(gt, reg) & mask) == value
*
* Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
* For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
* must be not larger than 20,0000 microseconds.
*
* This function is basically an MCR-friendly version of
* __intel_wait_for_register_fw(). Generally this function will only be used
* on GAM registers which are a bit special --- although they're MCR registers,
* reads (e.g., waiting for status updates) are always directed to the primary
* instance.
*
* Note that this routine assumes the caller holds forcewake asserted, it is
* not suitable for very long waits.
*
* Return: 0 if the register matches the desired condition, or -ETIMEDOUT.
*/
int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
i915_mcr_reg_t reg,
u32 mask,
u32 value,
unsigned int fast_timeout_us,
unsigned int slow_timeout_ms)
{
u32 reg_value = 0;
#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == value)
int ret;
/* Catch any overuse of this function */
might_sleep_if(slow_timeout_ms);
GEM_BUG_ON(fast_timeout_us > 20000);
GEM_BUG_ON(!fast_timeout_us && !slow_timeout_ms);
ret = -ETIMEDOUT;
if (fast_timeout_us && fast_timeout_us <= 20000)
ret = _wait_for_atomic(done, fast_timeout_us, 0);
if (ret && slow_timeout_ms)
ret = wait_for(done, slow_timeout_ms);
return ret;
#undef done
}

View File

@ -11,21 +11,24 @@
void intel_gt_mcr_init(struct intel_gt *gt);
u32 intel_gt_mcr_read(struct intel_gt *gt,
i915_reg_t reg,
i915_mcr_reg_t reg,
int group, int instance);
u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg);
u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg);
u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg);
u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg);
void intel_gt_mcr_unicast_write(struct intel_gt *gt,
i915_reg_t reg, u32 value,
i915_mcr_reg_t reg, u32 value,
int group, int instance);
void intel_gt_mcr_multicast_write(struct intel_gt *gt,
i915_reg_t reg, u32 value);
i915_mcr_reg_t reg, u32 value);
void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt,
i915_reg_t reg, u32 value);
i915_mcr_reg_t reg, u32 value);
u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg,
u32 clear, u32 set);
void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
i915_reg_t reg,
i915_mcr_reg_t reg,
u8 *group, u8 *instance);
void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
@ -34,6 +37,13 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
unsigned int *group, unsigned int *instance);
int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
i915_mcr_reg_t reg,
u32 mask,
u32 value,
unsigned int fast_timeout_us,
unsigned int slow_timeout_ms);
/*
* Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice
* presence is determined by using the group/instance as direct lookups in the

View File

@ -344,162 +344,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
drm_printf(p, "efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(rps, rps->efficient_freq));
} else if (GRAPHICS_VER(i915) >= 6) {
u32 rp_state_limits;
u32 gt_perf_status;
struct intel_rps_freq_caps caps;
u32 rpmodectl, rpinclimit, rpdeclimit;
u32 rpstat, cagf, reqf;
u32 rpcurupei, rpcurup, rpprevup;
u32 rpcurdownei, rpcurdown, rpprevdown;
u32 rpupei, rpupt, rpdownei, rpdownt;
u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
gen6_rps_get_freq_caps(rps, &caps);
if (IS_GEN9_LP(i915))
gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
else
gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
/* RPSTAT1 is in the GT power well */
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
if (GRAPHICS_VER(i915) >= 9) {
reqf >>= 23;
} else {
reqf &= ~GEN6_TURBO_DISABLE;
if (IS_HASWELL(i915) || IS_BROADWELL(i915))
reqf >>= 24;
else
reqf >>= 25;
}
reqf = intel_gpu_freq(rps, reqf);
rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
cagf = intel_rps_read_actual_frequency(rps);
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
if (GRAPHICS_VER(i915) >= 11) {
pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
/*
* The equivalent to the PM ISR & IIR cannot be read
* without affecting the current state of the system
*/
pm_isr = 0;
pm_iir = 0;
} else if (GRAPHICS_VER(i915) >= 8) {
pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
} else {
pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
}
pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
drm_printf(p, "Video Turbo Mode: %s\n",
str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
drm_printf(p, "HW control enabled: %s\n",
str_yes_no(rpmodectl & GEN6_RP_ENABLE));
drm_printf(p, "SW control enabled: %s\n",
str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
pm_ier, pm_imr, pm_mask);
if (GRAPHICS_VER(i915) <= 10)
drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
pm_isr, pm_iir);
drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
rps->pm_intrmsk_mbz);
drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
drm_printf(p, "Render p-state ratio: %d\n",
(gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
drm_printf(p, "Render p-state VID: %d\n",
gt_perf_status & 0xff);
drm_printf(p, "Render p-state limit: %d\n",
rp_state_limits & 0xff);
drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
drm_printf(p, "CAGF: %dMHz\n", cagf);
drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
rpcurupei,
intel_gt_pm_interval_to_ns(gt, rpcurupei));
drm_printf(p, "RP CUR UP: %d (%lldns)\n",
rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
drm_printf(p, "RP PREV UP: %d (%lldns)\n",
rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
drm_printf(p, "Up threshold: %d%%\n",
rps->power.up_threshold);
drm_printf(p, "RP UP EI: %d (%lldns)\n",
rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
rpcurdownei,
intel_gt_pm_interval_to_ns(gt, rpcurdownei));
drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
rpcurdown,
intel_gt_pm_interval_to_ns(gt, rpcurdown));
drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
rpprevdown,
intel_gt_pm_interval_to_ns(gt, rpprevdown));
drm_printf(p, "Down threshold: %d%%\n",
rps->power.down_threshold);
drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
intel_gpu_freq(rps, caps.min_freq));
drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
intel_gpu_freq(rps, caps.rp1_freq));
drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
intel_gpu_freq(rps, caps.rp0_freq));
drm_printf(p, "Max overclocked frequency: %dMHz\n",
intel_gpu_freq(rps, rps->max_freq));
drm_printf(p, "Current freq: %d MHz\n",
intel_gpu_freq(rps, rps->cur_freq));
drm_printf(p, "Actual freq: %d MHz\n", cagf);
drm_printf(p, "Idle freq: %d MHz\n",
intel_gpu_freq(rps, rps->idle_freq));
drm_printf(p, "Min freq: %d MHz\n",
intel_gpu_freq(rps, rps->min_freq));
drm_printf(p, "Boost freq: %d MHz\n",
intel_gpu_freq(rps, rps->boost_freq));
drm_printf(p, "Max freq: %d MHz\n",
intel_gpu_freq(rps, rps->max_freq));
drm_printf(p,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(rps, rps->efficient_freq));
gen6_rps_frequency_dump(rps, p);
} else {
drm_puts(p, "no P-state info available\n");
}
@ -655,6 +500,44 @@ static bool rps_eval(void *data)
DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost);
static int perf_limit_reasons_get(void *data, u64 *val)
{
struct intel_gt *gt = data;
intel_wakeref_t wakeref;
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
*val = intel_uncore_read(gt->uncore, intel_gt_perf_limit_reasons_reg(gt));
return 0;
}
static int perf_limit_reasons_clear(void *data, u64 val)
{
struct intel_gt *gt = data;
intel_wakeref_t wakeref;
/*
* Clear the upper 16 "log" bits, the lower 16 "status" bits are
* read-only. The upper 16 "log" bits are identical to the lower 16
* "status" bits except that the "log" bits remain set until cleared.
*/
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
intel_uncore_rmw(gt->uncore, intel_gt_perf_limit_reasons_reg(gt),
GT0_PERF_LIMIT_REASONS_LOG_MASK, 0);
return 0;
}
static bool perf_limit_reasons_eval(void *data)
{
struct intel_gt *gt = data;
return i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt));
}
DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get,
perf_limit_reasons_clear, "%llu\n");
void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
static const struct intel_gt_debugfs_file files[] = {
@ -664,6 +547,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{ "forcewake_user", &forcewake_user_fops, NULL},
{ "llc", &llc_fops, llc_eval },
{ "rps_boost", &rps_boost_fops, rps_eval },
{ "perf_limit_reasons", &perf_limit_reasons_fops, perf_limit_reasons_eval },
};
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);

View File

@ -8,6 +8,19 @@
#include "i915_reg_defs.h"
#define MCR_REG(offset) ((const i915_mcr_reg_t){ .reg = (offset) })
/*
* The perf control registers are technically multicast registers, but the
* driver never needs to read/write them directly; we only use them to build
* lists of registers (where they're mixed in with other non-MCR registers)
* and then operate on the offset directly. For now we'll just define them
* as non-multicast so we can place them on the same list, but we may want
* to try to come up with a better way to handle heterogeneous lists of
* registers in the future.
*/
#define PERF_REG(offset) _MMIO(offset)
/* RPM unit config (Gen8+) */
#define RPM_CONFIG0 _MMIO(0xd00)
#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3
@ -39,12 +52,17 @@
#define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0xd84)
#define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0xd88)
#define FORCEWAKE_ACK_GSC _MMIO(0xdf8)
#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc)
#define GMD_ID_GRAPHICS _MMIO(0xd8c)
#define GMD_ID_MEDIA _MMIO(MTL_MEDIA_GSI_BASE + 0xd8c)
#define MCFG_MCR_SELECTOR _MMIO(0xfd0)
#define MTL_MCR_SELECTOR _MMIO(0xfd4)
#define SF_MCR_SELECTOR _MMIO(0xfd8)
#define GEN8_MCR_SELECTOR _MMIO(0xfdc)
#define GAM_MCR_SELECTOR _MMIO(0xfe0)
#define GEN8_MCR_SLICE(slice) (((slice) & 3) << 26)
#define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3)
#define GEN8_MCR_SUBSLICE(subslice) (((subslice) & 3) << 24)
@ -54,6 +72,8 @@
#define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf)
#define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24)
#define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7)
#define MTL_MCR_GROUPID REG_GENMASK(11, 8)
#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0)
#define IPEIR_I965 _MMIO(0x2064)
#define IPEHR_I965 _MMIO(0x2068)
@ -329,11 +349,12 @@
#define GEN7_TLB_RD_ADDR _MMIO(0x4700)
#define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
#define XEHP_PAT_INDEX(index) MCR_REG(0x4800 + (index) * 4)
#define XEHP_TILE0_ADDR_RANGE _MMIO(0x4900)
#define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900)
#define XEHP_TILE_LMEM_RANGE_SHIFT 8
#define XEHP_FLAT_CCS_BASE_ADDR _MMIO(0x4910)
#define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910)
#define XEHP_CCS_BASE_SHIFT 8
#define GAMTARBMODE _MMIO(0x4a08)
@ -383,17 +404,18 @@
#define CHICKEN_RASTER_2 _MMIO(0x6208)
#define TBIMR_FAST_CLIP REG_BIT(5)
#define VFLSKPD _MMIO(0x62a8)
#define VFLSKPD MCR_REG(0x62a8)
#define DIS_OVER_FETCH_CACHE REG_BIT(1)
#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0)
#define FF_MODE2 _MMIO(0x6604)
#define GEN12_FF_MODE2 _MMIO(0x6604)
#define XEHP_FF_MODE2 MCR_REG(0x6604)
#define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24)
#define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
#define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16)
#define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
#define XEHPG_INSTDONE_GEOM_SVG _MMIO(0x666c)
#define XEHPG_INSTDONE_GEOM_SVG MCR_REG(0x666c)
#define CACHE_MODE_0_GEN7 _MMIO(0x7000) /* IVB+ */
#define RC_OP_FLUSH_ENABLE (1 << 0)
@ -421,6 +443,7 @@
#define HIZ_CHICKEN _MMIO(0x7018)
#define CHV_HZ_8X8_MODE_IN_1X REG_BIT(15)
#define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14)
#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13)
#define BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE REG_BIT(3)
#define GEN8_L3CNTLREG _MMIO(0x7034)
@ -442,23 +465,16 @@
#define GEN8_HDC_CHICKEN1 _MMIO(0x7304)
#define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304)
#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304)
#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12)
#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11)
#define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9)
/* GEN9 chicken */
#define SLICE_ECO_CHICKEN0 _MMIO(0x7308)
#define PIXEL_MASK_CAMMING_DISABLE (1 << 14)
#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308)
#define DISABLE_PIXEL_MASK_CAMMING (1 << 14)
#define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
#define SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
#define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c)
#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14)
#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
#define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice) * 0x4)
#define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \
@ -485,9 +501,12 @@
#define VF_PREEMPTION _MMIO(0x83a4)
#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
#define VFG_PREEMPTION_CHICKEN _MMIO(0x83b4)
#define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4)
#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
#define GEN12_SQCM _MMIO(0x8724)
#define XEHP_SQCM MCR_REG(0x8724)
#define EN_32B_ACCESS REG_BIT(30)
#define HSW_IDICR _MMIO(0x9008)
@ -519,6 +538,8 @@
#define GEN6_MBCTL_BOOT_FETCH_MECH (1 << 0)
/* Fuse readout registers for GT */
#define XEHP_FUSE4 _MMIO(0x9114)
#define GT_L3_EXC_MASK REG_GENMASK(6, 4)
#define GEN10_MIRROR_FUSE3 _MMIO(0x9118)
#define GEN10_L3BANK_PAIR_COUNT 4
#define GEN10_L3BANK_MASK 0x0F
@ -647,6 +668,9 @@
#define GEN7_MISCCPCTL _MMIO(0x9424)
#define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0)
#define GEN8_MISCCPCTL MCR_REG(0x9424)
#define GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0)
#define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1)
#define GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2)
#define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1 << 4)
@ -700,7 +724,8 @@
#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16)
#define LTCDD_CLKGATE_DIS REG_BIT(10)
#define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
#define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4)
#define SARBUNIT_CLKGATE_DIS (1 << 5)
#define RCCUNIT_CLKGATE_DIS (1 << 7)
#define MSCUNIT_CLKGATE_DIS (1 << 10)
@ -708,27 +733,27 @@
#define L3_CLKGATE_DIS REG_BIT(16)
#define L3_CR2X_CLKGATE_DIS REG_BIT(17)
#define SCCGCTL94DC _MMIO(0x94dc)
#define SCCGCTL94DC MCR_REG(0x94dc)
#define CG3DDISURB REG_BIT(14)
#define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4)
#define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19)
#define PSDUNIT_CLKGATE_DIS REG_BIT(5)
#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524)
#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x9524)
#define DSS_ROUTER_CLKGATE_DIS REG_BIT(28)
#define GWUNIT_CLKGATE_DIS REG_BIT(16)
#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528)
#define SUBSLICE_UNIT_LEVEL_CLKGATE2 MCR_REG(0x9528)
#define CPSSUNIT_CLKGATE_DIS REG_BIT(9)
#define SSMCGCTL9530 _MMIO(0x9530)
#define SSMCGCTL9530 MCR_REG(0x9530)
#define RTFUNIT_CLKGATE_DIS REG_BIT(18)
#define GEN10_DFR_RATIO_EN_AND_CHICKEN _MMIO(0x9550)
#define GEN10_DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550)
#define DFR_DISABLE (1 << 9)
#define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560)
#define INF_UNIT_LEVEL_CLKGATE MCR_REG(0x9560)
#define CGPSF_CLKGATE_DIS (1 << 3)
#define MICRO_BP0_0 _MMIO(0x9800)
@ -901,6 +926,8 @@
#define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4)
#define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4)
#define FORCEWAKE_REQ_GSC _MMIO(0xa618)
#define CHV_POWER_SS0_SIG1 _MMIO(0xa720)
#define CHV_POWER_SS0_SIG2 _MMIO(0xa724)
#define CHV_POWER_SS1_SIG1 _MMIO(0xa728)
@ -938,7 +965,8 @@
/* MOCS (Memory Object Control State) registers */
#define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */
#define GEN9_LNCFCMOCS_REG_COUNT 32
#define XEHP_LNCFCMOCS(i) MCR_REG(0xb020 + (i) * 4)
#define LNCFCMOCS_REG_COUNT 32
#define GEN7_L3CNTLREG3 _MMIO(0xb024)
@ -954,15 +982,10 @@
#define GEN7_L3LOG(slice, i) _MMIO(0xb070 + (slice) * 0x200 + (i) * 4)
#define GEN7_L3LOG_SIZE 0x80
#define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0)
#define PMFLUSHDONE_LNICRSDROP (1 << 20)
#define PMFLUSH_GAPL3UNBLOCK (1 << 21)
#define PMFLUSHDONE_LNEBLK (1 << 22)
#define XEHP_L3NODEARBCFG _MMIO(0xb0b4)
#define XEHP_L3NODEARBCFG MCR_REG(0xb0b4)
#define XEHP_LNESPARE REG_BIT(19)
#define GEN8_L3SQCREG1 _MMIO(0xb100)
#define GEN8_L3SQCREG1 MCR_REG(0xb100)
/*
* Note that on CHV the following has an off-by-one error wrt. to BSpec.
* Using the formula in BSpec leads to a hang, while the formula here works
@ -973,31 +996,28 @@
#define L3_HIGH_PRIO_CREDITS(x) (((x) >> 1) << 14)
#define L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14))
#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xb114)
#define GEN11_I2M_WRITE_DISABLE (1 << 28)
#define GEN8_L3SQCREG4 _MMIO(0xb118)
#define GEN8_L3SQCREG4 MCR_REG(0xb118)
#define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6)
#define GEN8_LQSC_RO_PERF_DIS (1 << 27)
#define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21)
#define GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22)
#define GEN9_SCRATCH1 _MMIO(0xb11c)
#define GEN9_SCRATCH1 MCR_REG(0xb11c)
#define EVICTION_PERF_FIX_ENABLE REG_BIT(8)
#define BDW_SCRATCH1 _MMIO(0xb11c)
#define BDW_SCRATCH1 MCR_REG(0xb11c)
#define GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE (1 << 2)
#define GEN11_SCRATCH2 _MMIO(0xb140)
#define GEN11_SCRATCH2 MCR_REG(0xb140)
#define GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE (1 << 19)
#define GEN11_L3SQCREG5 _MMIO(0xb158)
#define XEHP_L3SQCREG5 MCR_REG(0xb158)
#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
#define MLTICTXCTL _MMIO(0xb170)
#define MLTICTXCTL MCR_REG(0xb170)
#define TDONRENDER REG_BIT(2)
#define XEHP_L3SCQREG7 _MMIO(0xb188)
#define XEHP_L3SCQREG7 MCR_REG(0xb188)
#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3)
#define XEHPC_L3SCRUB _MMIO(0xb18c)
@ -1005,7 +1025,7 @@
#define SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0)
#define SCRUB_RATE_4B_PER_CLK REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6)
#define L3SQCREG1_CCS0 _MMIO(0xb200)
#define L3SQCREG1_CCS0 MCR_REG(0xb200)
#define FLUSHALLNONCOH REG_BIT(5)
#define GEN11_GLBLINVL _MMIO(0xb404)
@ -1030,11 +1050,14 @@
#define GEN9_BLT_MOCS(i) _MMIO(__GEN9_BCS0_MOCS0 + (i) * 4)
#define GEN12_FAULT_TLB_DATA0 _MMIO(0xceb8)
#define XEHP_FAULT_TLB_DATA0 MCR_REG(0xceb8)
#define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc)
#define XEHP_FAULT_TLB_DATA1 MCR_REG(0xcebc)
#define FAULT_VA_HIGH_BITS (0xf << 0)
#define FAULT_GTT_SEL (1 << 4)
#define GEN12_RING_FAULT_REG _MMIO(0xcec4)
#define XEHP_RING_FAULT_REG MCR_REG(0xcec4)
#define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7)
#define RING_FAULT_GTTSEL_MASK (1 << 11)
#define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff)
@ -1042,16 +1065,21 @@
#define RING_FAULT_VALID (1 << 0)
#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
#define XEHP_GFX_TLB_INV_CR MCR_REG(0xced8)
#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
#define XEHP_VD_TLB_INV_CR MCR_REG(0xcedc)
#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
#define XEHP_VE_TLB_INV_CR MCR_REG(0xcee0)
#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
#define XEHP_BLT_TLB_INV_CR MCR_REG(0xcee4)
#define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04)
#define XEHP_COMPCTX_TLB_INV_CR MCR_REG(0xcf04)
#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28)
#define RENDER_MOD_CTRL _MMIO(0xcf2c)
#define COMP_MOD_CTRL _MMIO(0xcf30)
#define VDBX_MOD_CTRL _MMIO(0xcf34)
#define VEBX_MOD_CTRL _MMIO(0xcf38)
#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28)
#define RENDER_MOD_CTRL MCR_REG(0xcf2c)
#define COMP_MOD_CTRL MCR_REG(0xcf30)
#define VDBX_MOD_CTRL MCR_REG(0xcf34)
#define VEBX_MOD_CTRL MCR_REG(0xcf38)
#define FORCE_MISS_FTLB REG_BIT(3)
#define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c)
@ -1066,48 +1094,52 @@
#define GEN12_GAM_DONE _MMIO(0xcf68)
#define GEN7_HALF_SLICE_CHICKEN1 _MMIO(0xe100) /* IVB GT1 + VLV */
#define GEN8_HALF_SLICE_CHICKEN1 MCR_REG(0xe100)
#define GEN7_MAX_PS_THREAD_DEP (8 << 12)
#define GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE (1 << 10)
#define GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE (1 << 4)
#define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1 << 3)
#define GEN7_SAMPLER_INSTDONE _MMIO(0xe160)
#define GEN8_SAMPLER_INSTDONE MCR_REG(0xe160)
#define GEN7_ROW_INSTDONE _MMIO(0xe164)
#define GEN8_ROW_INSTDONE MCR_REG(0xe164)
#define HALF_SLICE_CHICKEN2 _MMIO(0xe180)
#define HALF_SLICE_CHICKEN2 MCR_REG(0xe180)
#define GEN8_ST_PO_DISABLE (1 << 13)
#define HALF_SLICE_CHICKEN3 _MMIO(0xe184)
#define HSW_HALF_SLICE_CHICKEN3 _MMIO(0xe184)
#define GEN8_HALF_SLICE_CHICKEN3 MCR_REG(0xe184)
#define HSW_SAMPLE_C_PERFORMANCE (1 << 9)
#define GEN8_CENTROID_PIXEL_OPT_DIS (1 << 8)
#define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1 << 5)
#define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1)
#define GEN9_HALF_SLICE_CHICKEN5 _MMIO(0xe188)
#define GEN9_HALF_SLICE_CHICKEN5 MCR_REG(0xe188)
#define GEN9_DG_MIRROR_FIX_ENABLE (1 << 5)
#define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3)
#define GEN10_SAMPLER_MODE _MMIO(0xe18c)
#define GEN10_SAMPLER_MODE MCR_REG(0xe18c)
#define ENABLE_SMALLPL REG_BIT(15)
#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
#define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5)
#define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194)
#define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194)
#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15)
#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR REG_BIT(8)
#define GEN9_ENABLE_YV12_BUGFIX REG_BIT(4)
#define GEN9_ENABLE_GPGPU_PREEMPTION REG_BIT(2)
#define GEN10_CACHE_MODE_SS _MMIO(0xe420)
#define GEN10_CACHE_MODE_SS MCR_REG(0xe420)
#define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10)
#define DISABLE_ECC REG_BIT(5)
#define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4)
#define ENABLE_PREFETCH_INTO_IC REG_BIT(3)
#define EU_PERF_CNTL0 _MMIO(0xe458)
#define EU_PERF_CNTL4 _MMIO(0xe45c)
#define EU_PERF_CNTL0 PERF_REG(0xe458)
#define EU_PERF_CNTL4 PERF_REG(0xe45c)
#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c)
#define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c)
#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13)
#define XEHP_DIS_BBL_SYSPIPE REG_BIT(11)
#define GEN12_DISABLE_TDL_PUSH REG_BIT(9)
@ -1119,7 +1151,7 @@
#define HSW_ROW_CHICKEN3 _MMIO(0xe49c)
#define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6)
#define GEN8_ROW_CHICKEN _MMIO(0xe4f0)
#define GEN8_ROW_CHICKEN MCR_REG(0xe4f0)
#define FLOW_CONTROL_ENABLE REG_BIT(15)
#define UGM_BACKUP_MODE REG_BIT(13)
#define MDQ_ARBITRATION_MODE REG_BIT(12)
@ -1130,42 +1162,43 @@
#define DISABLE_EARLY_EOT REG_BIT(1)
#define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4)
#define GEN8_ROW_CHICKEN2 MCR_REG(0xe4f4)
#define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15)
#define GEN12_DISABLE_EARLY_READ REG_BIT(14)
#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12)
#define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8)
#define GEN12_DISABLE_DOP_GATING REG_BIT(0)
#define RT_CTRL _MMIO(0xe530)
#define RT_CTRL MCR_REG(0xe530)
#define DIS_NULL_QUERY REG_BIT(10)
#define STACKID_CTRL REG_GENMASK(6, 5)
#define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2)
#define EU_PERF_CNTL1 _MMIO(0xe558)
#define EU_PERF_CNTL5 _MMIO(0xe55c)
#define EU_PERF_CNTL1 PERF_REG(0xe558)
#define EU_PERF_CNTL5 PERF_REG(0xe55c)
#define GEN12_HDC_CHICKEN0 _MMIO(0xe5f0)
#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0)
#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11)
#define ICL_HDC_MODE _MMIO(0xe5f4)
#define ICL_HDC_MODE MCR_REG(0xe5f4)
#define EU_PERF_CNTL2 _MMIO(0xe658)
#define EU_PERF_CNTL6 _MMIO(0xe65c)
#define EU_PERF_CNTL3 _MMIO(0xe758)
#define EU_PERF_CNTL2 PERF_REG(0xe658)
#define EU_PERF_CNTL6 PERF_REG(0xe65c)
#define EU_PERF_CNTL3 PERF_REG(0xe758)
#define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8)
#define LSC_CHICKEN_BIT_0 MCR_REG(0xe7c8)
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
#define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4)
#define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4)
#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32)
#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
#define SARB_CHICKEN1 _MMIO(0xe90c)
#define SARB_CHICKEN1 MCR_REG(0xe90c)
#define COMP_CKN_IN REG_GENMASK(30, 29)
#define GEN7_HALF_SLICE_CHICKEN1_GT2 _MMIO(0xf100)
#define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4)
#define DOP_CLOCK_GATING_DISABLE (1 << 0)
#define PUSH_CONSTANT_DEREF_DISABLE (1 << 8)
@ -1513,6 +1546,9 @@
#define VLV_RENDER_C0_COUNT _MMIO(0x138118)
#define VLV_MEDIA_C0_COUNT _MMIO(0x13811c)
#define GEN12_RPSTAT1 _MMIO(0x1381b4)
#define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0)
#define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4))
#define GEN11_CSME (31)
#define GEN11_GUNIT (28)
@ -1583,6 +1619,11 @@
#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000)
#define GT0_PACKAGE_ENERGY_STATUS _MMIO(0x250004)
#define GT0_PACKAGE_RAPL_LIMIT _MMIO(0x250008)
#define GT0_PACKAGE_POWER_SKU_UNIT _MMIO(0x250068)
#define GT0_PLATFORM_ENERGY_STATUS _MMIO(0x25006c)
/*
* Standalone Media's non-engine GT registers are located at their regular GT
* offsets plus 0x380000. This extra offset is stored inside the intel_uncore

View File

@ -22,11 +22,9 @@ bool is_object_gt(struct kobject *kobj)
return !strncmp(kobj->name, "gt", 2);
}
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj,
const char *name)
{
struct kobject *kobj = &dev->kobj;
/*
* We are interested at knowing from where the interface
* has been called, whether it's called from gt/ or from
@ -38,6 +36,7 @@ struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
* "struct drm_i915_private *" type.
*/
if (!is_object_gt(kobj)) {
struct device *dev = kobj_to_dev(kobj);
struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
return to_gt(i915);
@ -51,18 +50,18 @@ static struct kobject *gt_get_parent_obj(struct intel_gt *gt)
return &gt->i915->drm.primary->kdev->kobj;
}
static ssize_t id_show(struct device *dev,
struct device_attribute *attr,
static ssize_t id_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
return sysfs_emit(buf, "%u\n", gt->info.id);
}
static DEVICE_ATTR_RO(id);
static struct kobj_attribute attr_id = __ATTR_RO(id);
static struct attribute *id_attrs[] = {
&dev_attr_id.attr,
&attr_id.attr,
NULL,
};
ATTRIBUTE_GROUPS(id);

View File

@ -18,11 +18,6 @@ bool is_object_gt(struct kobject *kobj);
struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
struct kobject *
intel_gt_create_kobj(struct intel_gt *gt,
struct kobject *dir,
const char *name);
static inline struct intel_gt *kobj_to_gt(struct kobject *kobj)
{
return container_of(kobj, struct intel_gt, sysfs_gt);
@ -30,7 +25,7 @@ static inline struct intel_gt *kobj_to_gt(struct kobject *kobj)
void intel_gt_sysfs_register(struct intel_gt *gt);
void intel_gt_sysfs_unregister(struct intel_gt *gt);
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj,
const char *name);
#endif /* SYSFS_GT_H */

View File

@ -24,14 +24,15 @@ enum intel_gt_sysfs_op {
};
static int
sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
sysfs_gt_attribute_w_func(struct kobject *kobj, struct attribute *attr,
int (func)(struct intel_gt *gt, u32 val), u32 val)
{
struct intel_gt *gt;
int ret;
if (!is_object_gt(&dev->kobj)) {
if (!is_object_gt(kobj)) {
int i;
struct device *dev = kobj_to_dev(kobj);
struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
for_each_gt(gt, i915, i) {
@ -40,7 +41,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
break;
}
} else {
gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
gt = intel_gt_sysfs_get_drvdata(kobj, attr->name);
ret = func(gt, val);
}
@ -48,7 +49,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
}
static u32
sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
sysfs_gt_attribute_r_func(struct kobject *kobj, struct attribute *attr,
u32 (func)(struct intel_gt *gt),
enum intel_gt_sysfs_op op)
{
@ -57,8 +58,9 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1;
if (!is_object_gt(&dev->kobj)) {
if (!is_object_gt(kobj)) {
int i;
struct device *dev = kobj_to_dev(kobj);
struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
for_each_gt(gt, i915, i) {
@ -77,7 +79,7 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
}
}
} else {
gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
gt = intel_gt_sysfs_get_drvdata(kobj, attr->name);
ret = func(gt);
}
@ -92,6 +94,76 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
#define sysfs_gt_attribute_r_max_func(d, a, f) \
sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
#define INTEL_GT_SYSFS_SHOW(_name, _attr_type) \
static ssize_t _name##_show_common(struct kobject *kobj, \
struct attribute *attr, char *buff) \
{ \
u32 val = sysfs_gt_attribute_r_##_attr_type##_func(kobj, attr, \
__##_name##_show); \
\
return sysfs_emit(buff, "%u\n", val); \
} \
static ssize_t _name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, char *buff) \
{ \
return _name ##_show_common(kobj, &attr->attr, buff); \
} \
static ssize_t _name##_dev_show(struct device *dev, \
struct device_attribute *attr, char *buff) \
{ \
return _name##_show_common(&dev->kobj, &attr->attr, buff); \
}
#define INTEL_GT_SYSFS_STORE(_name, _func) \
static ssize_t _name##_store_common(struct kobject *kobj, \
struct attribute *attr, \
const char *buff, size_t count) \
{ \
int ret; \
u32 val; \
\
ret = kstrtou32(buff, 0, &val); \
if (ret) \
return ret; \
\
ret = sysfs_gt_attribute_w_func(kobj, attr, _func, val); \
\
return ret ?: count; \
} \
static ssize_t _name##_store(struct kobject *kobj, \
struct kobj_attribute *attr, const char *buff, \
size_t count) \
{ \
return _name##_store_common(kobj, &attr->attr, buff, count); \
} \
static ssize_t _name##_dev_store(struct device *dev, \
struct device_attribute *attr, \
const char *buff, size_t count) \
{ \
return _name##_store_common(&dev->kobj, &attr->attr, buff, count); \
}
#define INTEL_GT_SYSFS_SHOW_MAX(_name) INTEL_GT_SYSFS_SHOW(_name, max)
#define INTEL_GT_SYSFS_SHOW_MIN(_name) INTEL_GT_SYSFS_SHOW(_name, min)
#define INTEL_GT_ATTR_RW(_name) \
static struct kobj_attribute attr_##_name = __ATTR_RW(_name)
#define INTEL_GT_ATTR_RO(_name) \
static struct kobj_attribute attr_##_name = __ATTR_RO(_name)
#define INTEL_GT_DUAL_ATTR_RW(_name) \
static struct device_attribute dev_attr_##_name = __ATTR(_name, 0644, \
_name##_dev_show, \
_name##_dev_store); \
INTEL_GT_ATTR_RW(_name)
#define INTEL_GT_DUAL_ATTR_RO(_name) \
static struct device_attribute dev_attr_##_name = __ATTR(_name, 0444, \
_name##_dev_show, \
NULL); \
INTEL_GT_ATTR_RO(_name)
#ifdef CONFIG_PM
static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
{
@ -104,11 +176,8 @@ static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
return DIV_ROUND_CLOSEST_ULL(res, 1000);
}
static ssize_t rc6_enable_show(struct device *dev,
struct device_attribute *attr,
char *buff)
static u8 get_rc6_mask(struct intel_gt *gt)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
u8 mask = 0;
if (HAS_RC6(gt->i915))
@ -118,7 +187,25 @@ static ssize_t rc6_enable_show(struct device *dev,
if (HAS_RC6pp(gt->i915))
mask |= BIT(2);
return sysfs_emit(buff, "%x\n", mask);
return mask;
}
static ssize_t rc6_enable_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
}
static ssize_t rc6_enable_dev_show(struct device *dev,
struct device_attribute *attr,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name);
return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
}
static u32 __rc6_residency_ms_show(struct intel_gt *gt)
@ -126,97 +213,79 @@ static u32 __rc6_residency_ms_show(struct intel_gt *gt)
return get_residency(gt, GEN6_GT_GFX_RC6);
}
static ssize_t rc6_residency_ms_show(struct device *dev,
struct device_attribute *attr,
char *buff)
{
u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
__rc6_residency_ms_show);
return sysfs_emit(buff, "%u\n", rc6_residency);
}
static u32 __rc6p_residency_ms_show(struct intel_gt *gt)
{
return get_residency(gt, GEN6_GT_GFX_RC6p);
}
static ssize_t rc6p_residency_ms_show(struct device *dev,
struct device_attribute *attr,
char *buff)
{
u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr,
__rc6p_residency_ms_show);
return sysfs_emit(buff, "%u\n", rc6p_residency);
}
static u32 __rc6pp_residency_ms_show(struct intel_gt *gt)
{
return get_residency(gt, GEN6_GT_GFX_RC6pp);
}
static ssize_t rc6pp_residency_ms_show(struct device *dev,
struct device_attribute *attr,
char *buff)
{
u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr,
__rc6pp_residency_ms_show);
return sysfs_emit(buff, "%u\n", rc6pp_residency);
}
static u32 __media_rc6_residency_ms_show(struct intel_gt *gt)
{
return get_residency(gt, VLV_GT_MEDIA_RC6);
}
static ssize_t media_rc6_residency_ms_show(struct device *dev,
struct device_attribute *attr,
char *buff)
{
u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
__media_rc6_residency_ms_show);
INTEL_GT_SYSFS_SHOW_MIN(rc6_residency_ms);
INTEL_GT_SYSFS_SHOW_MIN(rc6p_residency_ms);
INTEL_GT_SYSFS_SHOW_MIN(rc6pp_residency_ms);
INTEL_GT_SYSFS_SHOW_MIN(media_rc6_residency_ms);
return sysfs_emit(buff, "%u\n", rc6_residency);
}
static DEVICE_ATTR_RO(rc6_enable);
static DEVICE_ATTR_RO(rc6_residency_ms);
static DEVICE_ATTR_RO(rc6p_residency_ms);
static DEVICE_ATTR_RO(rc6pp_residency_ms);
static DEVICE_ATTR_RO(media_rc6_residency_ms);
INTEL_GT_DUAL_ATTR_RO(rc6_enable);
INTEL_GT_DUAL_ATTR_RO(rc6_residency_ms);
INTEL_GT_DUAL_ATTR_RO(rc6p_residency_ms);
INTEL_GT_DUAL_ATTR_RO(rc6pp_residency_ms);
INTEL_GT_DUAL_ATTR_RO(media_rc6_residency_ms);
static struct attribute *rc6_attrs[] = {
&attr_rc6_enable.attr,
&attr_rc6_residency_ms.attr,
NULL
};
static struct attribute *rc6p_attrs[] = {
&attr_rc6p_residency_ms.attr,
&attr_rc6pp_residency_ms.attr,
NULL
};
static struct attribute *media_rc6_attrs[] = {
&attr_media_rc6_residency_ms.attr,
NULL
};
static struct attribute *rc6_dev_attrs[] = {
&dev_attr_rc6_enable.attr,
&dev_attr_rc6_residency_ms.attr,
NULL
};
static struct attribute *rc6p_attrs[] = {
static struct attribute *rc6p_dev_attrs[] = {
&dev_attr_rc6p_residency_ms.attr,
&dev_attr_rc6pp_residency_ms.attr,
NULL
};
static struct attribute *media_rc6_attrs[] = {
static struct attribute *media_rc6_dev_attrs[] = {
&dev_attr_media_rc6_residency_ms.attr,
NULL
};
static const struct attribute_group rc6_attr_group[] = {
{ .attrs = rc6_attrs, },
{ .name = power_group_name, .attrs = rc6_attrs, },
{ .name = power_group_name, .attrs = rc6_dev_attrs, },
};
static const struct attribute_group rc6p_attr_group[] = {
{ .attrs = rc6p_attrs, },
{ .name = power_group_name, .attrs = rc6p_attrs, },
{ .name = power_group_name, .attrs = rc6p_dev_attrs, },
};
static const struct attribute_group media_rc6_attr_group[] = {
{ .attrs = media_rc6_attrs, },
{ .name = power_group_name, .attrs = media_rc6_attrs, },
{ .name = power_group_name, .attrs = media_rc6_dev_attrs, },
};
static int __intel_gt_sysfs_create_group(struct kobject *kobj,
@ -271,104 +340,34 @@ static u32 __act_freq_mhz_show(struct intel_gt *gt)
return intel_rps_read_actual_frequency(&gt->rps);
}
static ssize_t act_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buff)
{
u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr,
__act_freq_mhz_show);
return sysfs_emit(buff, "%u\n", actual_freq);
}
static u32 __cur_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_requested_frequency(&gt->rps);
}
static ssize_t cur_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buff)
{
u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr,
__cur_freq_mhz_show);
return sysfs_emit(buff, "%u\n", cur_freq);
}
static u32 __boost_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_boost_frequency(&gt->rps);
}
static ssize_t boost_freq_mhz_show(struct device *dev,
struct device_attribute *attr,
char *buff)
{
u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr,
__boost_freq_mhz_show);
return sysfs_emit(buff, "%u\n", boost_freq);
}
static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val)
{
return intel_rps_set_boost_frequency(&gt->rps, val);
}
static ssize_t boost_freq_mhz_store(struct device *dev,
struct device_attribute *attr,
const char *buff, size_t count)
{
ssize_t ret;
u32 val;
ret = kstrtou32(buff, 0, &val);
if (ret)
return ret;
return sysfs_gt_attribute_w_func(dev, attr,
__boost_freq_mhz_store, val) ?: count;
}
static u32 __rp0_freq_mhz_show(struct intel_gt *gt)
static u32 __RP0_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_rp0_frequency(&gt->rps);
}
static ssize_t RP0_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buff)
{
u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr,
__rp0_freq_mhz_show);
return sysfs_emit(buff, "%u\n", rp0_freq);
}
static u32 __rp1_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_rp1_frequency(&gt->rps);
}
static ssize_t RP1_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buff)
{
u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr,
__rp1_freq_mhz_show);
return sysfs_emit(buff, "%u\n", rp1_freq);
}
static u32 __rpn_freq_mhz_show(struct intel_gt *gt)
static u32 __RPn_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_rpn_frequency(&gt->rps);
}
static ssize_t RPn_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buff)
static u32 __RP1_freq_mhz_show(struct intel_gt *gt)
{
u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr,
__rpn_freq_mhz_show);
return sysfs_emit(buff, "%u\n", rpn_freq);
return intel_rps_get_rp1_frequency(&gt->rps);
}
static u32 __max_freq_mhz_show(struct intel_gt *gt)
@ -376,71 +375,21 @@ static u32 __max_freq_mhz_show(struct intel_gt *gt)
return intel_rps_get_max_frequency(&gt->rps);
}
static ssize_t max_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buff)
{
u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr,
__max_freq_mhz_show);
return sysfs_emit(buff, "%u\n", max_freq);
}
static int __set_max_freq(struct intel_gt *gt, u32 val)
{
return intel_rps_set_max_frequency(&gt->rps, val);
}
static ssize_t max_freq_mhz_store(struct device *dev,
struct device_attribute *attr,
const char *buff, size_t count)
{
int ret;
u32 val;
ret = kstrtou32(buff, 0, &val);
if (ret)
return ret;
ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val);
return ret ?: count;
}
static u32 __min_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_min_frequency(&gt->rps);
}
static ssize_t min_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buff)
{
u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr,
__min_freq_mhz_show);
return sysfs_emit(buff, "%u\n", min_freq);
}
static int __set_min_freq(struct intel_gt *gt, u32 val)
{
return intel_rps_set_min_frequency(&gt->rps, val);
}
static ssize_t min_freq_mhz_store(struct device *dev,
struct device_attribute *attr,
const char *buff, size_t count)
{
int ret;
u32 val;
ret = kstrtou32(buff, 0, &val);
if (ret)
return ret;
ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val);
return ret ?: count;
}
static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt)
{
struct intel_rps *rps = &gt->rps;
@ -448,23 +397,31 @@ static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt)
return intel_gpu_freq(rps, rps->efficient_freq);
}
static ssize_t vlv_rpe_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buff)
{
u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr,
__vlv_rpe_freq_mhz_show);
INTEL_GT_SYSFS_SHOW_MAX(act_freq_mhz);
INTEL_GT_SYSFS_SHOW_MAX(boost_freq_mhz);
INTEL_GT_SYSFS_SHOW_MAX(cur_freq_mhz);
INTEL_GT_SYSFS_SHOW_MAX(RP0_freq_mhz);
INTEL_GT_SYSFS_SHOW_MAX(RP1_freq_mhz);
INTEL_GT_SYSFS_SHOW_MAX(RPn_freq_mhz);
INTEL_GT_SYSFS_SHOW_MAX(max_freq_mhz);
INTEL_GT_SYSFS_SHOW_MIN(min_freq_mhz);
INTEL_GT_SYSFS_SHOW_MAX(vlv_rpe_freq_mhz);
INTEL_GT_SYSFS_STORE(boost_freq_mhz, __boost_freq_mhz_store);
INTEL_GT_SYSFS_STORE(max_freq_mhz, __set_max_freq);
INTEL_GT_SYSFS_STORE(min_freq_mhz, __set_min_freq);
return sysfs_emit(buff, "%u\n", rpe_freq);
}
#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store, _show_dev, _store_dev) \
static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, \
_show_dev, _store_dev); \
static struct kobj_attribute attr_rps_##_name = __ATTR(rps_##_name, _mode, \
_show, _store)
#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \
static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \
static struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store)
#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \
INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL)
#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \
INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store)
#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \
INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL, \
_name##_dev_show, NULL)
#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \
INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store, \
_name##_dev_show, _name##_dev_store)
/* The below macros generate static structures */
INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz);
@ -475,32 +432,31 @@ INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz);
INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz);
INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz);
INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz);
INTEL_GT_RPS_SYSFS_ATTR_RO(vlv_rpe_freq_mhz);
static DEVICE_ATTR_RO(vlv_rpe_freq_mhz);
#define GEN6_ATTR(s) { \
&dev_attr_##s##_act_freq_mhz.attr, \
&dev_attr_##s##_cur_freq_mhz.attr, \
&dev_attr_##s##_boost_freq_mhz.attr, \
&dev_attr_##s##_max_freq_mhz.attr, \
&dev_attr_##s##_min_freq_mhz.attr, \
&dev_attr_##s##_RP0_freq_mhz.attr, \
&dev_attr_##s##_RP1_freq_mhz.attr, \
&dev_attr_##s##_RPn_freq_mhz.attr, \
#define GEN6_ATTR(p, s) { \
&p##attr_##s##_act_freq_mhz.attr, \
&p##attr_##s##_cur_freq_mhz.attr, \
&p##attr_##s##_boost_freq_mhz.attr, \
&p##attr_##s##_max_freq_mhz.attr, \
&p##attr_##s##_min_freq_mhz.attr, \
&p##attr_##s##_RP0_freq_mhz.attr, \
&p##attr_##s##_RP1_freq_mhz.attr, \
&p##attr_##s##_RPn_freq_mhz.attr, \
NULL, \
}
#define GEN6_RPS_ATTR GEN6_ATTR(rps)
#define GEN6_GT_ATTR GEN6_ATTR(gt)
#define GEN6_RPS_ATTR GEN6_ATTR(, rps)
#define GEN6_GT_ATTR GEN6_ATTR(dev_, gt)
static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR;
static const struct attribute * const gen6_gt_attrs[] = GEN6_GT_ATTR;
static ssize_t punit_req_freq_mhz_show(struct device *dev,
struct device_attribute *attr,
static ssize_t punit_req_freq_mhz_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
u32 preq = intel_rps_read_punit_req_frequency(&gt->rps);
return sysfs_emit(buff, "%u\n", preq);
@ -508,20 +464,20 @@ static ssize_t punit_req_freq_mhz_show(struct device *dev,
struct intel_gt_bool_throttle_attr {
struct attribute attr;
ssize_t (*show)(struct device *dev, struct device_attribute *attr,
ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
char *buf);
i915_reg_t reg32;
i915_reg_t (*reg32)(struct intel_gt *gt);
u32 mask;
};
static ssize_t throttle_reason_bool_show(struct device *dev,
struct device_attribute *attr,
static ssize_t throttle_reason_bool_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_gt_bool_throttle_attr *t_attr =
(struct intel_gt_bool_throttle_attr *) attr;
bool val = rps_read_mask_mmio(&gt->rps, t_attr->reg32, t_attr->mask);
bool val = rps_read_mask_mmio(&gt->rps, t_attr->reg32(gt), t_attr->mask);
return sysfs_emit(buff, "%u\n", val);
}
@ -530,11 +486,11 @@ static ssize_t throttle_reason_bool_show(struct device *dev,
struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \
.attr = { .name = __stringify(sysfs_func__), .mode = 0444 }, \
.show = throttle_reason_bool_show, \
.reg32 = GT0_PERF_LIMIT_REASONS, \
.reg32 = intel_gt_perf_limit_reasons_reg, \
.mask = mask__, \
}
static DEVICE_ATTR_RO(punit_req_freq_mhz);
INTEL_GT_ATTR_RO(punit_req_freq_mhz);
static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK);
static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK);
static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK);
@ -597,8 +553,8 @@ static const struct attribute *throttle_reason_attrs[] = {
#define U8_8_VAL_MASK 0xffff
#define U8_8_SCALE_TO_VALUE "0.00390625"
static ssize_t freq_factor_scale_show(struct device *dev,
struct device_attribute *attr,
static ssize_t freq_factor_scale_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buff)
{
return sysfs_emit(buff, "%s\n", U8_8_SCALE_TO_VALUE);
@ -610,11 +566,11 @@ static u32 media_ratio_mode_to_factor(u32 mode)
return !mode ? mode : 256 / mode;
}
static ssize_t media_freq_factor_show(struct device *dev,
struct device_attribute *attr,
static ssize_t media_freq_factor_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
intel_wakeref_t wakeref;
u32 mode;
@ -641,11 +597,11 @@ static ssize_t media_freq_factor_show(struct device *dev,
return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode));
}
static ssize_t media_freq_factor_store(struct device *dev,
struct device_attribute *attr,
static ssize_t media_freq_factor_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buff, size_t count)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
u32 factor, mode;
int err;
@ -670,11 +626,11 @@ static ssize_t media_freq_factor_store(struct device *dev,
return err ?: count;
}
static ssize_t media_RP0_freq_mhz_show(struct device *dev,
struct device_attribute *attr,
static ssize_t media_RP0_freq_mhz_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
u32 val;
int err;
@ -691,11 +647,11 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev,
return sysfs_emit(buff, "%u\n", val);
}
static ssize_t media_RPn_freq_mhz_show(struct device *dev,
struct device_attribute *attr,
static ssize_t media_RPn_freq_mhz_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buff)
{
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
u32 val;
int err;
@ -712,17 +668,17 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev,
return sysfs_emit(buff, "%u\n", val);
}
static DEVICE_ATTR_RW(media_freq_factor);
static struct device_attribute dev_attr_media_freq_factor_scale =
INTEL_GT_ATTR_RW(media_freq_factor);
static struct kobj_attribute attr_media_freq_factor_scale =
__ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL);
static DEVICE_ATTR_RO(media_RP0_freq_mhz);
static DEVICE_ATTR_RO(media_RPn_freq_mhz);
INTEL_GT_ATTR_RO(media_RP0_freq_mhz);
INTEL_GT_ATTR_RO(media_RPn_freq_mhz);
static const struct attribute *media_perf_power_attrs[] = {
&dev_attr_media_freq_factor.attr,
&dev_attr_media_freq_factor_scale.attr,
&dev_attr_media_RP0_freq_mhz.attr,
&dev_attr_media_RPn_freq_mhz.attr,
&attr_media_freq_factor.attr,
&attr_media_freq_factor_scale.attr,
&attr_media_RP0_freq_mhz.attr,
&attr_media_RPn_freq_mhz.attr,
NULL
};
@ -754,20 +710,29 @@ static const struct attribute * const rps_defaults_attrs[] = {
NULL
};
static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj,
const struct attribute * const *attrs)
static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj)
{
const struct attribute * const *attrs;
struct attribute *vlv_attr;
int ret;
if (GRAPHICS_VER(gt->i915) < 6)
return 0;
if (is_object_gt(kobj)) {
attrs = gen6_rps_attrs;
vlv_attr = &attr_rps_vlv_rpe_freq_mhz.attr;
} else {
attrs = gen6_gt_attrs;
vlv_attr = &dev_attr_gt_vlv_rpe_freq_mhz.attr;
}
ret = sysfs_create_files(kobj, attrs);
if (ret)
return ret;
if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr);
ret = sysfs_create_file(kobj, vlv_attr);
return ret;
}
@ -778,9 +743,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
intel_sysfs_rc6_init(gt, kobj);
ret = is_object_gt(kobj) ?
intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) :
intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs);
ret = intel_sysfs_rps_init(gt, kobj);
if (ret)
drm_warn(&gt->i915->drm,
"failed to create gt%u RPS sysfs files (%pe)",
@ -790,13 +753,13 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
if (!is_object_gt(kobj))
return;
ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr);
ret = sysfs_create_file(kobj, &attr_punit_req_freq_mhz.attr);
if (ret)
drm_warn(&gt->i915->drm,
"failed to create gt%u punit_req_freq_mhz sysfs (%pe)",
gt->info.id, ERR_PTR(ret));
if (GRAPHICS_VER(gt->i915) >= 11) {
if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) {
ret = sysfs_create_files(kobj, throttle_reason_attrs);
if (ret)
drm_warn(&gt->i915->drm,

View File

@ -20,6 +20,7 @@
#include "intel_gsc.h"
#include "i915_vma.h"
#include "i915_perf_types.h"
#include "intel_engine_types.h"
#include "intel_gt_buffer_pool_types.h"
#include "intel_hwconfig.h"
@ -59,6 +60,9 @@ enum intel_steering_type {
L3BANK,
MSLICE,
LNCF,
GAM,
DSS,
OADDRM,
/*
* On some platforms there are multiple types of MCR registers that
@ -141,20 +145,6 @@ struct intel_gt {
struct intel_wakeref wakeref;
atomic_t user_wakeref;
/**
* Protects access to lmem usefault list.
* It is required, if we are outside of the runtime suspend path,
* access to @lmem_userfault_list requires always first grabbing the
* runtime pm, to ensure we can't race against runtime suspend.
* Once we have that we also need to grab @lmem_userfault_lock,
* at which point we have exclusive access.
* The runtime suspend path is special since it doesn't really hold any locks,
* but instead has exclusive access by virtue of all other accesses requiring
* holding the runtime pm wakeref.
*/
struct mutex lmem_userfault_lock;
struct list_head lmem_userfault_list;
struct list_head closed_vma;
spinlock_t closed_lock; /* guards the list of closed_vma */
@ -170,9 +160,6 @@ struct intel_gt {
*/
intel_wakeref_t awake;
/* Manual runtime pm autosuspend delay for user GGTT/lmem mmaps */
struct intel_wakeref_auto userfault_wakeref;
u32 clock_frequency;
u32 clock_period_ns;
@ -286,6 +273,8 @@ struct intel_gt {
/* sysfs defaults per gt */
struct gt_defaults defaults;
struct kobject *sysfs_defaults;
struct i915_perf_gt perf;
};
struct intel_gt_definition {

View File

@ -15,6 +15,7 @@
#include "i915_trace.h"
#include "i915_utils.h"
#include "intel_gt.h"
#include "intel_gt_mcr.h"
#include "intel_gt_regs.h"
#include "intel_gtt.h"
@ -269,11 +270,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
ARRAY_SIZE(vm->min_alignment));
if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915) &&
subclass == VM_CLASS_PPGTT) {
vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M;
vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M;
} else if (HAS_64K_PAGES(vm->i915)) {
if (HAS_64K_PAGES(vm->i915)) {
vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
}
@ -343,7 +340,8 @@ int setup_scratch_page(struct i915_address_space *vm)
*/
size = I915_GTT_PAGE_SIZE_4K;
if (i915_vm_is_4lvl(vm) &&
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) &&
!HAS_64K_PAGES(vm->i915))
size = I915_GTT_PAGE_SIZE_64K;
do {
@ -385,18 +383,6 @@ skip:
if (size == I915_GTT_PAGE_SIZE_4K)
return -ENOMEM;
/*
* If we need 64K minimum GTT pages for device local-memory,
* like on XEHPSDV, then we need to fail the allocation here,
* otherwise we can't safely support the insertion of
* local-memory pages for this vm, since the HW expects the
* correct physical alignment and size when the page-table is
* operating in 64K GTT mode, which includes any scratch PTEs,
* since userspace can still touch them.
*/
if (HAS_64K_PAGES(vm->i915))
return -ENOMEM;
size = I915_GTT_PAGE_SIZE_4K;
} while (1);
}
@ -493,6 +479,18 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore)
intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
}
static void xehp_setup_private_ppat(struct intel_gt *gt)
{
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
}
static void icl_setup_private_ppat(struct intel_uncore *uncore)
{
intel_uncore_write(uncore,
@ -585,13 +583,16 @@ static void chv_setup_private_ppat(struct intel_uncore *uncore)
intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
}
void setup_private_pat(struct intel_uncore *uncore)
void setup_private_pat(struct intel_gt *gt)
{
struct drm_i915_private *i915 = uncore->i915;
struct intel_uncore *uncore = gt->uncore;
struct drm_i915_private *i915 = gt->i915;
GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
if (GRAPHICS_VER(i915) >= 12)
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
xehp_setup_private_ppat(gt);
else if (GRAPHICS_VER(i915) >= 12)
tgl_setup_private_ppat(uncore);
else if (GRAPHICS_VER(i915) >= 11)
icl_setup_private_ppat(uncore);

View File

@ -93,6 +93,7 @@ typedef u64 gen8_pte_t;
#define GEN12_GGTT_PTE_LM BIT_ULL(1)
#define GEN12_PDE_64K BIT(6)
#define GEN12_PTE_PS64 BIT(8)
/*
* Cacheability Control is a 4-bit value. The low three bits are stored in bits
@ -667,7 +668,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm,
void gtt_write_workarounds(struct intel_gt *gt);
void setup_private_pat(struct intel_uncore *uncore);
void setup_private_pat(struct intel_gt *gt);
int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,

View File

@ -20,6 +20,30 @@
#include "intel_ring.h"
#include "shmem_utils.h"
/*
* The per-platform tables are u8-encoded in @data. Decode @data and set the
* addresses' offset and commands in @regs. The following encoding is used
* for each byte. There are 2 steps: decoding commands and decoding addresses.
*
* Commands:
* [7]: create NOPs - number of NOPs are set in lower bits
* [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
* MI_LRI_FORCE_POSTED
* [5:0]: Number of NOPs or registers to set values to in case of
* MI_LOAD_REGISTER_IMM
*
* Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
* number of registers. They are set by using the REG/REG16 macros: the former
* is used for offsets smaller than 0x200 while the latter is for values bigger
* than that. Those macros already set all the bits documented below correctly:
*
* [7]: When a register offset needs more than 6 bits, use additional bytes, to
* follow, for the lower bits
* [6:0]: Register offset, without considering the engine base.
*
* This function only tweaks the commands and register offsets. Values are not
* filled out.
*/
static void set_offsets(u32 *regs,
const u8 *data,
const struct intel_engine_cs *engine,
@ -264,6 +288,39 @@ static const u8 dg2_xcs_offsets[] = {
END
};
static const u8 mtl_xcs_offsets[] = {
NOP(1),
LRI(13, POSTED),
REG16(0x244),
REG(0x034),
REG(0x030),
REG(0x038),
REG(0x03c),
REG(0x168),
REG(0x140),
REG(0x110),
REG(0x1c0),
REG(0x1c4),
REG(0x1c8),
REG(0x180),
REG16(0x2b4),
NOP(4),
NOP(1),
LRI(9, POSTED),
REG16(0x3a8),
REG16(0x28c),
REG16(0x288),
REG16(0x284),
REG16(0x280),
REG16(0x27c),
REG16(0x278),
REG16(0x274),
REG16(0x270),
END
};
static const u8 gen8_rcs_offsets[] = {
NOP(1),
LRI(14, POSTED),
@ -606,6 +663,49 @@ static const u8 dg2_rcs_offsets[] = {
END
};
static const u8 mtl_rcs_offsets[] = {
NOP(1),
LRI(15, POSTED),
REG16(0x244),
REG(0x034),
REG(0x030),
REG(0x038),
REG(0x03c),
REG(0x168),
REG(0x140),
REG(0x110),
REG(0x1c0),
REG(0x1c4),
REG(0x1c8),
REG(0x180),
REG16(0x2b4),
REG(0x120),
REG(0x124),
NOP(1),
LRI(9, POSTED),
REG16(0x3a8),
REG16(0x28c),
REG16(0x288),
REG16(0x284),
REG16(0x280),
REG16(0x27c),
REG16(0x278),
REG16(0x274),
REG16(0x270),
NOP(2),
LRI(2, POSTED),
REG16(0x5a8),
REG16(0x5ac),
NOP(6),
LRI(1, 0),
REG(0x0c8),
END
};
#undef END
#undef REG16
#undef REG
@ -624,7 +724,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
!intel_engine_has_relative_mmio(engine));
if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
return mtl_rcs_offsets;
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return dg2_rcs_offsets;
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
return xehp_rcs_offsets;
@ -637,7 +739,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
else
return gen8_rcs_offsets;
} else {
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
return mtl_xcs_offsets;
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return dg2_xcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 12)
return gen12_xcs_offsets;
@ -745,19 +849,18 @@ static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
static u32
lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
{
switch (GRAPHICS_VER(engine->i915)) {
default:
MISSING_CASE(GRAPHICS_VER(engine->i915));
fallthrough;
case 12:
if (GRAPHICS_VER(engine->i915) >= 12)
return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
case 11:
else if (GRAPHICS_VER(engine->i915) >= 11)
return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
case 9:
else if (GRAPHICS_VER(engine->i915) >= 9)
return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
case 8:
else if (GRAPHICS_VER(engine->i915) >= 8)
return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
}
GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
return 0;
}
static void
@ -1012,7 +1115,7 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
context_size += I915_GTT_PAGE_SIZE; /* for redzone */
if (GRAPHICS_VER(engine->i915) == 12) {
if (GRAPHICS_VER(engine->i915) >= 12) {
ce->wa_bb_page = context_size / PAGE_SIZE;
context_size += PAGE_SIZE;
}
@ -1718,24 +1821,16 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine)
unsigned int i;
int err;
if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
if (GRAPHICS_VER(engine->i915) >= 11 ||
!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
return;
switch (GRAPHICS_VER(engine->i915)) {
case 12:
case 11:
return;
case 9:
if (GRAPHICS_VER(engine->i915) == 9) {
wa_bb_fn[0] = gen9_init_indirectctx_bb;
wa_bb_fn[1] = NULL;
break;
case 8:
} else if (GRAPHICS_VER(engine->i915) == 8) {
wa_bb_fn[0] = gen8_init_indirectctx_bb;
wa_bb_fn[1] = NULL;
break;
default:
MISSING_CASE(GRAPHICS_VER(engine->i915));
return;
}
err = lrc_create_wa_ctx(engine);

View File

@ -110,6 +110,8 @@ enum {
#define XEHP_SW_CTX_ID_WIDTH 16
#define XEHP_SW_COUNTER_SHIFT 58
#define XEHP_SW_COUNTER_WIDTH 6
#define GEN12_GUC_SW_CTX_ID_SHIFT 39
#define GEN12_GUC_SW_CTX_ID_WIDTH 16
static inline void lrc_runtime_start(struct intel_context *ce)
{

View File

@ -10,6 +10,7 @@
#include "intel_gtt.h"
#include "intel_migrate.h"
#include "intel_ring.h"
#include "gem/i915_gem_lmem.h"
struct insert_pte_data {
u64 offset;

View File

@ -7,6 +7,7 @@
#include "intel_engine.h"
#include "intel_gt.h"
#include "intel_gt_mcr.h"
#include "intel_gt_regs.h"
#include "intel_mocs.h"
#include "intel_ring.h"
@ -609,14 +610,17 @@ static u32 l3cc_combine(u16 low, u16 high)
0; \
i++)
static void init_l3cc_table(struct intel_uncore *uncore,
static void init_l3cc_table(struct intel_gt *gt,
const struct drm_i915_mocs_table *table)
{
unsigned int i;
u32 l3cc;
for_each_l3cc(l3cc, table, i)
intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc);
else
intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc);
}
void intel_mocs_init_engine(struct intel_engine_cs *engine)
@ -636,7 +640,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
init_mocs_table(engine, &table);
if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
init_l3cc_table(engine->uncore, &table);
init_l3cc_table(engine->gt, &table);
}
static u32 global_mocs_offset(void)
@ -672,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt)
* memory transactions including guc transactions
*/
if (flags & HAS_RENDER_L3CC)
init_l3cc_table(gt->uncore, &table);
init_l3cc_table(gt, &table);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)

View File

@ -1278,7 +1278,7 @@ static void intel_gt_reset_global(struct intel_gt *gt,
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
/* Use a watchdog to ensure that our reset completes */
intel_wedge_on_timeout(&w, gt, 5 * HZ) {
intel_wedge_on_timeout(&w, gt, 60 * HZ) {
intel_display_prepare_reset(gt->i915);
intel_gt_reset(gt, engine_mask, reason);

View File

@ -625,9 +625,7 @@ static void gen5_rps_disable(struct intel_rps *rps)
rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
/* Ack interrupts, disable EFC interrupt */
intel_uncore_write(uncore, MEMINTREN,
intel_uncore_read(uncore, MEMINTREN) &
~MEMINT_EVAL_CHG_EN);
intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0);
intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
/* Go back to the starting frequency */
@ -1016,9 +1014,15 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);
if (slpc->min_freq_softlimit >= slpc->boost_freq)
return;
/* Return if old value is non zero */
if (!atomic_fetch_inc(&slpc->num_waiters))
if (!atomic_fetch_inc(&slpc->num_waiters)) {
GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
rq->fence.context, rq->fence.seqno);
schedule_work(&slpc->boost_work);
}
return;
}
@ -1085,15 +1089,25 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps)
return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
}
/**
* gen6_rps_get_freq_caps - Get freq caps exposed by HW
* @rps: the intel_rps structure
* @caps: returned freq caps
*
* Returned "caps" frequencies should be converted to MHz using
* intel_gpu_freq()
*/
void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
static void
mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ?
intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) :
intel_uncore_read(uncore, MTL_RP_STATE_CAP);
u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ?
intel_uncore_read(uncore, MTL_MPE_FREQUENCY) :
intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY);
/* MTL values are in units of 16.67 MHz */
caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap);
caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap);
caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe);
}
static void
__gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 rp_state_cap;
@ -1128,6 +1142,24 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c
}
}
/**
* gen6_rps_get_freq_caps - Get freq caps exposed by HW
* @rps: the intel_rps structure
* @caps: returned freq caps
*
* Returned "caps" frequencies should be converted to MHz using
* intel_gpu_freq()
*/
void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
if (IS_METEORLAKE(i915))
return mtl_get_freq_caps(rps, caps);
else
return __gen6_rps_get_freq_caps(rps, caps);
}
static void gen6_rps_init(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
@ -2191,6 +2223,213 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
return intel_gpu_freq(rps, rps->min_freq);
}
static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
{
struct intel_gt *gt = rps_to_gt(rps);
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
struct intel_rps_freq_caps caps;
u32 rp_state_limits;
u32 gt_perf_status;
u32 rpmodectl, rpinclimit, rpdeclimit;
u32 rpstat, cagf, reqf;
u32 rpcurupei, rpcurup, rpprevup;
u32 rpcurdownei, rpcurdown, rpprevdown;
u32 rpupei, rpupt, rpdownei, rpdownt;
u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
gen6_rps_get_freq_caps(rps, &caps);
if (IS_GEN9_LP(i915))
gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
else
gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
/* RPSTAT1 is in the GT power well */
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
if (GRAPHICS_VER(i915) >= 9) {
reqf >>= 23;
} else {
reqf &= ~GEN6_TURBO_DISABLE;
if (IS_HASWELL(i915) || IS_BROADWELL(i915))
reqf >>= 24;
else
reqf >>= 25;
}
reqf = intel_gpu_freq(rps, reqf);
rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
cagf = intel_rps_read_actual_frequency(rps);
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
if (GRAPHICS_VER(i915) >= 11) {
pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
/*
* The equivalent to the PM ISR & IIR cannot be read
* without affecting the current state of the system
*/
pm_isr = 0;
pm_iir = 0;
} else if (GRAPHICS_VER(i915) >= 8) {
pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
} else {
pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
}
pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
drm_printf(p, "Video Turbo Mode: %s\n",
str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
drm_printf(p, "HW control enabled: %s\n",
str_yes_no(rpmodectl & GEN6_RP_ENABLE));
drm_printf(p, "SW control enabled: %s\n",
str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
pm_ier, pm_imr, pm_mask);
if (GRAPHICS_VER(i915) <= 10)
drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
pm_isr, pm_iir);
drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
rps->pm_intrmsk_mbz);
drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
drm_printf(p, "Render p-state ratio: %d\n",
(gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
drm_printf(p, "Render p-state VID: %d\n",
gt_perf_status & 0xff);
drm_printf(p, "Render p-state limit: %d\n",
rp_state_limits & 0xff);
drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
drm_printf(p, "CAGF: %dMHz\n", cagf);
drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
rpcurupei,
intel_gt_pm_interval_to_ns(gt, rpcurupei));
drm_printf(p, "RP CUR UP: %d (%lldns)\n",
rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
drm_printf(p, "RP PREV UP: %d (%lldns)\n",
rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
drm_printf(p, "Up threshold: %d%%\n",
rps->power.up_threshold);
drm_printf(p, "RP UP EI: %d (%lldns)\n",
rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
rpcurdownei,
intel_gt_pm_interval_to_ns(gt, rpcurdownei));
drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
rpcurdown,
intel_gt_pm_interval_to_ns(gt, rpcurdown));
drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
rpprevdown,
intel_gt_pm_interval_to_ns(gt, rpprevdown));
drm_printf(p, "Down threshold: %d%%\n",
rps->power.down_threshold);
drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
intel_gpu_freq(rps, caps.min_freq));
drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
intel_gpu_freq(rps, caps.rp1_freq));
drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
intel_gpu_freq(rps, caps.rp0_freq));
drm_printf(p, "Max overclocked frequency: %dMHz\n",
intel_gpu_freq(rps, rps->max_freq));
drm_printf(p, "Current freq: %d MHz\n",
intel_gpu_freq(rps, rps->cur_freq));
drm_printf(p, "Actual freq: %d MHz\n", cagf);
drm_printf(p, "Idle freq: %d MHz\n",
intel_gpu_freq(rps, rps->idle_freq));
drm_printf(p, "Min freq: %d MHz\n",
intel_gpu_freq(rps, rps->min_freq));
drm_printf(p, "Boost freq: %d MHz\n",
intel_gpu_freq(rps, rps->boost_freq));
drm_printf(p, "Max freq: %d MHz\n",
intel_gpu_freq(rps, rps->max_freq));
drm_printf(p,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(rps, rps->efficient_freq));
}
static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
{
struct intel_gt *gt = rps_to_gt(rps);
struct intel_uncore *uncore = gt->uncore;
struct intel_rps_freq_caps caps;
u32 pm_mask;
gen6_rps_get_freq_caps(rps, &caps);
pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
rps->pm_intrmsk_mbz);
drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1));
drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps));
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
intel_gpu_freq(rps, caps.min_freq));
drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
intel_gpu_freq(rps, caps.rp1_freq));
drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
intel_gpu_freq(rps, caps.rp0_freq));
drm_printf(p, "Current freq: %d MHz\n",
intel_rps_get_requested_frequency(rps));
drm_printf(p, "Actual freq: %d MHz\n",
intel_rps_read_actual_frequency(rps));
drm_printf(p, "Min freq: %d MHz\n",
intel_rps_get_min_frequency(rps));
drm_printf(p, "Boost freq: %d MHz\n",
intel_rps_get_boost_frequency(rps));
drm_printf(p, "Max freq: %d MHz\n",
intel_rps_get_max_frequency(rps));
drm_printf(p,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(rps, caps.rp1_freq));
}
void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
{
if (rps_uses_slpc(rps))
return slpc_frequency_dump(rps, p);
else
return rps_frequency_dump(rps, p);
}
static int set_max_freq(struct intel_rps *rps, u32 val)
{
struct drm_i915_private *i915 = rps_to_i915(rps);

View File

@ -10,6 +10,7 @@
#include "i915_reg_defs.h"
struct i915_request;
struct drm_printer;
void intel_rps_init_early(struct intel_rps *rps);
void intel_rps_init(struct intel_rps *rps);
@ -54,6 +55,8 @@ void intel_rps_lower_unslice(struct intel_rps *rps);
u32 intel_rps_read_throttle_reason(struct intel_rps *rps);
bool rps_read_mask_mmio(struct intel_rps *rps, i915_reg_t reg32, u32 mask);
void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p);
void gen5_rps_irq_handler(struct intel_rps *rps);
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);

View File

@ -677,8 +677,8 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
* If i915/perf is active, we want a stable powergating configuration
* on the system. Use the configuration pinned by i915/perf.
*/
if (i915->perf.exclusive_stream)
req_sseu = &i915->perf.sseu;
if (gt->perf.exclusive_stream)
req_sseu = &gt->perf.sseu;
slices = hweight8(req_sseu->slice_mask);
subslices = hweight8(req_sseu->subslice_mask);

File diff suppressed because it is too large Load Diff

View File

@ -11,11 +11,16 @@
#include "i915_reg_defs.h"
struct i915_wa {
i915_reg_t reg;
union {
i915_reg_t reg;
i915_mcr_reg_t mcr_reg;
};
u32 clr;
u32 set;
u32 read;
bool masked_reg;
u32 masked_reg:1;
u32 is_mcr:1;
};
struct i915_wa_list {

View File

@ -39,6 +39,16 @@ static int perf_end(struct intel_gt *gt)
return igt_flush_test(gt->i915);
}
static i915_reg_t timestamp_reg(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
return RING_TIMESTAMP_UDW(engine->mmio_base);
else
return RING_TIMESTAMP(engine->mmio_base);
}
static int write_timestamp(struct i915_request *rq, int slot)
{
struct intel_timeline *tl =
@ -55,7 +65,7 @@ static int write_timestamp(struct i915_request *rq, int slot)
if (GRAPHICS_VER(rq->engine->i915) >= 8)
cmd++;
*cs++ = cmd;
*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
*cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine));
*cs++ = tl->hwsp_offset + slot * sizeof(u32);
*cs++ = 0;
@ -125,7 +135,7 @@ static int perf_mi_bb_start(void *arg)
enum intel_engine_id id;
int err = 0;
if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
perf_begin(gt);
@ -135,6 +145,9 @@ static int perf_mi_bb_start(void *arg)
u32 cycles[COUNT];
int i;
if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
continue;
intel_engine_pm_get(engine);
batch = create_empty_batch(ce);
@ -249,7 +262,7 @@ static int perf_mi_noop(void *arg)
enum intel_engine_id id;
int err = 0;
if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
perf_begin(gt);
@ -259,6 +272,9 @@ static int perf_mi_noop(void *arg)
u32 cycles[COUNT];
int i;
if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
continue;
intel_engine_pm_get(engine);
base = create_empty_batch(ce);

View File

@ -85,8 +85,6 @@ static int wait_for_reset(struct intel_engine_cs *engine,
break;
} while (time_before(jiffies, timeout));
flush_scheduled_work();
if (rq->fence.error != -EIO) {
pr_err("%s: hanging request %llx:%lld not reset\n",
engine->name,
@ -3475,12 +3473,14 @@ static int random_priority(struct rnd_state *rnd)
struct preempt_smoke {
struct intel_gt *gt;
struct kthread_work work;
struct i915_gem_context **contexts;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *batch;
unsigned int ncontext;
struct rnd_state prng;
unsigned long count;
int result;
};
static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
@ -3540,34 +3540,31 @@ unpin:
return err;
}
static int smoke_crescendo_thread(void *arg)
static void smoke_crescendo_work(struct kthread_work *work)
{
struct preempt_smoke *smoke = arg;
struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work);
IGT_TIMEOUT(end_time);
unsigned long count;
count = 0;
do {
struct i915_gem_context *ctx = smoke_context(smoke);
int err;
err = smoke_submit(smoke,
ctx, count % I915_PRIORITY_MAX,
smoke->batch);
if (err)
return err;
smoke->result = smoke_submit(smoke, ctx,
count % I915_PRIORITY_MAX,
smoke->batch);
count++;
} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
} while (!smoke->result && count < smoke->ncontext &&
!__igt_timeout(end_time, NULL));
smoke->count = count;
return 0;
}
static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
#define BATCH BIT(0)
{
struct task_struct *tsk[I915_NUM_ENGINES] = {};
struct kthread_worker *worker[I915_NUM_ENGINES] = {};
struct preempt_smoke *arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
@ -3578,6 +3575,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
if (!arg)
return -ENOMEM;
memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
for_each_engine(engine, smoke->gt, id) {
arg[id] = *smoke;
arg[id].engine = engine;
@ -3585,31 +3584,28 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
arg[id].batch = NULL;
arg[id].count = 0;
tsk[id] = kthread_run(smoke_crescendo_thread, arg,
"igt/smoke:%d", id);
if (IS_ERR(tsk[id])) {
err = PTR_ERR(tsk[id]);
worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
if (IS_ERR(worker[id])) {
err = PTR_ERR(worker[id]);
break;
}
get_task_struct(tsk[id]);
}
yield(); /* start all threads before we kthread_stop() */
kthread_init_work(&arg[id].work, smoke_crescendo_work);
kthread_queue_work(worker[id], &arg[id].work);
}
count = 0;
for_each_engine(engine, smoke->gt, id) {
int status;
if (IS_ERR_OR_NULL(tsk[id]))
if (IS_ERR_OR_NULL(worker[id]))
continue;
status = kthread_stop(tsk[id]);
if (status && !err)
err = status;
kthread_flush_work(&arg[id].work);
if (arg[id].result && !err)
err = arg[id].result;
count += arg[id].count;
put_task_struct(tsk[id]);
kthread_destroy_worker(worker[id]);
}
pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",

View File

@ -36,6 +36,19 @@ static int cmp_u32(const void *A, const void *B)
return 0;
}
static u32 read_timestamp(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
/* On i965 the first read tends to give a stale value */
ENGINE_READ_FW(engine, RING_TIMESTAMP);
if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
return ENGINE_READ_FW(engine, RING_TIMESTAMP_UDW);
else
return ENGINE_READ_FW(engine, RING_TIMESTAMP);
}
static void measure_clocks(struct intel_engine_cs *engine,
u32 *out_cycles, ktime_t *out_dt)
{
@ -45,13 +58,13 @@ static void measure_clocks(struct intel_engine_cs *engine,
for (i = 0; i < 5; i++) {
local_irq_disable();
cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP);
cycles[i] = -read_timestamp(engine);
dt[i] = ktime_get();
udelay(1000);
dt[i] = ktime_sub(ktime_get(), dt[i]);
cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP);
cycles[i] += read_timestamp(engine);
local_irq_enable();
}
@ -78,25 +91,6 @@ static int live_gt_clocks(void *arg)
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
if (GRAPHICS_VER(gt->i915) == 5)
/*
* XXX CS_TIMESTAMP low dword is dysfunctional?
*
* Ville's experiments indicate the high dword still works,
* but at a correspondingly reduced frequency.
*/
return 0;
if (GRAPHICS_VER(gt->i915) == 4)
/*
* XXX CS_TIMESTAMP appears gibberish
*
* Ville's experiments indicate that it mostly appears 'stuck'
* in that we see the register report the same cycle count
* for a couple of reads.
*/
return 0;
intel_gt_pm_get(gt);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);

View File

@ -866,10 +866,13 @@ static int igt_reset_active_engine(void *arg)
}
struct active_engine {
struct task_struct *task;
struct kthread_worker *worker;
struct kthread_work work;
struct intel_engine_cs *engine;
unsigned long resets;
unsigned int flags;
bool stop;
int result;
};
#define TEST_ACTIVE BIT(0)
@ -900,10 +903,10 @@ static int active_request_put(struct i915_request *rq)
return err;
}
static int active_engine(void *data)
static void active_engine(struct kthread_work *work)
{
I915_RND_STATE(prng);
struct active_engine *arg = data;
struct active_engine *arg = container_of(work, typeof(*arg), work);
struct intel_engine_cs *engine = arg->engine;
struct i915_request *rq[8] = {};
struct intel_context *ce[ARRAY_SIZE(rq)];
@ -913,16 +916,17 @@ static int active_engine(void *data)
for (count = 0; count < ARRAY_SIZE(ce); count++) {
ce[count] = intel_context_create(engine);
if (IS_ERR(ce[count])) {
err = PTR_ERR(ce[count]);
pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err);
arg->result = PTR_ERR(ce[count]);
pr_err("[%s] Create context #%ld failed: %d!\n",
engine->name, count, arg->result);
while (--count)
intel_context_put(ce[count]);
return err;
return;
}
}
count = 0;
while (!kthread_should_stop()) {
while (!READ_ONCE(arg->stop)) {
unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
struct i915_request *old = rq[idx];
struct i915_request *new;
@ -967,7 +971,7 @@ static int active_engine(void *data)
intel_context_put(ce[count]);
}
return err;
arg->result = err;
}
static int __igt_reset_engines(struct intel_gt *gt,
@ -1022,7 +1026,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES);
for_each_engine(other, gt, tmp) {
struct task_struct *tsk;
struct kthread_worker *worker;
threads[tmp].resets =
i915_reset_engine_count(global, other);
@ -1036,19 +1040,21 @@ static int __igt_reset_engines(struct intel_gt *gt,
threads[tmp].engine = other;
threads[tmp].flags = flags;
tsk = kthread_run(active_engine, &threads[tmp],
"igt/%s", other->name);
if (IS_ERR(tsk)) {
err = PTR_ERR(tsk);
pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err);
worker = kthread_create_worker(0, "igt/%s",
other->name);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
pr_err("[%s] Worker create failed: %d!\n",
engine->name, err);
goto unwind;
}
threads[tmp].task = tsk;
get_task_struct(tsk);
}
threads[tmp].worker = worker;
yield(); /* start all threads before we begin */
kthread_init_work(&threads[tmp].work, active_engine);
kthread_queue_work(threads[tmp].worker,
&threads[tmp].work);
}
st_engine_heartbeat_disable_no_pm(engine);
GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
@ -1197,17 +1203,20 @@ unwind:
for_each_engine(other, gt, tmp) {
int ret;
if (!threads[tmp].task)
if (!threads[tmp].worker)
continue;
ret = kthread_stop(threads[tmp].task);
WRITE_ONCE(threads[tmp].stop, true);
kthread_flush_work(&threads[tmp].work);
ret = READ_ONCE(threads[tmp].result);
if (ret) {
pr_err("kthread for other engine %s failed, err=%d\n",
other->name, ret);
if (!err)
err = ret;
}
put_task_struct(threads[tmp].task);
kthread_destroy_worker(threads[tmp].worker);
/* GuC based resets are not logged per engine */
if (!using_guc) {

View File

@ -6,6 +6,7 @@
#include <linux/sort.h>
#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"
#include "selftests/i915_random.h"

View File

@ -1107,21 +1107,27 @@ static u64 __measure_power(int duration_ms)
return div64_u64(1000 * 1000 * dE, dt);
}
static u64 measure_power_at(struct intel_rps *rps, int *freq)
static u64 measure_power(struct intel_rps *rps, int *freq)
{
u64 x[5];
int i;
*freq = rps_set_check(rps, *freq);
for (i = 0; i < 5; i++)
x[i] = __measure_power(5);
*freq = (*freq + read_cagf(rps)) / 2;
*freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
/* A simple triangle filter for better result stability */
sort(x, 5, sizeof(*x), cmp_u64, NULL);
return div_u64(x[1] + 2 * x[2] + x[3], 4);
}
static u64 measure_power_at(struct intel_rps *rps, int *freq)
{
*freq = rps_set_check(rps, *freq);
return measure_power(rps, freq);
}
int live_rps_power(void *arg)
{
struct intel_gt *gt = arg;

View File

@ -11,7 +11,8 @@
enum test_type {
VARY_MIN,
VARY_MAX,
MAX_GRANTED
MAX_GRANTED,
SLPC_POWER,
};
static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq)
@ -41,6 +42,39 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
return ret;
}
static int slpc_set_freq(struct intel_gt *gt, u32 freq)
{
int err;
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
err = slpc_set_max_freq(slpc, freq);
if (err) {
pr_err("Unable to update max freq");
return err;
}
err = slpc_set_min_freq(slpc, freq);
if (err) {
pr_err("Unable to update min freq");
return err;
}
return err;
}
static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power)
{
int err = 0;
err = slpc_set_freq(gt, *freq);
if (err)
return err;
*freq = intel_rps_read_actual_frequency(&gt->rps);
*power = measure_power(&gt->rps, freq);
return err;
}
static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
u32 *max_act_freq)
{
@ -113,6 +147,58 @@ static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
return err;
}
static int slpc_power(struct intel_gt *gt, struct intel_engine_cs *engine)
{
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
struct {
u64 power;
int freq;
} min, max;
int err = 0;
/*
* Our fundamental assumption is that running at lower frequency
* actually saves power. Let's see if our RAPL measurement supports
* that theory.
*/
if (!librapl_supported(gt->i915))
return 0;
min.freq = slpc->min_freq;
err = measure_power_at_freq(gt, &min.freq, &min.power);
if (err)
return err;
max.freq = slpc->rp0_freq;
err = measure_power_at_freq(gt, &max.freq, &max.power);
if (err)
return err;
pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
engine->name,
min.power, min.freq,
max.power, max.freq);
if (10 * min.freq >= 9 * max.freq) {
pr_notice("Could not control frequency, ran at [%uMHz, %uMhz]\n",
min.freq, max.freq);
}
if (11 * min.power > 10 * max.power) {
pr_err("%s: did not conserve power when setting lower frequency!\n",
engine->name);
err = -EINVAL;
}
/* Restore min/max frequencies */
slpc_set_max_freq(slpc, slpc->rp0_freq);
slpc_set_min_freq(slpc, slpc->min_freq);
return err;
}
static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq)
{
struct intel_gt *gt = rps_to_gt(rps);
@ -153,6 +239,11 @@ static int run_test(struct intel_gt *gt, int test_type)
if (!intel_uc_uses_guc_slpc(&gt->uc))
return 0;
if (slpc->min_freq == slpc->rp0_freq) {
pr_err("Min/Max are fused to the same value\n");
return -EINVAL;
}
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
@ -167,17 +258,14 @@ static int run_test(struct intel_gt *gt, int test_type)
}
/*
* FIXME: With efficient frequency enabled, GuC can request
* frequencies higher than the SLPC max. While this is fixed
* in GuC, we level set these tests with RPn as min.
* Set min frequency to RPn so that we can test the whole
* range of RPn-RP0. This also turns off efficient freq
* usage and makes results more predictable.
*/
err = slpc_set_min_freq(slpc, slpc->min_freq);
if (err)
if (err) {
pr_err("Unable to update min freq!");
return err;
if (slpc->min_freq == slpc->rp0_freq) {
pr_err("Min/Max are fused to the same value\n");
return -EINVAL;
}
intel_gt_pm_wait_for_idle(gt);
@ -233,17 +321,23 @@ static int run_test(struct intel_gt *gt, int test_type)
err = max_granted_freq(slpc, rps, &max_act_freq);
break;
case SLPC_POWER:
err = slpc_power(gt, engine);
break;
}
pr_info("Max actual frequency for %s was %d\n",
engine->name, max_act_freq);
if (test_type != SLPC_POWER) {
pr_info("Max actual frequency for %s was %d\n",
engine->name, max_act_freq);
/* Actual frequency should rise above min */
if (max_act_freq <= slpc_min_freq) {
pr_err("Actual freq did not rise above min\n");
pr_err("Perf Limit Reasons: 0x%x\n",
intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS));
err = -EINVAL;
/* Actual frequency should rise above min */
if (max_act_freq <= slpc->min_freq) {
pr_err("Actual freq did not rise above min\n");
pr_err("Perf Limit Reasons: 0x%x\n",
intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS));
err = -EINVAL;
}
}
igt_spinner_end(&spin);
@ -270,26 +364,66 @@ static int run_test(struct intel_gt *gt, int test_type)
static int live_slpc_vary_min(void *arg)
{
struct drm_i915_private *i915 = arg;
struct intel_gt *gt = to_gt(i915);
struct intel_gt *gt;
unsigned int i;
int ret;
return run_test(gt, VARY_MIN);
for_each_gt(gt, i915, i) {
ret = run_test(gt, VARY_MIN);
if (ret)
return ret;
}
return ret;
}
static int live_slpc_vary_max(void *arg)
{
struct drm_i915_private *i915 = arg;
struct intel_gt *gt = to_gt(i915);
struct intel_gt *gt;
unsigned int i;
int ret;
return run_test(gt, VARY_MAX);
for_each_gt(gt, i915, i) {
ret = run_test(gt, VARY_MAX);
if (ret)
return ret;
}
return ret;
}
/* check if pcode can grant RP0 */
static int live_slpc_max_granted(void *arg)
{
struct drm_i915_private *i915 = arg;
struct intel_gt *gt = to_gt(i915);
struct intel_gt *gt;
unsigned int i;
int ret;
return run_test(gt, MAX_GRANTED);
for_each_gt(gt, i915, i) {
ret = run_test(gt, MAX_GRANTED);
if (ret)
return ret;
}
return ret;
}
static int live_slpc_power(void *arg)
{
struct drm_i915_private *i915 = arg;
struct intel_gt *gt;
unsigned int i;
int ret;
for_each_gt(gt, i915, i) {
ret = run_test(gt, SLPC_POWER);
if (ret)
return ret;
}
return ret;
}
int intel_slpc_live_selftests(struct drm_i915_private *i915)
@ -298,10 +432,16 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_slpc_vary_max),
SUBTEST(live_slpc_vary_min),
SUBTEST(live_slpc_max_granted),
SUBTEST(live_slpc_power),
};
if (intel_gt_is_wedged(to_gt(i915)))
return 0;
struct intel_gt *gt;
unsigned int i;
for_each_gt(gt, i915, i) {
if (intel_gt_is_wedged(gt))
return 0;
}
return i915_live_subtests(tests, i915);
}

View File

@ -991,7 +991,7 @@ static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg)
/* Alas, we must pardon some whitelists. Mistakes already made */
static const struct regmask pardon[] = {
{ GEN9_CTX_PREEMPT_REG, 9 },
{ GEN8_L3SQCREG4, 9 },
{ _MMIO(0xb118), 9 }, /* GEN8_L3SQCREG4 */
};
return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon));

View File

@ -144,7 +144,7 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
unsigned long long duration;
unsigned long long duration, clamped;
int err;
/*
@ -168,7 +168,8 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
if (err)
return err;
if (duration > jiffies_to_nsecs(2))
clamped = intel_clamp_max_busywait_duration_ns(engine, duration);
if (duration != clamped)
return -EINVAL;
WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
@ -203,7 +204,7 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
unsigned long long duration;
unsigned long long duration, clamped;
int err;
/*
@ -218,7 +219,8 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
if (err)
return err;
if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
clamped = intel_clamp_timeslice_duration_ms(engine, duration);
if (duration != clamped)
return -EINVAL;
WRITE_ONCE(engine->props.timeslice_duration_ms, duration);
@ -256,7 +258,7 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
unsigned long long duration;
unsigned long long duration, clamped;
int err;
/*
@ -272,7 +274,8 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr,
if (err)
return err;
if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
clamped = intel_clamp_stop_timeout_ms(engine, duration);
if (duration != clamped)
return -EINVAL;
WRITE_ONCE(engine->props.stop_timeout_ms, duration);
@ -306,7 +309,7 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
unsigned long long timeout;
unsigned long long timeout, clamped;
int err;
/*
@ -322,7 +325,8 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
if (err)
return err;
if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
clamped = intel_clamp_preempt_timeout_ms(engine, timeout);
if (timeout != clamped)
return -EINVAL;
WRITE_ONCE(engine->props.preempt_timeout_ms, timeout);
@ -362,7 +366,7 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
unsigned long long delay;
unsigned long long delay, clamped;
int err;
/*
@ -379,7 +383,8 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
if (err)
return err;
if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
clamped = intel_clamp_heartbeat_interval_ms(engine, delay);
if (delay != clamped)
return -EINVAL;
err = intel_engine_set_heartbeat(engine, delay);

View File

@ -117,6 +117,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_ENTER_S_STATE = 0x501,
INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509,
INTEL_GUC_ACTION_SCHED_CONTEXT = 0x1000,
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,

View File

@ -128,6 +128,15 @@ enum slpc_media_ratio_mode {
SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
};
enum slpc_gucrc_mode {
SLPC_GUCRC_MODE_HW = 0,
SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1,
SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2,
SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3,
SLPC_GUCRC_MODE_MAX,
};
enum slpc_event_id {
SLPC_EVENT_RESET = 0,
SLPC_EVENT_SHUTDOWN = 1,

View File

@ -81,10 +81,17 @@
#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907
#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u
/*
* Global scheduling policy update keys.
*/
enum {
GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD = 0x1001,
};
/*
* Per context scheduling policy update keys.
*/
enum {
enum {
GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001,
GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002,
GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003,

View File

@ -441,6 +441,7 @@ err_log:
err_fw:
intel_uc_fw_fini(&guc->fw);
out:
intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
i915_probe_error(gt->i915, "failed with %d\n", ret);
return ret;
}

View File

@ -112,6 +112,10 @@ struct intel_guc {
* refs
*/
struct list_head guc_id_list;
/**
* @guc_ids_in_use: Number single-lrc guc_ids in use
*/
unsigned int guc_ids_in_use;
/**
* @destroyed_contexts: list of contexts waiting to be destroyed
* (deregistered with the GuC)
@ -132,6 +136,16 @@ struct intel_guc {
* @reset_fail_mask: mask of engines that failed to reset
*/
intel_engine_mask_t reset_fail_mask;
/**
* @sched_disable_delay_ms: schedule disable delay, in ms, for
* contexts
*/
unsigned int sched_disable_delay_ms;
/**
* @sched_disable_gucid_threshold: threshold of min remaining available
* guc_ids before we start bypassing the schedule disable delay
*/
unsigned int sched_disable_gucid_threshold;
} submission_state;
/**
@ -466,4 +480,6 @@ void intel_guc_write_barrier(struct intel_guc *guc);
void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p);
int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc);
#endif

View File

@ -5,6 +5,7 @@
#include <linux/bsearch.h>
#include "gem/i915_gem_lmem.h"
#include "gt/intel_engine_regs.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_mcr.h"
@ -277,24 +278,16 @@ __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg)
return slot;
}
#define GUC_REGSET_STEERING(group, instance) ( \
FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
GUC_REGSET_NEEDS_STEERING \
)
static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
struct temp_regset *regset,
i915_reg_t reg, u32 flags)
u32 offset, u32 flags)
{
u32 count = regset->storage_used - (regset->registers - regset->storage);
u32 offset = i915_mmio_reg_offset(reg);
struct guc_mmio_reg entry = {
.offset = offset,
.flags = flags,
};
struct guc_mmio_reg *slot;
u8 group, inst;
/*
* The mmio list is built using separate lists within the driver.
@ -306,17 +299,6 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
sizeof(entry), guc_mmio_reg_cmp))
return 0;
/*
* The GuC doesn't have a default steering, so we need to explicitly
* steer all registers that need steering. However, we do not keep track
* of all the steering ranges, only of those that have a chance of using
* a non-default steering from the i915 pov. Instead of adding such
* tracking, it is easier to just program the default steering for all
* regs that don't need a non-default one.
*/
intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
entry.flags |= GUC_REGSET_STEERING(group, inst);
slot = __mmio_reg_add(regset, &entry);
if (IS_ERR(slot))
return PTR_ERR(slot);
@ -334,6 +316,38 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
#define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \
guc_mmio_reg_add(gt, \
regset, \
i915_mmio_reg_offset(reg), \
(masked) ? GUC_REGSET_MASKED : 0)
#define GUC_REGSET_STEERING(group, instance) ( \
FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
GUC_REGSET_NEEDS_STEERING \
)
static long __must_check guc_mcr_reg_add(struct intel_gt *gt,
struct temp_regset *regset,
i915_mcr_reg_t reg, u32 flags)
{
u8 group, inst;
/*
* The GuC doesn't have a default steering, so we need to explicitly
* steer all registers that need steering. However, we do not keep track
* of all the steering ranges, only of those that have a chance of using
* a non-default steering from the i915 pov. Instead of adding such
* tracking, it is easier to just program the default steering for all
* regs that don't need a non-default one.
*/
intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
flags |= GUC_REGSET_STEERING(group, inst);
return guc_mmio_reg_add(gt, regset, i915_mmio_reg_offset(reg), flags);
}
#define GUC_MCR_REG_ADD(gt, regset, reg, masked) \
guc_mcr_reg_add(gt, \
regset, \
(reg), \
(masked) ? GUC_REGSET_MASKED : 0)
@ -372,8 +386,21 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
false);
/* add in local MOCS registers */
for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++)
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
for (i = 0; i < LNCFCMOCS_REG_COUNT; i++)
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false);
else
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
if (GRAPHICS_VER(engine->i915) >= 12) {
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false);
}
return ret ? -1 : 0;
}

View File

@ -169,6 +169,8 @@ static struct __guc_mmio_reg_descr_group default_lists[] = {
MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0),
MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS),
MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS),
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
@ -182,6 +184,8 @@ static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = {
MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0),
MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS),
MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS),
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
@ -240,19 +244,19 @@ static void guc_capture_free_extlists(struct __guc_mmio_reg_descr_group *reglist
struct __ext_steer_reg {
const char *name;
i915_reg_t reg;
i915_mcr_reg_t reg;
};
static const struct __ext_steer_reg xe_extregs[] = {
{"GEN7_SAMPLER_INSTDONE", GEN7_SAMPLER_INSTDONE},
{"GEN7_ROW_INSTDONE", GEN7_ROW_INSTDONE}
{"GEN8_SAMPLER_INSTDONE", GEN8_SAMPLER_INSTDONE},
{"GEN8_ROW_INSTDONE", GEN8_ROW_INSTDONE}
};
static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
const struct __ext_steer_reg *extlist,
int slice_id, int subslice_id)
{
ext->reg = extlist->reg;
ext->reg = _MMIO(i915_mmio_reg_offset(extlist->reg));
ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
ext->regname = extlist->name;
@ -419,6 +423,44 @@ guc_capture_get_device_reglist(struct intel_guc *guc)
return default_lists;
}
static const char *
__stringify_type(u32 type)
{
switch (type) {
case GUC_CAPTURE_LIST_TYPE_GLOBAL:
return "Global";
case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
return "Class";
case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
return "Instance";
default:
break;
}
return "unknown";
}
static const char *
__stringify_engclass(u32 class)
{
switch (class) {
case GUC_RENDER_CLASS:
return "Render";
case GUC_VIDEO_CLASS:
return "Video";
case GUC_VIDEOENHANCE_CLASS:
return "VideoEnhance";
case GUC_BLITTER_CLASS:
return "Blitter";
case GUC_COMPUTE_CLASS:
return "Compute";
default:
break;
}
return "unknown";
}
static int
guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
struct guc_mmio_reg *ptr, u16 num_entries)
@ -482,32 +524,55 @@ guc_cap_list_num_regs(struct intel_guc_state_capture *gc, u32 owner, u32 type, u
return num_regs;
}
int
intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
size_t *size)
static int
guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
size_t *size, bool is_purpose_est)
{
struct intel_guc_state_capture *gc = guc->capture;
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
int num_regs;
if (!gc->reglists)
if (!gc->reglists) {
drm_warn(&i915->drm, "GuC-capture: No reglist on this device\n");
return -ENODEV;
}
if (cache->is_valid) {
*size = cache->size;
return cache->status;
}
if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
!guc_capture_get_one_list(gc->reglists, owner, type, classid)) {
if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL)
drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist Global!\n");
else
drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist %s(%u):%s(%u)!\n",
__stringify_type(type), type,
__stringify_engclass(classid), classid);
return -ENODATA;
}
num_regs = guc_cap_list_num_regs(gc, owner, type, classid);
/* intentional empty lists can exist depending on hw config */
if (!num_regs)
return -ENODATA;
*size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
(num_regs * sizeof(struct guc_mmio_reg)));
if (size)
*size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
(num_regs * sizeof(struct guc_mmio_reg)));
return 0;
}
int
intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
size_t *size)
{
return guc_capture_getlistsize(guc, owner, type, classid, size, false);
}
static void guc_capture_create_prealloc_nodes(struct intel_guc *guc);
int
@ -606,7 +671,7 @@ guc_capture_output_min_size_est(struct intel_guc *guc)
struct intel_gt *gt = guc_to_gt(guc);
struct intel_engine_cs *engine;
enum intel_engine_id id;
int worst_min_size = 0, num_regs = 0;
int worst_min_size = 0;
size_t tmp = 0;
if (!guc->capture)
@ -627,21 +692,19 @@ guc_capture_output_min_size_est(struct intel_guc *guc)
worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
(3 * sizeof(struct guc_state_capture_header_t));
if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp))
num_regs += tmp;
if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp, true))
worst_min_size += tmp;
if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
engine->class, &tmp)) {
num_regs += tmp;
if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
engine->class, &tmp, true)) {
worst_min_size += tmp;
}
if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
engine->class, &tmp)) {
num_regs += tmp;
if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
engine->class, &tmp, true)) {
worst_min_size += tmp;
}
}
worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
return worst_min_size;
}
@ -658,15 +721,23 @@ static void check_guc_capture_size(struct intel_guc *guc)
int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
u32 buffer_size = intel_guc_log_section_size_capture(&guc->log);
/*
* NOTE: min_size is much smaller than the capture region allocation (DG2: <80K vs 1MB)
* Additionally, its based on space needed to fit all engines getting reset at once
* within the same G2H handler task slot. This is very unlikely. However, if GuC really
* does run out of space for whatever reason, we will see an separate warning message
* when processing the G2H event capture-notification, search for:
* INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
*/
if (min_size < 0)
drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n",
min_size);
else if (min_size > buffer_size)
drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n",
drm_warn(&i915->drm, "GuC error state capture buffer maybe small: %d < %d\n",
buffer_size, min_size);
else if (spare_size > buffer_size)
drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n",
buffer_size, spare_size, min_size);
drm_dbg(&i915->drm, "GuC error state capture buffer lacks spare size: %d < %d (min = %d)\n",
buffer_size, spare_size, min_size);
}
/*

View File

@ -71,12 +71,73 @@ static bool intel_eval_slpc_support(void *data)
return intel_guc_slpc_is_used(guc);
}
static int guc_sched_disable_delay_ms_get(void *data, u64 *val)
{
struct intel_guc *guc = data;
if (!intel_guc_submission_is_used(guc))
return -ENODEV;
*val = (u64)guc->submission_state.sched_disable_delay_ms;
return 0;
}
static int guc_sched_disable_delay_ms_set(void *data, u64 val)
{
struct intel_guc *guc = data;
if (!intel_guc_submission_is_used(guc))
return -ENODEV;
/* clamp to a practical limit, 1 minute is reasonable for a longest delay */
guc->submission_state.sched_disable_delay_ms = min_t(u64, val, 60000);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops,
guc_sched_disable_delay_ms_get,
guc_sched_disable_delay_ms_set, "%lld\n");
static int guc_sched_disable_gucid_threshold_get(void *data, u64 *val)
{
struct intel_guc *guc = data;
if (!intel_guc_submission_is_used(guc))
return -ENODEV;
*val = guc->submission_state.sched_disable_gucid_threshold;
return 0;
}
static int guc_sched_disable_gucid_threshold_set(void *data, u64 val)
{
struct intel_guc *guc = data;
if (!intel_guc_submission_is_used(guc))
return -ENODEV;
if (val > intel_guc_sched_disable_gucid_threshold_max(guc))
guc->submission_state.sched_disable_gucid_threshold =
intel_guc_sched_disable_gucid_threshold_max(guc);
else
guc->submission_state.sched_disable_gucid_threshold = val;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops,
guc_sched_disable_gucid_threshold_get,
guc_sched_disable_gucid_threshold_set, "%lld\n");
void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
{
static const struct intel_gt_debugfs_file files[] = {
{ "guc_info", &guc_info_fops, NULL },
{ "guc_registered_contexts", &guc_registered_contexts_fops, NULL },
{ "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support},
{ "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL },
{ "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops,
NULL },
};
if (!intel_guc_is_supported(guc))

View File

@ -10,12 +10,15 @@
*/
#include "gt/intel_gt.h"
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
#include "intel_guc_fw.h"
#include "i915_drv.h"
static void guc_prepare_xfer(struct intel_uncore *uncore)
static void guc_prepare_xfer(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
@ -35,8 +38,9 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
if (GRAPHICS_VER(uncore->i915) == 9) {
/* DOP Clock Gating Enable for GuC clocks */
intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE);
intel_gt_mcr_multicast_write(gt, GEN8_MISCCPCTL,
GEN8_DOP_CLOCK_GATE_GUC_ENABLE |
intel_gt_mcr_read_any(gt, GEN8_MISCCPCTL));
/* allows for 5us (in 10ns units) before GT can go to RC6 */
intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
@ -168,7 +172,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
struct intel_uncore *uncore = gt->uncore;
int ret;
guc_prepare_xfer(uncore);
guc_prepare_xfer(gt);
/*
* Note that GuC needs the CSS header plus uKernel code to be copied

View File

@ -290,6 +290,25 @@ struct guc_update_context_policy {
struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
} __packed;
/* Format of the UPDATE_SCHEDULING_POLICIES H2G data packet */
struct guc_update_scheduling_policy_header {
u32 action;
} __packed;
/*
* Can't dynmically allocate memory for the scheduling policy KLV because
* it will be sent from within the reset path. Need a fixed size lump on
* the stack instead :(.
*
* Currently, there is only one KLV defined, which has 1 word of KL + 2 words of V.
*/
#define MAX_SCHEDULING_POLICY_SIZE 3
struct guc_update_scheduling_policy {
struct guc_update_scheduling_policy_header header;
u32 data[MAX_SCHEDULING_POLICY_SIZE];
} __packed;
#define GUC_POWER_UNSPECIFIED 0
#define GUC_POWER_D0 1
#define GUC_POWER_D1 2
@ -298,6 +317,9 @@ struct guc_update_context_policy {
/* Scheduling policy settings */
#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION 100 /* in ms */
#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO 50 /* in percent */
#define GLOBAL_POLICY_MAX_NUM_WI 15
/* Don't reset an engine upon preemption failure */
@ -305,6 +327,27 @@ struct guc_update_context_policy {
#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
/*
* GuC converts the timeout to clock ticks internally. Different platforms have
* different GuC clocks. Thus, the maximum value before overflow is platform
* dependent. Current worst case scenario is about 110s. So, the spec says to
* limit to 100s to be safe.
*/
#define GUC_POLICY_MAX_EXEC_QUANTUM_US (100 * 1000 * 1000UL)
#define GUC_POLICY_MAX_PREEMPT_TIMEOUT_US (100 * 1000 * 1000UL)
static inline u32 guc_policy_max_exec_quantum_ms(void)
{
BUILD_BUG_ON(GUC_POLICY_MAX_EXEC_QUANTUM_US >= UINT_MAX);
return GUC_POLICY_MAX_EXEC_QUANTUM_US / 1000;
}
static inline u32 guc_policy_max_preempt_timeout_ms(void)
{
BUILD_BUG_ON(GUC_POLICY_MAX_PREEMPT_TIMEOUT_US >= UINT_MAX);
return GUC_POLICY_MAX_PREEMPT_TIMEOUT_US / 1000;
}
struct guc_policies {
u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
/* In micro seconds. How much time to allow before DPC processing is

View File

@ -16,15 +16,15 @@
#if defined(CONFIG_DRM_I915_DEBUG_GUC)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
#else
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_8K
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_64K
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_2M
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
#endif
static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log);

View File

@ -137,6 +137,17 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
return ret > 0 ? -EPROTO : ret;
}
static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
{
u32 request[] = {
GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
id,
};
return intel_guc_send(guc, request, ARRAY_SIZE(request));
}
static bool slpc_is_running(struct intel_guc_slpc *slpc)
{
return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING;
@ -190,6 +201,15 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)
return ret;
}
static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id)
{
struct intel_guc *guc = slpc_to_guc(slpc);
GEM_BUG_ON(id >= SLPC_MAX_PARAM);
return guc_action_slpc_unset_param(guc, id);
}
static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
@ -263,6 +283,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
slpc->max_freq_softlimit = 0;
slpc->min_freq_softlimit = 0;
slpc->min_is_rpmax = false;
slpc->boost_freq = 0;
atomic_set(&slpc->num_waiters, 0);
@ -588,6 +609,39 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
return 0;
}
static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
int slpc_min_freq;
int ret;
ret = intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq);
if (ret) {
drm_err(&i915->drm,
"Failed to get min freq: (%d)\n",
ret);
return false;
}
if (slpc_min_freq == SLPC_MAX_FREQ_MHZ)
return true;
else
return false;
}
static void update_server_min_softlimit(struct intel_guc_slpc *slpc)
{
/* For server parts, SLPC min will be at RPMax.
* Use min softlimit to clamp it to RP0 instead.
*/
if (!slpc->min_freq_softlimit &&
is_slpc_min_freq_rpmax(slpc)) {
slpc->min_is_rpmax = true;
slpc->min_freq_softlimit = slpc->rp0_freq;
(slpc_to_gt(slpc))->defaults.min_freq = slpc->min_freq_softlimit;
}
}
static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
{
/* Force SLPC to used platform rp0 */
@ -610,6 +664,52 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
slpc->boost_freq = slpc->rp0_freq;
}
/**
* intel_guc_slpc_override_gucrc_mode() - override GUCRC mode
* @slpc: pointer to intel_guc_slpc.
* @mode: new value of the mode.
*
* This function will override the GUCRC mode.
*
* Return: 0 on success, non-zero error code on failure.
*/
int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode)
{
int ret;
struct drm_i915_private *i915 = slpc_to_i915(slpc);
intel_wakeref_t wakeref;
if (mode >= SLPC_GUCRC_MODE_MAX)
return -EINVAL;
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
if (ret)
drm_err(&i915->drm,
"Override gucrc mode %d failed %d\n",
mode, ret);
}
return ret;
}
int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
intel_wakeref_t wakeref;
int ret = 0;
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE);
if (ret)
drm_err(&i915->drm,
"Unsetting gucrc mode failed %d\n",
ret);
}
return ret;
}
/*
* intel_guc_slpc_enable() - Start SLPC
* @slpc: pointer to intel_guc_slpc.
@ -647,6 +747,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
slpc_get_rp_values(slpc);
/* Handle the case where min=max=RPmax */
update_server_min_softlimit(slpc);
/* Set SLPC max limit to RP0 */
ret = slpc_use_fused_rp0(slpc);
if (unlikely(ret)) {

View File

@ -9,6 +9,8 @@
#include "intel_guc_submission.h"
#include "intel_guc_slpc_types.h"
#define SLPC_MAX_FREQ_MHZ 4250
struct intel_gt;
struct drm_printer;
@ -42,5 +44,7 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val);
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc);
int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode);
#endif

View File

@ -19,6 +19,9 @@ struct intel_guc_slpc {
bool supported;
bool selected;
/* Indicates this is a server part */
bool min_is_rpmax;
/* platform frequency limits */
u32 min_freq;
u32 rp0_freq;

View File

@ -6,6 +6,7 @@
#include <linux/circ_buf.h>
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_lmem.h"
#include "gt/gen8_engine_cs.h"
#include "gt/intel_breadcrumbs.h"
#include "gt/intel_context.h"
@ -65,7 +66,13 @@
* corresponding G2H returns indicating the scheduling disable operation has
* completed it is safe to unpin the context. While a disable is in flight it
* isn't safe to resubmit the context so a fence is used to stall all future
* requests of that context until the G2H is returned.
* requests of that context until the G2H is returned. Because this interaction
* with the GuC takes a non-zero amount of time we delay the disabling of
* scheduling after the pin count goes to zero by a configurable period of time
* (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of
* time to resubmit something on the context before doing this costly operation.
* This delay is only done if the context isn't closed and the guc_id usage is
* less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD).
*
* Context deregistration:
* Before a context can be destroyed or if we steal its guc_id we must
@ -163,7 +170,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
#define SCHED_STATE_PENDING_ENABLE BIT(5)
#define SCHED_STATE_REGISTERED BIT(6)
#define SCHED_STATE_POLICY_REQUIRED BIT(7)
#define SCHED_STATE_BLOCKED_SHIFT 8
#define SCHED_STATE_CLOSED BIT(8)
#define SCHED_STATE_BLOCKED_SHIFT 9
#define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
#define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
@ -173,12 +181,20 @@ static inline void init_sched_state(struct intel_context *ce)
ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
}
/*
* Kernel contexts can have SCHED_STATE_REGISTERED after suspend.
* A context close can race with the submission path, so SCHED_STATE_CLOSED
* can be set immediately before we try to register.
*/
#define SCHED_STATE_VALID_INIT \
(SCHED_STATE_BLOCKED_MASK | \
SCHED_STATE_CLOSED | \
SCHED_STATE_REGISTERED)
__maybe_unused
static bool sched_state_is_init(struct intel_context *ce)
{
/* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
return !(ce->guc_state.sched_state &
~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT);
}
static inline bool
@ -319,6 +335,17 @@ static inline void clr_context_policy_required(struct intel_context *ce)
ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
}
static inline bool context_close_done(struct intel_context *ce)
{
return ce->guc_state.sched_state & SCHED_STATE_CLOSED;
}
static inline void set_context_close_done(struct intel_context *ce)
{
lockdep_assert_held(&ce->guc_state.lock);
ce->guc_state.sched_state |= SCHED_STATE_CLOSED;
}
static inline u32 context_blocked(struct intel_context *ce)
{
return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
@ -343,25 +370,6 @@ static inline void decr_context_blocked(struct intel_context *ce)
ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
}
static inline bool context_has_committed_requests(struct intel_context *ce)
{
return !!ce->guc_state.number_committed_requests;
}
static inline void incr_context_committed_requests(struct intel_context *ce)
{
lockdep_assert_held(&ce->guc_state.lock);
++ce->guc_state.number_committed_requests;
GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
}
static inline void decr_context_committed_requests(struct intel_context *ce)
{
lockdep_assert_held(&ce->guc_state.lock);
--ce->guc_state.number_committed_requests;
GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
}
static struct intel_context *
request_to_scheduling_context(struct i915_request *rq)
{
@ -1067,6 +1075,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
xa_unlock(&guc->context_lookup);
if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
(cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) {
/* successful cancel so jump straight to close it */
intel_context_sched_disable_unpin(ce);
}
spin_lock(&ce->guc_state.lock);
/*
@ -1994,6 +2008,9 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
if (unlikely(ret < 0))
return ret;
if (!intel_context_is_parent(ce))
++guc->submission_state.guc_ids_in_use;
ce->guc_id.id = ret;
return 0;
}
@ -2003,14 +2020,16 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(intel_context_is_child(ce));
if (!context_guc_id_invalid(ce)) {
if (intel_context_is_parent(ce))
if (intel_context_is_parent(ce)) {
bitmap_release_region(guc->submission_state.guc_ids_bitmap,
ce->guc_id.id,
order_base_2(ce->parallel.number_children
+ 1));
else
} else {
--guc->submission_state.guc_ids_in_use;
ida_simple_remove(&guc->submission_state.guc_ids,
ce->guc_id.id);
}
clr_ctx_id_mapping(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
}
@ -2429,6 +2448,10 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
int ret;
/* NB: For both of these, zero means disabled. */
GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
execution_quantum));
GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
preemption_timeout));
execution_quantum = engine->props.timeslice_duration_ms * 1000;
preemption_timeout = engine->props.preempt_timeout_ms * 1000;
@ -2462,6 +2485,10 @@ static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
/* NB: For both of these, zero means disabled. */
GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
desc->execution_quantum));
GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
desc->preemption_timeout));
desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
}
@ -2998,41 +3025,104 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
}
}
static void guc_context_sched_disable(struct intel_context *ce)
static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce,
unsigned long flags)
__releases(ce->guc_state.lock)
{
struct intel_guc *guc = ce_to_guc(ce);
unsigned long flags;
struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
intel_wakeref_t wakeref;
u16 guc_id;
GEM_BUG_ON(intel_context_is_child(ce));
spin_lock_irqsave(&ce->guc_state.lock, flags);
/*
* We have to check if the context has been disabled by another thread,
* check if submssion has been disabled to seal a race with reset and
* finally check if any more requests have been committed to the
* context ensursing that a request doesn't slip through the
* 'context_pending_disable' fence.
*/
if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
context_has_committed_requests(ce))) {
clr_context_enabled(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
goto unpin;
}
lockdep_assert_held(&ce->guc_state.lock);
guc_id = prep_context_pending_disable(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
with_intel_runtime_pm(runtime_pm, wakeref)
__guc_context_sched_disable(guc, ce, guc_id);
}
return;
unpin:
intel_context_sched_disable_unpin(ce);
static bool bypass_sched_disable(struct intel_guc *guc,
struct intel_context *ce)
{
lockdep_assert_held(&ce->guc_state.lock);
GEM_BUG_ON(intel_context_is_child(ce));
if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
!ctx_id_mapped(guc, ce->guc_id.id)) {
clr_context_enabled(ce);
return true;
}
return !context_enabled(ce);
}
static void __delay_sched_disable(struct work_struct *wrk)
{
struct intel_context *ce =
container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work);
struct intel_guc *guc = ce_to_guc(ce);
unsigned long flags;
spin_lock_irqsave(&ce->guc_state.lock, flags);
if (bypass_sched_disable(guc, ce)) {
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
intel_context_sched_disable_unpin(ce);
} else {
do_sched_disable(guc, ce, flags);
}
}
static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce)
{
/*
* parent contexts are perma-pinned, if we are unpinning do schedule
* disable immediately.
*/
if (intel_context_is_parent(ce))
return true;
/*
* If we are beyond the threshold for avail guc_ids, do schedule disable immediately.
*/
return guc->submission_state.guc_ids_in_use >
guc->submission_state.sched_disable_gucid_threshold;
}
static void guc_context_sched_disable(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
u64 delay = guc->submission_state.sched_disable_delay_ms;
unsigned long flags;
spin_lock_irqsave(&ce->guc_state.lock, flags);
if (bypass_sched_disable(guc, ce)) {
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
intel_context_sched_disable_unpin(ce);
} else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) &&
delay) {
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
mod_delayed_work(system_unbound_wq,
&ce->guc_state.sched_disable_delay_work,
msecs_to_jiffies(delay));
} else {
do_sched_disable(guc, ce, flags);
}
}
static void guc_context_close(struct intel_context *ce)
{
unsigned long flags;
if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))
__delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work);
spin_lock_irqsave(&ce->guc_state.lock, flags);
set_context_close_done(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
}
static inline void guc_lrc_desc_unpin(struct intel_context *ce)
@ -3071,7 +3161,6 @@ static void __guc_context_destroy(struct intel_context *ce)
ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
GEM_BUG_ON(ce->guc_state.number_committed_requests);
lrc_fini(ce);
intel_context_fini(ce);
@ -3340,8 +3429,6 @@ static void remove_from_context(struct i915_request *rq)
guc_prio_fini(rq, ce);
decr_context_committed_requests(ce);
spin_unlock_irq(&ce->guc_state.lock);
atomic_dec(&ce->guc_id.ref);
@ -3351,6 +3438,8 @@ static void remove_from_context(struct i915_request *rq)
static const struct intel_context_ops guc_context_ops = {
.alloc = guc_context_alloc,
.close = guc_context_close,
.pre_pin = guc_context_pre_pin,
.pin = guc_context_pin,
.unpin = guc_context_unpin,
@ -3433,6 +3522,10 @@ static void guc_context_init(struct intel_context *ce)
rcu_read_unlock();
ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work,
__delay_sched_disable);
set_bit(CONTEXT_GUC_INIT, &ce->flags);
}
@ -3470,6 +3563,26 @@ static int guc_request_alloc(struct i915_request *rq)
if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
guc_context_init(ce);
/*
* If the context gets closed while the execbuf is ongoing, the context
* close code will race with the below code to cancel the delayed work.
* If the context close wins the race and cancels the work, it will
* immediately call the sched disable (see guc_context_close), so there
* is a chance we can get past this check while the sched_disable code
* is being executed. To make sure that code completes before we check
* the status further down, we wait for the close process to complete.
* Else, this code path could send a request down thinking that the
* context is still in a schedule-enable mode while the GuC ends up
* dropping the request completely because the disable did go from the
* context_close path right to GuC just prior. In the event the CT is
* full, we could potentially need to wait up to 1.5 seconds.
*/
if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work))
intel_context_sched_disable_unpin(ce);
else if (intel_context_is_closed(ce))
if (wait_for(context_close_done(ce), 1500))
drm_warn(&guc_to_gt(guc)->i915->drm,
"timed out waiting on context sched close before realloc\n");
/*
* Call pin_guc_id here rather than in the pinning step as with
* dma_resv, contexts can be repeatedly pinned / unpinned trashing the
@ -3524,7 +3637,6 @@ out:
list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
}
incr_context_committed_requests(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
return 0;
@ -3600,6 +3712,8 @@ static int guc_virtual_context_alloc(struct intel_context *ce)
static const struct intel_context_ops virtual_guc_context_ops = {
.alloc = guc_virtual_context_alloc,
.close = guc_context_close,
.pre_pin = guc_virtual_context_pre_pin,
.pin = guc_virtual_context_pin,
.unpin = guc_virtual_context_unpin,
@ -3689,6 +3803,8 @@ static void guc_child_context_destroy(struct kref *kref)
static const struct intel_context_ops virtual_parent_context_ops = {
.alloc = guc_virtual_context_alloc,
.close = guc_context_close,
.pre_pin = guc_context_pre_pin,
.pin = guc_parent_context_pin,
.unpin = guc_parent_context_unpin,
@ -4093,7 +4209,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_bb_start = gen8_emit_bb_start;
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
engine->emit_bb_start = gen125_emit_bb_start;
engine->emit_bb_start = xehp_emit_bb_start;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
@ -4177,6 +4293,98 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
return 0;
}
struct scheduling_policy {
/* internal data */
u32 max_words, num_words;
u32 count;
/* API data */
struct guc_update_scheduling_policy h2g;
};
static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy)
{
u32 *start = (void *)&policy->h2g;
u32 *end = policy->h2g.data + policy->num_words;
size_t delta = end - start;
return delta;
}
static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy)
{
policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
policy->max_words = ARRAY_SIZE(policy->h2g.data);
policy->num_words = 0;
policy->count = 0;
return policy;
}
static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy,
u32 action, u32 *data, u32 len)
{
u32 *klv_ptr = policy->h2g.data + policy->num_words;
GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words);
*(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) |
FIELD_PREP(GUC_KLV_0_LEN, len);
memcpy(klv_ptr, data, sizeof(u32) * len);
policy->num_words += 1 + len;
policy->count++;
}
static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
struct scheduling_policy *policy)
{
int ret;
ret = intel_guc_send(guc, (u32 *)&policy->h2g,
__guc_scheduling_policy_action_size(policy));
if (ret < 0)
return ret;
if (ret != policy->count) {
drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!",
ret, policy->count);
if (ret > policy->count)
return -EPROTO;
}
return 0;
}
static int guc_init_global_schedule_policy(struct intel_guc *guc)
{
struct scheduling_policy policy;
struct intel_gt *gt = guc_to_gt(guc);
intel_wakeref_t wakeref;
int ret = 0;
if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0))
return 0;
__guc_scheduling_policy_start_klv(&policy);
with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
u32 yield[] = {
GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION,
GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO,
};
__guc_scheduling_policy_add_klv(&policy,
GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD,
yield, ARRAY_SIZE(yield));
ret = __guc_action_set_scheduling_policies(guc, &policy);
if (ret)
i915_probe_error(gt->i915,
"Failed to configure global scheduling policies: %pe!\n",
ERR_PTR(ret));
}
return ret;
}
void intel_guc_submission_enable(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@ -4189,6 +4397,7 @@ void intel_guc_submission_enable(struct intel_guc *guc)
guc_init_lrc_mapping(guc);
guc_init_engine_stats(guc);
guc_init_global_schedule_policy(guc);
}
void intel_guc_submission_disable(struct intel_guc *guc)
@ -4219,6 +4428,26 @@ static bool __guc_submission_selected(struct intel_guc *guc)
return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
}
int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc)
{
return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc);
}
/*
* This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher
* workloads are able to enjoy the latency reduction when delaying the schedule-disable
* operation. This matches the 30fps game-render + encode (real world) workload this
* knob was tested against.
*/
#define SCHED_DISABLE_DELAY_MS 34
/*
* A threshold of 75% is a reasonable starting point considering that real world apps
* generally don't get anywhere near this.
*/
#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \
(((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4)
void intel_guc_submission_init_early(struct intel_guc *guc)
{
xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
@ -4235,7 +4464,10 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
spin_lock_init(&guc->timestamp.lock);
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS;
guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
guc->submission_state.sched_disable_gucid_threshold =
NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc);
guc->submission_supported = __guc_submission_supported(guc);
guc->submission_selected = __guc_submission_selected(guc);
}

View File

@ -10,6 +10,9 @@
#include "intel_huc.h"
#include "i915_drv.h"
#include <linux/device/bus.h>
#include <linux/mei_aux.h>
/**
* DOC: HuC
*
@ -42,6 +45,172 @@
* HuC-specific commands.
*/
/*
* MEI-GSC load is an async process. The probing of the exposed aux device
* (see intel_gsc.c) usually happens a few seconds after i915 probe, depending
* on when the kernel schedules it. Unless something goes terribly wrong, we're
* guaranteed for this to happen during boot, so the big timeout is a safety net
* that we never expect to need.
* MEI-PXP + HuC load usually takes ~300ms, but if the GSC needs to be resumed
* and/or reset, this can take longer. Note that the kernel might schedule
* other work between the i915 init/resume and the MEI one, which can add to
* the delay.
*/
#define GSC_INIT_TIMEOUT_MS 10000
#define PXP_INIT_TIMEOUT_MS 5000
static int sw_fence_dummy_notify(struct i915_sw_fence *sf,
enum i915_sw_fence_notify state)
{
return NOTIFY_DONE;
}
static void __delayed_huc_load_complete(struct intel_huc *huc)
{
if (!i915_sw_fence_done(&huc->delayed_load.fence))
i915_sw_fence_complete(&huc->delayed_load.fence);
}
static void delayed_huc_load_complete(struct intel_huc *huc)
{
hrtimer_cancel(&huc->delayed_load.timer);
__delayed_huc_load_complete(huc);
}
static void __gsc_init_error(struct intel_huc *huc)
{
huc->delayed_load.status = INTEL_HUC_DELAYED_LOAD_ERROR;
__delayed_huc_load_complete(huc);
}
static void gsc_init_error(struct intel_huc *huc)
{
hrtimer_cancel(&huc->delayed_load.timer);
__gsc_init_error(huc);
}
static void gsc_init_done(struct intel_huc *huc)
{
hrtimer_cancel(&huc->delayed_load.timer);
/* MEI-GSC init is done, now we wait for MEI-PXP to bind */
huc->delayed_load.status = INTEL_HUC_WAITING_ON_PXP;
if (!i915_sw_fence_done(&huc->delayed_load.fence))
hrtimer_start(&huc->delayed_load.timer,
ms_to_ktime(PXP_INIT_TIMEOUT_MS),
HRTIMER_MODE_REL);
}
static enum hrtimer_restart huc_delayed_load_timer_callback(struct hrtimer *hrtimer)
{
struct intel_huc *huc = container_of(hrtimer, struct intel_huc, delayed_load.timer);
if (!intel_huc_is_authenticated(huc)) {
if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_GSC)
drm_notice(&huc_to_gt(huc)->i915->drm,
"timed out waiting for MEI GSC init to load HuC\n");
else if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_PXP)
drm_notice(&huc_to_gt(huc)->i915->drm,
"timed out waiting for MEI PXP init to load HuC\n");
else
MISSING_CASE(huc->delayed_load.status);
__gsc_init_error(huc);
}
return HRTIMER_NORESTART;
}
static void huc_delayed_load_start(struct intel_huc *huc)
{
ktime_t delay;
GEM_BUG_ON(intel_huc_is_authenticated(huc));
/*
* On resume we don't have to wait for MEI-GSC to be re-probed, but we
* do need to wait for MEI-PXP to reset & re-bind
*/
switch (huc->delayed_load.status) {
case INTEL_HUC_WAITING_ON_GSC:
delay = ms_to_ktime(GSC_INIT_TIMEOUT_MS);
break;
case INTEL_HUC_WAITING_ON_PXP:
delay = ms_to_ktime(PXP_INIT_TIMEOUT_MS);
break;
default:
gsc_init_error(huc);
return;
}
/*
* This fence is always complete unless we're waiting for the
* GSC device to come up to load the HuC. We arm the fence here
* and complete it when we confirm that the HuC is loaded from
* the PXP bind callback.
*/
GEM_BUG_ON(!i915_sw_fence_done(&huc->delayed_load.fence));
i915_sw_fence_fini(&huc->delayed_load.fence);
i915_sw_fence_reinit(&huc->delayed_load.fence);
i915_sw_fence_await(&huc->delayed_load.fence);
i915_sw_fence_commit(&huc->delayed_load.fence);
hrtimer_start(&huc->delayed_load.timer, delay, HRTIMER_MODE_REL);
}
static int gsc_notifier(struct notifier_block *nb, unsigned long action, void *data)
{
struct device *dev = data;
struct intel_huc *huc = container_of(nb, struct intel_huc, delayed_load.nb);
struct intel_gsc_intf *intf = &huc_to_gt(huc)->gsc.intf[0];
if (!intf->adev || &intf->adev->aux_dev.dev != dev)
return 0;
switch (action) {
case BUS_NOTIFY_BOUND_DRIVER: /* mei driver bound to aux device */
gsc_init_done(huc);
break;
case BUS_NOTIFY_DRIVER_NOT_BOUND: /* mei driver fails to be bound */
case BUS_NOTIFY_UNBIND_DRIVER: /* mei driver about to be unbound */
drm_info(&huc_to_gt(huc)->i915->drm,
"mei driver not bound, disabling HuC load\n");
gsc_init_error(huc);
break;
}
return 0;
}
void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus)
{
int ret;
if (!intel_huc_is_loaded_by_gsc(huc))
return;
huc->delayed_load.nb.notifier_call = gsc_notifier;
ret = bus_register_notifier(bus, &huc->delayed_load.nb);
if (ret) {
drm_err(&huc_to_gt(huc)->i915->drm,
"failed to register GSC notifier\n");
huc->delayed_load.nb.notifier_call = NULL;
gsc_init_error(huc);
}
}
void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus)
{
if (!huc->delayed_load.nb.notifier_call)
return;
delayed_huc_load_complete(huc);
bus_unregister_notifier(bus, &huc->delayed_load.nb);
huc->delayed_load.nb.notifier_call = NULL;
}
void intel_huc_init_early(struct intel_huc *huc)
{
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
@ -57,6 +226,17 @@ void intel_huc_init_early(struct intel_huc *huc)
huc->status.mask = HUC_FW_VERIFIED;
huc->status.value = HUC_FW_VERIFIED;
}
/*
* Initialize fence to be complete as this is expected to be complete
* unless there is a delayed HuC reload in progress.
*/
i915_sw_fence_init(&huc->delayed_load.fence,
sw_fence_dummy_notify);
i915_sw_fence_commit(&huc->delayed_load.fence);
hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
huc->delayed_load.timer.function = huc_delayed_load_timer_callback;
}
#define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy")
@ -113,6 +293,7 @@ int intel_huc_init(struct intel_huc *huc)
return 0;
out:
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
drm_info(&i915->drm, "HuC init failed with %d\n", err);
return err;
}
@ -122,9 +303,50 @@ void intel_huc_fini(struct intel_huc *huc)
if (!intel_uc_fw_is_loadable(&huc->fw))
return;
delayed_huc_load_complete(huc);
i915_sw_fence_fini(&huc->delayed_load.fence);
intel_uc_fw_fini(&huc->fw);
}
void intel_huc_suspend(struct intel_huc *huc)
{
if (!intel_uc_fw_is_loadable(&huc->fw))
return;
/*
* in the unlikely case that we're suspending before the GSC has
* completed its loading sequence, just stop waiting. We'll restart
* on resume.
*/
delayed_huc_load_complete(huc);
}
int intel_huc_wait_for_auth_complete(struct intel_huc *huc)
{
struct intel_gt *gt = huc_to_gt(huc);
int ret;
ret = __intel_wait_for_register(gt->uncore,
huc->status.reg,
huc->status.mask,
huc->status.value,
2, 50, NULL);
/* mark the load process as complete even if the wait failed */
delayed_huc_load_complete(huc);
if (ret) {
drm_err(&gt->i915->drm, "HuC: Firmware not verified %d\n", ret);
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return ret;
}
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
drm_info(&gt->i915->drm, "HuC authenticated\n");
return 0;
}
/**
* intel_huc_auth() - Authenticate HuC uCode
* @huc: intel_huc structure
@ -161,27 +383,18 @@ int intel_huc_auth(struct intel_huc *huc)
}
/* Check authentication status, it should be done by now */
ret = __intel_wait_for_register(gt->uncore,
huc->status.reg,
huc->status.mask,
huc->status.value,
2, 50, NULL);
if (ret) {
DRM_ERROR("HuC: Firmware not verified %d\n", ret);
ret = intel_huc_wait_for_auth_complete(huc);
if (ret)
goto fail;
}
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
drm_info(&gt->i915->drm, "HuC authenticated\n");
return 0;
fail:
i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return ret;
}
static bool huc_is_authenticated(struct intel_huc *huc)
bool intel_huc_is_authenticated(struct intel_huc *huc)
{
struct intel_gt *gt = huc_to_gt(huc);
intel_wakeref_t wakeref;
@ -200,13 +413,8 @@ static bool huc_is_authenticated(struct intel_huc *huc)
* This function reads status register to verify if HuC
* firmware was successfully loaded.
*
* Returns:
* * -ENODEV if HuC is not present on this platform,
* * -EOPNOTSUPP if HuC firmware is disabled,
* * -ENOPKG if HuC firmware was not installed,
* * -ENOEXEC if HuC firmware is invalid or mismatched,
* * 0 if HuC firmware is not running,
* * 1 if HuC firmware is authenticated and running.
* The return values match what is expected for the I915_PARAM_HUC_STATUS
* getparam.
*/
int intel_huc_check_status(struct intel_huc *huc)
{
@ -219,11 +427,21 @@ int intel_huc_check_status(struct intel_huc *huc)
return -ENOPKG;
case INTEL_UC_FIRMWARE_ERROR:
return -ENOEXEC;
case INTEL_UC_FIRMWARE_INIT_FAIL:
return -ENOMEM;
case INTEL_UC_FIRMWARE_LOAD_FAIL:
return -EIO;
default:
break;
}
return huc_is_authenticated(huc);
return intel_huc_is_authenticated(huc);
}
static bool huc_has_delayed_load(struct intel_huc *huc)
{
return intel_huc_is_loaded_by_gsc(huc) &&
(huc->delayed_load.status != INTEL_HUC_DELAYED_LOAD_ERROR);
}
void intel_huc_update_auth_status(struct intel_huc *huc)
@ -231,9 +449,11 @@ void intel_huc_update_auth_status(struct intel_huc *huc)
if (!intel_uc_fw_is_loadable(&huc->fw))
return;
if (huc_is_authenticated(huc))
if (intel_huc_is_authenticated(huc))
intel_uc_fw_change_status(&huc->fw,
INTEL_UC_FIRMWARE_RUNNING);
else if (huc_has_delayed_load(huc))
huc_delayed_load_start(huc);
}
/**

View File

@ -7,9 +7,21 @@
#define _INTEL_HUC_H_
#include "i915_reg_defs.h"
#include "i915_sw_fence.h"
#include "intel_uc_fw.h"
#include "intel_huc_fw.h"
#include <linux/notifier.h>
#include <linux/hrtimer.h>
struct bus_type;
enum intel_huc_delayed_load_status {
INTEL_HUC_WAITING_ON_GSC = 0,
INTEL_HUC_WAITING_ON_PXP,
INTEL_HUC_DELAYED_LOAD_ERROR,
};
struct intel_huc {
/* Generic uC firmware management */
struct intel_uc_fw fw;
@ -20,14 +32,27 @@ struct intel_huc {
u32 mask;
u32 value;
} status;
struct {
struct i915_sw_fence fence;
struct hrtimer timer;
struct notifier_block nb;
enum intel_huc_delayed_load_status status;
} delayed_load;
};
void intel_huc_init_early(struct intel_huc *huc);
int intel_huc_init(struct intel_huc *huc);
void intel_huc_fini(struct intel_huc *huc);
void intel_huc_suspend(struct intel_huc *huc);
int intel_huc_auth(struct intel_huc *huc);
int intel_huc_wait_for_auth_complete(struct intel_huc *huc);
int intel_huc_check_status(struct intel_huc *huc);
void intel_huc_update_auth_status(struct intel_huc *huc);
bool intel_huc_is_authenticated(struct intel_huc *huc);
void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
static inline int intel_huc_sanitize(struct intel_huc *huc)
{
@ -56,6 +81,12 @@ static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc)
return huc->fw.loaded_via_gsc;
}
static inline bool intel_huc_wait_required(struct intel_huc *huc)
{
return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) &&
!intel_huc_is_authenticated(huc);
}
void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
#endif

View File

@ -3,9 +3,43 @@
* Copyright © 2014-2019 Intel Corporation
*/
#include "gt/intel_gsc.h"
#include "gt/intel_gt.h"
#include "intel_huc.h"
#include "intel_huc_fw.h"
#include "i915_drv.h"
#include "pxp/intel_pxp_huc.h"
int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
{
int ret;
if (!intel_huc_is_loaded_by_gsc(huc))
return -ENODEV;
if (!intel_uc_fw_is_loadable(&huc->fw))
return -ENOEXEC;
/*
* If we abort a suspend, HuC might still be loaded when the mei
* component gets re-bound and this function called again. If so, just
* mark the HuC as loaded.
*/
if (intel_huc_is_authenticated(huc)) {
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
return 0;
}
GEM_WARN_ON(intel_uc_fw_is_loaded(&huc->fw));
ret = intel_pxp_huc_load_and_auth(&huc_to_gt(huc)->pxp);
if (ret)
return ret;
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED);
return intel_huc_wait_for_auth_complete(huc);
}
/**
* intel_huc_fw_upload() - load HuC uCode to device via DMA transfer

View File

@ -8,6 +8,7 @@
struct intel_huc;
int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc);
int intel_huc_fw_upload(struct intel_huc *huc);
#endif

View File

@ -93,7 +93,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(BROXTON, 0, guc_mmp(bxt, 70, 1, 1)) \
fw_def(SKYLAKE, 0, guc_mmp(skl, 70, 1, 1))
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp) \
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp, huc_gsc) \
fw_def(DG2, 0, huc_gsc(dg2)) \
fw_def(ALDERLAKE_P, 0, huc_raw(tgl)) \
fw_def(ALDERLAKE_P, 0, huc_mmp(tgl, 7, 9, 3)) \
fw_def(ALDERLAKE_S, 0, huc_raw(tgl)) \
@ -141,6 +142,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
#define MAKE_HUC_FW_PATH_BLANK(prefix_) \
__MAKE_UC_FW_PATH_BLANK(prefix_, "_huc")
#define MAKE_HUC_FW_PATH_GSC(prefix_) \
__MAKE_UC_FW_PATH_BLANK(prefix_, "_huc_gsc")
#define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
__MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_)
@ -153,7 +157,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
MODULE_FIRMWARE(uc_);
INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP)
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP)
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC)
/*
* The next expansion of the table macros (in __uc_fw_auto_select below) provides
@ -168,6 +172,7 @@ struct __packed uc_fw_blob {
u8 major;
u8 minor;
u8 patch;
bool loaded_via_gsc;
};
#define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
@ -176,16 +181,16 @@ struct __packed uc_fw_blob {
.patch = patch_, \
.path = path_,
#define UC_FW_BLOB_NEW(major_, minor_, patch_, path_) \
#define UC_FW_BLOB_NEW(major_, minor_, patch_, gsc_, path_) \
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
.legacy = false }
.legacy = false, .loaded_via_gsc = gsc_ }
#define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
.legacy = true }
#define GUC_FW_BLOB(prefix_, major_, minor_) \
UC_FW_BLOB_NEW(major_, minor_, 0, \
UC_FW_BLOB_NEW(major_, minor_, 0, false, \
MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_))
#define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
@ -193,12 +198,15 @@ struct __packed uc_fw_blob {
MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
#define HUC_FW_BLOB(prefix_) \
UC_FW_BLOB_NEW(0, 0, 0, MAKE_HUC_FW_PATH_BLANK(prefix_))
UC_FW_BLOB_NEW(0, 0, 0, false, MAKE_HUC_FW_PATH_BLANK(prefix_))
#define HUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
UC_FW_BLOB_OLD(major_, minor_, patch_, \
MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
#define HUC_FW_BLOB_GSC(prefix_) \
UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_))
struct __packed uc_fw_platform_requirement {
enum intel_platform p;
u8 rev; /* first platform rev using this FW */
@ -224,7 +232,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP)
};
static const struct uc_fw_platform_requirement blobs_huc[] = {
INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP)
INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
};
static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
@ -272,6 +280,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
uc_fw->file_wanted.path = blob->path;
uc_fw->file_wanted.major_ver = blob->major;
uc_fw->file_wanted.minor_ver = blob->minor;
uc_fw->loaded_via_gsc = blob->loaded_via_gsc;
found = true;
break;
}
@ -904,7 +913,6 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
out_unpin:
i915_gem_object_unpin_pages(uc_fw->obj);
out:
intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL);
return err;
}

View File

@ -354,9 +354,9 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4);
vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size =
pci_resource_len(pdev, GTTMMADR_BAR);
pci_resource_len(pdev, GEN4_GTTMMADR_BAR);
vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size =
pci_resource_len(pdev, GTT_APERTURE_BAR);
pci_resource_len(pdev, GEN4_GMADR_BAR);
memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4);

View File

@ -734,7 +734,7 @@ static i915_reg_t force_nonpriv_white_list[] = {
_MMIO(0x770c),
_MMIO(0x83a8),
_MMIO(0xb110),
GEN8_L3SQCREG4,//_MMIO(0xb118)
_MMIO(0xb118),
_MMIO(0xe100),
_MMIO(0xe18c),
_MMIO(0xe48c),
@ -2257,7 +2257,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(HSW_HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
/* display */

View File

@ -106,15 +106,15 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
{RCS0, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */
{RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */
{RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */
{RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */
{RCS0, GEN9_SCRATCH1, 0, false}, /* 0xb11c */
{RCS0, _MMIO(0xb118), 0, false}, /* GEN8_L3SQCREG4 */
{RCS0, _MMIO(0xb11c), 0, false}, /* GEN9_SCRATCH1 */
{RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */
{RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */
{RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */
{RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
{RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
{RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
{RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
{RCS0, _MMIO(0xe180), 0xffff, true}, /* HALF_SLICE_CHICKEN2 */
{RCS0, _MMIO(0xe184), 0xffff, true}, /* GEN8_HALF_SLICE_CHICKEN3 */
{RCS0, _MMIO(0xe188), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN5 */
{RCS0, _MMIO(0xe194), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN7 */
{RCS0, _MMIO(0xe4f0), 0xffff, true}, /* GEN8_ROW_CHICKEN */
{RCS0, TRVATTL3PTRDW(0), 0, true}, /* 0x4de0 */
{RCS0, TRVATTL3PTRDW(1), 0, true}, /* 0x4de4 */
{RCS0, TRNULLDETCT, 0, true}, /* 0x4de8 */

View File

@ -81,6 +81,7 @@
#include "i915_drm_client.h"
#include "i915_drv.h"
#include "i915_getparam.h"
#include "i915_hwmon.h"
#include "i915_ioc32.h"
#include "i915_ioctl.h"
#include "i915_irq.h"
@ -764,6 +765,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
for_each_gt(gt, dev_priv, i)
intel_gt_driver_register(gt);
i915_hwmon_register(dev_priv);
intel_display_driver_register(dev_priv);
intel_power_domains_enable(dev_priv);
@ -796,6 +799,8 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
for_each_gt(gt, dev_priv, i)
intel_gt_driver_unregister(gt);
i915_hwmon_unregister(dev_priv);
i915_perf_unregister(dev_priv);
i915_pmu_unregister(dev_priv);
@ -1656,7 +1661,8 @@ static int intel_runtime_suspend(struct device *kdev)
intel_runtime_pm_enable_interrupts(dev_priv);
intel_gt_runtime_resume(to_gt(dev_priv));
for_each_gt(gt, dev_priv, i)
intel_gt_runtime_resume(gt);
enable_rpm_wakeref_asserts(rpm);

View File

@ -40,7 +40,6 @@
#include "display/intel_display_core.h"
#include "gem/i915_gem_context_types.h"
#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_shrinker.h"
#include "gem/i915_gem_stolen.h"
@ -350,6 +349,8 @@ struct drm_i915_private {
struct i915_perf perf;
struct i915_hwmon *hwmon;
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
struct intel_gt gt0;
@ -898,19 +899,17 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
#define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
#define HAS_OA_BPC_REPORTING(dev_priv) \
(INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
#define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \
(INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits)
/*
* Set this flag, when platform requires 64K GTT page sizes or larger for
* device local memory access.
*/
#define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages)
/*
* Set this flag when platform doesn't allow both 64k pages and 4k pages in
* the same PT. this flag means we need to support compact PT layout for the
* ppGTT when using the 64K GTT pages.
*/
#define NEEDS_COMPACT_PT(dev_priv) (INTEL_INFO(dev_priv)->needs_compact_pt)
#define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc)
#define HAS_REGION(i915, i) (RUNTIME_INFO(i915)->memory_regions & (i))
@ -976,6 +975,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define HAS_ONE_EU_PER_FUSE_BIT(i915) (INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
#define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
/* intel_device_info.c */
static inline struct intel_device_info *
mkwrite_device_info(struct drm_i915_private *dev_priv)
@ -983,16 +985,4 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
return (struct intel_device_info *)INTEL_INFO(dev_priv);
}
static inline enum i915_map_type
i915_coherent_map_type(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj, bool always_coherent)
{
if (i915_gem_object_is_lmem(obj))
return I915_MAP_WC;
if (HAS_LLC(i915) || always_coherent)
return I915_MAP_WB;
else
return I915_MAP_WC;
}
#endif

View File

@ -843,7 +843,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
__i915_gem_object_release_mmap_gtt(obj);
list_for_each_entry_safe(obj, on,
&to_gt(i915)->lmem_userfault_list, userfault_link)
&i915->runtime_pm.lmem_userfault_list, userfault_link)
i915_gem_object_runtime_pm_release_mmap_offset(obj);
/*
@ -1128,6 +1128,8 @@ void i915_gem_drain_workqueue(struct drm_i915_private *i915)
int i915_gem_init(struct drm_i915_private *dev_priv)
{
struct intel_gt *gt;
unsigned int i;
int ret;
/* We need to fallback to 4K pages if host doesn't support huge gtt. */
@ -1158,9 +1160,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
*/
intel_init_clock_gating(dev_priv);
ret = intel_gt_init(to_gt(dev_priv));
if (ret)
goto err_unlock;
for_each_gt(gt, dev_priv, i) {
ret = intel_gt_init(gt);
if (ret)
goto err_unlock;
}
return 0;
@ -1173,8 +1177,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
err_unlock:
i915_gem_drain_workqueue(dev_priv);
if (ret != -EIO)
intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
if (ret != -EIO) {
for_each_gt(gt, dev_priv, i) {
intel_gt_driver_remove(gt);
intel_gt_driver_release(gt);
intel_uc_cleanup_firmwares(&gt->uc);
}
}
if (ret == -EIO) {
/*
@ -1182,10 +1191,12 @@ err_unlock:
* as wedged. But we only want to do this when the GPU is angry,
* for all other failure, such as an allocation failure, bail.
*/
if (!intel_gt_is_wedged(to_gt(dev_priv))) {
i915_probe_error(dev_priv,
"Failed to initialize GPU, declaring it wedged!\n");
intel_gt_set_wedged(to_gt(dev_priv));
for_each_gt(gt, dev_priv, i) {
if (!intel_gt_is_wedged(gt)) {
i915_probe_error(dev_priv,
"Failed to initialize GPU, declaring it wedged!\n");
intel_gt_set_wedged(gt);
}
}
/* Minimal basic recovery for KMS */
@ -1213,23 +1224,27 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
{
intel_wakeref_auto_fini(&to_gt(dev_priv)->userfault_wakeref);
struct intel_gt *gt;
unsigned int i;
i915_gem_suspend_late(dev_priv);
intel_gt_driver_remove(to_gt(dev_priv));
for_each_gt(gt, dev_priv, i)
intel_gt_driver_remove(gt);
dev_priv->uabi_engines = RB_ROOT;
/* Flush any outstanding unpin_work. */
i915_gem_drain_workqueue(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
}
void i915_gem_driver_release(struct drm_i915_private *dev_priv)
{
intel_gt_driver_release(to_gt(dev_priv));
struct intel_gt *gt;
unsigned int i;
intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
for_each_gt(gt, dev_priv, i) {
intel_gt_driver_release(gt);
intel_uc_cleanup_firmwares(&gt->uc);
}
/* Flush any outstanding work, including i915_gem_context.release_work. */
i915_gem_drain_workqueue(dev_priv);
@ -1259,7 +1274,7 @@ void i915_gem_init_early(struct drm_i915_private *dev_priv)
void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
{
i915_gem_drain_freed_objects(dev_priv);
i915_gem_drain_workqueue(dev_priv);
GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count);

View File

@ -175,6 +175,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_PERF_REVISION:
value = i915_perf_ioctl_version();
break;
case I915_PARAM_OA_TIMESTAMP_FREQUENCY:
value = i915_perf_oa_timestamp_frequency(i915);
break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;

View File

@ -1221,7 +1221,10 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
if (GRAPHICS_VER(i915) >= 6) {
ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);
if (GRAPHICS_VER(i915) >= 12)
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
ee->fault_reg = intel_gt_mcr_read_any(engine->gt,
XEHP_RING_FAULT_REG);
else if (GRAPHICS_VER(i915) >= 12)
ee->fault_reg = intel_uncore_read(engine->uncore,
GEN12_RING_FAULT_REG);
else if (GRAPHICS_VER(i915) >= 8)
@ -1820,7 +1823,12 @@ static void gt_record_global_regs(struct intel_gt_coredump *gt)
if (GRAPHICS_VER(i915) == 7)
gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
if (GRAPHICS_VER(i915) >= 12) {
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
gt->fault_data0 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt,
XEHP_FAULT_TLB_DATA0);
gt->fault_data1 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt,
XEHP_FAULT_TLB_DATA1);
} else if (GRAPHICS_VER(i915) >= 12) {
gt->fault_data0 = intel_uncore_read(uncore,
GEN12_FAULT_TLB_DATA0);
gt->fault_data1 = intel_uncore_read(uncore,

View File

@ -0,0 +1,732 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2022 Intel Corporation
*/
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
#include <linux/types.h>
#include "i915_drv.h"
#include "i915_hwmon.h"
#include "i915_reg.h"
#include "intel_mchbar_regs.h"
#include "intel_pcode.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_regs.h"
/*
* SF_* - scale factors for particular quantities according to hwmon spec.
* - voltage - millivolts
* - power - microwatts
* - curr - milliamperes
* - energy - microjoules
* - time - milliseconds
*/
#define SF_VOLTAGE 1000
#define SF_POWER 1000000
#define SF_CURR 1000
#define SF_ENERGY 1000000
#define SF_TIME 1000
struct hwm_reg {
i915_reg_t gt_perf_status;
i915_reg_t pkg_power_sku_unit;
i915_reg_t pkg_power_sku;
i915_reg_t pkg_rapl_limit;
i915_reg_t energy_status_all;
i915_reg_t energy_status_tile;
};
struct hwm_energy_info {
u32 reg_val_prev;
long accum_energy; /* Accumulated energy for energy1_input */
};
struct hwm_drvdata {
struct i915_hwmon *hwmon;
struct intel_uncore *uncore;
struct device *hwmon_dev;
struct hwm_energy_info ei; /* Energy info for energy1_input */
char name[12];
int gt_n;
};
struct i915_hwmon {
struct hwm_drvdata ddat;
struct hwm_drvdata ddat_gt[I915_MAX_GT];
struct mutex hwmon_lock; /* counter overflow logic and rmw */
struct hwm_reg rg;
int scl_shift_power;
int scl_shift_energy;
int scl_shift_time;
};
static void
hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
i915_reg_t reg, u32 clear, u32 set)
{
struct i915_hwmon *hwmon = ddat->hwmon;
struct intel_uncore *uncore = ddat->uncore;
intel_wakeref_t wakeref;
mutex_lock(&hwmon->hwmon_lock);
with_intel_runtime_pm(uncore->rpm, wakeref)
intel_uncore_rmw(uncore, reg, clear, set);
mutex_unlock(&hwmon->hwmon_lock);
}
/*
* This function's return type of u64 allows for the case where the scaling
* of the field taken from the 32-bit register value might cause a result to
* exceed 32 bits.
*/
static u64
hwm_field_read_and_scale(struct hwm_drvdata *ddat, i915_reg_t rgadr,
u32 field_msk, int nshift, u32 scale_factor)
{
struct intel_uncore *uncore = ddat->uncore;
intel_wakeref_t wakeref;
u32 reg_value;
with_intel_runtime_pm(uncore->rpm, wakeref)
reg_value = intel_uncore_read(uncore, rgadr);
reg_value = REG_FIELD_GET(field_msk, reg_value);
return mul_u64_u32_shr(reg_value, scale_factor, nshift);
}
static void
hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr,
int nshift, unsigned int scale_factor, long lval)
{
u32 nval;
/* Computation in 64-bits to avoid overflow. Round to nearest. */
nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr,
PKG_PWR_LIM_1,
REG_FIELD_PREP(PKG_PWR_LIM_1, nval));
}
/*
* hwm_energy - Obtain energy value
*
* The underlying energy hardware register is 32-bits and is subject to
* overflow. How long before overflow? For example, with an example
* scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and
* a power draw of 1000 watts, the 32-bit counter will overflow in
* approximately 4.36 minutes.
*
* Examples:
* 1 watt: (2^32 >> 14) / 1 W / (60 * 60 * 24) secs/day -> 3 days
* 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes
*
* The function significantly increases overflow duration (from 4.36
* minutes) by accumulating the energy register into a 'long' as allowed by
* the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()),
* a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and
* hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before
* energy1_input overflows. This at 1000 W is an overflow duration of 278 years.
*/
static void
hwm_energy(struct hwm_drvdata *ddat, long *energy)
{
struct intel_uncore *uncore = ddat->uncore;
struct i915_hwmon *hwmon = ddat->hwmon;
struct hwm_energy_info *ei = &ddat->ei;
intel_wakeref_t wakeref;
i915_reg_t rgaddr;
u32 reg_val;
if (ddat->gt_n >= 0)
rgaddr = hwmon->rg.energy_status_tile;
else
rgaddr = hwmon->rg.energy_status_all;
mutex_lock(&hwmon->hwmon_lock);
with_intel_runtime_pm(uncore->rpm, wakeref)
reg_val = intel_uncore_read(uncore, rgaddr);
if (reg_val >= ei->reg_val_prev)
ei->accum_energy += reg_val - ei->reg_val_prev;
else
ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
ei->reg_val_prev = reg_val;
*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
hwmon->scl_shift_energy);
mutex_unlock(&hwmon->hwmon_lock);
}
static ssize_t
hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
struct i915_hwmon *hwmon = ddat->hwmon;
intel_wakeref_t wakeref;
u32 r, x, y, x_w = 2; /* 2 bits */
u64 tau4, out;
with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit);
x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
/*
* tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
* = (4 | x) << (y - 2)
* where (y - 2) ensures a 1.x fixed point representation of 1.x
* However because y can be < 2, we compute
* tau4 = (4 | x) << y
* but add 2 when doing the final right shift to account for units
*/
tau4 = ((1 << x_w) | x) << y;
/* val in hwmon interface units (millisec) */
out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
return sysfs_emit(buf, "%llu\n", out);
}
static ssize_t
hwm_power1_max_interval_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
struct i915_hwmon *hwmon = ddat->hwmon;
u32 x, y, rxy, x_w = 2; /* 2 bits */
u64 tau4, r, max_win;
unsigned long val;
int ret;
ret = kstrtoul(buf, 0, &val);
if (ret)
return ret;
/*
* Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12
* The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds
*/
#define PKG_MAX_WIN_DEFAULT 0x12ull
/*
* val must be < max in hwmon interface units. The steps below are
* explained in i915_power1_max_interval_show()
*/
r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
tau4 = ((1 << x_w) | x) << y;
max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
if (val > max_win)
return -EINVAL;
/* val in hw units */
val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
/* Convert to 1.x * power(2,y) */
if (!val)
return -EINVAL;
y = ilog2(val);
/* x = (val - (1 << y)) >> (y - 2); */
x = (val - (1ul << y)) << x_w >> y;
rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
PKG_PWR_LIM_1_TIME, rxy);
return count;
}
static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
hwm_power1_max_interval_show,
hwm_power1_max_interval_store, 0);
static struct attribute *hwm_attributes[] = {
&sensor_dev_attr_power1_max_interval.dev_attr.attr,
NULL
};
static umode_t hwm_attributes_visible(struct kobject *kobj,
struct attribute *attr, int index)
{
struct device *dev = kobj_to_dev(kobj);
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
struct i915_hwmon *hwmon = ddat->hwmon;
if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? attr->mode : 0;
return 0;
}
static const struct attribute_group hwm_attrgroup = {
.attrs = hwm_attributes,
.is_visible = hwm_attributes_visible,
};
static const struct attribute_group *hwm_groups[] = {
&hwm_attrgroup,
NULL
};
static const struct hwmon_channel_info *hwm_info[] = {
HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
NULL
};
static const struct hwmon_channel_info *hwm_gt_info[] = {
HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
NULL
};
/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
static int hwm_pcode_read_i1(struct drm_i915_private *i915, u32 *uval)
{
return snb_pcode_read_p(&i915->uncore, PCODE_POWER_SETUP,
POWER_SETUP_SUBCOMMAND_READ_I1, 0, uval);
}
static int hwm_pcode_write_i1(struct drm_i915_private *i915, u32 uval)
{
return snb_pcode_write_p(&i915->uncore, PCODE_POWER_SETUP,
POWER_SETUP_SUBCOMMAND_WRITE_I1, 0, uval);
}
static umode_t
hwm_in_is_visible(const struct hwm_drvdata *ddat, u32 attr)
{
struct drm_i915_private *i915 = ddat->uncore->i915;
switch (attr) {
case hwmon_in_input:
return IS_DG1(i915) || IS_DG2(i915) ? 0444 : 0;
default:
return 0;
}
}
static int
hwm_in_read(struct hwm_drvdata *ddat, u32 attr, long *val)
{
struct i915_hwmon *hwmon = ddat->hwmon;
intel_wakeref_t wakeref;
u32 reg_value;
switch (attr) {
case hwmon_in_input:
with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
reg_value = intel_uncore_read(ddat->uncore, hwmon->rg.gt_perf_status);
/* HW register value in units of 2.5 millivolt */
*val = DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, reg_value) * 25, 10);
return 0;
default:
return -EOPNOTSUPP;
}
}
static umode_t
hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan)
{
struct drm_i915_private *i915 = ddat->uncore->i915;
struct i915_hwmon *hwmon = ddat->hwmon;
u32 uval;
switch (attr) {
case hwmon_power_max:
return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? 0664 : 0;
case hwmon_power_rated_max:
return i915_mmio_reg_valid(hwmon->rg.pkg_power_sku) ? 0444 : 0;
case hwmon_power_crit:
return (hwm_pcode_read_i1(i915, &uval) ||
!(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
default:
return 0;
}
}
static int
hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val)
{
struct i915_hwmon *hwmon = ddat->hwmon;
int ret;
u32 uval;
switch (attr) {
case hwmon_power_max:
*val = hwm_field_read_and_scale(ddat,
hwmon->rg.pkg_rapl_limit,
PKG_PWR_LIM_1,
hwmon->scl_shift_power,
SF_POWER);
return 0;
case hwmon_power_rated_max:
*val = hwm_field_read_and_scale(ddat,
hwmon->rg.pkg_power_sku,
PKG_PKG_TDP,
hwmon->scl_shift_power,
SF_POWER);
return 0;
case hwmon_power_crit:
ret = hwm_pcode_read_i1(ddat->uncore->i915, &uval);
if (ret)
return ret;
if (!(uval & POWER_SETUP_I1_WATTS))
return -ENODEV;
*val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
SF_POWER, POWER_SETUP_I1_SHIFT);
return 0;
default:
return -EOPNOTSUPP;
}
}
static int
hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val)
{
struct i915_hwmon *hwmon = ddat->hwmon;
u32 uval;
switch (attr) {
case hwmon_power_max:
hwm_field_scale_and_write(ddat,
hwmon->rg.pkg_rapl_limit,
hwmon->scl_shift_power,
SF_POWER, val);
return 0;
case hwmon_power_crit:
uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER);
return hwm_pcode_write_i1(ddat->uncore->i915, uval);
default:
return -EOPNOTSUPP;
}
}
static umode_t
hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
{
struct i915_hwmon *hwmon = ddat->hwmon;
i915_reg_t rgaddr;
switch (attr) {
case hwmon_energy_input:
if (ddat->gt_n >= 0)
rgaddr = hwmon->rg.energy_status_tile;
else
rgaddr = hwmon->rg.energy_status_all;
return i915_mmio_reg_valid(rgaddr) ? 0444 : 0;
default:
return 0;
}
}
static int
hwm_energy_read(struct hwm_drvdata *ddat, u32 attr, long *val)
{
switch (attr) {
case hwmon_energy_input:
hwm_energy(ddat, val);
return 0;
default:
return -EOPNOTSUPP;
}
}
static umode_t
hwm_curr_is_visible(const struct hwm_drvdata *ddat, u32 attr)
{
struct drm_i915_private *i915 = ddat->uncore->i915;
u32 uval;
switch (attr) {
case hwmon_curr_crit:
return (hwm_pcode_read_i1(i915, &uval) ||
(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
default:
return 0;
}
}
static int
hwm_curr_read(struct hwm_drvdata *ddat, u32 attr, long *val)
{
int ret;
u32 uval;
switch (attr) {
case hwmon_curr_crit:
ret = hwm_pcode_read_i1(ddat->uncore->i915, &uval);
if (ret)
return ret;
if (uval & POWER_SETUP_I1_WATTS)
return -ENODEV;
*val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
SF_CURR, POWER_SETUP_I1_SHIFT);
return 0;
default:
return -EOPNOTSUPP;
}
}
static int
hwm_curr_write(struct hwm_drvdata *ddat, u32 attr, long val)
{
u32 uval;
switch (attr) {
case hwmon_curr_crit:
uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_CURR);
return hwm_pcode_write_i1(ddat->uncore->i915, uval);
default:
return -EOPNOTSUPP;
}
}
static umode_t
hwm_is_visible(const void *drvdata, enum hwmon_sensor_types type,
u32 attr, int channel)
{
struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata;
switch (type) {
case hwmon_in:
return hwm_in_is_visible(ddat, attr);
case hwmon_power:
return hwm_power_is_visible(ddat, attr, channel);
case hwmon_energy:
return hwm_energy_is_visible(ddat, attr);
case hwmon_curr:
return hwm_curr_is_visible(ddat, attr);
default:
return 0;
}
}
static int
hwm_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
int channel, long *val)
{
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
switch (type) {
case hwmon_in:
return hwm_in_read(ddat, attr, val);
case hwmon_power:
return hwm_power_read(ddat, attr, channel, val);
case hwmon_energy:
return hwm_energy_read(ddat, attr, val);
case hwmon_curr:
return hwm_curr_read(ddat, attr, val);
default:
return -EOPNOTSUPP;
}
}
static int
hwm_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
int channel, long val)
{
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
switch (type) {
case hwmon_power:
return hwm_power_write(ddat, attr, channel, val);
case hwmon_curr:
return hwm_curr_write(ddat, attr, val);
default:
return -EOPNOTSUPP;
}
}
static const struct hwmon_ops hwm_ops = {
.is_visible = hwm_is_visible,
.read = hwm_read,
.write = hwm_write,
};
static const struct hwmon_chip_info hwm_chip_info = {
.ops = &hwm_ops,
.info = hwm_info,
};
static umode_t
hwm_gt_is_visible(const void *drvdata, enum hwmon_sensor_types type,
u32 attr, int channel)
{
struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata;
switch (type) {
case hwmon_energy:
return hwm_energy_is_visible(ddat, attr);
default:
return 0;
}
}
static int
hwm_gt_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
int channel, long *val)
{
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
switch (type) {
case hwmon_energy:
return hwm_energy_read(ddat, attr, val);
default:
return -EOPNOTSUPP;
}
}
static const struct hwmon_ops hwm_gt_ops = {
.is_visible = hwm_gt_is_visible,
.read = hwm_gt_read,
};
static const struct hwmon_chip_info hwm_gt_chip_info = {
.ops = &hwm_gt_ops,
.info = hwm_gt_info,
};
static void
hwm_get_preregistration_info(struct drm_i915_private *i915)
{
struct i915_hwmon *hwmon = i915->hwmon;
struct intel_uncore *uncore = &i915->uncore;
struct hwm_drvdata *ddat = &hwmon->ddat;
intel_wakeref_t wakeref;
u32 val_sku_unit = 0;
struct intel_gt *gt;
long energy;
int i;
/* Available for all Gen12+/dGfx */
hwmon->rg.gt_perf_status = GEN12_RPSTAT1;
if (IS_DG1(i915) || IS_DG2(i915)) {
hwmon->rg.pkg_power_sku_unit = PCU_PACKAGE_POWER_SKU_UNIT;
hwmon->rg.pkg_power_sku = PCU_PACKAGE_POWER_SKU;
hwmon->rg.pkg_rapl_limit = PCU_PACKAGE_RAPL_LIMIT;
hwmon->rg.energy_status_all = PCU_PACKAGE_ENERGY_STATUS;
hwmon->rg.energy_status_tile = INVALID_MMIO_REG;
} else if (IS_XEHPSDV(i915)) {
hwmon->rg.pkg_power_sku_unit = GT0_PACKAGE_POWER_SKU_UNIT;
hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
hwmon->rg.pkg_rapl_limit = GT0_PACKAGE_RAPL_LIMIT;
hwmon->rg.energy_status_all = GT0_PLATFORM_ENERGY_STATUS;
hwmon->rg.energy_status_tile = GT0_PACKAGE_ENERGY_STATUS;
} else {
hwmon->rg.pkg_power_sku_unit = INVALID_MMIO_REG;
hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
hwmon->rg.pkg_rapl_limit = INVALID_MMIO_REG;
hwmon->rg.energy_status_all = INVALID_MMIO_REG;
hwmon->rg.energy_status_tile = INVALID_MMIO_REG;
}
with_intel_runtime_pm(uncore->rpm, wakeref) {
/*
* The contents of register hwmon->rg.pkg_power_sku_unit do not change,
* so read it once and store the shift values.
*/
if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku_unit))
val_sku_unit = intel_uncore_read(uncore,
hwmon->rg.pkg_power_sku_unit);
}
hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
/*
* Initialize 'struct hwm_energy_info', i.e. set fields to the
* first value of the energy register read
*/
if (i915_mmio_reg_valid(hwmon->rg.energy_status_all))
hwm_energy(ddat, &energy);
if (i915_mmio_reg_valid(hwmon->rg.energy_status_tile)) {
for_each_gt(gt, i915, i)
hwm_energy(&hwmon->ddat_gt[i], &energy);
}
}
void i915_hwmon_register(struct drm_i915_private *i915)
{
struct device *dev = i915->drm.dev;
struct i915_hwmon *hwmon;
struct device *hwmon_dev;
struct hwm_drvdata *ddat;
struct hwm_drvdata *ddat_gt;
struct intel_gt *gt;
int i;
/* hwmon is available only for dGfx */
if (!IS_DGFX(i915))
return;
hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
if (!hwmon)
return;
i915->hwmon = hwmon;
mutex_init(&hwmon->hwmon_lock);
ddat = &hwmon->ddat;
ddat->hwmon = hwmon;
ddat->uncore = &i915->uncore;
snprintf(ddat->name, sizeof(ddat->name), "i915");
ddat->gt_n = -1;
for_each_gt(gt, i915, i) {
ddat_gt = hwmon->ddat_gt + i;
ddat_gt->hwmon = hwmon;
ddat_gt->uncore = gt->uncore;
snprintf(ddat_gt->name, sizeof(ddat_gt->name), "i915_gt%u", i);
ddat_gt->gt_n = i;
}
hwm_get_preregistration_info(i915);
/* hwmon_dev points to device hwmon<i> */
hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
ddat,
&hwm_chip_info,
hwm_groups);
if (IS_ERR(hwmon_dev)) {
i915->hwmon = NULL;
return;
}
ddat->hwmon_dev = hwmon_dev;
for_each_gt(gt, i915, i) {
ddat_gt = hwmon->ddat_gt + i;
/*
* Create per-gt directories only if a per-gt attribute is
* visible. Currently this is only energy
*/
if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0))
continue;
hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name,
ddat_gt,
&hwm_gt_chip_info,
NULL);
if (!IS_ERR(hwmon_dev))
ddat_gt->hwmon_dev = hwmon_dev;
}
}
void i915_hwmon_unregister(struct drm_i915_private *i915)
{
fetch_and_zero(&i915->hwmon);
}

Some files were not shown because too many files have changed in this diff Show More