Merge tag 'drm-intel-gt-next-2022-11-03' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Driver Changes: - Fix for #7306: [Arc A380] white flickering when using arc as a secondary gpu (Matt A) - Add Wa_18017747507 for DG2 (Wayne) - Avoid spurious WARN on DG1 due to incorrect cache_dirty flag (Niranjana, Matt A) - Corrections to CS timestamp support for Gen5 and earlier (Ville) - Fix a build error used with clang compiler on hwmon (GG) - Improvements to LMEM handling with RPM (Anshuman, Matt A) - Cleanups in dmabuf code (Mike) - Selftest improvements (Matt A) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Y2N11wu175p6qeEN@jlahtine-mobl.ger.corp.intel.com
This commit is contained in:
commit
60ba8c5bd9
75
Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
Normal file
75
Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
Normal file
@ -0,0 +1,75 @@
|
||||
What: /sys/devices/.../hwmon/hwmon<i>/in0_input
|
||||
Date: February 2023
|
||||
KernelVersion: 6.2
|
||||
Contact: intel-gfx@lists.freedesktop.org
|
||||
Description: RO. Current Voltage in millivolt.
|
||||
|
||||
Only supported for particular Intel i915 graphics platforms.
|
||||
|
||||
What: /sys/devices/.../hwmon/hwmon<i>/power1_max
|
||||
Date: February 2023
|
||||
KernelVersion: 6.2
|
||||
Contact: intel-gfx@lists.freedesktop.org
|
||||
Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts.
|
||||
|
||||
The power controller will throttle the operating frequency
|
||||
if the power averaged over a window (typically seconds)
|
||||
exceeds this limit.
|
||||
|
||||
Only supported for particular Intel i915 graphics platforms.
|
||||
|
||||
What: /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
|
||||
Date: February 2023
|
||||
KernelVersion: 6.2
|
||||
Contact: intel-gfx@lists.freedesktop.org
|
||||
Description: RO. Card default power limit (default TDP setting).
|
||||
|
||||
Only supported for particular Intel i915 graphics platforms.
|
||||
|
||||
What: /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
|
||||
Date: February 2023
|
||||
KernelVersion: 6.2
|
||||
Contact: intel-gfx@lists.freedesktop.org
|
||||
Description: RW. Sustained power limit interval (Tau in PL1/Tau) in
|
||||
milliseconds over which sustained power is averaged.
|
||||
|
||||
Only supported for particular Intel i915 graphics platforms.
|
||||
|
||||
What: /sys/devices/.../hwmon/hwmon<i>/power1_crit
|
||||
Date: February 2023
|
||||
KernelVersion: 6.2
|
||||
Contact: intel-gfx@lists.freedesktop.org
|
||||
Description: RW. Card reactive critical (I1) power limit in microwatts.
|
||||
|
||||
Card reactive critical (I1) power limit in microwatts is exposed
|
||||
for client products. The power controller will throttle the
|
||||
operating frequency if the power averaged over a window exceeds
|
||||
this limit.
|
||||
|
||||
Only supported for particular Intel i915 graphics platforms.
|
||||
|
||||
What: /sys/devices/.../hwmon/hwmon<i>/curr1_crit
|
||||
Date: February 2023
|
||||
KernelVersion: 6.2
|
||||
Contact: intel-gfx@lists.freedesktop.org
|
||||
Description: RW. Card reactive critical (I1) power limit in milliamperes.
|
||||
|
||||
Card reactive critical (I1) power limit in milliamperes is
|
||||
exposed for server products. The power controller will throttle
|
||||
the operating frequency if the power averaged over a window
|
||||
exceeds this limit.
|
||||
|
||||
Only supported for particular Intel i915 graphics platforms.
|
||||
|
||||
What: /sys/devices/.../hwmon/hwmon<i>/energy1_input
|
||||
Date: February 2023
|
||||
KernelVersion: 6.2
|
||||
Contact: intel-gfx@lists.freedesktop.org
|
||||
Description: RO. Energy input of device or gt in microjoules.
|
||||
|
||||
For i915 device level hwmon devices (name "i915") this
|
||||
reflects energy input for the entire device. For gt level
|
||||
hwmon devices (name "i915_gtN") this reflects energy input
|
||||
for the gt.
|
||||
|
||||
Only supported for particular Intel i915 graphics platforms.
|
@ -10224,6 +10224,7 @@ Q: http://patchwork.freedesktop.org/project/intel-gfx/
|
||||
B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
|
||||
C: irc://irc.oftc.net/intel-gfx
|
||||
T: git git://anongit.freedesktop.org/drm-intel
|
||||
F: Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
|
||||
F: Documentation/gpu/i915.rst
|
||||
F: drivers/gpu/drm/i915/
|
||||
F: include/drm/i915*
|
||||
|
@ -57,10 +57,28 @@ config DRM_I915_PREEMPT_TIMEOUT
|
||||
default 640 # milliseconds
|
||||
help
|
||||
How long to wait (in milliseconds) for a preemption event to occur
|
||||
when submitting a new context via execlists. If the current context
|
||||
does not hit an arbitration point and yield to HW before the timer
|
||||
expires, the HW will be reset to allow the more important context
|
||||
to execute.
|
||||
when submitting a new context. If the current context does not hit
|
||||
an arbitration point and yield to HW before the timer expires, the
|
||||
HW will be reset to allow the more important context to execute.
|
||||
|
||||
This is adjustable via
|
||||
/sys/class/drm/card?/engine/*/preempt_timeout_ms
|
||||
|
||||
May be 0 to disable the timeout.
|
||||
|
||||
The compiled in default may get overridden at driver probe time on
|
||||
certain platforms and certain engines which will be reflected in the
|
||||
sysfs control.
|
||||
|
||||
config DRM_I915_PREEMPT_TIMEOUT_COMPUTE
|
||||
int "Preempt timeout for compute engines (ms, jiffy granularity)"
|
||||
default 7500 # milliseconds
|
||||
help
|
||||
How long to wait (in milliseconds) for a preemption event to occur
|
||||
when submitting a new context to a compute capable engine. If the
|
||||
current context does not hit an arbitration point and yield to HW
|
||||
before the timer expires, the HW will be reset to allow the more
|
||||
important context to execute.
|
||||
|
||||
This is adjustable via
|
||||
/sys/class/drm/card?/engine/*/preempt_timeout_ms
|
||||
|
@ -209,6 +209,9 @@ i915-y += gt/uc/intel_uc.o \
|
||||
# graphics system controller (GSC) support
|
||||
i915-y += gt/intel_gsc.o
|
||||
|
||||
# graphics hardware monitoring (HWMON) support
|
||||
i915-$(CONFIG_HWMON) += i915_hwmon.o
|
||||
|
||||
# modesetting core code
|
||||
i915-y += \
|
||||
display/hsw_ips.o \
|
||||
@ -310,15 +313,18 @@ i915-y += \
|
||||
|
||||
i915-y += i915_perf.o
|
||||
|
||||
# Protected execution platform (PXP) support
|
||||
i915-$(CONFIG_DRM_I915_PXP) += \
|
||||
# Protected execution platform (PXP) support. Base support is required for HuC
|
||||
i915-y += \
|
||||
pxp/intel_pxp.o \
|
||||
pxp/intel_pxp_tee.o \
|
||||
pxp/intel_pxp_huc.o
|
||||
|
||||
i915-$(CONFIG_DRM_I915_PXP) += \
|
||||
pxp/intel_pxp_cmd.o \
|
||||
pxp/intel_pxp_debugfs.o \
|
||||
pxp/intel_pxp_irq.o \
|
||||
pxp/intel_pxp_pm.o \
|
||||
pxp/intel_pxp_session.o \
|
||||
pxp/intel_pxp_tee.o
|
||||
pxp/intel_pxp_session.o
|
||||
|
||||
# Post-mortem debug and GPU hang state capture
|
||||
i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include "gem/i915_gem_domain.h"
|
||||
#include "gem/i915_gem_internal.h"
|
||||
#include "gem/i915_gem_lmem.h"
|
||||
#include "gt/gen8_ppgtt.h"
|
||||
|
||||
#include "i915_drv.h"
|
||||
|
@ -167,7 +167,6 @@ retry:
|
||||
ret = i915_gem_object_attach_phys(obj, alignment);
|
||||
else if (!ret && HAS_LMEM(dev_priv))
|
||||
ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM_0);
|
||||
/* TODO: Do we need to sync when migration becomes async? */
|
||||
if (!ret)
|
||||
ret = i915_gem_object_pin_pages(obj);
|
||||
if (ret)
|
||||
|
@ -100,9 +100,9 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
|
||||
rsc[0].flags = IORESOURCE_IRQ;
|
||||
rsc[0].name = "hdmi-lpe-audio-irq";
|
||||
|
||||
rsc[1].start = pci_resource_start(pdev, GTTMMADR_BAR) +
|
||||
rsc[1].start = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) +
|
||||
I915_HDMI_LPE_AUDIO_BASE;
|
||||
rsc[1].end = pci_resource_start(pdev, GTTMMADR_BAR) +
|
||||
rsc[1].end = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) +
|
||||
I915_HDMI_LPE_AUDIO_BASE + I915_HDMI_LPE_AUDIO_SIZE - 1;
|
||||
rsc[1].flags = IORESOURCE_MEM;
|
||||
rsc[1].name = "hdmi-lpe-audio-mmio";
|
||||
|
@ -1452,7 +1452,7 @@ static void engines_idle_release(struct i915_gem_context *ctx,
|
||||
int err;
|
||||
|
||||
/* serialises with execbuf */
|
||||
set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
|
||||
intel_context_close(ce);
|
||||
if (!intel_context_pin_if_active(ce))
|
||||
continue;
|
||||
|
||||
@ -2298,7 +2298,6 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
|
||||
}
|
||||
|
||||
args->ctx_id = id;
|
||||
drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id);
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -25,43 +25,44 @@ static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
|
||||
return to_intel_bo(buf->priv);
|
||||
}
|
||||
|
||||
static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
|
||||
static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
|
||||
struct sg_table *st;
|
||||
struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf);
|
||||
struct sg_table *sgt;
|
||||
struct scatterlist *src, *dst;
|
||||
int ret, i;
|
||||
|
||||
/* Copy sg so that we make an independent mapping */
|
||||
st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
|
||||
if (st == NULL) {
|
||||
/*
|
||||
* Make a copy of the object's sgt, so that we can make an independent
|
||||
* mapping
|
||||
*/
|
||||
sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
|
||||
if (!sgt) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
|
||||
ret = sg_alloc_table(sgt, obj->mm.pages->orig_nents, GFP_KERNEL);
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
src = obj->mm.pages->sgl;
|
||||
dst = st->sgl;
|
||||
for (i = 0; i < obj->mm.pages->nents; i++) {
|
||||
dst = sgt->sgl;
|
||||
for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->orig_nents, i) {
|
||||
sg_set_page(dst, sg_page(src), src->length, 0);
|
||||
dst = sg_next(dst);
|
||||
src = sg_next(src);
|
||||
}
|
||||
|
||||
ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC);
|
||||
ret = dma_map_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC);
|
||||
if (ret)
|
||||
goto err_free_sg;
|
||||
|
||||
return st;
|
||||
return sgt;
|
||||
|
||||
err_free_sg:
|
||||
sg_free_table(st);
|
||||
sg_free_table(sgt);
|
||||
err_free:
|
||||
kfree(st);
|
||||
kfree(sgt);
|
||||
err:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -236,15 +237,15 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
|
||||
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
struct sg_table *pages;
|
||||
struct sg_table *sgt;
|
||||
unsigned int sg_page_sizes;
|
||||
|
||||
assert_object_held(obj);
|
||||
|
||||
pages = dma_buf_map_attachment(obj->base.import_attach,
|
||||
DMA_BIDIRECTIONAL);
|
||||
if (IS_ERR(pages))
|
||||
return PTR_ERR(pages);
|
||||
sgt = dma_buf_map_attachment(obj->base.import_attach,
|
||||
DMA_BIDIRECTIONAL);
|
||||
if (IS_ERR(sgt))
|
||||
return PTR_ERR(sgt);
|
||||
|
||||
/*
|
||||
* DG1 is special here since it still snoops transactions even with
|
||||
@ -261,16 +262,16 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
|
||||
(!HAS_LLC(i915) && !IS_DG1(i915)))
|
||||
wbinvd_on_all_cpus();
|
||||
|
||||
sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
|
||||
__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
|
||||
sg_page_sizes = i915_sg_dma_sizes(sgt->sgl);
|
||||
__i915_gem_object_set_pages(obj, sgt, sg_page_sizes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
|
||||
struct sg_table *pages)
|
||||
struct sg_table *sgt)
|
||||
{
|
||||
dma_buf_unmap_attachment(obj->base.import_attach, pages,
|
||||
dma_buf_unmap_attachment(obj->base.import_attach, sgt,
|
||||
DMA_BIDIRECTIONAL);
|
||||
}
|
||||
|
||||
@ -313,7 +314,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
|
||||
get_dma_buf(dma_buf);
|
||||
|
||||
obj = i915_gem_object_alloc();
|
||||
if (obj == NULL) {
|
||||
if (!obj) {
|
||||
ret = -ENOMEM;
|
||||
goto fail_detach;
|
||||
}
|
||||
|
@ -2954,11 +2954,6 @@ await_fence_array(struct i915_execbuffer *eb,
|
||||
int err;
|
||||
|
||||
for (n = 0; n < eb->num_fences; n++) {
|
||||
struct drm_syncobj *syncobj;
|
||||
unsigned int flags;
|
||||
|
||||
syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
|
||||
|
||||
if (!eb->fences[n].dma_fence)
|
||||
continue;
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/swiotlb.h>
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "i915_gem.h"
|
||||
@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
|
||||
struct scatterlist *sg;
|
||||
unsigned int sg_page_sizes;
|
||||
unsigned int npages;
|
||||
int max_order;
|
||||
int max_order = MAX_ORDER;
|
||||
unsigned int max_segment;
|
||||
gfp_t gfp;
|
||||
|
||||
max_order = MAX_ORDER;
|
||||
#ifdef CONFIG_SWIOTLB
|
||||
if (is_swiotlb_active(obj->base.dev->dev)) {
|
||||
unsigned int max_segment;
|
||||
|
||||
max_segment = swiotlb_max_segment();
|
||||
if (max_segment) {
|
||||
max_segment = max_t(unsigned int, max_segment,
|
||||
PAGE_SIZE) >> PAGE_SHIFT;
|
||||
max_order = min(max_order, ilog2(max_segment));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
|
||||
max_order = min(max_order, get_order(max_segment));
|
||||
|
||||
gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
|
||||
if (IS_I965GM(i915) || IS_I965G(i915)) {
|
||||
|
@ -413,7 +413,7 @@ retry:
|
||||
vma->mmo = mmo;
|
||||
|
||||
if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
|
||||
intel_wakeref_auto(&to_gt(i915)->userfault_wakeref,
|
||||
intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref,
|
||||
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
|
||||
|
||||
if (write) {
|
||||
@ -557,11 +557,13 @@ void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *
|
||||
|
||||
drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping);
|
||||
|
||||
if (obj->userfault_count) {
|
||||
/* rpm wakeref provide exclusive access */
|
||||
list_del(&obj->userfault_link);
|
||||
obj->userfault_count = 0;
|
||||
}
|
||||
/*
|
||||
* We have exclusive access here via runtime suspend. All other callers
|
||||
* must first grab the rpm wakeref.
|
||||
*/
|
||||
GEM_BUG_ON(!obj->userfault_count);
|
||||
list_del(&obj->userfault_link);
|
||||
obj->userfault_count = 0;
|
||||
}
|
||||
|
||||
void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
|
||||
@ -587,13 +589,6 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
|
||||
spin_lock(&obj->mmo.lock);
|
||||
}
|
||||
spin_unlock(&obj->mmo.lock);
|
||||
|
||||
if (obj->userfault_count) {
|
||||
mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
|
||||
list_del(&obj->userfault_link);
|
||||
mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
|
||||
obj->userfault_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static struct i915_mmap_offset *
|
||||
|
@ -458,6 +458,16 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
|
||||
io_mapping_unmap(src_map);
|
||||
}
|
||||
|
||||
static bool object_has_mappable_iomem(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
GEM_BUG_ON(!i915_gem_object_has_iomem(obj));
|
||||
|
||||
if (IS_DGFX(to_i915(obj->base.dev)))
|
||||
return i915_ttm_resource_mappable(i915_gem_to_ttm(obj)->resource);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_object_read_from_page - read data from the page of a GEM object
|
||||
* @obj: GEM object to read from
|
||||
@ -480,7 +490,7 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,
|
||||
|
||||
if (i915_gem_object_has_struct_page(obj))
|
||||
i915_gem_object_read_from_page_kmap(obj, offset, dst, size);
|
||||
else if (i915_gem_object_has_iomem(obj))
|
||||
else if (i915_gem_object_has_iomem(obj) && object_has_mappable_iomem(obj))
|
||||
i915_gem_object_read_from_page_iomap(obj, offset, dst, size);
|
||||
else
|
||||
return -ENODEV;
|
||||
|
@ -482,6 +482,10 @@ void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
|
||||
void *__must_check i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
|
||||
enum i915_map_type type);
|
||||
|
||||
enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915,
|
||||
struct drm_i915_gem_object *obj,
|
||||
bool always_coherent);
|
||||
|
||||
void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
|
||||
unsigned long offset,
|
||||
unsigned long size);
|
||||
|
@ -466,6 +466,18 @@ void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
|
||||
return ret;
|
||||
}
|
||||
|
||||
enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915,
|
||||
struct drm_i915_gem_object *obj,
|
||||
bool always_coherent)
|
||||
{
|
||||
if (i915_gem_object_is_lmem(obj))
|
||||
return I915_MAP_WC;
|
||||
if (HAS_LLC(i915) || always_coherent)
|
||||
return I915_MAP_WB;
|
||||
else
|
||||
return I915_MAP_WC;
|
||||
}
|
||||
|
||||
void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
|
||||
unsigned long offset,
|
||||
unsigned long size)
|
||||
|
@ -22,9 +22,12 @@
|
||||
|
||||
void i915_gem_suspend(struct drm_i915_private *i915)
|
||||
{
|
||||
struct intel_gt *gt;
|
||||
unsigned int i;
|
||||
|
||||
GEM_TRACE("%s\n", dev_name(i915->drm.dev));
|
||||
|
||||
intel_wakeref_auto(&to_gt(i915)->userfault_wakeref, 0);
|
||||
intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0);
|
||||
flush_workqueue(i915->wq);
|
||||
|
||||
/*
|
||||
@ -36,7 +39,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
|
||||
* state. Fortunately, the kernel_context is disposable and we do
|
||||
* not rely on its state.
|
||||
*/
|
||||
intel_gt_suspend_prepare(to_gt(i915));
|
||||
for_each_gt(gt, i915, i)
|
||||
intel_gt_suspend_prepare(gt);
|
||||
|
||||
i915_gem_drain_freed_objects(i915);
|
||||
}
|
||||
@ -131,7 +135,9 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
|
||||
&i915->mm.purge_list,
|
||||
NULL
|
||||
}, **phase;
|
||||
struct intel_gt *gt;
|
||||
unsigned long flags;
|
||||
unsigned int i;
|
||||
bool flush = false;
|
||||
|
||||
/*
|
||||
@ -154,7 +160,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
|
||||
* machine in an unusable condition.
|
||||
*/
|
||||
|
||||
intel_gt_suspend_late(to_gt(i915));
|
||||
for_each_gt(gt, i915, i)
|
||||
intel_gt_suspend_late(gt);
|
||||
|
||||
spin_lock_irqsave(&i915->mm.obj_lock, flags);
|
||||
for (phase = phases; *phase; phase++) {
|
||||
@ -212,7 +219,8 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
|
||||
|
||||
void i915_gem_resume(struct drm_i915_private *i915)
|
||||
{
|
||||
int ret;
|
||||
struct intel_gt *gt;
|
||||
int ret, i, j;
|
||||
|
||||
GEM_TRACE("%s\n", dev_name(i915->drm.dev));
|
||||
|
||||
@ -224,8 +232,25 @@ void i915_gem_resume(struct drm_i915_private *i915)
|
||||
* guarantee that the context image is complete. So let's just reset
|
||||
* it and start again.
|
||||
*/
|
||||
intel_gt_resume(to_gt(i915));
|
||||
for_each_gt(gt, i915, i)
|
||||
if (intel_gt_resume(gt))
|
||||
goto err_wedged;
|
||||
|
||||
ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU);
|
||||
GEM_WARN_ON(ret);
|
||||
|
||||
return;
|
||||
|
||||
err_wedged:
|
||||
for_each_gt(gt, i915, j) {
|
||||
if (!intel_gt_is_wedged(gt)) {
|
||||
dev_err(i915->drm.dev,
|
||||
"Failed to re-initialize GPU[%u], declaring it wedged!\n",
|
||||
j);
|
||||
intel_gt_set_wedged(gt);
|
||||
}
|
||||
|
||||
if (j == i)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
|
||||
struct intel_memory_region *mem = obj->mm.region;
|
||||
struct address_space *mapping = obj->base.filp->f_mapping;
|
||||
const unsigned long page_count = obj->base.size / PAGE_SIZE;
|
||||
unsigned int max_segment = i915_sg_segment_size();
|
||||
unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
|
||||
struct sg_table *st;
|
||||
struct sgt_iter sgt_iter;
|
||||
struct page *page;
|
||||
@ -369,14 +369,14 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
|
||||
|
||||
__start_cpu_write(obj);
|
||||
/*
|
||||
* On non-LLC platforms, force the flush-on-acquire if this is ever
|
||||
* On non-LLC igfx platforms, force the flush-on-acquire if this is ever
|
||||
* swapped-in. Our async flush path is not trust worthy enough yet(and
|
||||
* happens in the wrong order), and with some tricks it's conceivable
|
||||
* for userspace to change the cache-level to I915_CACHE_NONE after the
|
||||
* pages are swapped-in, and since execbuf binds the object before doing
|
||||
* the async flush, we have a race window.
|
||||
*/
|
||||
if (!HAS_LLC(i915))
|
||||
if (!HAS_LLC(i915) && !IS_DGFX(i915))
|
||||
obj->cache_dirty = true;
|
||||
}
|
||||
|
||||
|
@ -77,22 +77,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
|
||||
mutex_unlock(&i915->mm.stolen_lock);
|
||||
}
|
||||
|
||||
static int i915_adjust_stolen(struct drm_i915_private *i915,
|
||||
struct resource *dsm)
|
||||
static bool valid_stolen_size(struct drm_i915_private *i915, struct resource *dsm)
|
||||
{
|
||||
return (dsm->start != 0 || HAS_LMEMBAR_SMEM_STOLEN(i915)) && dsm->end > dsm->start;
|
||||
}
|
||||
|
||||
static int adjust_stolen(struct drm_i915_private *i915,
|
||||
struct resource *dsm)
|
||||
{
|
||||
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
|
||||
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
|
||||
struct resource *r;
|
||||
|
||||
if (dsm->start == 0 || dsm->end <= dsm->start)
|
||||
if (!valid_stolen_size(i915, dsm))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Make sure we don't clobber the GTT if it's within stolen memory
|
||||
*
|
||||
* TODO: We have yet too encounter the case where the GTT wasn't at the
|
||||
* end of stolen. With that assumption we could simplify this.
|
||||
*/
|
||||
|
||||
/* Make sure we don't clobber the GTT if it's within stolen memory */
|
||||
if (GRAPHICS_VER(i915) <= 4 &&
|
||||
!IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) {
|
||||
struct resource stolen[2] = {*dsm, *dsm};
|
||||
@ -131,12 +135,25 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
|
||||
}
|
||||
}
|
||||
|
||||
if (!valid_stolen_size(i915, dsm))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int request_smem_stolen(struct drm_i915_private *i915,
|
||||
struct resource *dsm)
|
||||
{
|
||||
struct resource *r;
|
||||
|
||||
/*
|
||||
* With stolen lmem, we don't need to check if the address range
|
||||
* overlaps with the non-stolen system memory range, since lmem is local
|
||||
* to the gpu.
|
||||
* With stolen lmem, we don't need to request system memory for the
|
||||
* address range since it's local to the gpu.
|
||||
*
|
||||
* Starting MTL, in IGFX devices the stolen memory is exposed via
|
||||
* LMEMBAR and shall be considered similar to stolen lmem.
|
||||
*/
|
||||
if (HAS_LMEM(i915))
|
||||
if (HAS_LMEM(i915) || HAS_LMEMBAR_SMEM_STOLEN(i915))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@ -371,8 +388,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
|
||||
|
||||
drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
|
||||
|
||||
*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
|
||||
|
||||
switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
|
||||
case GEN8_STOLEN_RESERVED_1M:
|
||||
*size = 1024 * 1024;
|
||||
@ -390,41 +405,30 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
|
||||
*size = 8 * 1024 * 1024;
|
||||
MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
|
||||
}
|
||||
|
||||
if (HAS_LMEMBAR_SMEM_STOLEN(i915))
|
||||
/* the base is initialized to stolen top so subtract size to get base */
|
||||
*base -= *size;
|
||||
else
|
||||
*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
|
||||
}
|
||||
|
||||
static int i915_gem_init_stolen(struct intel_memory_region *mem)
|
||||
/*
|
||||
* Initialize i915->dsm_reserved to contain the reserved space within the Data
|
||||
* Stolen Memory. This is a range on the top of DSM that is reserved, not to
|
||||
* be used by driver, so must be excluded from the region passed to the
|
||||
* allocator later. In the spec this is also called as WOPCM.
|
||||
*
|
||||
* Our expectation is that the reserved space is at the top of the stolen
|
||||
* region, as it has been the case for every platform, and *never* at the
|
||||
* bottom, so the calculation here can be simplified.
|
||||
*/
|
||||
static int init_reserved_stolen(struct drm_i915_private *i915)
|
||||
{
|
||||
struct drm_i915_private *i915 = mem->i915;
|
||||
struct intel_uncore *uncore = &i915->uncore;
|
||||
resource_size_t reserved_base, stolen_top;
|
||||
resource_size_t reserved_total, reserved_size;
|
||||
|
||||
mutex_init(&i915->mm.stolen_lock);
|
||||
|
||||
if (intel_vgpu_active(i915)) {
|
||||
drm_notice(&i915->drm,
|
||||
"%s, disabling use of stolen memory\n",
|
||||
"iGVT-g active");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
|
||||
drm_notice(&i915->drm,
|
||||
"%s, disabling use of stolen memory\n",
|
||||
"DMAR active");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (resource_size(&mem->region) == 0)
|
||||
return 0;
|
||||
|
||||
i915->dsm = mem->region;
|
||||
|
||||
if (i915_adjust_stolen(i915, &i915->dsm))
|
||||
return 0;
|
||||
|
||||
GEM_BUG_ON(i915->dsm.start == 0);
|
||||
GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
|
||||
resource_size_t reserved_size;
|
||||
int ret = 0;
|
||||
|
||||
stolen_top = i915->dsm.end + 1;
|
||||
reserved_base = stolen_top;
|
||||
@ -455,17 +459,16 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
|
||||
&reserved_base, &reserved_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Our expectation is that the reserved space is at the top of the
|
||||
* stolen region and *never* at the bottom. If we see !reserved_base,
|
||||
* it likely means we failed to read the registers correctly.
|
||||
*/
|
||||
/* No reserved stolen */
|
||||
if (reserved_base == stolen_top)
|
||||
goto bail_out;
|
||||
|
||||
if (!reserved_base) {
|
||||
drm_err(&i915->drm,
|
||||
"inconsistent reservation %pa + %pa; ignoring\n",
|
||||
&reserved_base, &reserved_size);
|
||||
reserved_base = stolen_top;
|
||||
reserved_size = 0;
|
||||
ret = -EINVAL;
|
||||
goto bail_out;
|
||||
}
|
||||
|
||||
i915->dsm_reserved =
|
||||
@ -475,19 +478,55 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
|
||||
drm_err(&i915->drm,
|
||||
"Stolen reserved area %pR outside stolen memory %pR\n",
|
||||
&i915->dsm_reserved, &i915->dsm);
|
||||
return 0;
|
||||
ret = -EINVAL;
|
||||
goto bail_out;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
bail_out:
|
||||
i915->dsm_reserved =
|
||||
(struct resource)DEFINE_RES_MEM(reserved_base, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int i915_gem_init_stolen(struct intel_memory_region *mem)
|
||||
{
|
||||
struct drm_i915_private *i915 = mem->i915;
|
||||
|
||||
mutex_init(&i915->mm.stolen_lock);
|
||||
|
||||
if (intel_vgpu_active(i915)) {
|
||||
drm_notice(&i915->drm,
|
||||
"%s, disabling use of stolen memory\n",
|
||||
"iGVT-g active");
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
|
||||
drm_notice(&i915->drm,
|
||||
"%s, disabling use of stolen memory\n",
|
||||
"DMAR active");
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
if (adjust_stolen(i915, &mem->region))
|
||||
return -ENOSPC;
|
||||
|
||||
if (request_smem_stolen(i915, &mem->region))
|
||||
return -ENOSPC;
|
||||
|
||||
i915->dsm = mem->region;
|
||||
|
||||
if (init_reserved_stolen(i915))
|
||||
return -ENOSPC;
|
||||
|
||||
/* Exclude the reserved region from driver use */
|
||||
mem->region.end = reserved_base - 1;
|
||||
mem->region.end = i915->dsm_reserved.start - 1;
|
||||
mem->io_size = min(mem->io_size, resource_size(&mem->region));
|
||||
|
||||
/* It is possible for the reserved area to end before the end of stolen
|
||||
* memory, so just consider the start. */
|
||||
reserved_total = stolen_top - reserved_base;
|
||||
|
||||
i915->stolen_usable_size =
|
||||
resource_size(&i915->dsm) - reserved_total;
|
||||
i915->stolen_usable_size = resource_size(&mem->region);
|
||||
|
||||
drm_dbg(&i915->drm,
|
||||
"Memory reserved for graphics device: %lluK, usable: %lluK\n",
|
||||
@ -495,7 +534,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
|
||||
(u64)i915->stolen_usable_size >> 10);
|
||||
|
||||
if (i915->stolen_usable_size == 0)
|
||||
return 0;
|
||||
return -ENOSPC;
|
||||
|
||||
/* Basic memrange allocator for stolen space. */
|
||||
drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size);
|
||||
@ -733,11 +772,17 @@ i915_gem_object_create_stolen(struct drm_i915_private *i915,
|
||||
|
||||
static int init_stolen_smem(struct intel_memory_region *mem)
|
||||
{
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Initialise stolen early so that we may reserve preallocated
|
||||
* objects for the BIOS to KMS transition.
|
||||
*/
|
||||
return i915_gem_init_stolen(mem);
|
||||
err = i915_gem_init_stolen(mem);
|
||||
if (err)
|
||||
drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int release_stolen_smem(struct intel_memory_region *mem)
|
||||
@ -754,27 +799,26 @@ static const struct intel_memory_region_ops i915_region_stolen_smem_ops = {
|
||||
|
||||
static int init_stolen_lmem(struct intel_memory_region *mem)
|
||||
{
|
||||
struct drm_i915_private *i915 = mem->i915;
|
||||
int err;
|
||||
|
||||
if (GEM_WARN_ON(resource_size(&mem->region) == 0))
|
||||
return -ENODEV;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* TODO: For stolen lmem we mostly just care about populating the dsm
|
||||
* related bits and setting up the drm_mm allocator for the range.
|
||||
* Perhaps split up i915_gem_init_stolen() for this.
|
||||
*/
|
||||
err = i915_gem_init_stolen(mem);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (mem->io_size && !io_mapping_init_wc(&mem->iomap,
|
||||
mem->io_start,
|
||||
mem->io_size)) {
|
||||
err = -EIO;
|
||||
goto err_cleanup;
|
||||
if (err) {
|
||||
drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mem->io_size &&
|
||||
!io_mapping_init_wc(&mem->iomap, mem->io_start, mem->io_size))
|
||||
goto err_cleanup;
|
||||
|
||||
drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
|
||||
&mem->io_start);
|
||||
drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &mem->region.start);
|
||||
|
||||
return 0;
|
||||
|
||||
err_cleanup:
|
||||
@ -796,6 +840,29 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
|
||||
.init_object = _i915_gem_object_stolen_init,
|
||||
};
|
||||
|
||||
static int mtl_get_gms_size(struct intel_uncore *uncore)
|
||||
{
|
||||
u16 ggc, gms;
|
||||
|
||||
ggc = intel_uncore_read16(uncore, GGC);
|
||||
|
||||
/* check GGMS, should be fixed 0x3 (8MB) */
|
||||
if ((ggc & GGMS_MASK) != GGMS_MASK)
|
||||
return -EIO;
|
||||
|
||||
/* return valid GMS value, -EIO if invalid */
|
||||
gms = REG_FIELD_GET(GMS_MASK, ggc);
|
||||
switch (gms) {
|
||||
case 0x0 ... 0x04:
|
||||
return gms * 32;
|
||||
case 0xf0 ... 0xfe:
|
||||
return (gms - 0xf0 + 1) * 4;
|
||||
default:
|
||||
MISSING_CASE(gms);
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
struct intel_memory_region *
|
||||
i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
|
||||
u16 instance)
|
||||
@ -806,6 +873,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
|
||||
struct intel_memory_region *mem;
|
||||
resource_size_t io_start, io_size;
|
||||
resource_size_t min_page_size;
|
||||
int ret;
|
||||
|
||||
if (WARN_ON_ONCE(instance))
|
||||
return ERR_PTR(-ENODEV);
|
||||
@ -813,12 +881,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
|
||||
if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
|
||||
return ERR_PTR(-ENXIO);
|
||||
|
||||
/* Use DSM base address instead for stolen memory */
|
||||
dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
|
||||
if (IS_DG1(uncore->i915)) {
|
||||
if (HAS_LMEMBAR_SMEM_STOLEN(i915) || IS_DG1(i915)) {
|
||||
lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
|
||||
if (WARN_ON(lmem_size < dsm_base))
|
||||
return ERR_PTR(-ENODEV);
|
||||
} else {
|
||||
resource_size_t lmem_range;
|
||||
|
||||
@ -827,13 +891,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
|
||||
lmem_size *= SZ_1G;
|
||||
}
|
||||
|
||||
dsm_size = lmem_size - dsm_base;
|
||||
if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
|
||||
if (HAS_LMEMBAR_SMEM_STOLEN(i915)) {
|
||||
/*
|
||||
* MTL dsm size is in GGC register.
|
||||
* Also MTL uses offset to DSMBASE in ptes, so i915
|
||||
* uses dsm_base = 0 to setup stolen region.
|
||||
*/
|
||||
ret = mtl_get_gms_size(uncore);
|
||||
if (ret < 0) {
|
||||
drm_err(&i915->drm, "invalid MTL GGC register setting\n");
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
dsm_base = 0;
|
||||
dsm_size = (resource_size_t)(ret * SZ_1M);
|
||||
|
||||
GEM_BUG_ON(pci_resource_len(pdev, GEN12_LMEM_BAR) != SZ_256M);
|
||||
GEM_BUG_ON((dsm_size + SZ_8M) > lmem_size);
|
||||
} else {
|
||||
/* Use DSM base address instead for stolen memory */
|
||||
dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
|
||||
if (WARN_ON(lmem_size < dsm_base))
|
||||
return ERR_PTR(-ENODEV);
|
||||
dsm_size = lmem_size - dsm_base;
|
||||
}
|
||||
|
||||
io_size = dsm_size;
|
||||
if (HAS_LMEMBAR_SMEM_STOLEN(i915)) {
|
||||
io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + SZ_8M;
|
||||
} else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
|
||||
io_start = 0;
|
||||
io_size = 0;
|
||||
} else {
|
||||
io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
|
||||
io_size = dsm_size;
|
||||
}
|
||||
|
||||
min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
|
||||
@ -847,16 +937,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
|
||||
if (IS_ERR(mem))
|
||||
return mem;
|
||||
|
||||
/*
|
||||
* TODO: consider creating common helper to just print all the
|
||||
* interesting stuff from intel_memory_region, which we can use for all
|
||||
* our probed regions.
|
||||
*/
|
||||
|
||||
drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
|
||||
&mem->io_start);
|
||||
drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base);
|
||||
|
||||
intel_memory_region_set_name(mem, "stolen-local");
|
||||
|
||||
mem->private = true;
|
||||
@ -881,6 +961,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
|
||||
intel_memory_region_set_name(mem, "stolen-system");
|
||||
|
||||
mem->private = true;
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
|
@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
|
||||
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
|
||||
struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
|
||||
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
|
||||
const unsigned int max_segment = i915_sg_segment_size();
|
||||
const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
|
||||
const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
|
||||
struct file *filp = i915_tt->filp;
|
||||
struct sgt_iter sgt_iter;
|
||||
@ -279,7 +279,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
|
||||
struct i915_ttm_tt *i915_tt;
|
||||
int ret;
|
||||
|
||||
if (!obj)
|
||||
if (i915_ttm_is_ghost_object(bo))
|
||||
return NULL;
|
||||
|
||||
i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL);
|
||||
@ -362,7 +362,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
|
||||
{
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
|
||||
|
||||
if (!obj)
|
||||
if (i915_ttm_is_ghost_object(bo))
|
||||
return false;
|
||||
|
||||
/*
|
||||
@ -509,18 +509,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
|
||||
static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
|
||||
intel_wakeref_t wakeref = 0;
|
||||
|
||||
if (bo->resource && likely(obj)) {
|
||||
/* ttm_bo_release() already has dma_resv_lock */
|
||||
if (i915_ttm_cpu_maps_iomem(bo->resource))
|
||||
wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
|
||||
|
||||
if (bo->resource && !i915_ttm_is_ghost_object(bo)) {
|
||||
__i915_gem_object_pages_fini(obj);
|
||||
|
||||
if (wakeref)
|
||||
intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
|
||||
|
||||
i915_ttm_free_cached_io_rsgt(obj);
|
||||
}
|
||||
}
|
||||
@ -538,7 +529,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm)
|
||||
ret = sg_alloc_table_from_pages_segment(st,
|
||||
ttm->pages, ttm->num_pages,
|
||||
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
|
||||
i915_sg_segment_size(), GFP_KERNEL);
|
||||
i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
|
||||
if (ret) {
|
||||
st->sgl = NULL;
|
||||
return ERR_PTR(ret);
|
||||
@ -624,7 +615,7 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
|
||||
int ret;
|
||||
|
||||
if (!obj)
|
||||
if (i915_ttm_is_ghost_object(bo))
|
||||
return;
|
||||
|
||||
ret = i915_ttm_move_notify(bo);
|
||||
@ -657,7 +648,7 @@ static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo);
|
||||
bool unknown_state;
|
||||
|
||||
if (!obj)
|
||||
if (i915_ttm_is_ghost_object(mem->bo))
|
||||
return -EINVAL;
|
||||
|
||||
if (!kref_get_unless_zero(&obj->base.refcount))
|
||||
@ -690,7 +681,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
|
||||
unsigned long base;
|
||||
unsigned int ofs;
|
||||
|
||||
GEM_BUG_ON(!obj);
|
||||
GEM_BUG_ON(i915_ttm_is_ghost_object(bo));
|
||||
GEM_WARN_ON(bo->ttm);
|
||||
|
||||
base = obj->mm.region->iomap.base - obj->mm.region->region.start;
|
||||
@ -699,6 +690,50 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
|
||||
return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs;
|
||||
}
|
||||
|
||||
static int i915_ttm_access_memory(struct ttm_buffer_object *bo,
|
||||
unsigned long offset, void *buf,
|
||||
int len, int write)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
|
||||
resource_size_t iomap = obj->mm.region->iomap.base -
|
||||
obj->mm.region->region.start;
|
||||
unsigned long page = offset >> PAGE_SHIFT;
|
||||
unsigned long bytes_left = len;
|
||||
|
||||
/*
|
||||
* TODO: For now just let it fail if the resource is non-mappable,
|
||||
* otherwise we need to perform the memcpy from the gpu here, without
|
||||
* interfering with the object (like moving the entire thing).
|
||||
*/
|
||||
if (!i915_ttm_resource_mappable(bo->resource))
|
||||
return -EIO;
|
||||
|
||||
offset -= page << PAGE_SHIFT;
|
||||
do {
|
||||
unsigned long bytes = min(bytes_left, PAGE_SIZE - offset);
|
||||
void __iomem *ptr;
|
||||
dma_addr_t daddr;
|
||||
|
||||
daddr = i915_gem_object_get_dma_address(obj, page);
|
||||
ptr = ioremap_wc(iomap + daddr + offset, bytes);
|
||||
if (!ptr)
|
||||
return -EIO;
|
||||
|
||||
if (write)
|
||||
memcpy_toio(ptr, buf, bytes);
|
||||
else
|
||||
memcpy_fromio(buf, ptr, bytes);
|
||||
iounmap(ptr);
|
||||
|
||||
page++;
|
||||
buf += bytes;
|
||||
bytes_left -= bytes;
|
||||
offset = 0;
|
||||
} while (bytes_left);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
/*
|
||||
* All callbacks need to take care not to downcast a struct ttm_buffer_object
|
||||
* without checking its subclass, since it might be a TTM ghost object.
|
||||
@ -715,6 +750,7 @@ static struct ttm_device_funcs i915_ttm_bo_driver = {
|
||||
.delete_mem_notify = i915_ttm_delete_mem_notify,
|
||||
.io_mem_reserve = i915_ttm_io_mem_reserve,
|
||||
.io_mem_pfn = i915_ttm_io_mem_pfn,
|
||||
.access_memory = i915_ttm_access_memory,
|
||||
};
|
||||
|
||||
/**
|
||||
@ -990,13 +1026,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
|
||||
struct vm_area_struct *area = vmf->vma;
|
||||
struct ttm_buffer_object *bo = area->vm_private_data;
|
||||
struct drm_device *dev = bo->base.dev;
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
|
||||
intel_wakeref_t wakeref = 0;
|
||||
vm_fault_t ret;
|
||||
int idx;
|
||||
|
||||
obj = i915_ttm_to_gem(bo);
|
||||
if (!obj)
|
||||
if (i915_ttm_is_ghost_object(bo))
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
/* Sanity check that we allow writing into this object */
|
||||
@ -1035,7 +1070,8 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
|
||||
}
|
||||
|
||||
if (err) {
|
||||
drm_dbg(dev, "Unable to make resource CPU accessible\n");
|
||||
drm_dbg(dev, "Unable to make resource CPU accessible(err = %pe)\n",
|
||||
ERR_PTR(err));
|
||||
dma_resv_unlock(bo->base.resv);
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
goto out_rpm;
|
||||
@ -1053,16 +1089,19 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
|
||||
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
|
||||
goto out_rpm;
|
||||
|
||||
/* ttm_bo_vm_reserve() already has dma_resv_lock */
|
||||
/*
|
||||
* ttm_bo_vm_reserve() already has dma_resv_lock.
|
||||
* userfault_count is protected by dma_resv lock and rpm wakeref.
|
||||
*/
|
||||
if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
|
||||
obj->userfault_count = 1;
|
||||
mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
|
||||
list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list);
|
||||
mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
|
||||
spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
|
||||
list_add(&obj->userfault_link, &to_i915(obj->base.dev)->runtime_pm.lmem_userfault_list);
|
||||
spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
|
||||
}
|
||||
|
||||
if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
|
||||
intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref,
|
||||
intel_wakeref_auto(&to_i915(obj->base.dev)->runtime_pm.userfault_wakeref,
|
||||
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
|
||||
|
||||
i915_ttm_adjust_lru(obj);
|
||||
@ -1094,7 +1133,7 @@ static void ttm_vm_open(struct vm_area_struct *vma)
|
||||
struct drm_i915_gem_object *obj =
|
||||
i915_ttm_to_gem(vma->vm_private_data);
|
||||
|
||||
GEM_BUG_ON(!obj);
|
||||
GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data));
|
||||
i915_gem_object_get(obj);
|
||||
}
|
||||
|
||||
@ -1103,7 +1142,7 @@ static void ttm_vm_close(struct vm_area_struct *vma)
|
||||
struct drm_i915_gem_object *obj =
|
||||
i915_ttm_to_gem(vma->vm_private_data);
|
||||
|
||||
GEM_BUG_ON(!obj);
|
||||
GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data));
|
||||
i915_gem_object_put(obj);
|
||||
}
|
||||
|
||||
@ -1124,7 +1163,27 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
|
||||
|
||||
static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
|
||||
intel_wakeref_t wakeref = 0;
|
||||
|
||||
assert_object_held_shared(obj);
|
||||
|
||||
if (i915_ttm_cpu_maps_iomem(bo->resource)) {
|
||||
wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
|
||||
|
||||
/* userfault_count is protected by obj lock and rpm wakeref. */
|
||||
if (obj->userfault_count) {
|
||||
spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
|
||||
list_del(&obj->userfault_link);
|
||||
spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
|
||||
obj->userfault_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ttm_bo_unmap_virtual(i915_gem_to_ttm(obj));
|
||||
|
||||
if (wakeref)
|
||||
intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
|
||||
}
|
||||
|
||||
static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
|
||||
|
@ -27,19 +27,27 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj)
|
||||
*/
|
||||
void i915_ttm_bo_destroy(struct ttm_buffer_object *bo);
|
||||
|
||||
/**
|
||||
* i915_ttm_is_ghost_object - Check if the ttm bo is a ghost object.
|
||||
* @bo: Pointer to the ttm buffer object
|
||||
*
|
||||
* Return: True if the ttm bo is not a i915 object but a ghost ttm object,
|
||||
* False otherwise.
|
||||
*/
|
||||
static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo)
|
||||
{
|
||||
return bo->destroy != i915_ttm_bo_destroy;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding
|
||||
* struct drm_i915_gem_object.
|
||||
*
|
||||
* Return: Pointer to the embedding struct ttm_buffer_object, or NULL
|
||||
* if the object was not an i915 ttm object.
|
||||
* Return: Pointer to the embedding struct ttm_buffer_object.
|
||||
*/
|
||||
static inline struct drm_i915_gem_object *
|
||||
i915_ttm_to_gem(struct ttm_buffer_object *bo)
|
||||
{
|
||||
if (bo->destroy != i915_ttm_bo_destroy)
|
||||
return NULL;
|
||||
|
||||
return container_of(bo, struct drm_i915_gem_object, __do_not_access);
|
||||
}
|
||||
|
||||
|
@ -560,7 +560,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
|
||||
bool clear;
|
||||
int ret;
|
||||
|
||||
if (GEM_WARN_ON(!obj)) {
|
||||
if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) {
|
||||
ttm_bo_move_null(bo, dst_mem);
|
||||
return 0;
|
||||
}
|
||||
|
@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)
|
||||
static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
|
||||
unsigned int max_segment = i915_sg_segment_size();
|
||||
unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev);
|
||||
struct sg_table *st;
|
||||
unsigned int sg_page_sizes;
|
||||
struct page **pvec;
|
||||
@ -292,7 +292,7 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
|
||||
if (!i915_gem_object_is_readonly(obj))
|
||||
gup_flags |= FOLL_WRITE;
|
||||
|
||||
pinned = ret = 0;
|
||||
pinned = 0;
|
||||
while (pinned < num_pages) {
|
||||
ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE,
|
||||
num_pages - pinned, gup_flags,
|
||||
@ -302,7 +302,6 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
|
||||
|
||||
pinned += ret;
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
ret = i915_gem_object_lock_interruptible(obj, NULL);
|
||||
if (ret)
|
||||
|
@ -1161,7 +1161,8 @@ static int igt_write_huge(struct drm_i915_private *i915,
|
||||
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
|
||||
|
||||
size = obj->base.size;
|
||||
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
|
||||
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
|
||||
!HAS_64K_PAGES(i915))
|
||||
size = round_up(size, I915_GTT_PAGE_SIZE_2M);
|
||||
|
||||
n = 0;
|
||||
@ -1214,6 +1215,10 @@ static int igt_write_huge(struct drm_i915_private *i915,
|
||||
* size and ensure the vma offset is at the start of the pt
|
||||
* boundary, however to improve coverage we opt for testing both
|
||||
* aligned and unaligned offsets.
|
||||
*
|
||||
* With PS64 this is no longer the case, but to ensure we
|
||||
* sometimes get the compact layout for smaller objects, apply
|
||||
* the round_up anyway.
|
||||
*/
|
||||
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
|
||||
offset_low = round_down(offset_low,
|
||||
@ -1411,6 +1416,7 @@ static int igt_ppgtt_sanity_check(void *arg)
|
||||
{ SZ_2M + SZ_4K, SZ_64K | SZ_4K },
|
||||
{ SZ_2M + SZ_4K, SZ_2M | SZ_4K },
|
||||
{ SZ_2M + SZ_64K, SZ_2M | SZ_64K },
|
||||
{ SZ_2M + SZ_64K, SZ_64K },
|
||||
};
|
||||
int i, j;
|
||||
int err;
|
||||
@ -1540,6 +1546,154 @@ out_put:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_ppgtt_mixed(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
|
||||
struct drm_i915_gem_object *obj, *on;
|
||||
struct i915_gem_engines *engines;
|
||||
struct i915_gem_engines_iter it;
|
||||
struct i915_address_space *vm;
|
||||
struct i915_gem_context *ctx;
|
||||
struct intel_context *ce;
|
||||
struct file *file;
|
||||
I915_RND_STATE(prng);
|
||||
LIST_HEAD(objects);
|
||||
struct intel_memory_region *mr;
|
||||
struct i915_vma *vma;
|
||||
unsigned int count;
|
||||
u32 i, addr;
|
||||
int *order;
|
||||
int n, err;
|
||||
|
||||
/*
|
||||
* Sanity check mixing 4K and 64K pages within the same page-table via
|
||||
* the new PS64 TLB hint.
|
||||
*/
|
||||
|
||||
if (!HAS_64K_PAGES(i915)) {
|
||||
pr_info("device lacks PS64, skipping\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
file = mock_file(i915);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
ctx = hugepage_ctx(i915, file);
|
||||
if (IS_ERR(ctx)) {
|
||||
err = PTR_ERR(ctx);
|
||||
goto out;
|
||||
}
|
||||
vm = i915_gem_context_get_eb_vm(ctx);
|
||||
|
||||
i = 0;
|
||||
addr = 0;
|
||||
do {
|
||||
u32 sz;
|
||||
|
||||
sz = i915_prandom_u32_max_state(SZ_4M, &prng);
|
||||
sz = max_t(u32, sz, SZ_4K);
|
||||
|
||||
mr = i915->mm.regions[INTEL_REGION_LMEM_0];
|
||||
if (i & 1)
|
||||
mr = i915->mm.regions[INTEL_REGION_SMEM];
|
||||
|
||||
obj = i915_gem_object_create_region(mr, sz, 0, 0);
|
||||
if (IS_ERR(obj)) {
|
||||
err = PTR_ERR(obj);
|
||||
goto out_vm;
|
||||
}
|
||||
|
||||
list_add_tail(&obj->st_link, &objects);
|
||||
|
||||
vma = i915_vma_instance(obj, vm, NULL);
|
||||
if (IS_ERR(vma)) {
|
||||
err = PTR_ERR(vma);
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
addr = round_up(addr, mr->min_page_size);
|
||||
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
||||
if (err)
|
||||
goto err_put;
|
||||
|
||||
if (mr->type == INTEL_MEMORY_LOCAL &&
|
||||
(vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) {
|
||||
err = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
addr += obj->base.size;
|
||||
i++;
|
||||
} while (addr <= SZ_16M);
|
||||
|
||||
n = 0;
|
||||
count = 0;
|
||||
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
|
||||
count++;
|
||||
if (!intel_engine_can_store_dword(ce->engine))
|
||||
continue;
|
||||
|
||||
n++;
|
||||
}
|
||||
i915_gem_context_unlock_engines(ctx);
|
||||
if (!n)
|
||||
goto err_put;
|
||||
|
||||
order = i915_random_order(count * count, &prng);
|
||||
if (!order) {
|
||||
err = -ENOMEM;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
addr = 0;
|
||||
engines = i915_gem_context_lock_engines(ctx);
|
||||
list_for_each_entry(obj, &objects, st_link) {
|
||||
u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng);
|
||||
|
||||
addr = round_up(addr, obj->mm.region->min_page_size);
|
||||
|
||||
ce = engines->engines[order[i] % engines->num_engines];
|
||||
i = (i + 1) % (count * count);
|
||||
if (!ce || !intel_engine_can_store_dword(ce->engine))
|
||||
continue;
|
||||
|
||||
err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
err = __igt_write_huge(ce, obj, obj->base.size, addr,
|
||||
offset_in_page(rnd) / sizeof(u32), rnd + 1);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
err = __igt_write_huge(ce, obj, obj->base.size, addr,
|
||||
(PAGE_SIZE / sizeof(u32)) - 1,
|
||||
rnd + 2);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
addr += obj->base.size;
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
i915_gem_context_unlock_engines(ctx);
|
||||
kfree(order);
|
||||
err_put:
|
||||
list_for_each_entry_safe(obj, on, &objects, st_link) {
|
||||
list_del(&obj->st_link);
|
||||
i915_gem_object_put(obj);
|
||||
}
|
||||
out_vm:
|
||||
i915_vm_put(vm);
|
||||
out:
|
||||
fput(file);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_tmpfs_fallback(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
@ -1803,6 +1957,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
|
||||
SUBTEST(igt_ppgtt_smoke_huge),
|
||||
SUBTEST(igt_ppgtt_sanity_check),
|
||||
SUBTEST(igt_ppgtt_compact),
|
||||
SUBTEST(igt_ppgtt_mixed),
|
||||
};
|
||||
|
||||
if (!HAS_PPGTT(i915)) {
|
||||
|
@ -179,97 +179,108 @@ out_file:
|
||||
}
|
||||
|
||||
struct parallel_switch {
|
||||
struct task_struct *tsk;
|
||||
struct kthread_worker *worker;
|
||||
struct kthread_work work;
|
||||
struct intel_context *ce[2];
|
||||
int result;
|
||||
};
|
||||
|
||||
static int __live_parallel_switch1(void *data)
|
||||
static void __live_parallel_switch1(struct kthread_work *work)
|
||||
{
|
||||
struct parallel_switch *arg = data;
|
||||
struct parallel_switch *arg =
|
||||
container_of(work, typeof(*arg), work);
|
||||
IGT_TIMEOUT(end_time);
|
||||
unsigned long count;
|
||||
|
||||
count = 0;
|
||||
arg->result = 0;
|
||||
do {
|
||||
struct i915_request *rq = NULL;
|
||||
int err, n;
|
||||
int n;
|
||||
|
||||
err = 0;
|
||||
for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
|
||||
for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
|
||||
struct i915_request *prev = rq;
|
||||
|
||||
rq = i915_request_create(arg->ce[n]);
|
||||
if (IS_ERR(rq)) {
|
||||
i915_request_put(prev);
|
||||
return PTR_ERR(rq);
|
||||
arg->result = PTR_ERR(rq);
|
||||
break;
|
||||
}
|
||||
|
||||
i915_request_get(rq);
|
||||
if (prev) {
|
||||
err = i915_request_await_dma_fence(rq, &prev->fence);
|
||||
arg->result =
|
||||
i915_request_await_dma_fence(rq,
|
||||
&prev->fence);
|
||||
i915_request_put(prev);
|
||||
}
|
||||
|
||||
i915_request_add(rq);
|
||||
}
|
||||
|
||||
if (IS_ERR_OR_NULL(rq))
|
||||
break;
|
||||
|
||||
if (i915_request_wait(rq, 0, HZ) < 0)
|
||||
err = -ETIME;
|
||||
arg->result = -ETIME;
|
||||
|
||||
i915_request_put(rq);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
count++;
|
||||
} while (!__igt_timeout(end_time, NULL));
|
||||
} while (!arg->result && !__igt_timeout(end_time, NULL));
|
||||
|
||||
pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
|
||||
return 0;
|
||||
pr_info("%s: %lu switches (sync) <%d>\n",
|
||||
arg->ce[0]->engine->name, count, arg->result);
|
||||
}
|
||||
|
||||
static int __live_parallel_switchN(void *data)
|
||||
static void __live_parallel_switchN(struct kthread_work *work)
|
||||
{
|
||||
struct parallel_switch *arg = data;
|
||||
struct parallel_switch *arg =
|
||||
container_of(work, typeof(*arg), work);
|
||||
struct i915_request *rq = NULL;
|
||||
IGT_TIMEOUT(end_time);
|
||||
unsigned long count;
|
||||
int n;
|
||||
|
||||
count = 0;
|
||||
arg->result = 0;
|
||||
do {
|
||||
for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
|
||||
for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
|
||||
struct i915_request *prev = rq;
|
||||
int err = 0;
|
||||
|
||||
rq = i915_request_create(arg->ce[n]);
|
||||
if (IS_ERR(rq)) {
|
||||
i915_request_put(prev);
|
||||
return PTR_ERR(rq);
|
||||
arg->result = PTR_ERR(rq);
|
||||
break;
|
||||
}
|
||||
|
||||
i915_request_get(rq);
|
||||
if (prev) {
|
||||
err = i915_request_await_dma_fence(rq, &prev->fence);
|
||||
arg->result =
|
||||
i915_request_await_dma_fence(rq,
|
||||
&prev->fence);
|
||||
i915_request_put(prev);
|
||||
}
|
||||
|
||||
i915_request_add(rq);
|
||||
if (err) {
|
||||
i915_request_put(rq);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
count++;
|
||||
} while (!__igt_timeout(end_time, NULL));
|
||||
i915_request_put(rq);
|
||||
} while (!arg->result && !__igt_timeout(end_time, NULL));
|
||||
|
||||
pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
|
||||
return 0;
|
||||
if (!IS_ERR_OR_NULL(rq))
|
||||
i915_request_put(rq);
|
||||
|
||||
pr_info("%s: %lu switches (many) <%d>\n",
|
||||
arg->ce[0]->engine->name, count, arg->result);
|
||||
}
|
||||
|
||||
static int live_parallel_switch(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
static int (* const func[])(void *arg) = {
|
||||
static void (* const func[])(struct kthread_work *) = {
|
||||
__live_parallel_switch1,
|
||||
__live_parallel_switchN,
|
||||
NULL,
|
||||
@ -277,7 +288,7 @@ static int live_parallel_switch(void *arg)
|
||||
struct parallel_switch *data = NULL;
|
||||
struct i915_gem_engines *engines;
|
||||
struct i915_gem_engines_iter it;
|
||||
int (* const *fn)(void *arg);
|
||||
void (* const *fn)(struct kthread_work *);
|
||||
struct i915_gem_context *ctx;
|
||||
struct intel_context *ce;
|
||||
struct file *file;
|
||||
@ -348,9 +359,22 @@ static int live_parallel_switch(void *arg)
|
||||
}
|
||||
}
|
||||
|
||||
for (n = 0; n < count; n++) {
|
||||
struct kthread_worker *worker;
|
||||
|
||||
if (!data[n].ce[0])
|
||||
continue;
|
||||
|
||||
worker = kthread_create_worker(0, "igt/parallel:%s",
|
||||
data[n].ce[0]->engine->name);
|
||||
if (IS_ERR(worker))
|
||||
goto out;
|
||||
|
||||
data[n].worker = worker;
|
||||
}
|
||||
|
||||
for (fn = func; !err && *fn; fn++) {
|
||||
struct igt_live_test t;
|
||||
int n;
|
||||
|
||||
err = igt_live_test_begin(&t, i915, __func__, "");
|
||||
if (err)
|
||||
@ -360,30 +384,17 @@ static int live_parallel_switch(void *arg)
|
||||
if (!data[n].ce[0])
|
||||
continue;
|
||||
|
||||
data[n].tsk = kthread_run(*fn, &data[n],
|
||||
"igt/parallel:%s",
|
||||
data[n].ce[0]->engine->name);
|
||||
if (IS_ERR(data[n].tsk)) {
|
||||
err = PTR_ERR(data[n].tsk);
|
||||
break;
|
||||
}
|
||||
get_task_struct(data[n].tsk);
|
||||
data[n].result = 0;
|
||||
kthread_init_work(&data[n].work, *fn);
|
||||
kthread_queue_work(data[n].worker, &data[n].work);
|
||||
}
|
||||
|
||||
yield(); /* start all threads before we kthread_stop() */
|
||||
|
||||
for (n = 0; n < count; n++) {
|
||||
int status;
|
||||
|
||||
if (IS_ERR_OR_NULL(data[n].tsk))
|
||||
continue;
|
||||
|
||||
status = kthread_stop(data[n].tsk);
|
||||
if (status && !err)
|
||||
err = status;
|
||||
|
||||
put_task_struct(data[n].tsk);
|
||||
data[n].tsk = NULL;
|
||||
if (data[n].ce[0]) {
|
||||
kthread_flush_work(&data[n].work);
|
||||
if (data[n].result && !err)
|
||||
err = data[n].result;
|
||||
}
|
||||
}
|
||||
|
||||
if (igt_live_test_end(&t))
|
||||
@ -399,6 +410,9 @@ out:
|
||||
intel_context_unpin(data[n].ce[m]);
|
||||
intel_context_put(data[n].ce[m]);
|
||||
}
|
||||
|
||||
if (data[n].worker)
|
||||
kthread_destroy_worker(data[n].worker);
|
||||
}
|
||||
kfree(data);
|
||||
out_file:
|
||||
|
@ -6,8 +6,12 @@
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "i915_selftest.h"
|
||||
#include "gem/i915_gem_context.h"
|
||||
|
||||
#include "mock_context.h"
|
||||
#include "mock_dmabuf.h"
|
||||
#include "igt_gem_utils.h"
|
||||
#include "selftests/mock_drm.h"
|
||||
#include "selftests/mock_gem_device.h"
|
||||
|
||||
static int igt_dmabuf_export(void *arg)
|
||||
@ -140,6 +144,75 @@ out_ret:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int verify_access(struct drm_i915_private *i915,
|
||||
struct drm_i915_gem_object *native_obj,
|
||||
struct drm_i915_gem_object *import_obj)
|
||||
{
|
||||
struct i915_gem_engines_iter it;
|
||||
struct i915_gem_context *ctx;
|
||||
struct intel_context *ce;
|
||||
struct i915_vma *vma;
|
||||
struct file *file;
|
||||
u32 *vaddr;
|
||||
int err = 0, i;
|
||||
|
||||
file = mock_file(i915);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
ctx = live_context(i915, file);
|
||||
if (IS_ERR(ctx)) {
|
||||
err = PTR_ERR(ctx);
|
||||
goto out_file;
|
||||
}
|
||||
|
||||
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
|
||||
if (intel_engine_can_store_dword(ce->engine))
|
||||
break;
|
||||
}
|
||||
i915_gem_context_unlock_engines(ctx);
|
||||
if (!ce)
|
||||
goto out_file;
|
||||
|
||||
vma = i915_vma_instance(import_obj, ce->vm, NULL);
|
||||
if (IS_ERR(vma)) {
|
||||
err = PTR_ERR(vma);
|
||||
goto out_file;
|
||||
}
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
||||
if (err)
|
||||
goto out_file;
|
||||
|
||||
err = igt_gpu_fill_dw(ce, vma, 0,
|
||||
vma->size >> PAGE_SHIFT, 0xdeadbeaf);
|
||||
i915_vma_unpin(vma);
|
||||
if (err)
|
||||
goto out_file;
|
||||
|
||||
err = i915_gem_object_wait(import_obj, 0, MAX_SCHEDULE_TIMEOUT);
|
||||
if (err)
|
||||
goto out_file;
|
||||
|
||||
vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB);
|
||||
if (IS_ERR(vaddr)) {
|
||||
err = PTR_ERR(vaddr);
|
||||
goto out_file;
|
||||
}
|
||||
|
||||
for (i = 0; i < native_obj->base.size / sizeof(u32); i += PAGE_SIZE / sizeof(u32)) {
|
||||
if (vaddr[i] != 0xdeadbeaf) {
|
||||
pr_err("Data mismatch [%d]=%u\n", i, vaddr[i]);
|
||||
err = -EINVAL;
|
||||
goto out_file;
|
||||
}
|
||||
}
|
||||
|
||||
out_file:
|
||||
fput(file);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
|
||||
struct intel_memory_region **regions,
|
||||
unsigned int num_regions)
|
||||
@ -154,7 +227,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
|
||||
|
||||
force_different_devices = true;
|
||||
|
||||
obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
|
||||
obj = __i915_gem_object_create_user(i915, SZ_8M,
|
||||
regions, num_regions);
|
||||
if (IS_ERR(obj)) {
|
||||
pr_err("__i915_gem_object_create_user failed with err=%ld\n",
|
||||
@ -206,6 +279,10 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
|
||||
|
||||
i915_gem_object_unlock(import_obj);
|
||||
|
||||
err = verify_access(i915, obj, import_obj);
|
||||
if (err)
|
||||
goto out_import;
|
||||
|
||||
/* Now try a fake an importer */
|
||||
import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev);
|
||||
if (IS_ERR(import_attach)) {
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <linux/prime_numbers.h>
|
||||
|
||||
#include "gem/i915_gem_internal.h"
|
||||
#include "gem/i915_gem_lmem.h"
|
||||
#include "gem/i915_gem_region.h"
|
||||
#include "gem/i915_gem_ttm.h"
|
||||
#include "gem/i915_gem_ttm_move.h"
|
||||
|
@ -396,15 +396,17 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __gen125_emit_bb_start(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags,
|
||||
u32 arb)
|
||||
static int __xehp_emit_bb_start(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags,
|
||||
u32 arb)
|
||||
{
|
||||
struct intel_context *ce = rq->context;
|
||||
u32 wa_offset = lrc_indirect_bb(ce);
|
||||
u32 *cs;
|
||||
|
||||
GEM_BUG_ON(!ce->wa_bb_page);
|
||||
|
||||
cs = intel_ring_begin(rq, 12);
|
||||
if (IS_ERR(cs))
|
||||
return PTR_ERR(cs);
|
||||
@ -435,18 +437,18 @@ static int __gen125_emit_bb_start(struct i915_request *rq,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gen125_emit_bb_start_noarb(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags)
|
||||
int xehp_emit_bb_start_noarb(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags)
|
||||
{
|
||||
return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
|
||||
return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
|
||||
}
|
||||
|
||||
int gen125_emit_bb_start(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags)
|
||||
int xehp_emit_bb_start(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags)
|
||||
{
|
||||
return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
|
||||
return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
|
||||
}
|
||||
|
||||
int gen8_emit_bb_start_noarb(struct i915_request *rq,
|
||||
@ -583,6 +585,8 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
|
||||
u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
|
||||
{
|
||||
cs = gen8_emit_pipe_control(cs,
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_TLB_INVALIDATE |
|
||||
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DC_FLUSH_ENABLE,
|
||||
@ -600,15 +604,21 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
|
||||
|
||||
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
|
||||
{
|
||||
cs = gen8_emit_pipe_control(cs,
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_TLB_INVALIDATE |
|
||||
PIPE_CONTROL_TILE_CACHE_FLUSH |
|
||||
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DC_FLUSH_ENABLE,
|
||||
0);
|
||||
|
||||
/*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
|
||||
cs = gen8_emit_ggtt_write_rcs(cs,
|
||||
rq->fence.seqno,
|
||||
hwsp_offset(rq),
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_TILE_CACHE_FLUSH |
|
||||
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DC_FLUSH_ENABLE |
|
||||
PIPE_CONTROL_FLUSH_ENABLE);
|
||||
PIPE_CONTROL_FLUSH_ENABLE |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
return gen8_emit_fini_breadcrumb_tail(rq, cs);
|
||||
}
|
||||
@ -715,6 +725,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
|
||||
{
|
||||
struct drm_i915_private *i915 = rq->engine->i915;
|
||||
u32 flags = (PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_TLB_INVALIDATE |
|
||||
PIPE_CONTROL_TILE_CACHE_FLUSH |
|
||||
PIPE_CONTROL_FLUSH_L3 |
|
||||
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
|
||||
@ -731,11 +742,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
|
||||
else if (rq->engine->class == COMPUTE_CLASS)
|
||||
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
|
||||
|
||||
cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
|
||||
|
||||
/*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
|
||||
cs = gen12_emit_ggtt_write_rcs(cs,
|
||||
rq->fence.seqno,
|
||||
hwsp_offset(rq),
|
||||
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
|
||||
flags);
|
||||
0,
|
||||
PIPE_CONTROL_FLUSH_ENABLE |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
return gen12_emit_fini_breadcrumb_tail(rq, cs);
|
||||
}
|
||||
|
@ -32,12 +32,12 @@ int gen8_emit_bb_start(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags);
|
||||
|
||||
int gen125_emit_bb_start_noarb(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags);
|
||||
int gen125_emit_bb_start(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags);
|
||||
int xehp_emit_bb_start_noarb(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags);
|
||||
int xehp_emit_bb_start(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags);
|
||||
|
||||
u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
|
||||
u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
|
||||
|
@ -476,6 +476,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
|
||||
const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
|
||||
unsigned int rem = sg_dma_len(iter->sg);
|
||||
u64 start = vma_res->start;
|
||||
u64 end = start + vma_res->vma_size;
|
||||
|
||||
GEM_BUG_ON(!i915_vm_is_4lvl(vm));
|
||||
|
||||
@ -489,9 +490,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
|
||||
gen8_pte_t encode = pte_encode;
|
||||
unsigned int page_size;
|
||||
gen8_pte_t *vaddr;
|
||||
u16 index, max;
|
||||
u16 index, max, nent, i;
|
||||
|
||||
max = I915_PDES;
|
||||
nent = 1;
|
||||
|
||||
if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
|
||||
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
|
||||
@ -503,25 +505,37 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
|
||||
|
||||
vaddr = px_vaddr(pd);
|
||||
} else {
|
||||
if (encode & GEN12_PPGTT_PTE_LM) {
|
||||
GEM_BUG_ON(__gen8_pte_index(start, 0) % 16);
|
||||
GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K);
|
||||
GEM_BUG_ON(!IS_ALIGNED(iter->dma,
|
||||
I915_GTT_PAGE_SIZE_64K));
|
||||
index = __gen8_pte_index(start, 0);
|
||||
page_size = I915_GTT_PAGE_SIZE;
|
||||
|
||||
index = __gen8_pte_index(start, 0) / 16;
|
||||
page_size = I915_GTT_PAGE_SIZE_64K;
|
||||
if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
|
||||
/*
|
||||
* Device local-memory on these platforms should
|
||||
* always use 64K pages or larger (including GTT
|
||||
* alignment), therefore if we know the whole
|
||||
* page-table needs to be filled we can always
|
||||
* safely use the compact-layout. Otherwise fall
|
||||
* back to the TLB hint with PS64. If this is
|
||||
* system memory we only bother with PS64.
|
||||
*/
|
||||
if ((encode & GEN12_PPGTT_PTE_LM) &&
|
||||
end - start >= SZ_2M && !index) {
|
||||
index = __gen8_pte_index(start, 0) / 16;
|
||||
page_size = I915_GTT_PAGE_SIZE_64K;
|
||||
|
||||
max /= 16;
|
||||
max /= 16;
|
||||
|
||||
vaddr = px_vaddr(pd);
|
||||
vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
|
||||
vaddr = px_vaddr(pd);
|
||||
vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
|
||||
|
||||
pt->is_compact = true;
|
||||
} else {
|
||||
GEM_BUG_ON(pt->is_compact);
|
||||
index = __gen8_pte_index(start, 0);
|
||||
page_size = I915_GTT_PAGE_SIZE;
|
||||
pt->is_compact = true;
|
||||
} else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
|
||||
rem >= I915_GTT_PAGE_SIZE_64K &&
|
||||
!(index % 16)) {
|
||||
encode |= GEN12_PTE_PS64;
|
||||
page_size = I915_GTT_PAGE_SIZE_64K;
|
||||
nent = 16;
|
||||
}
|
||||
}
|
||||
|
||||
vaddr = px_vaddr(pt);
|
||||
@ -529,7 +543,12 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
|
||||
|
||||
do {
|
||||
GEM_BUG_ON(rem < page_size);
|
||||
vaddr[index++] = encode | iter->dma;
|
||||
|
||||
for (i = 0; i < nent; i++) {
|
||||
vaddr[index++] =
|
||||
encode | (iter->dma + i *
|
||||
I915_GTT_PAGE_SIZE);
|
||||
}
|
||||
|
||||
start += page_size;
|
||||
iter->dma += page_size;
|
||||
@ -745,6 +764,8 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
|
||||
GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
|
||||
GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
|
||||
|
||||
/* XXX: we don't strictly need to use this layout */
|
||||
|
||||
if (!pt->is_compact) {
|
||||
vaddr = px_vaddr(pd);
|
||||
vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
|
||||
@ -929,29 +950,18 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
|
||||
*/
|
||||
ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
|
||||
|
||||
if (HAS_LMEM(gt->i915)) {
|
||||
if (HAS_LMEM(gt->i915))
|
||||
ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
|
||||
|
||||
/*
|
||||
* On some platforms the hw has dropped support for 4K GTT pages
|
||||
* when dealing with LMEM, and due to the design of 64K GTT
|
||||
* pages in the hw, we can only mark the *entire* page-table as
|
||||
* operating in 64K GTT mode, since the enable bit is still on
|
||||
* the pde, and not the pte. And since we still need to allow
|
||||
* 4K GTT pages for SMEM objects, we can't have a "normal" 4K
|
||||
* page-table with scratch pointing to LMEM, since that's
|
||||
* undefined from the hw pov. The simplest solution is to just
|
||||
* move the 64K scratch page to SMEM on such platforms and call
|
||||
* it a day, since that should work for all configurations.
|
||||
*/
|
||||
if (HAS_64K_PAGES(gt->i915))
|
||||
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
|
||||
else
|
||||
ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
|
||||
} else {
|
||||
else
|
||||
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
|
||||
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
|
||||
}
|
||||
|
||||
/*
|
||||
* Using SMEM here instead of LMEM has the advantage of not reserving
|
||||
* high performance memory for a "never" used filler page. It also
|
||||
* removes the device access that would be required to initialise the
|
||||
* scratch page, reducing pressure on an even scarcer resource.
|
||||
*/
|
||||
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
|
||||
|
||||
ppgtt->vm.pte_encode = gen8_pte_encode;
|
||||
|
||||
|
@ -276,6 +276,14 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce)
|
||||
return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
|
||||
}
|
||||
|
||||
static inline void intel_context_close(struct intel_context *ce)
|
||||
{
|
||||
set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
|
||||
|
||||
if (ce->ops->close)
|
||||
ce->ops->close(ce);
|
||||
}
|
||||
|
||||
static inline bool intel_context_is_closed(const struct intel_context *ce)
|
||||
{
|
||||
return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);
|
||||
|
@ -43,6 +43,8 @@ struct intel_context_ops {
|
||||
void (*revoke)(struct intel_context *ce, struct i915_request *rq,
|
||||
unsigned int preempt_timeout_ms);
|
||||
|
||||
void (*close)(struct intel_context *ce);
|
||||
|
||||
int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
|
||||
int (*pin)(struct intel_context *ce, void *vaddr);
|
||||
void (*unpin)(struct intel_context *ce);
|
||||
@ -197,8 +199,6 @@ struct intel_context {
|
||||
* context's submissions is complete.
|
||||
*/
|
||||
struct i915_sw_fence blocked;
|
||||
/** @number_committed_requests: number of committed requests */
|
||||
int number_committed_requests;
|
||||
/** @requests: list of active requests on this context */
|
||||
struct list_head requests;
|
||||
/** @prio: the context's current guc priority */
|
||||
@ -208,6 +208,11 @@ struct intel_context {
|
||||
* each priority bucket
|
||||
*/
|
||||
u32 prio_count[GUC_CLIENT_PRIORITY_NUM];
|
||||
/**
|
||||
* @sched_disable_delay_work: worker to disable scheduling on this
|
||||
* context
|
||||
*/
|
||||
struct delayed_work sched_disable_delay_work;
|
||||
} guc_state;
|
||||
|
||||
struct {
|
||||
|
@ -348,4 +348,10 @@ intel_engine_get_hung_context(struct intel_engine_cs *engine)
|
||||
return engine->hung_ce;
|
||||
}
|
||||
|
||||
u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value);
|
||||
u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value);
|
||||
u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value);
|
||||
u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value);
|
||||
u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value);
|
||||
|
||||
#endif /* _INTEL_RINGBUFFER_H_ */
|
||||
|
@ -486,6 +486,17 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
|
||||
engine->logical_mask = BIT(logical_instance);
|
||||
__sprint_engine_name(engine);
|
||||
|
||||
if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
|
||||
__ffs(CCS_MASK(engine->gt)) == engine->instance) ||
|
||||
engine->class == RENDER_CLASS)
|
||||
engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
|
||||
|
||||
/* features common between engines sharing EUs */
|
||||
if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
|
||||
engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
|
||||
engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
|
||||
}
|
||||
|
||||
engine->props.heartbeat_interval_ms =
|
||||
CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
|
||||
engine->props.max_busywait_duration_ns =
|
||||
@ -497,20 +508,34 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
|
||||
engine->props.timeslice_duration_ms =
|
||||
CONFIG_DRM_I915_TIMESLICE_DURATION;
|
||||
|
||||
/* Override to uninterruptible for OpenCL workloads. */
|
||||
if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
|
||||
engine->props.preempt_timeout_ms = 0;
|
||||
/*
|
||||
* Mid-thread pre-emption is not available in Gen12. Unfortunately,
|
||||
* some compute workloads run quite long threads. That means they get
|
||||
* reset due to not pre-empting in a timely manner. So, bump the
|
||||
* pre-emption timeout value to be much higher for compute engines.
|
||||
*/
|
||||
if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
|
||||
engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE;
|
||||
|
||||
if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
|
||||
__ffs(CCS_MASK(engine->gt)) == engine->instance) ||
|
||||
engine->class == RENDER_CLASS)
|
||||
engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
|
||||
/* Cap properties according to any system limits */
|
||||
#define CLAMP_PROP(field) \
|
||||
do { \
|
||||
u64 clamp = intel_clamp_##field(engine, engine->props.field); \
|
||||
if (clamp != engine->props.field) { \
|
||||
drm_notice(&engine->i915->drm, \
|
||||
"Warning, clamping %s to %lld to prevent overflow\n", \
|
||||
#field, clamp); \
|
||||
engine->props.field = clamp; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* features common between engines sharing EUs */
|
||||
if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
|
||||
engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
|
||||
engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
|
||||
}
|
||||
CLAMP_PROP(heartbeat_interval_ms);
|
||||
CLAMP_PROP(max_busywait_duration_ns);
|
||||
CLAMP_PROP(preempt_timeout_ms);
|
||||
CLAMP_PROP(stop_timeout_ms);
|
||||
CLAMP_PROP(timeslice_duration_ms);
|
||||
|
||||
#undef CLAMP_PROP
|
||||
|
||||
engine->defaults = engine->props; /* never to change again */
|
||||
|
||||
@ -534,6 +559,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)
|
||||
{
|
||||
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value)
|
||||
{
|
||||
value = min(value, jiffies_to_nsecs(2));
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
|
||||
{
|
||||
/*
|
||||
* NB: The GuC API only supports 32bit values. However, the limit is further
|
||||
* reduced due to internal calculations which would otherwise overflow.
|
||||
*/
|
||||
if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
|
||||
value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
|
||||
|
||||
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
|
||||
{
|
||||
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
|
||||
{
|
||||
/*
|
||||
* NB: The GuC API only supports 32bit values. However, the limit is further
|
||||
* reduced due to internal calculations which would otherwise overflow.
|
||||
*/
|
||||
if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
|
||||
value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
|
||||
|
||||
value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
static void __setup_engine_capabilities(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct drm_i915_private *i915 = engine->i915;
|
||||
@ -1274,8 +1348,13 @@ int intel_engines_init(struct intel_gt *gt)
|
||||
return err;
|
||||
|
||||
err = setup(engine);
|
||||
if (err)
|
||||
if (err) {
|
||||
intel_engine_cleanup_common(engine);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* The backend should now be responsible for cleanup */
|
||||
GEM_BUG_ON(engine->release == NULL);
|
||||
|
||||
err = engine_init_common(engine);
|
||||
if (err)
|
||||
@ -1554,11 +1633,11 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
|
||||
for_each_ss_steering(iter, engine->gt, slice, subslice) {
|
||||
instdone->sampler[slice][subslice] =
|
||||
intel_gt_mcr_read(engine->gt,
|
||||
GEN7_SAMPLER_INSTDONE,
|
||||
GEN8_SAMPLER_INSTDONE,
|
||||
slice, subslice);
|
||||
instdone->row[slice][subslice] =
|
||||
intel_gt_mcr_read(engine->gt,
|
||||
GEN7_ROW_INSTDONE,
|
||||
GEN8_ROW_INSTDONE,
|
||||
slice, subslice);
|
||||
}
|
||||
|
||||
|
@ -22,9 +22,37 @@
|
||||
|
||||
static bool next_heartbeat(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct i915_request *rq;
|
||||
long delay;
|
||||
|
||||
delay = READ_ONCE(engine->props.heartbeat_interval_ms);
|
||||
|
||||
rq = engine->heartbeat.systole;
|
||||
|
||||
/*
|
||||
* FIXME: The final period extension is disabled if the period has been
|
||||
* modified from the default. This is to prevent issues with certain
|
||||
* selftests which override the value and expect specific behaviour.
|
||||
* Once the selftests have been updated to either cope with variable
|
||||
* heartbeat periods (or to override the pre-emption timeout as well,
|
||||
* or just to add a selftest specific override of the extension), the
|
||||
* generic override can be removed.
|
||||
*/
|
||||
if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER &&
|
||||
delay == engine->defaults.heartbeat_interval_ms) {
|
||||
long longer;
|
||||
|
||||
/*
|
||||
* The final try is at the highest priority possible. Up until now
|
||||
* a pre-emption might not even have been attempted. So make sure
|
||||
* this last attempt allows enough time for a pre-emption to occur.
|
||||
*/
|
||||
longer = READ_ONCE(engine->props.preempt_timeout_ms) * 2;
|
||||
longer = intel_clamp_heartbeat_interval_ms(engine, longer);
|
||||
if (longer > delay)
|
||||
delay = longer;
|
||||
}
|
||||
|
||||
if (!delay)
|
||||
return false;
|
||||
|
||||
@ -288,6 +316,17 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
|
||||
if (!delay && !intel_engine_has_preempt_reset(engine))
|
||||
return -ENODEV;
|
||||
|
||||
/* FIXME: Remove together with equally marked hack in next_heartbeat. */
|
||||
if (delay != engine->defaults.heartbeat_interval_ms &&
|
||||
delay < 2 * engine->props.preempt_timeout_ms) {
|
||||
if (intel_engine_uses_guc(engine))
|
||||
drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may downgrade individual engine resets to full GPU resets!\n",
|
||||
engine->name);
|
||||
else
|
||||
drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may cause engine resets to target innocent contexts!\n",
|
||||
engine->name);
|
||||
}
|
||||
|
||||
intel_engine_pm_get(engine);
|
||||
|
||||
err = mutex_lock_interruptible(&ce->timeline->mutex);
|
||||
|
@ -201,6 +201,7 @@
|
||||
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
|
||||
#define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */
|
||||
#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8)
|
||||
#define MI_PREDICATE_RESULT_2_ENGINE(base) _MMIO((base) + 0x3bc)
|
||||
#define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4)
|
||||
#define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30)
|
||||
#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2)
|
||||
|
@ -3471,9 +3471,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
|
||||
|
||||
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
|
||||
if (intel_engine_has_preemption(engine))
|
||||
engine->emit_bb_start = gen125_emit_bb_start;
|
||||
engine->emit_bb_start = xehp_emit_bb_start;
|
||||
else
|
||||
engine->emit_bb_start = gen125_emit_bb_start_noarb;
|
||||
engine->emit_bb_start = xehp_emit_bb_start_noarb;
|
||||
} else {
|
||||
if (intel_engine_has_preemption(engine))
|
||||
engine->emit_bb_start = gen8_emit_bb_start;
|
||||
|
@ -871,8 +871,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
|
||||
u32 pte_flags;
|
||||
int ret;
|
||||
|
||||
GEM_WARN_ON(pci_resource_len(pdev, GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
|
||||
phys_addr = pci_resource_start(pdev, GTTMMADR_BAR) + gen6_gttadr_offset(i915);
|
||||
GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
|
||||
phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
|
||||
|
||||
/*
|
||||
* On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
|
||||
@ -931,11 +931,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
|
||||
unsigned int size;
|
||||
u16 snb_gmch_ctl;
|
||||
|
||||
if (!HAS_LMEM(i915)) {
|
||||
if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
|
||||
if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) {
|
||||
if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
|
||||
return -ENXIO;
|
||||
|
||||
ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR);
|
||||
ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
|
||||
ggtt->mappable_end = resource_size(&ggtt->gmadr);
|
||||
}
|
||||
|
||||
@ -986,7 +986,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
|
||||
|
||||
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
|
||||
|
||||
setup_private_pat(ggtt->vm.gt->uncore);
|
||||
setup_private_pat(ggtt->vm.gt);
|
||||
|
||||
return ggtt_probe_common(ggtt, size);
|
||||
}
|
||||
@ -1089,10 +1089,10 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
|
||||
unsigned int size;
|
||||
u16 snb_gmch_ctl;
|
||||
|
||||
if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
|
||||
if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
|
||||
return -ENXIO;
|
||||
|
||||
ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR);
|
||||
ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
|
||||
ggtt->mappable_end = resource_size(&ggtt->gmadr);
|
||||
|
||||
/*
|
||||
@ -1308,7 +1308,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
|
||||
wbinvd_on_all_cpus();
|
||||
|
||||
if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
|
||||
setup_private_pat(ggtt->vm.gt->uncore);
|
||||
setup_private_pat(ggtt->vm.gt);
|
||||
|
||||
intel_ggtt_restore_fences(ggtt);
|
||||
}
|
||||
|
@ -187,6 +187,10 @@
|
||||
#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
|
||||
#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
|
||||
|
||||
#define MI_OPCODE(x) (((x) >> 23) & 0x3f)
|
||||
#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
|
||||
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
|
||||
|
||||
/*
|
||||
* 3D instructions used by the kernel
|
||||
*/
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/mei_aux.h>
|
||||
#include "i915_drv.h"
|
||||
#include "i915_reg.h"
|
||||
#include "gem/i915_gem_lmem.h"
|
||||
#include "gem/i915_gem_region.h"
|
||||
#include "gt/intel_gsc.h"
|
||||
#include "gt/intel_gt.h"
|
||||
@ -142,8 +143,14 @@ static void gsc_destroy_one(struct drm_i915_private *i915,
|
||||
struct intel_gsc_intf *intf = &gsc->intf[intf_id];
|
||||
|
||||
if (intf->adev) {
|
||||
auxiliary_device_delete(&intf->adev->aux_dev);
|
||||
auxiliary_device_uninit(&intf->adev->aux_dev);
|
||||
struct auxiliary_device *aux_dev = &intf->adev->aux_dev;
|
||||
|
||||
if (intf_id == 0)
|
||||
intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
|
||||
aux_dev->dev.bus);
|
||||
|
||||
auxiliary_device_delete(aux_dev);
|
||||
auxiliary_device_uninit(aux_dev);
|
||||
intf->adev = NULL;
|
||||
}
|
||||
|
||||
@ -242,14 +249,24 @@ add_device:
|
||||
goto fail;
|
||||
}
|
||||
|
||||
intf->adev = adev; /* needed by the notifier */
|
||||
|
||||
if (intf_id == 0)
|
||||
intel_huc_register_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
|
||||
aux_dev->dev.bus);
|
||||
|
||||
ret = auxiliary_device_add(aux_dev);
|
||||
if (ret < 0) {
|
||||
drm_err(&i915->drm, "gsc aux add failed %d\n", ret);
|
||||
if (intf_id == 0)
|
||||
intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
|
||||
aux_dev->dev.bus);
|
||||
intf->adev = NULL;
|
||||
|
||||
/* adev will be freed with the put_device() and .release sequence */
|
||||
auxiliary_device_uninit(aux_dev);
|
||||
goto fail;
|
||||
}
|
||||
intf->adev = adev;
|
||||
|
||||
return;
|
||||
fail:
|
||||
|
@ -40,8 +40,6 @@ void intel_gt_common_init_early(struct intel_gt *gt)
|
||||
{
|
||||
spin_lock_init(gt->irq_lock);
|
||||
|
||||
INIT_LIST_HEAD(>->lmem_userfault_list);
|
||||
mutex_init(>->lmem_userfault_lock);
|
||||
INIT_LIST_HEAD(>->closed_vma);
|
||||
spin_lock_init(>->closed_lock);
|
||||
|
||||
@ -231,6 +229,16 @@ static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
|
||||
GEN6_RING_FAULT_REG_POSTING_READ(engine);
|
||||
}
|
||||
|
||||
i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt)
|
||||
{
|
||||
/* GT0_PERF_LIMIT_REASONS is available only for Gen11+ */
|
||||
if (GRAPHICS_VER(gt->i915) < 11)
|
||||
return INVALID_MMIO_REG;
|
||||
|
||||
return gt->type == GT_MEDIA ?
|
||||
MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS;
|
||||
}
|
||||
|
||||
void
|
||||
intel_gt_clear_error_registers(struct intel_gt *gt,
|
||||
intel_engine_mask_t engine_mask)
|
||||
@ -260,7 +268,11 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
|
||||
I915_MASTER_ERROR_INTERRUPT);
|
||||
}
|
||||
|
||||
if (GRAPHICS_VER(i915) >= 12) {
|
||||
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
|
||||
intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
|
||||
RING_FAULT_VALID, 0);
|
||||
intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
|
||||
} else if (GRAPHICS_VER(i915) >= 12) {
|
||||
rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
|
||||
intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
|
||||
} else if (GRAPHICS_VER(i915) >= 8) {
|
||||
@ -298,6 +310,42 @@ static void gen6_check_faults(struct intel_gt *gt)
|
||||
}
|
||||
}
|
||||
|
||||
static void xehp_check_faults(struct intel_gt *gt)
|
||||
{
|
||||
u32 fault;
|
||||
|
||||
/*
|
||||
* Although the fault register now lives in an MCR register range,
|
||||
* the GAM registers are special and we only truly need to read
|
||||
* the "primary" GAM instance rather than handling each instance
|
||||
* individually. intel_gt_mcr_read_any() will automatically steer
|
||||
* toward the primary instance.
|
||||
*/
|
||||
fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
|
||||
if (fault & RING_FAULT_VALID) {
|
||||
u32 fault_data0, fault_data1;
|
||||
u64 fault_addr;
|
||||
|
||||
fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
|
||||
fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
|
||||
|
||||
fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
|
||||
((u64)fault_data0 << 12);
|
||||
|
||||
drm_dbg(>->i915->drm, "Unexpected fault\n"
|
||||
"\tAddr: 0x%08x_%08x\n"
|
||||
"\tAddress space: %s\n"
|
||||
"\tEngine ID: %d\n"
|
||||
"\tSource ID: %d\n"
|
||||
"\tType: %d\n",
|
||||
upper_32_bits(fault_addr), lower_32_bits(fault_addr),
|
||||
fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
|
||||
GEN8_RING_FAULT_ENGINE_ID(fault),
|
||||
RING_FAULT_SRCID(fault),
|
||||
RING_FAULT_FAULT_TYPE(fault));
|
||||
}
|
||||
}
|
||||
|
||||
static void gen8_check_faults(struct intel_gt *gt)
|
||||
{
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
@ -344,7 +392,9 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
|
||||
/* From GEN8 onwards we only have one 'All Engine Fault Register' */
|
||||
if (GRAPHICS_VER(i915) >= 8)
|
||||
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
|
||||
xehp_check_faults(gt);
|
||||
else if (GRAPHICS_VER(i915) >= 8)
|
||||
gen8_check_faults(gt);
|
||||
else if (GRAPHICS_VER(i915) >= 6)
|
||||
gen6_check_faults(gt);
|
||||
@ -807,7 +857,6 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
|
||||
}
|
||||
|
||||
intel_uncore_init_early(gt->uncore, gt);
|
||||
intel_wakeref_auto_init(>->userfault_wakeref, gt->uncore->rpm);
|
||||
|
||||
ret = intel_uncore_setup_mmio(gt->uncore, phys_addr);
|
||||
if (ret)
|
||||
@ -828,7 +877,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR;
|
||||
mmio_bar = intel_mmio_bar(GRAPHICS_VER(i915));
|
||||
phys_addr = pci_resource_start(pdev, mmio_bar);
|
||||
|
||||
/*
|
||||
@ -939,7 +988,10 @@ void intel_gt_info_print(const struct intel_gt_info *info,
|
||||
}
|
||||
|
||||
struct reg_and_bit {
|
||||
i915_reg_t reg;
|
||||
union {
|
||||
i915_reg_t reg;
|
||||
i915_mcr_reg_t mcr_reg;
|
||||
};
|
||||
u32 bit;
|
||||
};
|
||||
|
||||
@ -965,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
|
||||
return rb;
|
||||
}
|
||||
|
||||
/*
|
||||
* HW architecture suggest typical invalidation time at 40us,
|
||||
* with pessimistic cases up to 100us and a recommendation to
|
||||
* cap at 1ms. We go a bit higher just in case.
|
||||
*/
|
||||
#define TLB_INVAL_TIMEOUT_US 100
|
||||
#define TLB_INVAL_TIMEOUT_MS 4
|
||||
|
||||
/*
|
||||
* On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
|
||||
* but are now considered MCR registers. Since they exist within a GAM range,
|
||||
* the primary instance of the register rolls up the status from each unit.
|
||||
*/
|
||||
static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
|
||||
{
|
||||
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
|
||||
return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0,
|
||||
TLB_INVAL_TIMEOUT_US,
|
||||
TLB_INVAL_TIMEOUT_MS);
|
||||
else
|
||||
return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
|
||||
TLB_INVAL_TIMEOUT_US,
|
||||
TLB_INVAL_TIMEOUT_MS,
|
||||
NULL);
|
||||
}
|
||||
|
||||
static void mmio_invalidate_full(struct intel_gt *gt)
|
||||
{
|
||||
static const i915_reg_t gen8_regs[] = {
|
||||
@ -980,6 +1058,13 @@ static void mmio_invalidate_full(struct intel_gt *gt)
|
||||
[COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
|
||||
[COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
|
||||
};
|
||||
static const i915_mcr_reg_t xehp_regs[] = {
|
||||
[RENDER_CLASS] = XEHP_GFX_TLB_INV_CR,
|
||||
[VIDEO_DECODE_CLASS] = XEHP_VD_TLB_INV_CR,
|
||||
[VIDEO_ENHANCEMENT_CLASS] = XEHP_VE_TLB_INV_CR,
|
||||
[COPY_ENGINE_CLASS] = XEHP_BLT_TLB_INV_CR,
|
||||
[COMPUTE_CLASS] = XEHP_COMPCTX_TLB_INV_CR,
|
||||
};
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
struct intel_engine_cs *engine;
|
||||
@ -988,7 +1073,10 @@ static void mmio_invalidate_full(struct intel_gt *gt)
|
||||
const i915_reg_t *regs;
|
||||
unsigned int num = 0;
|
||||
|
||||
if (GRAPHICS_VER(i915) == 12) {
|
||||
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
|
||||
regs = NULL;
|
||||
num = ARRAY_SIZE(xehp_regs);
|
||||
} else if (GRAPHICS_VER(i915) == 12) {
|
||||
regs = gen12_regs;
|
||||
num = ARRAY_SIZE(gen12_regs);
|
||||
} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
|
||||
@ -1013,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
|
||||
if (!intel_engine_pm_is_awake(engine))
|
||||
continue;
|
||||
|
||||
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
|
||||
if (!i915_mmio_reg_offset(rb.reg))
|
||||
continue;
|
||||
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
|
||||
intel_gt_mcr_multicast_write_fw(gt,
|
||||
xehp_regs[engine->class],
|
||||
BIT(engine->instance));
|
||||
} else {
|
||||
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
|
||||
if (!i915_mmio_reg_offset(rb.reg))
|
||||
continue;
|
||||
|
||||
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
|
||||
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
|
||||
}
|
||||
awake |= engine->mask;
|
||||
}
|
||||
|
||||
@ -1037,22 +1131,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
|
||||
for_each_engine_masked(engine, gt, awake, tmp) {
|
||||
struct reg_and_bit rb;
|
||||
|
||||
/*
|
||||
* HW architecture suggest typical invalidation time at 40us,
|
||||
* with pessimistic cases up to 100us and a recommendation to
|
||||
* cap at 1ms. We go a bit higher just in case.
|
||||
*/
|
||||
const unsigned int timeout_us = 100;
|
||||
const unsigned int timeout_ms = 4;
|
||||
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
|
||||
rb.mcr_reg = xehp_regs[engine->class];
|
||||
rb.bit = BIT(engine->instance);
|
||||
} else {
|
||||
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
|
||||
}
|
||||
|
||||
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
|
||||
if (__intel_wait_for_register_fw(uncore,
|
||||
rb.reg, rb.bit, 0,
|
||||
timeout_us, timeout_ms,
|
||||
NULL))
|
||||
if (wait_for_invalidate(gt, rb))
|
||||
drm_err_ratelimited(>->i915->drm,
|
||||
"%s TLB invalidation did not complete in %ums!\n",
|
||||
engine->name, timeout_ms);
|
||||
engine->name, TLB_INVAL_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -60,6 +60,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915);
|
||||
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
|
||||
|
||||
void intel_gt_check_and_clear_faults(struct intel_gt *gt);
|
||||
i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt);
|
||||
void intel_gt_clear_error_registers(struct intel_gt *gt,
|
||||
intel_engine_mask_t engine_mask);
|
||||
|
||||
|
@ -107,7 +107,7 @@ static u32 gen9_read_clock_frequency(struct intel_uncore *uncore)
|
||||
return freq;
|
||||
}
|
||||
|
||||
static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
|
||||
static u32 gen6_read_clock_frequency(struct intel_uncore *uncore)
|
||||
{
|
||||
/*
|
||||
* PRMs say:
|
||||
@ -119,7 +119,27 @@ static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
|
||||
return 12500000;
|
||||
}
|
||||
|
||||
static u32 gen2_read_clock_frequency(struct intel_uncore *uncore)
|
||||
static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
|
||||
{
|
||||
/*
|
||||
* 63:32 increments every 1000 ns
|
||||
* 31:0 mbz
|
||||
*/
|
||||
return 1000000000 / 1000;
|
||||
}
|
||||
|
||||
static u32 g4x_read_clock_frequency(struct intel_uncore *uncore)
|
||||
{
|
||||
/*
|
||||
* 63:20 increments every 1/4 ns
|
||||
* 19:0 mbz
|
||||
*
|
||||
* -> 63:32 increments every 1024 ns
|
||||
*/
|
||||
return 1000000000 / 1024;
|
||||
}
|
||||
|
||||
static u32 gen4_read_clock_frequency(struct intel_uncore *uncore)
|
||||
{
|
||||
/*
|
||||
* PRMs say:
|
||||
@ -127,8 +147,10 @@ static u32 gen2_read_clock_frequency(struct intel_uncore *uncore)
|
||||
* "The value in this register increments once every 16
|
||||
* hclks." (through the “Clocking Configuration”
|
||||
* (“CLKCFG”) MCHBAR register)
|
||||
*
|
||||
* Testing on actual hardware has shown there is no /16.
|
||||
*/
|
||||
return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16;
|
||||
return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000;
|
||||
}
|
||||
|
||||
static u32 read_clock_frequency(struct intel_uncore *uncore)
|
||||
@ -137,10 +159,16 @@ static u32 read_clock_frequency(struct intel_uncore *uncore)
|
||||
return gen11_read_clock_frequency(uncore);
|
||||
else if (GRAPHICS_VER(uncore->i915) >= 9)
|
||||
return gen9_read_clock_frequency(uncore);
|
||||
else if (GRAPHICS_VER(uncore->i915) >= 5)
|
||||
else if (GRAPHICS_VER(uncore->i915) >= 6)
|
||||
return gen6_read_clock_frequency(uncore);
|
||||
else if (GRAPHICS_VER(uncore->i915) == 5)
|
||||
return gen5_read_clock_frequency(uncore);
|
||||
else if (IS_G4X(uncore->i915))
|
||||
return g4x_read_clock_frequency(uncore);
|
||||
else if (GRAPHICS_VER(uncore->i915) == 4)
|
||||
return gen4_read_clock_frequency(uncore);
|
||||
else
|
||||
return gen2_read_clock_frequency(uncore);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void intel_gt_init_clock_frequency(struct intel_gt *gt)
|
||||
|
@ -40,6 +40,9 @@ static const char * const intel_steering_types[] = {
|
||||
"L3BANK",
|
||||
"MSLICE",
|
||||
"LNCF",
|
||||
"GAM",
|
||||
"DSS",
|
||||
"OADDRM",
|
||||
"INSTANCE 0",
|
||||
};
|
||||
|
||||
@ -48,14 +51,23 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = {
|
||||
{},
|
||||
};
|
||||
|
||||
/*
|
||||
* Although the bspec lists more "MSLICE" ranges than shown here, some of those
|
||||
* are of a "GAM" subclass that has special rules. Thus we use a separate
|
||||
* GAM table farther down for those.
|
||||
*/
|
||||
static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
|
||||
{ 0x004000, 0x004AFF },
|
||||
{ 0x00C800, 0x00CFFF },
|
||||
{ 0x00DD00, 0x00DDFF },
|
||||
{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
|
||||
{},
|
||||
};
|
||||
|
||||
static const struct intel_mmio_range xehpsdv_gam_steering_table[] = {
|
||||
{ 0x004000, 0x004AFF },
|
||||
{ 0x00C800, 0x00CFFF },
|
||||
{},
|
||||
};
|
||||
|
||||
static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
|
||||
{ 0x00B000, 0x00B0FF },
|
||||
{ 0x00D800, 0x00D8FF },
|
||||
@ -89,9 +101,47 @@ static const struct intel_mmio_range pvc_instance0_steering_table[] = {
|
||||
{},
|
||||
};
|
||||
|
||||
static const struct intel_mmio_range xelpg_instance0_steering_table[] = {
|
||||
{ 0x000B00, 0x000BFF }, /* SQIDI */
|
||||
{ 0x001000, 0x001FFF }, /* SQIDI */
|
||||
{ 0x004000, 0x0048FF }, /* GAM */
|
||||
{ 0x008700, 0x0087FF }, /* SQIDI */
|
||||
{ 0x00B000, 0x00B0FF }, /* NODE */
|
||||
{ 0x00C800, 0x00CFFF }, /* GAM */
|
||||
{ 0x00D880, 0x00D8FF }, /* NODE */
|
||||
{ 0x00DD00, 0x00DDFF }, /* OAAL2 */
|
||||
{},
|
||||
};
|
||||
|
||||
static const struct intel_mmio_range xelpg_l3bank_steering_table[] = {
|
||||
{ 0x00B100, 0x00B3FF },
|
||||
{},
|
||||
};
|
||||
|
||||
/* DSS steering is used for SLICE ranges as well */
|
||||
static const struct intel_mmio_range xelpg_dss_steering_table[] = {
|
||||
{ 0x005200, 0x0052FF }, /* SLICE */
|
||||
{ 0x005500, 0x007FFF }, /* SLICE */
|
||||
{ 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
|
||||
{ 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
|
||||
{ 0x009680, 0x0096FF }, /* DSS */
|
||||
{ 0x00D800, 0x00D87F }, /* SLICE */
|
||||
{ 0x00DC00, 0x00DCFF }, /* SLICE */
|
||||
{ 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */
|
||||
{},
|
||||
};
|
||||
|
||||
static const struct intel_mmio_range xelpmp_oaddrm_steering_table[] = {
|
||||
{ 0x393200, 0x39323F },
|
||||
{ 0x393400, 0x3934FF },
|
||||
{},
|
||||
};
|
||||
|
||||
void intel_gt_mcr_init(struct intel_gt *gt)
|
||||
{
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
unsigned long fuse;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* An mslice is unavailable only if both the meml3 for the slice is
|
||||
@ -109,14 +159,36 @@ void intel_gt_mcr_init(struct intel_gt *gt)
|
||||
drm_warn(&i915->drm, "mslice mask all zero!\n");
|
||||
}
|
||||
|
||||
if (IS_PONTEVECCHIO(i915)) {
|
||||
if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) {
|
||||
gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
|
||||
} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
|
||||
fuse = REG_FIELD_GET(GT_L3_EXC_MASK,
|
||||
intel_uncore_read(gt->uncore, XEHP_FUSE4));
|
||||
|
||||
/*
|
||||
* Despite the register field being named "exclude mask" the
|
||||
* bits actually represent enabled banks (two banks per bit).
|
||||
*/
|
||||
for_each_set_bit(i, &fuse, 3)
|
||||
gt->info.l3bank_mask |= 0x3 << 2 * i;
|
||||
|
||||
gt->steering_table[INSTANCE0] = xelpg_instance0_steering_table;
|
||||
gt->steering_table[L3BANK] = xelpg_l3bank_steering_table;
|
||||
gt->steering_table[DSS] = xelpg_dss_steering_table;
|
||||
} else if (IS_PONTEVECCHIO(i915)) {
|
||||
gt->steering_table[INSTANCE0] = pvc_instance0_steering_table;
|
||||
} else if (IS_DG2(i915)) {
|
||||
gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
|
||||
gt->steering_table[LNCF] = dg2_lncf_steering_table;
|
||||
/*
|
||||
* No need to hook up the GAM table since it has a dedicated
|
||||
* steering control register on DG2 and can use implicit
|
||||
* steering.
|
||||
*/
|
||||
} else if (IS_XEHPSDV(i915)) {
|
||||
gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
|
||||
gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
|
||||
gt->steering_table[GAM] = xehpsdv_gam_steering_table;
|
||||
} else if (GRAPHICS_VER(i915) >= 11 &&
|
||||
GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
|
||||
gt->steering_table[L3BANK] = icl_l3bank_steering_table;
|
||||
@ -134,6 +206,19 @@ void intel_gt_mcr_init(struct intel_gt *gt)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Although the rest of the driver should use MCR-specific functions to
|
||||
* read/write MCR registers, we still use the regular intel_uncore_* functions
|
||||
* internally to implement those, so we need a way for the functions in this
|
||||
* file to "cast" an i915_mcr_reg_t into an i915_reg_t.
|
||||
*/
|
||||
static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
|
||||
{
|
||||
i915_reg_t r = { .reg = mcr.reg };
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* rw_with_mcr_steering_fw - Access a register with specific MCR steering
|
||||
* @uncore: pointer to struct intel_uncore
|
||||
@ -148,14 +233,26 @@ void intel_gt_mcr_init(struct intel_gt *gt)
|
||||
* Caller needs to make sure the relevant forcewake wells are up.
|
||||
*/
|
||||
static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
|
||||
i915_reg_t reg, u8 rw_flag,
|
||||
i915_mcr_reg_t reg, u8 rw_flag,
|
||||
int group, int instance, u32 value)
|
||||
{
|
||||
u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
|
||||
|
||||
lockdep_assert_held(&uncore->lock);
|
||||
|
||||
if (GRAPHICS_VER(uncore->i915) >= 11) {
|
||||
if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) {
|
||||
/*
|
||||
* Always leave the hardware in multicast mode when doing reads
|
||||
* (see comment about Wa_22013088509 below) and only change it
|
||||
* to unicast mode when doing writes of a specific instance.
|
||||
*
|
||||
* No need to save old steering reg value.
|
||||
*/
|
||||
intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR,
|
||||
REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
|
||||
REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance) |
|
||||
(rw_flag == FW_REG_READ ? GEN11_MCR_MULTICAST : 0));
|
||||
} else if (GRAPHICS_VER(uncore->i915) >= 11) {
|
||||
mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
|
||||
mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance);
|
||||
|
||||
@ -173,39 +270,53 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
|
||||
*/
|
||||
if (rw_flag == FW_REG_WRITE)
|
||||
mcr_mask |= GEN11_MCR_MULTICAST;
|
||||
|
||||
mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
|
||||
old_mcr = mcr;
|
||||
|
||||
mcr &= ~mcr_mask;
|
||||
mcr |= mcr_ss;
|
||||
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
|
||||
} else {
|
||||
mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
|
||||
mcr_ss = GEN8_MCR_SLICE(group) | GEN8_MCR_SUBSLICE(instance);
|
||||
|
||||
mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
|
||||
old_mcr = mcr;
|
||||
|
||||
mcr &= ~mcr_mask;
|
||||
mcr |= mcr_ss;
|
||||
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
|
||||
}
|
||||
|
||||
old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
|
||||
|
||||
mcr &= ~mcr_mask;
|
||||
mcr |= mcr_ss;
|
||||
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
|
||||
|
||||
if (rw_flag == FW_REG_READ)
|
||||
val = intel_uncore_read_fw(uncore, reg);
|
||||
val = intel_uncore_read_fw(uncore, mcr_reg_cast(reg));
|
||||
else
|
||||
intel_uncore_write_fw(uncore, reg, value);
|
||||
intel_uncore_write_fw(uncore, mcr_reg_cast(reg), value);
|
||||
|
||||
mcr &= ~mcr_mask;
|
||||
mcr |= old_mcr & mcr_mask;
|
||||
|
||||
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
|
||||
/*
|
||||
* For pre-MTL platforms, we need to restore the old value of the
|
||||
* steering control register to ensure that implicit steering continues
|
||||
* to behave as expected. For MTL and beyond, we need only reinstate
|
||||
* the 'multicast' bit (and only if we did a write that cleared it).
|
||||
*/
|
||||
if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70) && rw_flag == FW_REG_WRITE)
|
||||
intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
|
||||
else if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 70))
|
||||
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, old_mcr);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
|
||||
i915_reg_t reg, u8 rw_flag,
|
||||
i915_mcr_reg_t reg, u8 rw_flag,
|
||||
int group, int instance,
|
||||
u32 value)
|
||||
{
|
||||
enum forcewake_domains fw_domains;
|
||||
u32 val;
|
||||
|
||||
fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
|
||||
fw_domains = intel_uncore_forcewake_for_reg(uncore, mcr_reg_cast(reg),
|
||||
rw_flag);
|
||||
fw_domains |= intel_uncore_forcewake_for_reg(uncore,
|
||||
GEN8_MCR_SELECTOR,
|
||||
@ -233,7 +344,7 @@ static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
|
||||
* group/instance.
|
||||
*/
|
||||
u32 intel_gt_mcr_read(struct intel_gt *gt,
|
||||
i915_reg_t reg,
|
||||
i915_mcr_reg_t reg,
|
||||
int group, int instance)
|
||||
{
|
||||
return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0);
|
||||
@ -250,7 +361,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt,
|
||||
* Write an MCR register in unicast mode after steering toward a specific
|
||||
* group/instance.
|
||||
*/
|
||||
void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value,
|
||||
void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value,
|
||||
int group, int instance)
|
||||
{
|
||||
rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value);
|
||||
@ -265,9 +376,16 @@ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value,
|
||||
* Write an MCR register in multicast mode to update all instances.
|
||||
*/
|
||||
void intel_gt_mcr_multicast_write(struct intel_gt *gt,
|
||||
i915_reg_t reg, u32 value)
|
||||
i915_mcr_reg_t reg, u32 value)
|
||||
{
|
||||
intel_uncore_write(gt->uncore, reg, value);
|
||||
/*
|
||||
* Ensure we have multicast behavior, just in case some non-i915 agent
|
||||
* left the hardware in unicast mode.
|
||||
*/
|
||||
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
|
||||
intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
|
||||
|
||||
intel_uncore_write(gt->uncore, mcr_reg_cast(reg), value);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -281,9 +399,44 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt,
|
||||
* domains; use intel_gt_mcr_multicast_write() in cases where forcewake should
|
||||
* be obtained automatically.
|
||||
*/
|
||||
void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 value)
|
||||
void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value)
|
||||
{
|
||||
intel_uncore_write_fw(gt->uncore, reg, value);
|
||||
/*
|
||||
* Ensure we have multicast behavior, just in case some non-i915 agent
|
||||
* left the hardware in unicast mode.
|
||||
*/
|
||||
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
|
||||
intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
|
||||
|
||||
intel_uncore_write_fw(gt->uncore, mcr_reg_cast(reg), value);
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_gt_mcr_multicast_rmw - Performs a multicast RMW operations
|
||||
* @gt: GT structure
|
||||
* @reg: the MCR register to read and write
|
||||
* @clear: bits to clear during RMW
|
||||
* @set: bits to set during RMW
|
||||
*
|
||||
* Performs a read-modify-write on an MCR register in a multicast manner.
|
||||
* This operation only makes sense on MCR registers where all instances are
|
||||
* expected to have the same value. The read will target any non-terminated
|
||||
* instance and the write will be applied to all instances.
|
||||
*
|
||||
* This function assumes the caller is already holding any necessary forcewake
|
||||
* domains; use intel_gt_mcr_multicast_rmw() in cases where forcewake should
|
||||
* be obtained automatically.
|
||||
*
|
||||
* Returns the old (unmodified) value read.
|
||||
*/
|
||||
u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg,
|
||||
u32 clear, u32 set)
|
||||
{
|
||||
u32 val = intel_gt_mcr_read_any(gt, reg);
|
||||
|
||||
intel_gt_mcr_multicast_write(gt, reg, (val & ~clear) | set);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -301,7 +454,7 @@ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 va
|
||||
* for @type steering too.
|
||||
*/
|
||||
static bool reg_needs_read_steering(struct intel_gt *gt,
|
||||
i915_reg_t reg,
|
||||
i915_mcr_reg_t reg,
|
||||
enum intel_steering_type type)
|
||||
{
|
||||
const u32 offset = i915_mmio_reg_offset(reg);
|
||||
@ -332,6 +485,8 @@ static void get_nonterminated_steering(struct intel_gt *gt,
|
||||
enum intel_steering_type type,
|
||||
u8 *group, u8 *instance)
|
||||
{
|
||||
u32 dss;
|
||||
|
||||
switch (type) {
|
||||
case L3BANK:
|
||||
*group = 0; /* unused */
|
||||
@ -351,6 +506,15 @@ static void get_nonterminated_steering(struct intel_gt *gt,
|
||||
*group = __ffs(gt->info.mslice_mask) << 1;
|
||||
*instance = 0; /* unused */
|
||||
break;
|
||||
case GAM:
|
||||
*group = IS_DG2(gt->i915) ? 1 : 0;
|
||||
*instance = 0;
|
||||
break;
|
||||
case DSS:
|
||||
dss = intel_sseu_find_first_xehp_dss(>->info.sseu, 0, 0);
|
||||
*group = dss / GEN_DSS_PER_GSLICE;
|
||||
*instance = dss % GEN_DSS_PER_GSLICE;
|
||||
break;
|
||||
case INSTANCE0:
|
||||
/*
|
||||
* There are a lot of MCR types for which instance (0, 0)
|
||||
@ -359,6 +523,13 @@ static void get_nonterminated_steering(struct intel_gt *gt,
|
||||
*group = 0;
|
||||
*instance = 0;
|
||||
break;
|
||||
case OADDRM:
|
||||
if ((VDBOX_MASK(gt) | VEBOX_MASK(gt) | gt->info.sfc_mask) & BIT(0))
|
||||
*group = 0;
|
||||
else
|
||||
*group = 1;
|
||||
*instance = 0;
|
||||
break;
|
||||
default:
|
||||
MISSING_CASE(type);
|
||||
*group = 0;
|
||||
@ -380,7 +551,7 @@ static void get_nonterminated_steering(struct intel_gt *gt,
|
||||
* steering.
|
||||
*/
|
||||
void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
|
||||
i915_reg_t reg,
|
||||
i915_mcr_reg_t reg,
|
||||
u8 *group, u8 *instance)
|
||||
{
|
||||
int type;
|
||||
@ -409,7 +580,7 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
|
||||
*
|
||||
* Returns the value from a non-terminated instance of @reg.
|
||||
*/
|
||||
u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
|
||||
u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg)
|
||||
{
|
||||
int type;
|
||||
u8 group, instance;
|
||||
@ -423,7 +594,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
|
||||
}
|
||||
}
|
||||
|
||||
return intel_uncore_read_fw(gt->uncore, reg);
|
||||
return intel_uncore_read_fw(gt->uncore, mcr_reg_cast(reg));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -436,7 +607,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
|
||||
*
|
||||
* Returns the value from a non-terminated instance of @reg.
|
||||
*/
|
||||
u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg)
|
||||
u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg)
|
||||
{
|
||||
int type;
|
||||
u8 group, instance;
|
||||
@ -450,7 +621,7 @@ u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg)
|
||||
}
|
||||
}
|
||||
|
||||
return intel_uncore_read(gt->uncore, reg);
|
||||
return intel_uncore_read(gt->uncore, mcr_reg_cast(reg));
|
||||
}
|
||||
|
||||
static void report_steering_type(struct drm_printer *p,
|
||||
@ -483,11 +654,20 @@ static void report_steering_type(struct drm_printer *p,
|
||||
void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
|
||||
bool dump_table)
|
||||
{
|
||||
drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n",
|
||||
gt->default_steering.groupid,
|
||||
gt->default_steering.instanceid);
|
||||
/*
|
||||
* Starting with MTL we no longer have default steering;
|
||||
* all ranges are explicitly steered.
|
||||
*/
|
||||
if (GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))
|
||||
drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n",
|
||||
gt->default_steering.groupid,
|
||||
gt->default_steering.instanceid);
|
||||
|
||||
if (IS_PONTEVECCHIO(gt->i915)) {
|
||||
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
|
||||
for (int i = 0; i < NUM_STEERING_TYPES; i++)
|
||||
if (gt->steering_table[i])
|
||||
report_steering_type(p, gt, i, dump_table);
|
||||
} else if (IS_PONTEVECCHIO(gt->i915)) {
|
||||
report_steering_type(p, gt, INSTANCE0, dump_table);
|
||||
} else if (HAS_MSLICE_STEERING(gt->i915)) {
|
||||
report_steering_type(p, gt, MSLICE, dump_table);
|
||||
@ -520,3 +700,58 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state
|
||||
* @gt: GT structure
|
||||
* @reg: the register to read
|
||||
* @mask: mask to apply to register value
|
||||
* @value: value to wait for
|
||||
* @fast_timeout_us: fast timeout in microsecond for atomic/tight wait
|
||||
* @slow_timeout_ms: slow timeout in millisecond
|
||||
*
|
||||
* This routine waits until the target register @reg contains the expected
|
||||
* @value after applying the @mask, i.e. it waits until ::
|
||||
*
|
||||
* (intel_gt_mcr_read_any_fw(gt, reg) & mask) == value
|
||||
*
|
||||
* Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
|
||||
* For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
|
||||
* must be not larger than 20,0000 microseconds.
|
||||
*
|
||||
* This function is basically an MCR-friendly version of
|
||||
* __intel_wait_for_register_fw(). Generally this function will only be used
|
||||
* on GAM registers which are a bit special --- although they're MCR registers,
|
||||
* reads (e.g., waiting for status updates) are always directed to the primary
|
||||
* instance.
|
||||
*
|
||||
* Note that this routine assumes the caller holds forcewake asserted, it is
|
||||
* not suitable for very long waits.
|
||||
*
|
||||
* Return: 0 if the register matches the desired condition, or -ETIMEDOUT.
|
||||
*/
|
||||
int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
|
||||
i915_mcr_reg_t reg,
|
||||
u32 mask,
|
||||
u32 value,
|
||||
unsigned int fast_timeout_us,
|
||||
unsigned int slow_timeout_ms)
|
||||
{
|
||||
u32 reg_value = 0;
|
||||
#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == value)
|
||||
int ret;
|
||||
|
||||
/* Catch any overuse of this function */
|
||||
might_sleep_if(slow_timeout_ms);
|
||||
GEM_BUG_ON(fast_timeout_us > 20000);
|
||||
GEM_BUG_ON(!fast_timeout_us && !slow_timeout_ms);
|
||||
|
||||
ret = -ETIMEDOUT;
|
||||
if (fast_timeout_us && fast_timeout_us <= 20000)
|
||||
ret = _wait_for_atomic(done, fast_timeout_us, 0);
|
||||
if (ret && slow_timeout_ms)
|
||||
ret = wait_for(done, slow_timeout_ms);
|
||||
|
||||
return ret;
|
||||
#undef done
|
||||
}
|
||||
|
@ -11,21 +11,24 @@
|
||||
void intel_gt_mcr_init(struct intel_gt *gt);
|
||||
|
||||
u32 intel_gt_mcr_read(struct intel_gt *gt,
|
||||
i915_reg_t reg,
|
||||
i915_mcr_reg_t reg,
|
||||
int group, int instance);
|
||||
u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg);
|
||||
u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg);
|
||||
u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg);
|
||||
u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg);
|
||||
|
||||
void intel_gt_mcr_unicast_write(struct intel_gt *gt,
|
||||
i915_reg_t reg, u32 value,
|
||||
i915_mcr_reg_t reg, u32 value,
|
||||
int group, int instance);
|
||||
void intel_gt_mcr_multicast_write(struct intel_gt *gt,
|
||||
i915_reg_t reg, u32 value);
|
||||
i915_mcr_reg_t reg, u32 value);
|
||||
void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt,
|
||||
i915_reg_t reg, u32 value);
|
||||
i915_mcr_reg_t reg, u32 value);
|
||||
|
||||
u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg,
|
||||
u32 clear, u32 set);
|
||||
|
||||
void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
|
||||
i915_reg_t reg,
|
||||
i915_mcr_reg_t reg,
|
||||
u8 *group, u8 *instance);
|
||||
|
||||
void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
|
||||
@ -34,6 +37,13 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
|
||||
void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
|
||||
unsigned int *group, unsigned int *instance);
|
||||
|
||||
int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
|
||||
i915_mcr_reg_t reg,
|
||||
u32 mask,
|
||||
u32 value,
|
||||
unsigned int fast_timeout_us,
|
||||
unsigned int slow_timeout_ms);
|
||||
|
||||
/*
|
||||
* Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice
|
||||
* presence is determined by using the group/instance as direct lookups in the
|
||||
|
@ -344,162 +344,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
|
||||
drm_printf(p, "efficient (RPe) frequency: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->efficient_freq));
|
||||
} else if (GRAPHICS_VER(i915) >= 6) {
|
||||
u32 rp_state_limits;
|
||||
u32 gt_perf_status;
|
||||
struct intel_rps_freq_caps caps;
|
||||
u32 rpmodectl, rpinclimit, rpdeclimit;
|
||||
u32 rpstat, cagf, reqf;
|
||||
u32 rpcurupei, rpcurup, rpprevup;
|
||||
u32 rpcurdownei, rpcurdown, rpprevdown;
|
||||
u32 rpupei, rpupt, rpdownei, rpdownt;
|
||||
u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
|
||||
|
||||
rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
|
||||
gen6_rps_get_freq_caps(rps, &caps);
|
||||
if (IS_GEN9_LP(i915))
|
||||
gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
|
||||
else
|
||||
gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
|
||||
|
||||
/* RPSTAT1 is in the GT power well */
|
||||
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
|
||||
|
||||
reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
|
||||
if (GRAPHICS_VER(i915) >= 9) {
|
||||
reqf >>= 23;
|
||||
} else {
|
||||
reqf &= ~GEN6_TURBO_DISABLE;
|
||||
if (IS_HASWELL(i915) || IS_BROADWELL(i915))
|
||||
reqf >>= 24;
|
||||
else
|
||||
reqf >>= 25;
|
||||
}
|
||||
reqf = intel_gpu_freq(rps, reqf);
|
||||
|
||||
rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
|
||||
rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
|
||||
rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
|
||||
|
||||
rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
|
||||
rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
|
||||
rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
|
||||
rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
|
||||
rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
|
||||
rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
|
||||
rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
|
||||
|
||||
rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
|
||||
rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
|
||||
|
||||
rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
|
||||
rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
|
||||
|
||||
cagf = intel_rps_read_actual_frequency(rps);
|
||||
|
||||
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
|
||||
|
||||
if (GRAPHICS_VER(i915) >= 11) {
|
||||
pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
|
||||
pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
|
||||
/*
|
||||
* The equivalent to the PM ISR & IIR cannot be read
|
||||
* without affecting the current state of the system
|
||||
*/
|
||||
pm_isr = 0;
|
||||
pm_iir = 0;
|
||||
} else if (GRAPHICS_VER(i915) >= 8) {
|
||||
pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
|
||||
pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
|
||||
pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
|
||||
pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
|
||||
} else {
|
||||
pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
|
||||
pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
|
||||
pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
|
||||
pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
|
||||
}
|
||||
pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
|
||||
|
||||
drm_printf(p, "Video Turbo Mode: %s\n",
|
||||
str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
|
||||
drm_printf(p, "HW control enabled: %s\n",
|
||||
str_yes_no(rpmodectl & GEN6_RP_ENABLE));
|
||||
drm_printf(p, "SW control enabled: %s\n",
|
||||
str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
|
||||
|
||||
drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
|
||||
pm_ier, pm_imr, pm_mask);
|
||||
if (GRAPHICS_VER(i915) <= 10)
|
||||
drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
|
||||
pm_isr, pm_iir);
|
||||
drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
|
||||
rps->pm_intrmsk_mbz);
|
||||
drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
|
||||
drm_printf(p, "Render p-state ratio: %d\n",
|
||||
(gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
|
||||
drm_printf(p, "Render p-state VID: %d\n",
|
||||
gt_perf_status & 0xff);
|
||||
drm_printf(p, "Render p-state limit: %d\n",
|
||||
rp_state_limits & 0xff);
|
||||
drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
|
||||
drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
|
||||
drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
|
||||
drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
|
||||
drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
|
||||
drm_printf(p, "CAGF: %dMHz\n", cagf);
|
||||
drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
|
||||
rpcurupei,
|
||||
intel_gt_pm_interval_to_ns(gt, rpcurupei));
|
||||
drm_printf(p, "RP CUR UP: %d (%lldns)\n",
|
||||
rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
|
||||
drm_printf(p, "RP PREV UP: %d (%lldns)\n",
|
||||
rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
|
||||
drm_printf(p, "Up threshold: %d%%\n",
|
||||
rps->power.up_threshold);
|
||||
drm_printf(p, "RP UP EI: %d (%lldns)\n",
|
||||
rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
|
||||
drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
|
||||
rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
|
||||
|
||||
drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
|
||||
rpcurdownei,
|
||||
intel_gt_pm_interval_to_ns(gt, rpcurdownei));
|
||||
drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
|
||||
rpcurdown,
|
||||
intel_gt_pm_interval_to_ns(gt, rpcurdown));
|
||||
drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
|
||||
rpprevdown,
|
||||
intel_gt_pm_interval_to_ns(gt, rpprevdown));
|
||||
drm_printf(p, "Down threshold: %d%%\n",
|
||||
rps->power.down_threshold);
|
||||
drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
|
||||
rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
|
||||
drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
|
||||
rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
|
||||
|
||||
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, caps.min_freq));
|
||||
drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, caps.rp1_freq));
|
||||
drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, caps.rp0_freq));
|
||||
drm_printf(p, "Max overclocked frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, rps->max_freq));
|
||||
|
||||
drm_printf(p, "Current freq: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->cur_freq));
|
||||
drm_printf(p, "Actual freq: %d MHz\n", cagf);
|
||||
drm_printf(p, "Idle freq: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->idle_freq));
|
||||
drm_printf(p, "Min freq: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->min_freq));
|
||||
drm_printf(p, "Boost freq: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->boost_freq));
|
||||
drm_printf(p, "Max freq: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->max_freq));
|
||||
drm_printf(p,
|
||||
"efficient (RPe) frequency: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->efficient_freq));
|
||||
gen6_rps_frequency_dump(rps, p);
|
||||
} else {
|
||||
drm_puts(p, "no P-state info available\n");
|
||||
}
|
||||
@ -655,6 +500,44 @@ static bool rps_eval(void *data)
|
||||
|
||||
DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost);
|
||||
|
||||
static int perf_limit_reasons_get(void *data, u64 *val)
|
||||
{
|
||||
struct intel_gt *gt = data;
|
||||
intel_wakeref_t wakeref;
|
||||
|
||||
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
|
||||
*val = intel_uncore_read(gt->uncore, intel_gt_perf_limit_reasons_reg(gt));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_limit_reasons_clear(void *data, u64 val)
|
||||
{
|
||||
struct intel_gt *gt = data;
|
||||
intel_wakeref_t wakeref;
|
||||
|
||||
/*
|
||||
* Clear the upper 16 "log" bits, the lower 16 "status" bits are
|
||||
* read-only. The upper 16 "log" bits are identical to the lower 16
|
||||
* "status" bits except that the "log" bits remain set until cleared.
|
||||
*/
|
||||
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
|
||||
intel_uncore_rmw(gt->uncore, intel_gt_perf_limit_reasons_reg(gt),
|
||||
GT0_PERF_LIMIT_REASONS_LOG_MASK, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool perf_limit_reasons_eval(void *data)
|
||||
{
|
||||
struct intel_gt *gt = data;
|
||||
|
||||
return i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt));
|
||||
}
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get,
|
||||
perf_limit_reasons_clear, "%llu\n");
|
||||
|
||||
void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
|
||||
{
|
||||
static const struct intel_gt_debugfs_file files[] = {
|
||||
@ -664,6 +547,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
|
||||
{ "forcewake_user", &forcewake_user_fops, NULL},
|
||||
{ "llc", &llc_fops, llc_eval },
|
||||
{ "rps_boost", &rps_boost_fops, rps_eval },
|
||||
{ "perf_limit_reasons", &perf_limit_reasons_fops, perf_limit_reasons_eval },
|
||||
};
|
||||
|
||||
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
|
||||
|
@ -8,6 +8,19 @@
|
||||
|
||||
#include "i915_reg_defs.h"
|
||||
|
||||
#define MCR_REG(offset) ((const i915_mcr_reg_t){ .reg = (offset) })
|
||||
|
||||
/*
|
||||
* The perf control registers are technically multicast registers, but the
|
||||
* driver never needs to read/write them directly; we only use them to build
|
||||
* lists of registers (where they're mixed in with other non-MCR registers)
|
||||
* and then operate on the offset directly. For now we'll just define them
|
||||
* as non-multicast so we can place them on the same list, but we may want
|
||||
* to try to come up with a better way to handle heterogeneous lists of
|
||||
* registers in the future.
|
||||
*/
|
||||
#define PERF_REG(offset) _MMIO(offset)
|
||||
|
||||
/* RPM unit config (Gen8+) */
|
||||
#define RPM_CONFIG0 _MMIO(0xd00)
|
||||
#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3
|
||||
@ -39,12 +52,17 @@
|
||||
#define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0xd84)
|
||||
#define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0xd88)
|
||||
|
||||
#define FORCEWAKE_ACK_GSC _MMIO(0xdf8)
|
||||
#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc)
|
||||
|
||||
#define GMD_ID_GRAPHICS _MMIO(0xd8c)
|
||||
#define GMD_ID_MEDIA _MMIO(MTL_MEDIA_GSI_BASE + 0xd8c)
|
||||
|
||||
#define MCFG_MCR_SELECTOR _MMIO(0xfd0)
|
||||
#define MTL_MCR_SELECTOR _MMIO(0xfd4)
|
||||
#define SF_MCR_SELECTOR _MMIO(0xfd8)
|
||||
#define GEN8_MCR_SELECTOR _MMIO(0xfdc)
|
||||
#define GAM_MCR_SELECTOR _MMIO(0xfe0)
|
||||
#define GEN8_MCR_SLICE(slice) (((slice) & 3) << 26)
|
||||
#define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3)
|
||||
#define GEN8_MCR_SUBSLICE(subslice) (((subslice) & 3) << 24)
|
||||
@ -54,6 +72,8 @@
|
||||
#define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf)
|
||||
#define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24)
|
||||
#define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7)
|
||||
#define MTL_MCR_GROUPID REG_GENMASK(11, 8)
|
||||
#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0)
|
||||
|
||||
#define IPEIR_I965 _MMIO(0x2064)
|
||||
#define IPEHR_I965 _MMIO(0x2068)
|
||||
@ -329,11 +349,12 @@
|
||||
#define GEN7_TLB_RD_ADDR _MMIO(0x4700)
|
||||
|
||||
#define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
|
||||
#define XEHP_PAT_INDEX(index) MCR_REG(0x4800 + (index) * 4)
|
||||
|
||||
#define XEHP_TILE0_ADDR_RANGE _MMIO(0x4900)
|
||||
#define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900)
|
||||
#define XEHP_TILE_LMEM_RANGE_SHIFT 8
|
||||
|
||||
#define XEHP_FLAT_CCS_BASE_ADDR _MMIO(0x4910)
|
||||
#define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910)
|
||||
#define XEHP_CCS_BASE_SHIFT 8
|
||||
|
||||
#define GAMTARBMODE _MMIO(0x4a08)
|
||||
@ -383,17 +404,18 @@
|
||||
#define CHICKEN_RASTER_2 _MMIO(0x6208)
|
||||
#define TBIMR_FAST_CLIP REG_BIT(5)
|
||||
|
||||
#define VFLSKPD _MMIO(0x62a8)
|
||||
#define VFLSKPD MCR_REG(0x62a8)
|
||||
#define DIS_OVER_FETCH_CACHE REG_BIT(1)
|
||||
#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0)
|
||||
|
||||
#define FF_MODE2 _MMIO(0x6604)
|
||||
#define GEN12_FF_MODE2 _MMIO(0x6604)
|
||||
#define XEHP_FF_MODE2 MCR_REG(0x6604)
|
||||
#define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24)
|
||||
#define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
|
||||
#define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16)
|
||||
#define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
|
||||
|
||||
#define XEHPG_INSTDONE_GEOM_SVG _MMIO(0x666c)
|
||||
#define XEHPG_INSTDONE_GEOM_SVG MCR_REG(0x666c)
|
||||
|
||||
#define CACHE_MODE_0_GEN7 _MMIO(0x7000) /* IVB+ */
|
||||
#define RC_OP_FLUSH_ENABLE (1 << 0)
|
||||
@ -421,6 +443,7 @@
|
||||
#define HIZ_CHICKEN _MMIO(0x7018)
|
||||
#define CHV_HZ_8X8_MODE_IN_1X REG_BIT(15)
|
||||
#define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14)
|
||||
#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13)
|
||||
#define BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE REG_BIT(3)
|
||||
|
||||
#define GEN8_L3CNTLREG _MMIO(0x7034)
|
||||
@ -442,23 +465,16 @@
|
||||
#define GEN8_HDC_CHICKEN1 _MMIO(0x7304)
|
||||
|
||||
#define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304)
|
||||
#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304)
|
||||
#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
|
||||
#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12)
|
||||
#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11)
|
||||
#define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9)
|
||||
|
||||
/* GEN9 chicken */
|
||||
#define SLICE_ECO_CHICKEN0 _MMIO(0x7308)
|
||||
#define PIXEL_MASK_CAMMING_DISABLE (1 << 14)
|
||||
|
||||
#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308)
|
||||
#define DISABLE_PIXEL_MASK_CAMMING (1 << 14)
|
||||
|
||||
#define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
|
||||
#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
|
||||
|
||||
#define SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
|
||||
#define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c)
|
||||
#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14)
|
||||
#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
|
||||
|
||||
#define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice) * 0x4)
|
||||
#define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \
|
||||
@ -485,9 +501,12 @@
|
||||
#define VF_PREEMPTION _MMIO(0x83a4)
|
||||
#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
|
||||
|
||||
#define VFG_PREEMPTION_CHICKEN _MMIO(0x83b4)
|
||||
#define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4)
|
||||
|
||||
#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
|
||||
|
||||
#define GEN12_SQCM _MMIO(0x8724)
|
||||
#define XEHP_SQCM MCR_REG(0x8724)
|
||||
#define EN_32B_ACCESS REG_BIT(30)
|
||||
|
||||
#define HSW_IDICR _MMIO(0x9008)
|
||||
@ -519,6 +538,8 @@
|
||||
#define GEN6_MBCTL_BOOT_FETCH_MECH (1 << 0)
|
||||
|
||||
/* Fuse readout registers for GT */
|
||||
#define XEHP_FUSE4 _MMIO(0x9114)
|
||||
#define GT_L3_EXC_MASK REG_GENMASK(6, 4)
|
||||
#define GEN10_MIRROR_FUSE3 _MMIO(0x9118)
|
||||
#define GEN10_L3BANK_PAIR_COUNT 4
|
||||
#define GEN10_L3BANK_MASK 0x0F
|
||||
@ -647,6 +668,9 @@
|
||||
|
||||
#define GEN7_MISCCPCTL _MMIO(0x9424)
|
||||
#define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0)
|
||||
|
||||
#define GEN8_MISCCPCTL MCR_REG(0x9424)
|
||||
#define GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0)
|
||||
#define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1)
|
||||
#define GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2)
|
||||
#define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1 << 4)
|
||||
@ -700,7 +724,8 @@
|
||||
#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16)
|
||||
#define LTCDD_CLKGATE_DIS REG_BIT(10)
|
||||
|
||||
#define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
|
||||
#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
|
||||
#define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4)
|
||||
#define SARBUNIT_CLKGATE_DIS (1 << 5)
|
||||
#define RCCUNIT_CLKGATE_DIS (1 << 7)
|
||||
#define MSCUNIT_CLKGATE_DIS (1 << 10)
|
||||
@ -708,27 +733,27 @@
|
||||
#define L3_CLKGATE_DIS REG_BIT(16)
|
||||
#define L3_CR2X_CLKGATE_DIS REG_BIT(17)
|
||||
|
||||
#define SCCGCTL94DC _MMIO(0x94dc)
|
||||
#define SCCGCTL94DC MCR_REG(0x94dc)
|
||||
#define CG3DDISURB REG_BIT(14)
|
||||
|
||||
#define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4)
|
||||
#define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19)
|
||||
#define PSDUNIT_CLKGATE_DIS REG_BIT(5)
|
||||
|
||||
#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524)
|
||||
#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x9524)
|
||||
#define DSS_ROUTER_CLKGATE_DIS REG_BIT(28)
|
||||
#define GWUNIT_CLKGATE_DIS REG_BIT(16)
|
||||
|
||||
#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528)
|
||||
#define SUBSLICE_UNIT_LEVEL_CLKGATE2 MCR_REG(0x9528)
|
||||
#define CPSSUNIT_CLKGATE_DIS REG_BIT(9)
|
||||
|
||||
#define SSMCGCTL9530 _MMIO(0x9530)
|
||||
#define SSMCGCTL9530 MCR_REG(0x9530)
|
||||
#define RTFUNIT_CLKGATE_DIS REG_BIT(18)
|
||||
|
||||
#define GEN10_DFR_RATIO_EN_AND_CHICKEN _MMIO(0x9550)
|
||||
#define GEN10_DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550)
|
||||
#define DFR_DISABLE (1 << 9)
|
||||
|
||||
#define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560)
|
||||
#define INF_UNIT_LEVEL_CLKGATE MCR_REG(0x9560)
|
||||
#define CGPSF_CLKGATE_DIS (1 << 3)
|
||||
|
||||
#define MICRO_BP0_0 _MMIO(0x9800)
|
||||
@ -901,6 +926,8 @@
|
||||
#define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4)
|
||||
#define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4)
|
||||
|
||||
#define FORCEWAKE_REQ_GSC _MMIO(0xa618)
|
||||
|
||||
#define CHV_POWER_SS0_SIG1 _MMIO(0xa720)
|
||||
#define CHV_POWER_SS0_SIG2 _MMIO(0xa724)
|
||||
#define CHV_POWER_SS1_SIG1 _MMIO(0xa728)
|
||||
@ -938,7 +965,8 @@
|
||||
|
||||
/* MOCS (Memory Object Control State) registers */
|
||||
#define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */
|
||||
#define GEN9_LNCFCMOCS_REG_COUNT 32
|
||||
#define XEHP_LNCFCMOCS(i) MCR_REG(0xb020 + (i) * 4)
|
||||
#define LNCFCMOCS_REG_COUNT 32
|
||||
|
||||
#define GEN7_L3CNTLREG3 _MMIO(0xb024)
|
||||
|
||||
@ -954,15 +982,10 @@
|
||||
#define GEN7_L3LOG(slice, i) _MMIO(0xb070 + (slice) * 0x200 + (i) * 4)
|
||||
#define GEN7_L3LOG_SIZE 0x80
|
||||
|
||||
#define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0)
|
||||
#define PMFLUSHDONE_LNICRSDROP (1 << 20)
|
||||
#define PMFLUSH_GAPL3UNBLOCK (1 << 21)
|
||||
#define PMFLUSHDONE_LNEBLK (1 << 22)
|
||||
|
||||
#define XEHP_L3NODEARBCFG _MMIO(0xb0b4)
|
||||
#define XEHP_L3NODEARBCFG MCR_REG(0xb0b4)
|
||||
#define XEHP_LNESPARE REG_BIT(19)
|
||||
|
||||
#define GEN8_L3SQCREG1 _MMIO(0xb100)
|
||||
#define GEN8_L3SQCREG1 MCR_REG(0xb100)
|
||||
/*
|
||||
* Note that on CHV the following has an off-by-one error wrt. to BSpec.
|
||||
* Using the formula in BSpec leads to a hang, while the formula here works
|
||||
@ -973,31 +996,28 @@
|
||||
#define L3_HIGH_PRIO_CREDITS(x) (((x) >> 1) << 14)
|
||||
#define L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14))
|
||||
|
||||
#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xb114)
|
||||
#define GEN11_I2M_WRITE_DISABLE (1 << 28)
|
||||
|
||||
#define GEN8_L3SQCREG4 _MMIO(0xb118)
|
||||
#define GEN8_L3SQCREG4 MCR_REG(0xb118)
|
||||
#define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6)
|
||||
#define GEN8_LQSC_RO_PERF_DIS (1 << 27)
|
||||
#define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21)
|
||||
#define GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22)
|
||||
|
||||
#define GEN9_SCRATCH1 _MMIO(0xb11c)
|
||||
#define GEN9_SCRATCH1 MCR_REG(0xb11c)
|
||||
#define EVICTION_PERF_FIX_ENABLE REG_BIT(8)
|
||||
|
||||
#define BDW_SCRATCH1 _MMIO(0xb11c)
|
||||
#define BDW_SCRATCH1 MCR_REG(0xb11c)
|
||||
#define GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE (1 << 2)
|
||||
|
||||
#define GEN11_SCRATCH2 _MMIO(0xb140)
|
||||
#define GEN11_SCRATCH2 MCR_REG(0xb140)
|
||||
#define GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE (1 << 19)
|
||||
|
||||
#define GEN11_L3SQCREG5 _MMIO(0xb158)
|
||||
#define XEHP_L3SQCREG5 MCR_REG(0xb158)
|
||||
#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
|
||||
|
||||
#define MLTICTXCTL _MMIO(0xb170)
|
||||
#define MLTICTXCTL MCR_REG(0xb170)
|
||||
#define TDONRENDER REG_BIT(2)
|
||||
|
||||
#define XEHP_L3SCQREG7 _MMIO(0xb188)
|
||||
#define XEHP_L3SCQREG7 MCR_REG(0xb188)
|
||||
#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3)
|
||||
|
||||
#define XEHPC_L3SCRUB _MMIO(0xb18c)
|
||||
@ -1005,7 +1025,7 @@
|
||||
#define SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0)
|
||||
#define SCRUB_RATE_4B_PER_CLK REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6)
|
||||
|
||||
#define L3SQCREG1_CCS0 _MMIO(0xb200)
|
||||
#define L3SQCREG1_CCS0 MCR_REG(0xb200)
|
||||
#define FLUSHALLNONCOH REG_BIT(5)
|
||||
|
||||
#define GEN11_GLBLINVL _MMIO(0xb404)
|
||||
@ -1030,11 +1050,14 @@
|
||||
#define GEN9_BLT_MOCS(i) _MMIO(__GEN9_BCS0_MOCS0 + (i) * 4)
|
||||
|
||||
#define GEN12_FAULT_TLB_DATA0 _MMIO(0xceb8)
|
||||
#define XEHP_FAULT_TLB_DATA0 MCR_REG(0xceb8)
|
||||
#define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc)
|
||||
#define XEHP_FAULT_TLB_DATA1 MCR_REG(0xcebc)
|
||||
#define FAULT_VA_HIGH_BITS (0xf << 0)
|
||||
#define FAULT_GTT_SEL (1 << 4)
|
||||
|
||||
#define GEN12_RING_FAULT_REG _MMIO(0xcec4)
|
||||
#define XEHP_RING_FAULT_REG MCR_REG(0xcec4)
|
||||
#define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7)
|
||||
#define RING_FAULT_GTTSEL_MASK (1 << 11)
|
||||
#define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff)
|
||||
@ -1042,16 +1065,21 @@
|
||||
#define RING_FAULT_VALID (1 << 0)
|
||||
|
||||
#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
|
||||
#define XEHP_GFX_TLB_INV_CR MCR_REG(0xced8)
|
||||
#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
|
||||
#define XEHP_VD_TLB_INV_CR MCR_REG(0xcedc)
|
||||
#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
|
||||
#define XEHP_VE_TLB_INV_CR MCR_REG(0xcee0)
|
||||
#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
|
||||
#define XEHP_BLT_TLB_INV_CR MCR_REG(0xcee4)
|
||||
#define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04)
|
||||
#define XEHP_COMPCTX_TLB_INV_CR MCR_REG(0xcf04)
|
||||
|
||||
#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28)
|
||||
#define RENDER_MOD_CTRL _MMIO(0xcf2c)
|
||||
#define COMP_MOD_CTRL _MMIO(0xcf30)
|
||||
#define VDBX_MOD_CTRL _MMIO(0xcf34)
|
||||
#define VEBX_MOD_CTRL _MMIO(0xcf38)
|
||||
#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28)
|
||||
#define RENDER_MOD_CTRL MCR_REG(0xcf2c)
|
||||
#define COMP_MOD_CTRL MCR_REG(0xcf30)
|
||||
#define VDBX_MOD_CTRL MCR_REG(0xcf34)
|
||||
#define VEBX_MOD_CTRL MCR_REG(0xcf38)
|
||||
#define FORCE_MISS_FTLB REG_BIT(3)
|
||||
|
||||
#define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c)
|
||||
@ -1066,48 +1094,52 @@
|
||||
#define GEN12_GAM_DONE _MMIO(0xcf68)
|
||||
|
||||
#define GEN7_HALF_SLICE_CHICKEN1 _MMIO(0xe100) /* IVB GT1 + VLV */
|
||||
#define GEN8_HALF_SLICE_CHICKEN1 MCR_REG(0xe100)
|
||||
#define GEN7_MAX_PS_THREAD_DEP (8 << 12)
|
||||
#define GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE (1 << 10)
|
||||
#define GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE (1 << 4)
|
||||
#define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1 << 3)
|
||||
|
||||
#define GEN7_SAMPLER_INSTDONE _MMIO(0xe160)
|
||||
#define GEN8_SAMPLER_INSTDONE MCR_REG(0xe160)
|
||||
#define GEN7_ROW_INSTDONE _MMIO(0xe164)
|
||||
#define GEN8_ROW_INSTDONE MCR_REG(0xe164)
|
||||
|
||||
#define HALF_SLICE_CHICKEN2 _MMIO(0xe180)
|
||||
#define HALF_SLICE_CHICKEN2 MCR_REG(0xe180)
|
||||
#define GEN8_ST_PO_DISABLE (1 << 13)
|
||||
|
||||
#define HALF_SLICE_CHICKEN3 _MMIO(0xe184)
|
||||
#define HSW_HALF_SLICE_CHICKEN3 _MMIO(0xe184)
|
||||
#define GEN8_HALF_SLICE_CHICKEN3 MCR_REG(0xe184)
|
||||
#define HSW_SAMPLE_C_PERFORMANCE (1 << 9)
|
||||
#define GEN8_CENTROID_PIXEL_OPT_DIS (1 << 8)
|
||||
#define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1 << 5)
|
||||
#define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1)
|
||||
|
||||
#define GEN9_HALF_SLICE_CHICKEN5 _MMIO(0xe188)
|
||||
#define GEN9_HALF_SLICE_CHICKEN5 MCR_REG(0xe188)
|
||||
#define GEN9_DG_MIRROR_FIX_ENABLE (1 << 5)
|
||||
#define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3)
|
||||
|
||||
#define GEN10_SAMPLER_MODE _MMIO(0xe18c)
|
||||
#define GEN10_SAMPLER_MODE MCR_REG(0xe18c)
|
||||
#define ENABLE_SMALLPL REG_BIT(15)
|
||||
#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
|
||||
#define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5)
|
||||
|
||||
#define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194)
|
||||
#define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194)
|
||||
#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15)
|
||||
#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR REG_BIT(8)
|
||||
#define GEN9_ENABLE_YV12_BUGFIX REG_BIT(4)
|
||||
#define GEN9_ENABLE_GPGPU_PREEMPTION REG_BIT(2)
|
||||
|
||||
#define GEN10_CACHE_MODE_SS _MMIO(0xe420)
|
||||
#define GEN10_CACHE_MODE_SS MCR_REG(0xe420)
|
||||
#define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10)
|
||||
#define DISABLE_ECC REG_BIT(5)
|
||||
#define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4)
|
||||
#define ENABLE_PREFETCH_INTO_IC REG_BIT(3)
|
||||
|
||||
#define EU_PERF_CNTL0 _MMIO(0xe458)
|
||||
#define EU_PERF_CNTL4 _MMIO(0xe45c)
|
||||
#define EU_PERF_CNTL0 PERF_REG(0xe458)
|
||||
#define EU_PERF_CNTL4 PERF_REG(0xe45c)
|
||||
|
||||
#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c)
|
||||
#define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c)
|
||||
#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13)
|
||||
#define XEHP_DIS_BBL_SYSPIPE REG_BIT(11)
|
||||
#define GEN12_DISABLE_TDL_PUSH REG_BIT(9)
|
||||
@ -1119,7 +1151,7 @@
|
||||
#define HSW_ROW_CHICKEN3 _MMIO(0xe49c)
|
||||
#define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6)
|
||||
|
||||
#define GEN8_ROW_CHICKEN _MMIO(0xe4f0)
|
||||
#define GEN8_ROW_CHICKEN MCR_REG(0xe4f0)
|
||||
#define FLOW_CONTROL_ENABLE REG_BIT(15)
|
||||
#define UGM_BACKUP_MODE REG_BIT(13)
|
||||
#define MDQ_ARBITRATION_MODE REG_BIT(12)
|
||||
@ -1130,42 +1162,43 @@
|
||||
#define DISABLE_EARLY_EOT REG_BIT(1)
|
||||
|
||||
#define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4)
|
||||
|
||||
#define GEN8_ROW_CHICKEN2 MCR_REG(0xe4f4)
|
||||
#define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15)
|
||||
#define GEN12_DISABLE_EARLY_READ REG_BIT(14)
|
||||
#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12)
|
||||
#define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8)
|
||||
#define GEN12_DISABLE_DOP_GATING REG_BIT(0)
|
||||
|
||||
#define RT_CTRL _MMIO(0xe530)
|
||||
#define RT_CTRL MCR_REG(0xe530)
|
||||
#define DIS_NULL_QUERY REG_BIT(10)
|
||||
#define STACKID_CTRL REG_GENMASK(6, 5)
|
||||
#define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2)
|
||||
|
||||
#define EU_PERF_CNTL1 _MMIO(0xe558)
|
||||
#define EU_PERF_CNTL5 _MMIO(0xe55c)
|
||||
#define EU_PERF_CNTL1 PERF_REG(0xe558)
|
||||
#define EU_PERF_CNTL5 PERF_REG(0xe55c)
|
||||
|
||||
#define GEN12_HDC_CHICKEN0 _MMIO(0xe5f0)
|
||||
#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0)
|
||||
#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11)
|
||||
#define ICL_HDC_MODE _MMIO(0xe5f4)
|
||||
#define ICL_HDC_MODE MCR_REG(0xe5f4)
|
||||
|
||||
#define EU_PERF_CNTL2 _MMIO(0xe658)
|
||||
#define EU_PERF_CNTL6 _MMIO(0xe65c)
|
||||
#define EU_PERF_CNTL3 _MMIO(0xe758)
|
||||
#define EU_PERF_CNTL2 PERF_REG(0xe658)
|
||||
#define EU_PERF_CNTL6 PERF_REG(0xe65c)
|
||||
#define EU_PERF_CNTL3 PERF_REG(0xe758)
|
||||
|
||||
#define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8)
|
||||
#define LSC_CHICKEN_BIT_0 MCR_REG(0xe7c8)
|
||||
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
|
||||
#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
|
||||
#define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4)
|
||||
#define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4)
|
||||
#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
|
||||
#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
|
||||
#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
|
||||
#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32)
|
||||
#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
|
||||
|
||||
#define SARB_CHICKEN1 _MMIO(0xe90c)
|
||||
#define SARB_CHICKEN1 MCR_REG(0xe90c)
|
||||
#define COMP_CKN_IN REG_GENMASK(30, 29)
|
||||
|
||||
#define GEN7_HALF_SLICE_CHICKEN1_GT2 _MMIO(0xf100)
|
||||
|
||||
#define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4)
|
||||
#define DOP_CLOCK_GATING_DISABLE (1 << 0)
|
||||
#define PUSH_CONSTANT_DEREF_DISABLE (1 << 8)
|
||||
@ -1513,6 +1546,9 @@
|
||||
#define VLV_RENDER_C0_COUNT _MMIO(0x138118)
|
||||
#define VLV_MEDIA_C0_COUNT _MMIO(0x13811c)
|
||||
|
||||
#define GEN12_RPSTAT1 _MMIO(0x1381b4)
|
||||
#define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0)
|
||||
|
||||
#define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4))
|
||||
#define GEN11_CSME (31)
|
||||
#define GEN11_GUNIT (28)
|
||||
@ -1583,6 +1619,11 @@
|
||||
|
||||
#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000)
|
||||
|
||||
#define GT0_PACKAGE_ENERGY_STATUS _MMIO(0x250004)
|
||||
#define GT0_PACKAGE_RAPL_LIMIT _MMIO(0x250008)
|
||||
#define GT0_PACKAGE_POWER_SKU_UNIT _MMIO(0x250068)
|
||||
#define GT0_PLATFORM_ENERGY_STATUS _MMIO(0x25006c)
|
||||
|
||||
/*
|
||||
* Standalone Media's non-engine GT registers are located at their regular GT
|
||||
* offsets plus 0x380000. This extra offset is stored inside the intel_uncore
|
||||
|
@ -22,11 +22,9 @@ bool is_object_gt(struct kobject *kobj)
|
||||
return !strncmp(kobj->name, "gt", 2);
|
||||
}
|
||||
|
||||
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
|
||||
struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj,
|
||||
const char *name)
|
||||
{
|
||||
struct kobject *kobj = &dev->kobj;
|
||||
|
||||
/*
|
||||
* We are interested at knowing from where the interface
|
||||
* has been called, whether it's called from gt/ or from
|
||||
@ -38,6 +36,7 @@ struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
|
||||
* "struct drm_i915_private *" type.
|
||||
*/
|
||||
if (!is_object_gt(kobj)) {
|
||||
struct device *dev = kobj_to_dev(kobj);
|
||||
struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
|
||||
|
||||
return to_gt(i915);
|
||||
@ -51,18 +50,18 @@ static struct kobject *gt_get_parent_obj(struct intel_gt *gt)
|
||||
return >->i915->drm.primary->kdev->kobj;
|
||||
}
|
||||
|
||||
static ssize_t id_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
static ssize_t id_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
|
||||
|
||||
return sysfs_emit(buf, "%u\n", gt->info.id);
|
||||
}
|
||||
static DEVICE_ATTR_RO(id);
|
||||
static struct kobj_attribute attr_id = __ATTR_RO(id);
|
||||
|
||||
static struct attribute *id_attrs[] = {
|
||||
&dev_attr_id.attr,
|
||||
&attr_id.attr,
|
||||
NULL,
|
||||
};
|
||||
ATTRIBUTE_GROUPS(id);
|
||||
|
@ -18,11 +18,6 @@ bool is_object_gt(struct kobject *kobj);
|
||||
|
||||
struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
|
||||
|
||||
struct kobject *
|
||||
intel_gt_create_kobj(struct intel_gt *gt,
|
||||
struct kobject *dir,
|
||||
const char *name);
|
||||
|
||||
static inline struct intel_gt *kobj_to_gt(struct kobject *kobj)
|
||||
{
|
||||
return container_of(kobj, struct intel_gt, sysfs_gt);
|
||||
@ -30,7 +25,7 @@ static inline struct intel_gt *kobj_to_gt(struct kobject *kobj)
|
||||
|
||||
void intel_gt_sysfs_register(struct intel_gt *gt);
|
||||
void intel_gt_sysfs_unregister(struct intel_gt *gt);
|
||||
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
|
||||
struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj,
|
||||
const char *name);
|
||||
|
||||
#endif /* SYSFS_GT_H */
|
||||
|
@ -24,14 +24,15 @@ enum intel_gt_sysfs_op {
|
||||
};
|
||||
|
||||
static int
|
||||
sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
|
||||
sysfs_gt_attribute_w_func(struct kobject *kobj, struct attribute *attr,
|
||||
int (func)(struct intel_gt *gt, u32 val), u32 val)
|
||||
{
|
||||
struct intel_gt *gt;
|
||||
int ret;
|
||||
|
||||
if (!is_object_gt(&dev->kobj)) {
|
||||
if (!is_object_gt(kobj)) {
|
||||
int i;
|
||||
struct device *dev = kobj_to_dev(kobj);
|
||||
struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
|
||||
|
||||
for_each_gt(gt, i915, i) {
|
||||
@ -40,7 +41,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
|
||||
gt = intel_gt_sysfs_get_drvdata(kobj, attr->name);
|
||||
ret = func(gt, val);
|
||||
}
|
||||
|
||||
@ -48,7 +49,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
|
||||
}
|
||||
|
||||
static u32
|
||||
sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
|
||||
sysfs_gt_attribute_r_func(struct kobject *kobj, struct attribute *attr,
|
||||
u32 (func)(struct intel_gt *gt),
|
||||
enum intel_gt_sysfs_op op)
|
||||
{
|
||||
@ -57,8 +58,9 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
|
||||
|
||||
ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1;
|
||||
|
||||
if (!is_object_gt(&dev->kobj)) {
|
||||
if (!is_object_gt(kobj)) {
|
||||
int i;
|
||||
struct device *dev = kobj_to_dev(kobj);
|
||||
struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
|
||||
|
||||
for_each_gt(gt, i915, i) {
|
||||
@ -77,7 +79,7 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
|
||||
gt = intel_gt_sysfs_get_drvdata(kobj, attr->name);
|
||||
ret = func(gt);
|
||||
}
|
||||
|
||||
@ -92,6 +94,76 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
|
||||
#define sysfs_gt_attribute_r_max_func(d, a, f) \
|
||||
sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
|
||||
|
||||
#define INTEL_GT_SYSFS_SHOW(_name, _attr_type) \
|
||||
static ssize_t _name##_show_common(struct kobject *kobj, \
|
||||
struct attribute *attr, char *buff) \
|
||||
{ \
|
||||
u32 val = sysfs_gt_attribute_r_##_attr_type##_func(kobj, attr, \
|
||||
__##_name##_show); \
|
||||
\
|
||||
return sysfs_emit(buff, "%u\n", val); \
|
||||
} \
|
||||
static ssize_t _name##_show(struct kobject *kobj, \
|
||||
struct kobj_attribute *attr, char *buff) \
|
||||
{ \
|
||||
return _name ##_show_common(kobj, &attr->attr, buff); \
|
||||
} \
|
||||
static ssize_t _name##_dev_show(struct device *dev, \
|
||||
struct device_attribute *attr, char *buff) \
|
||||
{ \
|
||||
return _name##_show_common(&dev->kobj, &attr->attr, buff); \
|
||||
}
|
||||
|
||||
#define INTEL_GT_SYSFS_STORE(_name, _func) \
|
||||
static ssize_t _name##_store_common(struct kobject *kobj, \
|
||||
struct attribute *attr, \
|
||||
const char *buff, size_t count) \
|
||||
{ \
|
||||
int ret; \
|
||||
u32 val; \
|
||||
\
|
||||
ret = kstrtou32(buff, 0, &val); \
|
||||
if (ret) \
|
||||
return ret; \
|
||||
\
|
||||
ret = sysfs_gt_attribute_w_func(kobj, attr, _func, val); \
|
||||
\
|
||||
return ret ?: count; \
|
||||
} \
|
||||
static ssize_t _name##_store(struct kobject *kobj, \
|
||||
struct kobj_attribute *attr, const char *buff, \
|
||||
size_t count) \
|
||||
{ \
|
||||
return _name##_store_common(kobj, &attr->attr, buff, count); \
|
||||
} \
|
||||
static ssize_t _name##_dev_store(struct device *dev, \
|
||||
struct device_attribute *attr, \
|
||||
const char *buff, size_t count) \
|
||||
{ \
|
||||
return _name##_store_common(&dev->kobj, &attr->attr, buff, count); \
|
||||
}
|
||||
|
||||
#define INTEL_GT_SYSFS_SHOW_MAX(_name) INTEL_GT_SYSFS_SHOW(_name, max)
|
||||
#define INTEL_GT_SYSFS_SHOW_MIN(_name) INTEL_GT_SYSFS_SHOW(_name, min)
|
||||
|
||||
#define INTEL_GT_ATTR_RW(_name) \
|
||||
static struct kobj_attribute attr_##_name = __ATTR_RW(_name)
|
||||
|
||||
#define INTEL_GT_ATTR_RO(_name) \
|
||||
static struct kobj_attribute attr_##_name = __ATTR_RO(_name)
|
||||
|
||||
#define INTEL_GT_DUAL_ATTR_RW(_name) \
|
||||
static struct device_attribute dev_attr_##_name = __ATTR(_name, 0644, \
|
||||
_name##_dev_show, \
|
||||
_name##_dev_store); \
|
||||
INTEL_GT_ATTR_RW(_name)
|
||||
|
||||
#define INTEL_GT_DUAL_ATTR_RO(_name) \
|
||||
static struct device_attribute dev_attr_##_name = __ATTR(_name, 0444, \
|
||||
_name##_dev_show, \
|
||||
NULL); \
|
||||
INTEL_GT_ATTR_RO(_name)
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
|
||||
{
|
||||
@ -104,11 +176,8 @@ static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
|
||||
return DIV_ROUND_CLOSEST_ULL(res, 1000);
|
||||
}
|
||||
|
||||
static ssize_t rc6_enable_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buff)
|
||||
static u8 get_rc6_mask(struct intel_gt *gt)
|
||||
{
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
|
||||
u8 mask = 0;
|
||||
|
||||
if (HAS_RC6(gt->i915))
|
||||
@ -118,7 +187,25 @@ static ssize_t rc6_enable_show(struct device *dev,
|
||||
if (HAS_RC6pp(gt->i915))
|
||||
mask |= BIT(2);
|
||||
|
||||
return sysfs_emit(buff, "%x\n", mask);
|
||||
return mask;
|
||||
}
|
||||
|
||||
static ssize_t rc6_enable_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
|
||||
|
||||
return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
|
||||
}
|
||||
|
||||
static ssize_t rc6_enable_dev_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name);
|
||||
|
||||
return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
|
||||
}
|
||||
|
||||
static u32 __rc6_residency_ms_show(struct intel_gt *gt)
|
||||
@ -126,97 +213,79 @@ static u32 __rc6_residency_ms_show(struct intel_gt *gt)
|
||||
return get_residency(gt, GEN6_GT_GFX_RC6);
|
||||
}
|
||||
|
||||
static ssize_t rc6_residency_ms_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
|
||||
__rc6_residency_ms_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", rc6_residency);
|
||||
}
|
||||
|
||||
static u32 __rc6p_residency_ms_show(struct intel_gt *gt)
|
||||
{
|
||||
return get_residency(gt, GEN6_GT_GFX_RC6p);
|
||||
}
|
||||
|
||||
static ssize_t rc6p_residency_ms_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr,
|
||||
__rc6p_residency_ms_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", rc6p_residency);
|
||||
}
|
||||
|
||||
static u32 __rc6pp_residency_ms_show(struct intel_gt *gt)
|
||||
{
|
||||
return get_residency(gt, GEN6_GT_GFX_RC6pp);
|
||||
}
|
||||
|
||||
static ssize_t rc6pp_residency_ms_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr,
|
||||
__rc6pp_residency_ms_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", rc6pp_residency);
|
||||
}
|
||||
|
||||
static u32 __media_rc6_residency_ms_show(struct intel_gt *gt)
|
||||
{
|
||||
return get_residency(gt, VLV_GT_MEDIA_RC6);
|
||||
}
|
||||
|
||||
static ssize_t media_rc6_residency_ms_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
|
||||
__media_rc6_residency_ms_show);
|
||||
INTEL_GT_SYSFS_SHOW_MIN(rc6_residency_ms);
|
||||
INTEL_GT_SYSFS_SHOW_MIN(rc6p_residency_ms);
|
||||
INTEL_GT_SYSFS_SHOW_MIN(rc6pp_residency_ms);
|
||||
INTEL_GT_SYSFS_SHOW_MIN(media_rc6_residency_ms);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", rc6_residency);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(rc6_enable);
|
||||
static DEVICE_ATTR_RO(rc6_residency_ms);
|
||||
static DEVICE_ATTR_RO(rc6p_residency_ms);
|
||||
static DEVICE_ATTR_RO(rc6pp_residency_ms);
|
||||
static DEVICE_ATTR_RO(media_rc6_residency_ms);
|
||||
INTEL_GT_DUAL_ATTR_RO(rc6_enable);
|
||||
INTEL_GT_DUAL_ATTR_RO(rc6_residency_ms);
|
||||
INTEL_GT_DUAL_ATTR_RO(rc6p_residency_ms);
|
||||
INTEL_GT_DUAL_ATTR_RO(rc6pp_residency_ms);
|
||||
INTEL_GT_DUAL_ATTR_RO(media_rc6_residency_ms);
|
||||
|
||||
static struct attribute *rc6_attrs[] = {
|
||||
&attr_rc6_enable.attr,
|
||||
&attr_rc6_residency_ms.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *rc6p_attrs[] = {
|
||||
&attr_rc6p_residency_ms.attr,
|
||||
&attr_rc6pp_residency_ms.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *media_rc6_attrs[] = {
|
||||
&attr_media_rc6_residency_ms.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *rc6_dev_attrs[] = {
|
||||
&dev_attr_rc6_enable.attr,
|
||||
&dev_attr_rc6_residency_ms.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *rc6p_attrs[] = {
|
||||
static struct attribute *rc6p_dev_attrs[] = {
|
||||
&dev_attr_rc6p_residency_ms.attr,
|
||||
&dev_attr_rc6pp_residency_ms.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *media_rc6_attrs[] = {
|
||||
static struct attribute *media_rc6_dev_attrs[] = {
|
||||
&dev_attr_media_rc6_residency_ms.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct attribute_group rc6_attr_group[] = {
|
||||
{ .attrs = rc6_attrs, },
|
||||
{ .name = power_group_name, .attrs = rc6_attrs, },
|
||||
{ .name = power_group_name, .attrs = rc6_dev_attrs, },
|
||||
};
|
||||
|
||||
static const struct attribute_group rc6p_attr_group[] = {
|
||||
{ .attrs = rc6p_attrs, },
|
||||
{ .name = power_group_name, .attrs = rc6p_attrs, },
|
||||
{ .name = power_group_name, .attrs = rc6p_dev_attrs, },
|
||||
};
|
||||
|
||||
static const struct attribute_group media_rc6_attr_group[] = {
|
||||
{ .attrs = media_rc6_attrs, },
|
||||
{ .name = power_group_name, .attrs = media_rc6_attrs, },
|
||||
{ .name = power_group_name, .attrs = media_rc6_dev_attrs, },
|
||||
};
|
||||
|
||||
static int __intel_gt_sysfs_create_group(struct kobject *kobj,
|
||||
@ -271,104 +340,34 @@ static u32 __act_freq_mhz_show(struct intel_gt *gt)
|
||||
return intel_rps_read_actual_frequency(>->rps);
|
||||
}
|
||||
|
||||
static ssize_t act_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buff)
|
||||
{
|
||||
u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr,
|
||||
__act_freq_mhz_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", actual_freq);
|
||||
}
|
||||
|
||||
static u32 __cur_freq_mhz_show(struct intel_gt *gt)
|
||||
{
|
||||
return intel_rps_get_requested_frequency(>->rps);
|
||||
}
|
||||
|
||||
static ssize_t cur_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buff)
|
||||
{
|
||||
u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr,
|
||||
__cur_freq_mhz_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", cur_freq);
|
||||
}
|
||||
|
||||
static u32 __boost_freq_mhz_show(struct intel_gt *gt)
|
||||
{
|
||||
return intel_rps_get_boost_frequency(>->rps);
|
||||
}
|
||||
|
||||
static ssize_t boost_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr,
|
||||
__boost_freq_mhz_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", boost_freq);
|
||||
}
|
||||
|
||||
static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val)
|
||||
{
|
||||
return intel_rps_set_boost_frequency(>->rps, val);
|
||||
}
|
||||
|
||||
static ssize_t boost_freq_mhz_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buff, size_t count)
|
||||
{
|
||||
ssize_t ret;
|
||||
u32 val;
|
||||
|
||||
ret = kstrtou32(buff, 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return sysfs_gt_attribute_w_func(dev, attr,
|
||||
__boost_freq_mhz_store, val) ?: count;
|
||||
}
|
||||
|
||||
static u32 __rp0_freq_mhz_show(struct intel_gt *gt)
|
||||
static u32 __RP0_freq_mhz_show(struct intel_gt *gt)
|
||||
{
|
||||
return intel_rps_get_rp0_frequency(>->rps);
|
||||
}
|
||||
|
||||
static ssize_t RP0_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buff)
|
||||
{
|
||||
u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr,
|
||||
__rp0_freq_mhz_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", rp0_freq);
|
||||
}
|
||||
|
||||
static u32 __rp1_freq_mhz_show(struct intel_gt *gt)
|
||||
{
|
||||
return intel_rps_get_rp1_frequency(>->rps);
|
||||
}
|
||||
|
||||
static ssize_t RP1_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buff)
|
||||
{
|
||||
u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr,
|
||||
__rp1_freq_mhz_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", rp1_freq);
|
||||
}
|
||||
|
||||
static u32 __rpn_freq_mhz_show(struct intel_gt *gt)
|
||||
static u32 __RPn_freq_mhz_show(struct intel_gt *gt)
|
||||
{
|
||||
return intel_rps_get_rpn_frequency(>->rps);
|
||||
}
|
||||
|
||||
static ssize_t RPn_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buff)
|
||||
static u32 __RP1_freq_mhz_show(struct intel_gt *gt)
|
||||
{
|
||||
u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr,
|
||||
__rpn_freq_mhz_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", rpn_freq);
|
||||
return intel_rps_get_rp1_frequency(>->rps);
|
||||
}
|
||||
|
||||
static u32 __max_freq_mhz_show(struct intel_gt *gt)
|
||||
@ -376,71 +375,21 @@ static u32 __max_freq_mhz_show(struct intel_gt *gt)
|
||||
return intel_rps_get_max_frequency(>->rps);
|
||||
}
|
||||
|
||||
static ssize_t max_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buff)
|
||||
{
|
||||
u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr,
|
||||
__max_freq_mhz_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", max_freq);
|
||||
}
|
||||
|
||||
static int __set_max_freq(struct intel_gt *gt, u32 val)
|
||||
{
|
||||
return intel_rps_set_max_frequency(>->rps, val);
|
||||
}
|
||||
|
||||
static ssize_t max_freq_mhz_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buff, size_t count)
|
||||
{
|
||||
int ret;
|
||||
u32 val;
|
||||
|
||||
ret = kstrtou32(buff, 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val);
|
||||
|
||||
return ret ?: count;
|
||||
}
|
||||
|
||||
static u32 __min_freq_mhz_show(struct intel_gt *gt)
|
||||
{
|
||||
return intel_rps_get_min_frequency(>->rps);
|
||||
}
|
||||
|
||||
static ssize_t min_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buff)
|
||||
{
|
||||
u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr,
|
||||
__min_freq_mhz_show);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", min_freq);
|
||||
}
|
||||
|
||||
static int __set_min_freq(struct intel_gt *gt, u32 val)
|
||||
{
|
||||
return intel_rps_set_min_frequency(>->rps, val);
|
||||
}
|
||||
|
||||
static ssize_t min_freq_mhz_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buff, size_t count)
|
||||
{
|
||||
int ret;
|
||||
u32 val;
|
||||
|
||||
ret = kstrtou32(buff, 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val);
|
||||
|
||||
return ret ?: count;
|
||||
}
|
||||
|
||||
static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt)
|
||||
{
|
||||
struct intel_rps *rps = >->rps;
|
||||
@ -448,23 +397,31 @@ static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt)
|
||||
return intel_gpu_freq(rps, rps->efficient_freq);
|
||||
}
|
||||
|
||||
static ssize_t vlv_rpe_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buff)
|
||||
{
|
||||
u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr,
|
||||
__vlv_rpe_freq_mhz_show);
|
||||
INTEL_GT_SYSFS_SHOW_MAX(act_freq_mhz);
|
||||
INTEL_GT_SYSFS_SHOW_MAX(boost_freq_mhz);
|
||||
INTEL_GT_SYSFS_SHOW_MAX(cur_freq_mhz);
|
||||
INTEL_GT_SYSFS_SHOW_MAX(RP0_freq_mhz);
|
||||
INTEL_GT_SYSFS_SHOW_MAX(RP1_freq_mhz);
|
||||
INTEL_GT_SYSFS_SHOW_MAX(RPn_freq_mhz);
|
||||
INTEL_GT_SYSFS_SHOW_MAX(max_freq_mhz);
|
||||
INTEL_GT_SYSFS_SHOW_MIN(min_freq_mhz);
|
||||
INTEL_GT_SYSFS_SHOW_MAX(vlv_rpe_freq_mhz);
|
||||
INTEL_GT_SYSFS_STORE(boost_freq_mhz, __boost_freq_mhz_store);
|
||||
INTEL_GT_SYSFS_STORE(max_freq_mhz, __set_max_freq);
|
||||
INTEL_GT_SYSFS_STORE(min_freq_mhz, __set_min_freq);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", rpe_freq);
|
||||
}
|
||||
#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store, _show_dev, _store_dev) \
|
||||
static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, \
|
||||
_show_dev, _store_dev); \
|
||||
static struct kobj_attribute attr_rps_##_name = __ATTR(rps_##_name, _mode, \
|
||||
_show, _store)
|
||||
|
||||
#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \
|
||||
static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \
|
||||
static struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store)
|
||||
|
||||
#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \
|
||||
INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL)
|
||||
#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \
|
||||
INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store)
|
||||
#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \
|
||||
INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL, \
|
||||
_name##_dev_show, NULL)
|
||||
#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \
|
||||
INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store, \
|
||||
_name##_dev_show, _name##_dev_store)
|
||||
|
||||
/* The below macros generate static structures */
|
||||
INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz);
|
||||
@ -475,32 +432,31 @@ INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz);
|
||||
INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz);
|
||||
INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz);
|
||||
INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz);
|
||||
INTEL_GT_RPS_SYSFS_ATTR_RO(vlv_rpe_freq_mhz);
|
||||
|
||||
static DEVICE_ATTR_RO(vlv_rpe_freq_mhz);
|
||||
|
||||
#define GEN6_ATTR(s) { \
|
||||
&dev_attr_##s##_act_freq_mhz.attr, \
|
||||
&dev_attr_##s##_cur_freq_mhz.attr, \
|
||||
&dev_attr_##s##_boost_freq_mhz.attr, \
|
||||
&dev_attr_##s##_max_freq_mhz.attr, \
|
||||
&dev_attr_##s##_min_freq_mhz.attr, \
|
||||
&dev_attr_##s##_RP0_freq_mhz.attr, \
|
||||
&dev_attr_##s##_RP1_freq_mhz.attr, \
|
||||
&dev_attr_##s##_RPn_freq_mhz.attr, \
|
||||
#define GEN6_ATTR(p, s) { \
|
||||
&p##attr_##s##_act_freq_mhz.attr, \
|
||||
&p##attr_##s##_cur_freq_mhz.attr, \
|
||||
&p##attr_##s##_boost_freq_mhz.attr, \
|
||||
&p##attr_##s##_max_freq_mhz.attr, \
|
||||
&p##attr_##s##_min_freq_mhz.attr, \
|
||||
&p##attr_##s##_RP0_freq_mhz.attr, \
|
||||
&p##attr_##s##_RP1_freq_mhz.attr, \
|
||||
&p##attr_##s##_RPn_freq_mhz.attr, \
|
||||
NULL, \
|
||||
}
|
||||
|
||||
#define GEN6_RPS_ATTR GEN6_ATTR(rps)
|
||||
#define GEN6_GT_ATTR GEN6_ATTR(gt)
|
||||
#define GEN6_RPS_ATTR GEN6_ATTR(, rps)
|
||||
#define GEN6_GT_ATTR GEN6_ATTR(dev_, gt)
|
||||
|
||||
static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR;
|
||||
static const struct attribute * const gen6_gt_attrs[] = GEN6_GT_ATTR;
|
||||
|
||||
static ssize_t punit_req_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
static ssize_t punit_req_freq_mhz_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
|
||||
u32 preq = intel_rps_read_punit_req_frequency(>->rps);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", preq);
|
||||
@ -508,20 +464,20 @@ static ssize_t punit_req_freq_mhz_show(struct device *dev,
|
||||
|
||||
struct intel_gt_bool_throttle_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct device *dev, struct device_attribute *attr,
|
||||
ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *buf);
|
||||
i915_reg_t reg32;
|
||||
i915_reg_t (*reg32)(struct intel_gt *gt);
|
||||
u32 mask;
|
||||
};
|
||||
|
||||
static ssize_t throttle_reason_bool_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
static ssize_t throttle_reason_bool_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
|
||||
struct intel_gt_bool_throttle_attr *t_attr =
|
||||
(struct intel_gt_bool_throttle_attr *) attr;
|
||||
bool val = rps_read_mask_mmio(>->rps, t_attr->reg32, t_attr->mask);
|
||||
bool val = rps_read_mask_mmio(>->rps, t_attr->reg32(gt), t_attr->mask);
|
||||
|
||||
return sysfs_emit(buff, "%u\n", val);
|
||||
}
|
||||
@ -530,11 +486,11 @@ static ssize_t throttle_reason_bool_show(struct device *dev,
|
||||
struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \
|
||||
.attr = { .name = __stringify(sysfs_func__), .mode = 0444 }, \
|
||||
.show = throttle_reason_bool_show, \
|
||||
.reg32 = GT0_PERF_LIMIT_REASONS, \
|
||||
.reg32 = intel_gt_perf_limit_reasons_reg, \
|
||||
.mask = mask__, \
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(punit_req_freq_mhz);
|
||||
INTEL_GT_ATTR_RO(punit_req_freq_mhz);
|
||||
static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK);
|
||||
static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK);
|
||||
static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK);
|
||||
@ -597,8 +553,8 @@ static const struct attribute *throttle_reason_attrs[] = {
|
||||
#define U8_8_VAL_MASK 0xffff
|
||||
#define U8_8_SCALE_TO_VALUE "0.00390625"
|
||||
|
||||
static ssize_t freq_factor_scale_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
static ssize_t freq_factor_scale_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
return sysfs_emit(buff, "%s\n", U8_8_SCALE_TO_VALUE);
|
||||
@ -610,11 +566,11 @@ static u32 media_ratio_mode_to_factor(u32 mode)
|
||||
return !mode ? mode : 256 / mode;
|
||||
}
|
||||
|
||||
static ssize_t media_freq_factor_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
static ssize_t media_freq_factor_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
|
||||
struct intel_guc_slpc *slpc = >->uc.guc.slpc;
|
||||
intel_wakeref_t wakeref;
|
||||
u32 mode;
|
||||
@ -641,11 +597,11 @@ static ssize_t media_freq_factor_show(struct device *dev,
|
||||
return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode));
|
||||
}
|
||||
|
||||
static ssize_t media_freq_factor_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
static ssize_t media_freq_factor_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buff, size_t count)
|
||||
{
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
|
||||
struct intel_guc_slpc *slpc = >->uc.guc.slpc;
|
||||
u32 factor, mode;
|
||||
int err;
|
||||
@ -670,11 +626,11 @@ static ssize_t media_freq_factor_store(struct device *dev,
|
||||
return err ?: count;
|
||||
}
|
||||
|
||||
static ssize_t media_RP0_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
static ssize_t media_RP0_freq_mhz_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
|
||||
u32 val;
|
||||
int err;
|
||||
|
||||
@ -691,11 +647,11 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev,
|
||||
return sysfs_emit(buff, "%u\n", val);
|
||||
}
|
||||
|
||||
static ssize_t media_RPn_freq_mhz_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
static ssize_t media_RPn_freq_mhz_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buff)
|
||||
{
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
|
||||
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
|
||||
u32 val;
|
||||
int err;
|
||||
|
||||
@ -712,17 +668,17 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev,
|
||||
return sysfs_emit(buff, "%u\n", val);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RW(media_freq_factor);
|
||||
static struct device_attribute dev_attr_media_freq_factor_scale =
|
||||
INTEL_GT_ATTR_RW(media_freq_factor);
|
||||
static struct kobj_attribute attr_media_freq_factor_scale =
|
||||
__ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL);
|
||||
static DEVICE_ATTR_RO(media_RP0_freq_mhz);
|
||||
static DEVICE_ATTR_RO(media_RPn_freq_mhz);
|
||||
INTEL_GT_ATTR_RO(media_RP0_freq_mhz);
|
||||
INTEL_GT_ATTR_RO(media_RPn_freq_mhz);
|
||||
|
||||
static const struct attribute *media_perf_power_attrs[] = {
|
||||
&dev_attr_media_freq_factor.attr,
|
||||
&dev_attr_media_freq_factor_scale.attr,
|
||||
&dev_attr_media_RP0_freq_mhz.attr,
|
||||
&dev_attr_media_RPn_freq_mhz.attr,
|
||||
&attr_media_freq_factor.attr,
|
||||
&attr_media_freq_factor_scale.attr,
|
||||
&attr_media_RP0_freq_mhz.attr,
|
||||
&attr_media_RPn_freq_mhz.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -754,20 +710,29 @@ static const struct attribute * const rps_defaults_attrs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj,
|
||||
const struct attribute * const *attrs)
|
||||
static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj)
|
||||
{
|
||||
const struct attribute * const *attrs;
|
||||
struct attribute *vlv_attr;
|
||||
int ret;
|
||||
|
||||
if (GRAPHICS_VER(gt->i915) < 6)
|
||||
return 0;
|
||||
|
||||
if (is_object_gt(kobj)) {
|
||||
attrs = gen6_rps_attrs;
|
||||
vlv_attr = &attr_rps_vlv_rpe_freq_mhz.attr;
|
||||
} else {
|
||||
attrs = gen6_gt_attrs;
|
||||
vlv_attr = &dev_attr_gt_vlv_rpe_freq_mhz.attr;
|
||||
}
|
||||
|
||||
ret = sysfs_create_files(kobj, attrs);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
|
||||
ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr);
|
||||
ret = sysfs_create_file(kobj, vlv_attr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -778,9 +743,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
|
||||
|
||||
intel_sysfs_rc6_init(gt, kobj);
|
||||
|
||||
ret = is_object_gt(kobj) ?
|
||||
intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) :
|
||||
intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs);
|
||||
ret = intel_sysfs_rps_init(gt, kobj);
|
||||
if (ret)
|
||||
drm_warn(>->i915->drm,
|
||||
"failed to create gt%u RPS sysfs files (%pe)",
|
||||
@ -790,13 +753,13 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
|
||||
if (!is_object_gt(kobj))
|
||||
return;
|
||||
|
||||
ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr);
|
||||
ret = sysfs_create_file(kobj, &attr_punit_req_freq_mhz.attr);
|
||||
if (ret)
|
||||
drm_warn(>->i915->drm,
|
||||
"failed to create gt%u punit_req_freq_mhz sysfs (%pe)",
|
||||
gt->info.id, ERR_PTR(ret));
|
||||
|
||||
if (GRAPHICS_VER(gt->i915) >= 11) {
|
||||
if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) {
|
||||
ret = sysfs_create_files(kobj, throttle_reason_attrs);
|
||||
if (ret)
|
||||
drm_warn(>->i915->drm,
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "intel_gsc.h"
|
||||
|
||||
#include "i915_vma.h"
|
||||
#include "i915_perf_types.h"
|
||||
#include "intel_engine_types.h"
|
||||
#include "intel_gt_buffer_pool_types.h"
|
||||
#include "intel_hwconfig.h"
|
||||
@ -59,6 +60,9 @@ enum intel_steering_type {
|
||||
L3BANK,
|
||||
MSLICE,
|
||||
LNCF,
|
||||
GAM,
|
||||
DSS,
|
||||
OADDRM,
|
||||
|
||||
/*
|
||||
* On some platforms there are multiple types of MCR registers that
|
||||
@ -141,20 +145,6 @@ struct intel_gt {
|
||||
struct intel_wakeref wakeref;
|
||||
atomic_t user_wakeref;
|
||||
|
||||
/**
|
||||
* Protects access to lmem usefault list.
|
||||
* It is required, if we are outside of the runtime suspend path,
|
||||
* access to @lmem_userfault_list requires always first grabbing the
|
||||
* runtime pm, to ensure we can't race against runtime suspend.
|
||||
* Once we have that we also need to grab @lmem_userfault_lock,
|
||||
* at which point we have exclusive access.
|
||||
* The runtime suspend path is special since it doesn't really hold any locks,
|
||||
* but instead has exclusive access by virtue of all other accesses requiring
|
||||
* holding the runtime pm wakeref.
|
||||
*/
|
||||
struct mutex lmem_userfault_lock;
|
||||
struct list_head lmem_userfault_list;
|
||||
|
||||
struct list_head closed_vma;
|
||||
spinlock_t closed_lock; /* guards the list of closed_vma */
|
||||
|
||||
@ -170,9 +160,6 @@ struct intel_gt {
|
||||
*/
|
||||
intel_wakeref_t awake;
|
||||
|
||||
/* Manual runtime pm autosuspend delay for user GGTT/lmem mmaps */
|
||||
struct intel_wakeref_auto userfault_wakeref;
|
||||
|
||||
u32 clock_frequency;
|
||||
u32 clock_period_ns;
|
||||
|
||||
@ -286,6 +273,8 @@ struct intel_gt {
|
||||
/* sysfs defaults per gt */
|
||||
struct gt_defaults defaults;
|
||||
struct kobject *sysfs_defaults;
|
||||
|
||||
struct i915_perf_gt perf;
|
||||
};
|
||||
|
||||
struct intel_gt_definition {
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "i915_trace.h"
|
||||
#include "i915_utils.h"
|
||||
#include "intel_gt.h"
|
||||
#include "intel_gt_mcr.h"
|
||||
#include "intel_gt_regs.h"
|
||||
#include "intel_gtt.h"
|
||||
|
||||
@ -269,11 +270,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
|
||||
memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
|
||||
ARRAY_SIZE(vm->min_alignment));
|
||||
|
||||
if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915) &&
|
||||
subclass == VM_CLASS_PPGTT) {
|
||||
vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M;
|
||||
vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M;
|
||||
} else if (HAS_64K_PAGES(vm->i915)) {
|
||||
if (HAS_64K_PAGES(vm->i915)) {
|
||||
vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
|
||||
vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
|
||||
}
|
||||
@ -343,7 +340,8 @@ int setup_scratch_page(struct i915_address_space *vm)
|
||||
*/
|
||||
size = I915_GTT_PAGE_SIZE_4K;
|
||||
if (i915_vm_is_4lvl(vm) &&
|
||||
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
|
||||
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) &&
|
||||
!HAS_64K_PAGES(vm->i915))
|
||||
size = I915_GTT_PAGE_SIZE_64K;
|
||||
|
||||
do {
|
||||
@ -385,18 +383,6 @@ skip:
|
||||
if (size == I915_GTT_PAGE_SIZE_4K)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* If we need 64K minimum GTT pages for device local-memory,
|
||||
* like on XEHPSDV, then we need to fail the allocation here,
|
||||
* otherwise we can't safely support the insertion of
|
||||
* local-memory pages for this vm, since the HW expects the
|
||||
* correct physical alignment and size when the page-table is
|
||||
* operating in 64K GTT mode, which includes any scratch PTEs,
|
||||
* since userspace can still touch them.
|
||||
*/
|
||||
if (HAS_64K_PAGES(vm->i915))
|
||||
return -ENOMEM;
|
||||
|
||||
size = I915_GTT_PAGE_SIZE_4K;
|
||||
} while (1);
|
||||
}
|
||||
@ -493,6 +479,18 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore)
|
||||
intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
|
||||
}
|
||||
|
||||
static void xehp_setup_private_ppat(struct intel_gt *gt)
|
||||
{
|
||||
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
|
||||
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
|
||||
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
|
||||
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
|
||||
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
|
||||
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
|
||||
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
|
||||
intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
|
||||
}
|
||||
|
||||
static void icl_setup_private_ppat(struct intel_uncore *uncore)
|
||||
{
|
||||
intel_uncore_write(uncore,
|
||||
@ -585,13 +583,16 @@ static void chv_setup_private_ppat(struct intel_uncore *uncore)
|
||||
intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
|
||||
}
|
||||
|
||||
void setup_private_pat(struct intel_uncore *uncore)
|
||||
void setup_private_pat(struct intel_gt *gt)
|
||||
{
|
||||
struct drm_i915_private *i915 = uncore->i915;
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
|
||||
GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
|
||||
|
||||
if (GRAPHICS_VER(i915) >= 12)
|
||||
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
|
||||
xehp_setup_private_ppat(gt);
|
||||
else if (GRAPHICS_VER(i915) >= 12)
|
||||
tgl_setup_private_ppat(uncore);
|
||||
else if (GRAPHICS_VER(i915) >= 11)
|
||||
icl_setup_private_ppat(uncore);
|
||||
|
@ -93,6 +93,7 @@ typedef u64 gen8_pte_t;
|
||||
#define GEN12_GGTT_PTE_LM BIT_ULL(1)
|
||||
|
||||
#define GEN12_PDE_64K BIT(6)
|
||||
#define GEN12_PTE_PS64 BIT(8)
|
||||
|
||||
/*
|
||||
* Cacheability Control is a 4-bit value. The low three bits are stored in bits
|
||||
@ -667,7 +668,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm,
|
||||
|
||||
void gtt_write_workarounds(struct intel_gt *gt);
|
||||
|
||||
void setup_private_pat(struct intel_uncore *uncore);
|
||||
void setup_private_pat(struct intel_gt *gt);
|
||||
|
||||
int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
|
@ -20,6 +20,30 @@
|
||||
#include "intel_ring.h"
|
||||
#include "shmem_utils.h"
|
||||
|
||||
/*
|
||||
* The per-platform tables are u8-encoded in @data. Decode @data and set the
|
||||
* addresses' offset and commands in @regs. The following encoding is used
|
||||
* for each byte. There are 2 steps: decoding commands and decoding addresses.
|
||||
*
|
||||
* Commands:
|
||||
* [7]: create NOPs - number of NOPs are set in lower bits
|
||||
* [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
|
||||
* MI_LRI_FORCE_POSTED
|
||||
* [5:0]: Number of NOPs or registers to set values to in case of
|
||||
* MI_LOAD_REGISTER_IMM
|
||||
*
|
||||
* Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
|
||||
* number of registers. They are set by using the REG/REG16 macros: the former
|
||||
* is used for offsets smaller than 0x200 while the latter is for values bigger
|
||||
* than that. Those macros already set all the bits documented below correctly:
|
||||
*
|
||||
* [7]: When a register offset needs more than 6 bits, use additional bytes, to
|
||||
* follow, for the lower bits
|
||||
* [6:0]: Register offset, without considering the engine base.
|
||||
*
|
||||
* This function only tweaks the commands and register offsets. Values are not
|
||||
* filled out.
|
||||
*/
|
||||
static void set_offsets(u32 *regs,
|
||||
const u8 *data,
|
||||
const struct intel_engine_cs *engine,
|
||||
@ -264,6 +288,39 @@ static const u8 dg2_xcs_offsets[] = {
|
||||
END
|
||||
};
|
||||
|
||||
static const u8 mtl_xcs_offsets[] = {
|
||||
NOP(1),
|
||||
LRI(13, POSTED),
|
||||
REG16(0x244),
|
||||
REG(0x034),
|
||||
REG(0x030),
|
||||
REG(0x038),
|
||||
REG(0x03c),
|
||||
REG(0x168),
|
||||
REG(0x140),
|
||||
REG(0x110),
|
||||
REG(0x1c0),
|
||||
REG(0x1c4),
|
||||
REG(0x1c8),
|
||||
REG(0x180),
|
||||
REG16(0x2b4),
|
||||
NOP(4),
|
||||
|
||||
NOP(1),
|
||||
LRI(9, POSTED),
|
||||
REG16(0x3a8),
|
||||
REG16(0x28c),
|
||||
REG16(0x288),
|
||||
REG16(0x284),
|
||||
REG16(0x280),
|
||||
REG16(0x27c),
|
||||
REG16(0x278),
|
||||
REG16(0x274),
|
||||
REG16(0x270),
|
||||
|
||||
END
|
||||
};
|
||||
|
||||
static const u8 gen8_rcs_offsets[] = {
|
||||
NOP(1),
|
||||
LRI(14, POSTED),
|
||||
@ -606,6 +663,49 @@ static const u8 dg2_rcs_offsets[] = {
|
||||
END
|
||||
};
|
||||
|
||||
static const u8 mtl_rcs_offsets[] = {
|
||||
NOP(1),
|
||||
LRI(15, POSTED),
|
||||
REG16(0x244),
|
||||
REG(0x034),
|
||||
REG(0x030),
|
||||
REG(0x038),
|
||||
REG(0x03c),
|
||||
REG(0x168),
|
||||
REG(0x140),
|
||||
REG(0x110),
|
||||
REG(0x1c0),
|
||||
REG(0x1c4),
|
||||
REG(0x1c8),
|
||||
REG(0x180),
|
||||
REG16(0x2b4),
|
||||
REG(0x120),
|
||||
REG(0x124),
|
||||
|
||||
NOP(1),
|
||||
LRI(9, POSTED),
|
||||
REG16(0x3a8),
|
||||
REG16(0x28c),
|
||||
REG16(0x288),
|
||||
REG16(0x284),
|
||||
REG16(0x280),
|
||||
REG16(0x27c),
|
||||
REG16(0x278),
|
||||
REG16(0x274),
|
||||
REG16(0x270),
|
||||
|
||||
NOP(2),
|
||||
LRI(2, POSTED),
|
||||
REG16(0x5a8),
|
||||
REG16(0x5ac),
|
||||
|
||||
NOP(6),
|
||||
LRI(1, 0),
|
||||
REG(0x0c8),
|
||||
|
||||
END
|
||||
};
|
||||
|
||||
#undef END
|
||||
#undef REG16
|
||||
#undef REG
|
||||
@ -624,7 +724,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
|
||||
!intel_engine_has_relative_mmio(engine));
|
||||
|
||||
if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
|
||||
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
|
||||
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
|
||||
return mtl_rcs_offsets;
|
||||
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
|
||||
return dg2_rcs_offsets;
|
||||
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
|
||||
return xehp_rcs_offsets;
|
||||
@ -637,7 +739,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
|
||||
else
|
||||
return gen8_rcs_offsets;
|
||||
} else {
|
||||
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
|
||||
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
|
||||
return mtl_xcs_offsets;
|
||||
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
|
||||
return dg2_xcs_offsets;
|
||||
else if (GRAPHICS_VER(engine->i915) >= 12)
|
||||
return gen12_xcs_offsets;
|
||||
@ -745,19 +849,18 @@ static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
|
||||
static u32
|
||||
lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
|
||||
{
|
||||
switch (GRAPHICS_VER(engine->i915)) {
|
||||
default:
|
||||
MISSING_CASE(GRAPHICS_VER(engine->i915));
|
||||
fallthrough;
|
||||
case 12:
|
||||
if (GRAPHICS_VER(engine->i915) >= 12)
|
||||
return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
|
||||
case 11:
|
||||
else if (GRAPHICS_VER(engine->i915) >= 11)
|
||||
return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
|
||||
case 9:
|
||||
else if (GRAPHICS_VER(engine->i915) >= 9)
|
||||
return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
|
||||
case 8:
|
||||
else if (GRAPHICS_VER(engine->i915) >= 8)
|
||||
return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
|
||||
}
|
||||
|
||||
GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1012,7 +1115,7 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
|
||||
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
|
||||
context_size += I915_GTT_PAGE_SIZE; /* for redzone */
|
||||
|
||||
if (GRAPHICS_VER(engine->i915) == 12) {
|
||||
if (GRAPHICS_VER(engine->i915) >= 12) {
|
||||
ce->wa_bb_page = context_size / PAGE_SIZE;
|
||||
context_size += PAGE_SIZE;
|
||||
}
|
||||
@ -1718,24 +1821,16 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine)
|
||||
unsigned int i;
|
||||
int err;
|
||||
|
||||
if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
|
||||
if (GRAPHICS_VER(engine->i915) >= 11 ||
|
||||
!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
|
||||
return;
|
||||
|
||||
switch (GRAPHICS_VER(engine->i915)) {
|
||||
case 12:
|
||||
case 11:
|
||||
return;
|
||||
case 9:
|
||||
if (GRAPHICS_VER(engine->i915) == 9) {
|
||||
wa_bb_fn[0] = gen9_init_indirectctx_bb;
|
||||
wa_bb_fn[1] = NULL;
|
||||
break;
|
||||
case 8:
|
||||
} else if (GRAPHICS_VER(engine->i915) == 8) {
|
||||
wa_bb_fn[0] = gen8_init_indirectctx_bb;
|
||||
wa_bb_fn[1] = NULL;
|
||||
break;
|
||||
default:
|
||||
MISSING_CASE(GRAPHICS_VER(engine->i915));
|
||||
return;
|
||||
}
|
||||
|
||||
err = lrc_create_wa_ctx(engine);
|
||||
|
@ -110,6 +110,8 @@ enum {
|
||||
#define XEHP_SW_CTX_ID_WIDTH 16
|
||||
#define XEHP_SW_COUNTER_SHIFT 58
|
||||
#define XEHP_SW_COUNTER_WIDTH 6
|
||||
#define GEN12_GUC_SW_CTX_ID_SHIFT 39
|
||||
#define GEN12_GUC_SW_CTX_ID_WIDTH 16
|
||||
|
||||
static inline void lrc_runtime_start(struct intel_context *ce)
|
||||
{
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "intel_gtt.h"
|
||||
#include "intel_migrate.h"
|
||||
#include "intel_ring.h"
|
||||
#include "gem/i915_gem_lmem.h"
|
||||
|
||||
struct insert_pte_data {
|
||||
u64 offset;
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "intel_engine.h"
|
||||
#include "intel_gt.h"
|
||||
#include "intel_gt_mcr.h"
|
||||
#include "intel_gt_regs.h"
|
||||
#include "intel_mocs.h"
|
||||
#include "intel_ring.h"
|
||||
@ -609,14 +610,17 @@ static u32 l3cc_combine(u16 low, u16 high)
|
||||
0; \
|
||||
i++)
|
||||
|
||||
static void init_l3cc_table(struct intel_uncore *uncore,
|
||||
static void init_l3cc_table(struct intel_gt *gt,
|
||||
const struct drm_i915_mocs_table *table)
|
||||
{
|
||||
unsigned int i;
|
||||
u32 l3cc;
|
||||
|
||||
for_each_l3cc(l3cc, table, i)
|
||||
intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
|
||||
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
|
||||
intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc);
|
||||
else
|
||||
intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc);
|
||||
}
|
||||
|
||||
void intel_mocs_init_engine(struct intel_engine_cs *engine)
|
||||
@ -636,7 +640,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
|
||||
init_mocs_table(engine, &table);
|
||||
|
||||
if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
|
||||
init_l3cc_table(engine->uncore, &table);
|
||||
init_l3cc_table(engine->gt, &table);
|
||||
}
|
||||
|
||||
static u32 global_mocs_offset(void)
|
||||
@ -672,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt)
|
||||
* memory transactions including guc transactions
|
||||
*/
|
||||
if (flags & HAS_RENDER_L3CC)
|
||||
init_l3cc_table(gt->uncore, &table);
|
||||
init_l3cc_table(gt, &table);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
|
@ -1278,7 +1278,7 @@ static void intel_gt_reset_global(struct intel_gt *gt,
|
||||
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
|
||||
|
||||
/* Use a watchdog to ensure that our reset completes */
|
||||
intel_wedge_on_timeout(&w, gt, 5 * HZ) {
|
||||
intel_wedge_on_timeout(&w, gt, 60 * HZ) {
|
||||
intel_display_prepare_reset(gt->i915);
|
||||
|
||||
intel_gt_reset(gt, engine_mask, reason);
|
||||
|
@ -625,9 +625,7 @@ static void gen5_rps_disable(struct intel_rps *rps)
|
||||
rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
|
||||
|
||||
/* Ack interrupts, disable EFC interrupt */
|
||||
intel_uncore_write(uncore, MEMINTREN,
|
||||
intel_uncore_read(uncore, MEMINTREN) &
|
||||
~MEMINT_EVAL_CHG_EN);
|
||||
intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0);
|
||||
intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
|
||||
|
||||
/* Go back to the starting frequency */
|
||||
@ -1016,9 +1014,15 @@ void intel_rps_boost(struct i915_request *rq)
|
||||
if (rps_uses_slpc(rps)) {
|
||||
slpc = rps_to_slpc(rps);
|
||||
|
||||
if (slpc->min_freq_softlimit >= slpc->boost_freq)
|
||||
return;
|
||||
|
||||
/* Return if old value is non zero */
|
||||
if (!atomic_fetch_inc(&slpc->num_waiters))
|
||||
if (!atomic_fetch_inc(&slpc->num_waiters)) {
|
||||
GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
|
||||
rq->fence.context, rq->fence.seqno);
|
||||
schedule_work(&slpc->boost_work);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
@ -1085,15 +1089,25 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps)
|
||||
return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
|
||||
}
|
||||
|
||||
/**
|
||||
* gen6_rps_get_freq_caps - Get freq caps exposed by HW
|
||||
* @rps: the intel_rps structure
|
||||
* @caps: returned freq caps
|
||||
*
|
||||
* Returned "caps" frequencies should be converted to MHz using
|
||||
* intel_gpu_freq()
|
||||
*/
|
||||
void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
|
||||
static void
|
||||
mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
|
||||
{
|
||||
struct intel_uncore *uncore = rps_to_uncore(rps);
|
||||
u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ?
|
||||
intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) :
|
||||
intel_uncore_read(uncore, MTL_RP_STATE_CAP);
|
||||
u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ?
|
||||
intel_uncore_read(uncore, MTL_MPE_FREQUENCY) :
|
||||
intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY);
|
||||
|
||||
/* MTL values are in units of 16.67 MHz */
|
||||
caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap);
|
||||
caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap);
|
||||
caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe);
|
||||
}
|
||||
|
||||
static void
|
||||
__gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
|
||||
{
|
||||
struct drm_i915_private *i915 = rps_to_i915(rps);
|
||||
u32 rp_state_cap;
|
||||
@ -1128,6 +1142,24 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* gen6_rps_get_freq_caps - Get freq caps exposed by HW
|
||||
* @rps: the intel_rps structure
|
||||
* @caps: returned freq caps
|
||||
*
|
||||
* Returned "caps" frequencies should be converted to MHz using
|
||||
* intel_gpu_freq()
|
||||
*/
|
||||
void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
|
||||
{
|
||||
struct drm_i915_private *i915 = rps_to_i915(rps);
|
||||
|
||||
if (IS_METEORLAKE(i915))
|
||||
return mtl_get_freq_caps(rps, caps);
|
||||
else
|
||||
return __gen6_rps_get_freq_caps(rps, caps);
|
||||
}
|
||||
|
||||
static void gen6_rps_init(struct intel_rps *rps)
|
||||
{
|
||||
struct drm_i915_private *i915 = rps_to_i915(rps);
|
||||
@ -2191,6 +2223,213 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
|
||||
return intel_gpu_freq(rps, rps->min_freq);
|
||||
}
|
||||
|
||||
static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
|
||||
{
|
||||
struct intel_gt *gt = rps_to_gt(rps);
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
struct intel_rps_freq_caps caps;
|
||||
u32 rp_state_limits;
|
||||
u32 gt_perf_status;
|
||||
u32 rpmodectl, rpinclimit, rpdeclimit;
|
||||
u32 rpstat, cagf, reqf;
|
||||
u32 rpcurupei, rpcurup, rpprevup;
|
||||
u32 rpcurdownei, rpcurdown, rpprevdown;
|
||||
u32 rpupei, rpupt, rpdownei, rpdownt;
|
||||
u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
|
||||
|
||||
rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
|
||||
gen6_rps_get_freq_caps(rps, &caps);
|
||||
if (IS_GEN9_LP(i915))
|
||||
gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
|
||||
else
|
||||
gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
|
||||
|
||||
/* RPSTAT1 is in the GT power well */
|
||||
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
|
||||
|
||||
reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
|
||||
if (GRAPHICS_VER(i915) >= 9) {
|
||||
reqf >>= 23;
|
||||
} else {
|
||||
reqf &= ~GEN6_TURBO_DISABLE;
|
||||
if (IS_HASWELL(i915) || IS_BROADWELL(i915))
|
||||
reqf >>= 24;
|
||||
else
|
||||
reqf >>= 25;
|
||||
}
|
||||
reqf = intel_gpu_freq(rps, reqf);
|
||||
|
||||
rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
|
||||
rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
|
||||
rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
|
||||
|
||||
rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
|
||||
rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
|
||||
rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
|
||||
rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
|
||||
rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
|
||||
rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
|
||||
rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
|
||||
|
||||
rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
|
||||
rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
|
||||
|
||||
rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
|
||||
rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
|
||||
|
||||
cagf = intel_rps_read_actual_frequency(rps);
|
||||
|
||||
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
|
||||
|
||||
if (GRAPHICS_VER(i915) >= 11) {
|
||||
pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
|
||||
pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
|
||||
/*
|
||||
* The equivalent to the PM ISR & IIR cannot be read
|
||||
* without affecting the current state of the system
|
||||
*/
|
||||
pm_isr = 0;
|
||||
pm_iir = 0;
|
||||
} else if (GRAPHICS_VER(i915) >= 8) {
|
||||
pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
|
||||
pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
|
||||
pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
|
||||
pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
|
||||
} else {
|
||||
pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
|
||||
pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
|
||||
pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
|
||||
pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
|
||||
}
|
||||
pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
|
||||
|
||||
drm_printf(p, "Video Turbo Mode: %s\n",
|
||||
str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
|
||||
drm_printf(p, "HW control enabled: %s\n",
|
||||
str_yes_no(rpmodectl & GEN6_RP_ENABLE));
|
||||
drm_printf(p, "SW control enabled: %s\n",
|
||||
str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
|
||||
|
||||
drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
|
||||
pm_ier, pm_imr, pm_mask);
|
||||
if (GRAPHICS_VER(i915) <= 10)
|
||||
drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
|
||||
pm_isr, pm_iir);
|
||||
drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
|
||||
rps->pm_intrmsk_mbz);
|
||||
drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
|
||||
drm_printf(p, "Render p-state ratio: %d\n",
|
||||
(gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
|
||||
drm_printf(p, "Render p-state VID: %d\n",
|
||||
gt_perf_status & 0xff);
|
||||
drm_printf(p, "Render p-state limit: %d\n",
|
||||
rp_state_limits & 0xff);
|
||||
drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
|
||||
drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
|
||||
drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
|
||||
drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
|
||||
drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
|
||||
drm_printf(p, "CAGF: %dMHz\n", cagf);
|
||||
drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
|
||||
rpcurupei,
|
||||
intel_gt_pm_interval_to_ns(gt, rpcurupei));
|
||||
drm_printf(p, "RP CUR UP: %d (%lldns)\n",
|
||||
rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
|
||||
drm_printf(p, "RP PREV UP: %d (%lldns)\n",
|
||||
rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
|
||||
drm_printf(p, "Up threshold: %d%%\n",
|
||||
rps->power.up_threshold);
|
||||
drm_printf(p, "RP UP EI: %d (%lldns)\n",
|
||||
rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
|
||||
drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
|
||||
rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
|
||||
|
||||
drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
|
||||
rpcurdownei,
|
||||
intel_gt_pm_interval_to_ns(gt, rpcurdownei));
|
||||
drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
|
||||
rpcurdown,
|
||||
intel_gt_pm_interval_to_ns(gt, rpcurdown));
|
||||
drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
|
||||
rpprevdown,
|
||||
intel_gt_pm_interval_to_ns(gt, rpprevdown));
|
||||
drm_printf(p, "Down threshold: %d%%\n",
|
||||
rps->power.down_threshold);
|
||||
drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
|
||||
rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
|
||||
drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
|
||||
rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
|
||||
|
||||
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, caps.min_freq));
|
||||
drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, caps.rp1_freq));
|
||||
drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, caps.rp0_freq));
|
||||
drm_printf(p, "Max overclocked frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, rps->max_freq));
|
||||
|
||||
drm_printf(p, "Current freq: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->cur_freq));
|
||||
drm_printf(p, "Actual freq: %d MHz\n", cagf);
|
||||
drm_printf(p, "Idle freq: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->idle_freq));
|
||||
drm_printf(p, "Min freq: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->min_freq));
|
||||
drm_printf(p, "Boost freq: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->boost_freq));
|
||||
drm_printf(p, "Max freq: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->max_freq));
|
||||
drm_printf(p,
|
||||
"efficient (RPe) frequency: %d MHz\n",
|
||||
intel_gpu_freq(rps, rps->efficient_freq));
|
||||
}
|
||||
|
||||
static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
|
||||
{
|
||||
struct intel_gt *gt = rps_to_gt(rps);
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
struct intel_rps_freq_caps caps;
|
||||
u32 pm_mask;
|
||||
|
||||
gen6_rps_get_freq_caps(rps, &caps);
|
||||
pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
|
||||
|
||||
drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
|
||||
drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
|
||||
rps->pm_intrmsk_mbz);
|
||||
drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1));
|
||||
drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps));
|
||||
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, caps.min_freq));
|
||||
drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, caps.rp1_freq));
|
||||
drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
|
||||
intel_gpu_freq(rps, caps.rp0_freq));
|
||||
drm_printf(p, "Current freq: %d MHz\n",
|
||||
intel_rps_get_requested_frequency(rps));
|
||||
drm_printf(p, "Actual freq: %d MHz\n",
|
||||
intel_rps_read_actual_frequency(rps));
|
||||
drm_printf(p, "Min freq: %d MHz\n",
|
||||
intel_rps_get_min_frequency(rps));
|
||||
drm_printf(p, "Boost freq: %d MHz\n",
|
||||
intel_rps_get_boost_frequency(rps));
|
||||
drm_printf(p, "Max freq: %d MHz\n",
|
||||
intel_rps_get_max_frequency(rps));
|
||||
drm_printf(p,
|
||||
"efficient (RPe) frequency: %d MHz\n",
|
||||
intel_gpu_freq(rps, caps.rp1_freq));
|
||||
}
|
||||
|
||||
void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
|
||||
{
|
||||
if (rps_uses_slpc(rps))
|
||||
return slpc_frequency_dump(rps, p);
|
||||
else
|
||||
return rps_frequency_dump(rps, p);
|
||||
}
|
||||
|
||||
static int set_max_freq(struct intel_rps *rps, u32 val)
|
||||
{
|
||||
struct drm_i915_private *i915 = rps_to_i915(rps);
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "i915_reg_defs.h"
|
||||
|
||||
struct i915_request;
|
||||
struct drm_printer;
|
||||
|
||||
void intel_rps_init_early(struct intel_rps *rps);
|
||||
void intel_rps_init(struct intel_rps *rps);
|
||||
@ -54,6 +55,8 @@ void intel_rps_lower_unslice(struct intel_rps *rps);
|
||||
u32 intel_rps_read_throttle_reason(struct intel_rps *rps);
|
||||
bool rps_read_mask_mmio(struct intel_rps *rps, i915_reg_t reg32, u32 mask);
|
||||
|
||||
void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p);
|
||||
|
||||
void gen5_rps_irq_handler(struct intel_rps *rps);
|
||||
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
|
||||
void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
|
||||
|
@ -677,8 +677,8 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
|
||||
* If i915/perf is active, we want a stable powergating configuration
|
||||
* on the system. Use the configuration pinned by i915/perf.
|
||||
*/
|
||||
if (i915->perf.exclusive_stream)
|
||||
req_sseu = &i915->perf.sseu;
|
||||
if (gt->perf.exclusive_stream)
|
||||
req_sseu = >->perf.sseu;
|
||||
|
||||
slices = hweight8(req_sseu->slice_mask);
|
||||
subslices = hweight8(req_sseu->subslice_mask);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,11 +11,16 @@
|
||||
#include "i915_reg_defs.h"
|
||||
|
||||
struct i915_wa {
|
||||
i915_reg_t reg;
|
||||
union {
|
||||
i915_reg_t reg;
|
||||
i915_mcr_reg_t mcr_reg;
|
||||
};
|
||||
u32 clr;
|
||||
u32 set;
|
||||
u32 read;
|
||||
bool masked_reg;
|
||||
|
||||
u32 masked_reg:1;
|
||||
u32 is_mcr:1;
|
||||
};
|
||||
|
||||
struct i915_wa_list {
|
||||
|
@ -39,6 +39,16 @@ static int perf_end(struct intel_gt *gt)
|
||||
return igt_flush_test(gt->i915);
|
||||
}
|
||||
|
||||
static i915_reg_t timestamp_reg(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct drm_i915_private *i915 = engine->i915;
|
||||
|
||||
if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
|
||||
return RING_TIMESTAMP_UDW(engine->mmio_base);
|
||||
else
|
||||
return RING_TIMESTAMP(engine->mmio_base);
|
||||
}
|
||||
|
||||
static int write_timestamp(struct i915_request *rq, int slot)
|
||||
{
|
||||
struct intel_timeline *tl =
|
||||
@ -55,7 +65,7 @@ static int write_timestamp(struct i915_request *rq, int slot)
|
||||
if (GRAPHICS_VER(rq->engine->i915) >= 8)
|
||||
cmd++;
|
||||
*cs++ = cmd;
|
||||
*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
|
||||
*cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine));
|
||||
*cs++ = tl->hwsp_offset + slot * sizeof(u32);
|
||||
*cs++ = 0;
|
||||
|
||||
@ -125,7 +135,7 @@ static int perf_mi_bb_start(void *arg)
|
||||
enum intel_engine_id id;
|
||||
int err = 0;
|
||||
|
||||
if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
|
||||
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
|
||||
return 0;
|
||||
|
||||
perf_begin(gt);
|
||||
@ -135,6 +145,9 @@ static int perf_mi_bb_start(void *arg)
|
||||
u32 cycles[COUNT];
|
||||
int i;
|
||||
|
||||
if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
|
||||
continue;
|
||||
|
||||
intel_engine_pm_get(engine);
|
||||
|
||||
batch = create_empty_batch(ce);
|
||||
@ -249,7 +262,7 @@ static int perf_mi_noop(void *arg)
|
||||
enum intel_engine_id id;
|
||||
int err = 0;
|
||||
|
||||
if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
|
||||
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
|
||||
return 0;
|
||||
|
||||
perf_begin(gt);
|
||||
@ -259,6 +272,9 @@ static int perf_mi_noop(void *arg)
|
||||
u32 cycles[COUNT];
|
||||
int i;
|
||||
|
||||
if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
|
||||
continue;
|
||||
|
||||
intel_engine_pm_get(engine);
|
||||
|
||||
base = create_empty_batch(ce);
|
||||
|
@ -85,8 +85,6 @@ static int wait_for_reset(struct intel_engine_cs *engine,
|
||||
break;
|
||||
} while (time_before(jiffies, timeout));
|
||||
|
||||
flush_scheduled_work();
|
||||
|
||||
if (rq->fence.error != -EIO) {
|
||||
pr_err("%s: hanging request %llx:%lld not reset\n",
|
||||
engine->name,
|
||||
@ -3475,12 +3473,14 @@ static int random_priority(struct rnd_state *rnd)
|
||||
|
||||
struct preempt_smoke {
|
||||
struct intel_gt *gt;
|
||||
struct kthread_work work;
|
||||
struct i915_gem_context **contexts;
|
||||
struct intel_engine_cs *engine;
|
||||
struct drm_i915_gem_object *batch;
|
||||
unsigned int ncontext;
|
||||
struct rnd_state prng;
|
||||
unsigned long count;
|
||||
int result;
|
||||
};
|
||||
|
||||
static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
|
||||
@ -3540,34 +3540,31 @@ unpin:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int smoke_crescendo_thread(void *arg)
|
||||
static void smoke_crescendo_work(struct kthread_work *work)
|
||||
{
|
||||
struct preempt_smoke *smoke = arg;
|
||||
struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work);
|
||||
IGT_TIMEOUT(end_time);
|
||||
unsigned long count;
|
||||
|
||||
count = 0;
|
||||
do {
|
||||
struct i915_gem_context *ctx = smoke_context(smoke);
|
||||
int err;
|
||||
|
||||
err = smoke_submit(smoke,
|
||||
ctx, count % I915_PRIORITY_MAX,
|
||||
smoke->batch);
|
||||
if (err)
|
||||
return err;
|
||||
smoke->result = smoke_submit(smoke, ctx,
|
||||
count % I915_PRIORITY_MAX,
|
||||
smoke->batch);
|
||||
|
||||
count++;
|
||||
} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
|
||||
} while (!smoke->result && count < smoke->ncontext &&
|
||||
!__igt_timeout(end_time, NULL));
|
||||
|
||||
smoke->count = count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
|
||||
#define BATCH BIT(0)
|
||||
{
|
||||
struct task_struct *tsk[I915_NUM_ENGINES] = {};
|
||||
struct kthread_worker *worker[I915_NUM_ENGINES] = {};
|
||||
struct preempt_smoke *arg;
|
||||
struct intel_engine_cs *engine;
|
||||
enum intel_engine_id id;
|
||||
@ -3578,6 +3575,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
|
||||
if (!arg)
|
||||
return -ENOMEM;
|
||||
|
||||
memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
|
||||
|
||||
for_each_engine(engine, smoke->gt, id) {
|
||||
arg[id] = *smoke;
|
||||
arg[id].engine = engine;
|
||||
@ -3585,31 +3584,28 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
|
||||
arg[id].batch = NULL;
|
||||
arg[id].count = 0;
|
||||
|
||||
tsk[id] = kthread_run(smoke_crescendo_thread, arg,
|
||||
"igt/smoke:%d", id);
|
||||
if (IS_ERR(tsk[id])) {
|
||||
err = PTR_ERR(tsk[id]);
|
||||
worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
|
||||
if (IS_ERR(worker[id])) {
|
||||
err = PTR_ERR(worker[id]);
|
||||
break;
|
||||
}
|
||||
get_task_struct(tsk[id]);
|
||||
}
|
||||
|
||||
yield(); /* start all threads before we kthread_stop() */
|
||||
kthread_init_work(&arg[id].work, smoke_crescendo_work);
|
||||
kthread_queue_work(worker[id], &arg[id].work);
|
||||
}
|
||||
|
||||
count = 0;
|
||||
for_each_engine(engine, smoke->gt, id) {
|
||||
int status;
|
||||
|
||||
if (IS_ERR_OR_NULL(tsk[id]))
|
||||
if (IS_ERR_OR_NULL(worker[id]))
|
||||
continue;
|
||||
|
||||
status = kthread_stop(tsk[id]);
|
||||
if (status && !err)
|
||||
err = status;
|
||||
kthread_flush_work(&arg[id].work);
|
||||
if (arg[id].result && !err)
|
||||
err = arg[id].result;
|
||||
|
||||
count += arg[id].count;
|
||||
|
||||
put_task_struct(tsk[id]);
|
||||
kthread_destroy_worker(worker[id]);
|
||||
}
|
||||
|
||||
pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
|
||||
|
@ -36,6 +36,19 @@ static int cmp_u32(const void *A, const void *B)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 read_timestamp(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct drm_i915_private *i915 = engine->i915;
|
||||
|
||||
/* On i965 the first read tends to give a stale value */
|
||||
ENGINE_READ_FW(engine, RING_TIMESTAMP);
|
||||
|
||||
if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
|
||||
return ENGINE_READ_FW(engine, RING_TIMESTAMP_UDW);
|
||||
else
|
||||
return ENGINE_READ_FW(engine, RING_TIMESTAMP);
|
||||
}
|
||||
|
||||
static void measure_clocks(struct intel_engine_cs *engine,
|
||||
u32 *out_cycles, ktime_t *out_dt)
|
||||
{
|
||||
@ -45,13 +58,13 @@ static void measure_clocks(struct intel_engine_cs *engine,
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
local_irq_disable();
|
||||
cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP);
|
||||
cycles[i] = -read_timestamp(engine);
|
||||
dt[i] = ktime_get();
|
||||
|
||||
udelay(1000);
|
||||
|
||||
dt[i] = ktime_sub(ktime_get(), dt[i]);
|
||||
cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP);
|
||||
cycles[i] += read_timestamp(engine);
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
@ -78,25 +91,6 @@ static int live_gt_clocks(void *arg)
|
||||
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
|
||||
return 0;
|
||||
|
||||
if (GRAPHICS_VER(gt->i915) == 5)
|
||||
/*
|
||||
* XXX CS_TIMESTAMP low dword is dysfunctional?
|
||||
*
|
||||
* Ville's experiments indicate the high dword still works,
|
||||
* but at a correspondingly reduced frequency.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
if (GRAPHICS_VER(gt->i915) == 4)
|
||||
/*
|
||||
* XXX CS_TIMESTAMP appears gibberish
|
||||
*
|
||||
* Ville's experiments indicate that it mostly appears 'stuck'
|
||||
* in that we see the register report the same cycle count
|
||||
* for a couple of reads.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
intel_gt_pm_get(gt);
|
||||
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
|
||||
|
||||
|
@ -866,10 +866,13 @@ static int igt_reset_active_engine(void *arg)
|
||||
}
|
||||
|
||||
struct active_engine {
|
||||
struct task_struct *task;
|
||||
struct kthread_worker *worker;
|
||||
struct kthread_work work;
|
||||
struct intel_engine_cs *engine;
|
||||
unsigned long resets;
|
||||
unsigned int flags;
|
||||
bool stop;
|
||||
int result;
|
||||
};
|
||||
|
||||
#define TEST_ACTIVE BIT(0)
|
||||
@ -900,10 +903,10 @@ static int active_request_put(struct i915_request *rq)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int active_engine(void *data)
|
||||
static void active_engine(struct kthread_work *work)
|
||||
{
|
||||
I915_RND_STATE(prng);
|
||||
struct active_engine *arg = data;
|
||||
struct active_engine *arg = container_of(work, typeof(*arg), work);
|
||||
struct intel_engine_cs *engine = arg->engine;
|
||||
struct i915_request *rq[8] = {};
|
||||
struct intel_context *ce[ARRAY_SIZE(rq)];
|
||||
@ -913,16 +916,17 @@ static int active_engine(void *data)
|
||||
for (count = 0; count < ARRAY_SIZE(ce); count++) {
|
||||
ce[count] = intel_context_create(engine);
|
||||
if (IS_ERR(ce[count])) {
|
||||
err = PTR_ERR(ce[count]);
|
||||
pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err);
|
||||
arg->result = PTR_ERR(ce[count]);
|
||||
pr_err("[%s] Create context #%ld failed: %d!\n",
|
||||
engine->name, count, arg->result);
|
||||
while (--count)
|
||||
intel_context_put(ce[count]);
|
||||
return err;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
count = 0;
|
||||
while (!kthread_should_stop()) {
|
||||
while (!READ_ONCE(arg->stop)) {
|
||||
unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
|
||||
struct i915_request *old = rq[idx];
|
||||
struct i915_request *new;
|
||||
@ -967,7 +971,7 @@ static int active_engine(void *data)
|
||||
intel_context_put(ce[count]);
|
||||
}
|
||||
|
||||
return err;
|
||||
arg->result = err;
|
||||
}
|
||||
|
||||
static int __igt_reset_engines(struct intel_gt *gt,
|
||||
@ -1022,7 +1026,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
|
||||
|
||||
memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES);
|
||||
for_each_engine(other, gt, tmp) {
|
||||
struct task_struct *tsk;
|
||||
struct kthread_worker *worker;
|
||||
|
||||
threads[tmp].resets =
|
||||
i915_reset_engine_count(global, other);
|
||||
@ -1036,19 +1040,21 @@ static int __igt_reset_engines(struct intel_gt *gt,
|
||||
threads[tmp].engine = other;
|
||||
threads[tmp].flags = flags;
|
||||
|
||||
tsk = kthread_run(active_engine, &threads[tmp],
|
||||
"igt/%s", other->name);
|
||||
if (IS_ERR(tsk)) {
|
||||
err = PTR_ERR(tsk);
|
||||
pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err);
|
||||
worker = kthread_create_worker(0, "igt/%s",
|
||||
other->name);
|
||||
if (IS_ERR(worker)) {
|
||||
err = PTR_ERR(worker);
|
||||
pr_err("[%s] Worker create failed: %d!\n",
|
||||
engine->name, err);
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
threads[tmp].task = tsk;
|
||||
get_task_struct(tsk);
|
||||
}
|
||||
threads[tmp].worker = worker;
|
||||
|
||||
yield(); /* start all threads before we begin */
|
||||
kthread_init_work(&threads[tmp].work, active_engine);
|
||||
kthread_queue_work(threads[tmp].worker,
|
||||
&threads[tmp].work);
|
||||
}
|
||||
|
||||
st_engine_heartbeat_disable_no_pm(engine);
|
||||
GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
|
||||
@ -1197,17 +1203,20 @@ unwind:
|
||||
for_each_engine(other, gt, tmp) {
|
||||
int ret;
|
||||
|
||||
if (!threads[tmp].task)
|
||||
if (!threads[tmp].worker)
|
||||
continue;
|
||||
|
||||
ret = kthread_stop(threads[tmp].task);
|
||||
WRITE_ONCE(threads[tmp].stop, true);
|
||||
kthread_flush_work(&threads[tmp].work);
|
||||
ret = READ_ONCE(threads[tmp].result);
|
||||
if (ret) {
|
||||
pr_err("kthread for other engine %s failed, err=%d\n",
|
||||
other->name, ret);
|
||||
if (!err)
|
||||
err = ret;
|
||||
}
|
||||
put_task_struct(threads[tmp].task);
|
||||
|
||||
kthread_destroy_worker(threads[tmp].worker);
|
||||
|
||||
/* GuC based resets are not logged per engine */
|
||||
if (!using_guc) {
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include "gem/i915_gem_internal.h"
|
||||
#include "gem/i915_gem_lmem.h"
|
||||
|
||||
#include "selftests/i915_random.h"
|
||||
|
||||
|
@ -1107,21 +1107,27 @@ static u64 __measure_power(int duration_ms)
|
||||
return div64_u64(1000 * 1000 * dE, dt);
|
||||
}
|
||||
|
||||
static u64 measure_power_at(struct intel_rps *rps, int *freq)
|
||||
static u64 measure_power(struct intel_rps *rps, int *freq)
|
||||
{
|
||||
u64 x[5];
|
||||
int i;
|
||||
|
||||
*freq = rps_set_check(rps, *freq);
|
||||
for (i = 0; i < 5; i++)
|
||||
x[i] = __measure_power(5);
|
||||
*freq = (*freq + read_cagf(rps)) / 2;
|
||||
|
||||
*freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
|
||||
|
||||
/* A simple triangle filter for better result stability */
|
||||
sort(x, 5, sizeof(*x), cmp_u64, NULL);
|
||||
return div_u64(x[1] + 2 * x[2] + x[3], 4);
|
||||
}
|
||||
|
||||
static u64 measure_power_at(struct intel_rps *rps, int *freq)
|
||||
{
|
||||
*freq = rps_set_check(rps, *freq);
|
||||
return measure_power(rps, freq);
|
||||
}
|
||||
|
||||
int live_rps_power(void *arg)
|
||||
{
|
||||
struct intel_gt *gt = arg;
|
||||
|
@ -11,7 +11,8 @@
|
||||
enum test_type {
|
||||
VARY_MIN,
|
||||
VARY_MAX,
|
||||
MAX_GRANTED
|
||||
MAX_GRANTED,
|
||||
SLPC_POWER,
|
||||
};
|
||||
|
||||
static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq)
|
||||
@ -41,6 +42,39 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int slpc_set_freq(struct intel_gt *gt, u32 freq)
|
||||
{
|
||||
int err;
|
||||
struct intel_guc_slpc *slpc = >->uc.guc.slpc;
|
||||
|
||||
err = slpc_set_max_freq(slpc, freq);
|
||||
if (err) {
|
||||
pr_err("Unable to update max freq");
|
||||
return err;
|
||||
}
|
||||
|
||||
err = slpc_set_min_freq(slpc, freq);
|
||||
if (err) {
|
||||
pr_err("Unable to update min freq");
|
||||
return err;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
err = slpc_set_freq(gt, *freq);
|
||||
if (err)
|
||||
return err;
|
||||
*freq = intel_rps_read_actual_frequency(>->rps);
|
||||
*power = measure_power(>->rps, freq);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
|
||||
u32 *max_act_freq)
|
||||
{
|
||||
@ -113,6 +147,58 @@ static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int slpc_power(struct intel_gt *gt, struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_guc_slpc *slpc = >->uc.guc.slpc;
|
||||
struct {
|
||||
u64 power;
|
||||
int freq;
|
||||
} min, max;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* Our fundamental assumption is that running at lower frequency
|
||||
* actually saves power. Let's see if our RAPL measurement supports
|
||||
* that theory.
|
||||
*/
|
||||
if (!librapl_supported(gt->i915))
|
||||
return 0;
|
||||
|
||||
min.freq = slpc->min_freq;
|
||||
err = measure_power_at_freq(gt, &min.freq, &min.power);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
max.freq = slpc->rp0_freq;
|
||||
err = measure_power_at_freq(gt, &max.freq, &max.power);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
|
||||
engine->name,
|
||||
min.power, min.freq,
|
||||
max.power, max.freq);
|
||||
|
||||
if (10 * min.freq >= 9 * max.freq) {
|
||||
pr_notice("Could not control frequency, ran at [%uMHz, %uMhz]\n",
|
||||
min.freq, max.freq);
|
||||
}
|
||||
|
||||
if (11 * min.power > 10 * max.power) {
|
||||
pr_err("%s: did not conserve power when setting lower frequency!\n",
|
||||
engine->name);
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
/* Restore min/max frequencies */
|
||||
slpc_set_max_freq(slpc, slpc->rp0_freq);
|
||||
slpc_set_min_freq(slpc, slpc->min_freq);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq)
|
||||
{
|
||||
struct intel_gt *gt = rps_to_gt(rps);
|
||||
@ -153,6 +239,11 @@ static int run_test(struct intel_gt *gt, int test_type)
|
||||
if (!intel_uc_uses_guc_slpc(>->uc))
|
||||
return 0;
|
||||
|
||||
if (slpc->min_freq == slpc->rp0_freq) {
|
||||
pr_err("Min/Max are fused to the same value\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (igt_spinner_init(&spin, gt))
|
||||
return -ENOMEM;
|
||||
|
||||
@ -167,17 +258,14 @@ static int run_test(struct intel_gt *gt, int test_type)
|
||||
}
|
||||
|
||||
/*
|
||||
* FIXME: With efficient frequency enabled, GuC can request
|
||||
* frequencies higher than the SLPC max. While this is fixed
|
||||
* in GuC, we level set these tests with RPn as min.
|
||||
* Set min frequency to RPn so that we can test the whole
|
||||
* range of RPn-RP0. This also turns off efficient freq
|
||||
* usage and makes results more predictable.
|
||||
*/
|
||||
err = slpc_set_min_freq(slpc, slpc->min_freq);
|
||||
if (err)
|
||||
if (err) {
|
||||
pr_err("Unable to update min freq!");
|
||||
return err;
|
||||
|
||||
if (slpc->min_freq == slpc->rp0_freq) {
|
||||
pr_err("Min/Max are fused to the same value\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
intel_gt_pm_wait_for_idle(gt);
|
||||
@ -233,17 +321,23 @@ static int run_test(struct intel_gt *gt, int test_type)
|
||||
|
||||
err = max_granted_freq(slpc, rps, &max_act_freq);
|
||||
break;
|
||||
|
||||
case SLPC_POWER:
|
||||
err = slpc_power(gt, engine);
|
||||
break;
|
||||
}
|
||||
|
||||
pr_info("Max actual frequency for %s was %d\n",
|
||||
engine->name, max_act_freq);
|
||||
if (test_type != SLPC_POWER) {
|
||||
pr_info("Max actual frequency for %s was %d\n",
|
||||
engine->name, max_act_freq);
|
||||
|
||||
/* Actual frequency should rise above min */
|
||||
if (max_act_freq <= slpc_min_freq) {
|
||||
pr_err("Actual freq did not rise above min\n");
|
||||
pr_err("Perf Limit Reasons: 0x%x\n",
|
||||
intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS));
|
||||
err = -EINVAL;
|
||||
/* Actual frequency should rise above min */
|
||||
if (max_act_freq <= slpc->min_freq) {
|
||||
pr_err("Actual freq did not rise above min\n");
|
||||
pr_err("Perf Limit Reasons: 0x%x\n",
|
||||
intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS));
|
||||
err = -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
igt_spinner_end(&spin);
|
||||
@ -270,26 +364,66 @@ static int run_test(struct intel_gt *gt, int test_type)
|
||||
static int live_slpc_vary_min(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct intel_gt *gt = to_gt(i915);
|
||||
struct intel_gt *gt;
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
return run_test(gt, VARY_MIN);
|
||||
for_each_gt(gt, i915, i) {
|
||||
ret = run_test(gt, VARY_MIN);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int live_slpc_vary_max(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct intel_gt *gt = to_gt(i915);
|
||||
struct intel_gt *gt;
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
return run_test(gt, VARY_MAX);
|
||||
for_each_gt(gt, i915, i) {
|
||||
ret = run_test(gt, VARY_MAX);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* check if pcode can grant RP0 */
|
||||
static int live_slpc_max_granted(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct intel_gt *gt = to_gt(i915);
|
||||
struct intel_gt *gt;
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
return run_test(gt, MAX_GRANTED);
|
||||
for_each_gt(gt, i915, i) {
|
||||
ret = run_test(gt, MAX_GRANTED);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int live_slpc_power(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct intel_gt *gt;
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
for_each_gt(gt, i915, i) {
|
||||
ret = run_test(gt, SLPC_POWER);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int intel_slpc_live_selftests(struct drm_i915_private *i915)
|
||||
@ -298,10 +432,16 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915)
|
||||
SUBTEST(live_slpc_vary_max),
|
||||
SUBTEST(live_slpc_vary_min),
|
||||
SUBTEST(live_slpc_max_granted),
|
||||
SUBTEST(live_slpc_power),
|
||||
};
|
||||
|
||||
if (intel_gt_is_wedged(to_gt(i915)))
|
||||
return 0;
|
||||
struct intel_gt *gt;
|
||||
unsigned int i;
|
||||
|
||||
for_each_gt(gt, i915, i) {
|
||||
if (intel_gt_is_wedged(gt))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return i915_live_subtests(tests, i915);
|
||||
}
|
||||
|
@ -991,7 +991,7 @@ static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg)
|
||||
/* Alas, we must pardon some whitelists. Mistakes already made */
|
||||
static const struct regmask pardon[] = {
|
||||
{ GEN9_CTX_PREEMPT_REG, 9 },
|
||||
{ GEN8_L3SQCREG4, 9 },
|
||||
{ _MMIO(0xb118), 9 }, /* GEN8_L3SQCREG4 */
|
||||
};
|
||||
|
||||
return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon));
|
||||
|
@ -144,7 +144,7 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct intel_engine_cs *engine = kobj_to_engine(kobj);
|
||||
unsigned long long duration;
|
||||
unsigned long long duration, clamped;
|
||||
int err;
|
||||
|
||||
/*
|
||||
@ -168,7 +168,8 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (duration > jiffies_to_nsecs(2))
|
||||
clamped = intel_clamp_max_busywait_duration_ns(engine, duration);
|
||||
if (duration != clamped)
|
||||
return -EINVAL;
|
||||
|
||||
WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
|
||||
@ -203,7 +204,7 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct intel_engine_cs *engine = kobj_to_engine(kobj);
|
||||
unsigned long long duration;
|
||||
unsigned long long duration, clamped;
|
||||
int err;
|
||||
|
||||
/*
|
||||
@ -218,7 +219,8 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
|
||||
clamped = intel_clamp_timeslice_duration_ms(engine, duration);
|
||||
if (duration != clamped)
|
||||
return -EINVAL;
|
||||
|
||||
WRITE_ONCE(engine->props.timeslice_duration_ms, duration);
|
||||
@ -256,7 +258,7 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct intel_engine_cs *engine = kobj_to_engine(kobj);
|
||||
unsigned long long duration;
|
||||
unsigned long long duration, clamped;
|
||||
int err;
|
||||
|
||||
/*
|
||||
@ -272,7 +274,8 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
|
||||
clamped = intel_clamp_stop_timeout_ms(engine, duration);
|
||||
if (duration != clamped)
|
||||
return -EINVAL;
|
||||
|
||||
WRITE_ONCE(engine->props.stop_timeout_ms, duration);
|
||||
@ -306,7 +309,7 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct intel_engine_cs *engine = kobj_to_engine(kobj);
|
||||
unsigned long long timeout;
|
||||
unsigned long long timeout, clamped;
|
||||
int err;
|
||||
|
||||
/*
|
||||
@ -322,7 +325,8 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
|
||||
clamped = intel_clamp_preempt_timeout_ms(engine, timeout);
|
||||
if (timeout != clamped)
|
||||
return -EINVAL;
|
||||
|
||||
WRITE_ONCE(engine->props.preempt_timeout_ms, timeout);
|
||||
@ -362,7 +366,7 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct intel_engine_cs *engine = kobj_to_engine(kobj);
|
||||
unsigned long long delay;
|
||||
unsigned long long delay, clamped;
|
||||
int err;
|
||||
|
||||
/*
|
||||
@ -379,7 +383,8 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
|
||||
clamped = intel_clamp_heartbeat_interval_ms(engine, delay);
|
||||
if (delay != clamped)
|
||||
return -EINVAL;
|
||||
|
||||
err = intel_engine_set_heartbeat(engine, delay);
|
||||
|
@ -117,6 +117,7 @@ enum intel_guc_action {
|
||||
INTEL_GUC_ACTION_ENTER_S_STATE = 0x501,
|
||||
INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
|
||||
INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
|
||||
INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509,
|
||||
INTEL_GUC_ACTION_SCHED_CONTEXT = 0x1000,
|
||||
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
|
||||
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
|
||||
|
@ -128,6 +128,15 @@ enum slpc_media_ratio_mode {
|
||||
SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
|
||||
};
|
||||
|
||||
enum slpc_gucrc_mode {
|
||||
SLPC_GUCRC_MODE_HW = 0,
|
||||
SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1,
|
||||
SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2,
|
||||
SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3,
|
||||
|
||||
SLPC_GUCRC_MODE_MAX,
|
||||
};
|
||||
|
||||
enum slpc_event_id {
|
||||
SLPC_EVENT_RESET = 0,
|
||||
SLPC_EVENT_SHUTDOWN = 1,
|
||||
|
@ -81,10 +81,17 @@
|
||||
#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907
|
||||
#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u
|
||||
|
||||
/*
|
||||
* Global scheduling policy update keys.
|
||||
*/
|
||||
enum {
|
||||
GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD = 0x1001,
|
||||
};
|
||||
|
||||
/*
|
||||
* Per context scheduling policy update keys.
|
||||
*/
|
||||
enum {
|
||||
enum {
|
||||
GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001,
|
||||
GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002,
|
||||
GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003,
|
||||
|
@ -441,6 +441,7 @@ err_log:
|
||||
err_fw:
|
||||
intel_uc_fw_fini(&guc->fw);
|
||||
out:
|
||||
intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
|
||||
i915_probe_error(gt->i915, "failed with %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -112,6 +112,10 @@ struct intel_guc {
|
||||
* refs
|
||||
*/
|
||||
struct list_head guc_id_list;
|
||||
/**
|
||||
* @guc_ids_in_use: Number single-lrc guc_ids in use
|
||||
*/
|
||||
unsigned int guc_ids_in_use;
|
||||
/**
|
||||
* @destroyed_contexts: list of contexts waiting to be destroyed
|
||||
* (deregistered with the GuC)
|
||||
@ -132,6 +136,16 @@ struct intel_guc {
|
||||
* @reset_fail_mask: mask of engines that failed to reset
|
||||
*/
|
||||
intel_engine_mask_t reset_fail_mask;
|
||||
/**
|
||||
* @sched_disable_delay_ms: schedule disable delay, in ms, for
|
||||
* contexts
|
||||
*/
|
||||
unsigned int sched_disable_delay_ms;
|
||||
/**
|
||||
* @sched_disable_gucid_threshold: threshold of min remaining available
|
||||
* guc_ids before we start bypassing the schedule disable delay
|
||||
*/
|
||||
unsigned int sched_disable_gucid_threshold;
|
||||
} submission_state;
|
||||
|
||||
/**
|
||||
@ -466,4 +480,6 @@ void intel_guc_write_barrier(struct intel_guc *guc);
|
||||
|
||||
void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p);
|
||||
|
||||
int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc);
|
||||
|
||||
#endif
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <linux/bsearch.h>
|
||||
|
||||
#include "gem/i915_gem_lmem.h"
|
||||
#include "gt/intel_engine_regs.h"
|
||||
#include "gt/intel_gt.h"
|
||||
#include "gt/intel_gt_mcr.h"
|
||||
@ -277,24 +278,16 @@ __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg)
|
||||
return slot;
|
||||
}
|
||||
|
||||
#define GUC_REGSET_STEERING(group, instance) ( \
|
||||
FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
|
||||
FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
|
||||
GUC_REGSET_NEEDS_STEERING \
|
||||
)
|
||||
|
||||
static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
|
||||
struct temp_regset *regset,
|
||||
i915_reg_t reg, u32 flags)
|
||||
u32 offset, u32 flags)
|
||||
{
|
||||
u32 count = regset->storage_used - (regset->registers - regset->storage);
|
||||
u32 offset = i915_mmio_reg_offset(reg);
|
||||
struct guc_mmio_reg entry = {
|
||||
.offset = offset,
|
||||
.flags = flags,
|
||||
};
|
||||
struct guc_mmio_reg *slot;
|
||||
u8 group, inst;
|
||||
|
||||
/*
|
||||
* The mmio list is built using separate lists within the driver.
|
||||
@ -306,17 +299,6 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
|
||||
sizeof(entry), guc_mmio_reg_cmp))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* The GuC doesn't have a default steering, so we need to explicitly
|
||||
* steer all registers that need steering. However, we do not keep track
|
||||
* of all the steering ranges, only of those that have a chance of using
|
||||
* a non-default steering from the i915 pov. Instead of adding such
|
||||
* tracking, it is easier to just program the default steering for all
|
||||
* regs that don't need a non-default one.
|
||||
*/
|
||||
intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
|
||||
entry.flags |= GUC_REGSET_STEERING(group, inst);
|
||||
|
||||
slot = __mmio_reg_add(regset, &entry);
|
||||
if (IS_ERR(slot))
|
||||
return PTR_ERR(slot);
|
||||
@ -334,6 +316,38 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
|
||||
|
||||
#define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \
|
||||
guc_mmio_reg_add(gt, \
|
||||
regset, \
|
||||
i915_mmio_reg_offset(reg), \
|
||||
(masked) ? GUC_REGSET_MASKED : 0)
|
||||
|
||||
#define GUC_REGSET_STEERING(group, instance) ( \
|
||||
FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
|
||||
FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
|
||||
GUC_REGSET_NEEDS_STEERING \
|
||||
)
|
||||
|
||||
static long __must_check guc_mcr_reg_add(struct intel_gt *gt,
|
||||
struct temp_regset *regset,
|
||||
i915_mcr_reg_t reg, u32 flags)
|
||||
{
|
||||
u8 group, inst;
|
||||
|
||||
/*
|
||||
* The GuC doesn't have a default steering, so we need to explicitly
|
||||
* steer all registers that need steering. However, we do not keep track
|
||||
* of all the steering ranges, only of those that have a chance of using
|
||||
* a non-default steering from the i915 pov. Instead of adding such
|
||||
* tracking, it is easier to just program the default steering for all
|
||||
* regs that don't need a non-default one.
|
||||
*/
|
||||
intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
|
||||
flags |= GUC_REGSET_STEERING(group, inst);
|
||||
|
||||
return guc_mmio_reg_add(gt, regset, i915_mmio_reg_offset(reg), flags);
|
||||
}
|
||||
|
||||
#define GUC_MCR_REG_ADD(gt, regset, reg, masked) \
|
||||
guc_mcr_reg_add(gt, \
|
||||
regset, \
|
||||
(reg), \
|
||||
(masked) ? GUC_REGSET_MASKED : 0)
|
||||
@ -372,8 +386,21 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
|
||||
false);
|
||||
|
||||
/* add in local MOCS registers */
|
||||
for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++)
|
||||
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
|
||||
for (i = 0; i < LNCFCMOCS_REG_COUNT; i++)
|
||||
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
|
||||
ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false);
|
||||
else
|
||||
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
|
||||
|
||||
if (GRAPHICS_VER(engine->i915) >= 12) {
|
||||
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false);
|
||||
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false);
|
||||
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false);
|
||||
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false);
|
||||
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false);
|
||||
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false);
|
||||
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false);
|
||||
}
|
||||
|
||||
return ret ? -1 : 0;
|
||||
}
|
||||
|
@ -169,6 +169,8 @@ static struct __guc_mmio_reg_descr_group default_lists[] = {
|
||||
MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0),
|
||||
MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
|
||||
MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
|
||||
MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS),
|
||||
MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS),
|
||||
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
|
||||
MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
|
||||
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
|
||||
@ -182,6 +184,8 @@ static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = {
|
||||
MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0),
|
||||
MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
|
||||
MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
|
||||
MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS),
|
||||
MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS),
|
||||
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
|
||||
MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
|
||||
MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
|
||||
@ -240,19 +244,19 @@ static void guc_capture_free_extlists(struct __guc_mmio_reg_descr_group *reglist
|
||||
|
||||
struct __ext_steer_reg {
|
||||
const char *name;
|
||||
i915_reg_t reg;
|
||||
i915_mcr_reg_t reg;
|
||||
};
|
||||
|
||||
static const struct __ext_steer_reg xe_extregs[] = {
|
||||
{"GEN7_SAMPLER_INSTDONE", GEN7_SAMPLER_INSTDONE},
|
||||
{"GEN7_ROW_INSTDONE", GEN7_ROW_INSTDONE}
|
||||
{"GEN8_SAMPLER_INSTDONE", GEN8_SAMPLER_INSTDONE},
|
||||
{"GEN8_ROW_INSTDONE", GEN8_ROW_INSTDONE}
|
||||
};
|
||||
|
||||
static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
|
||||
const struct __ext_steer_reg *extlist,
|
||||
int slice_id, int subslice_id)
|
||||
{
|
||||
ext->reg = extlist->reg;
|
||||
ext->reg = _MMIO(i915_mmio_reg_offset(extlist->reg));
|
||||
ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
|
||||
ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
|
||||
ext->regname = extlist->name;
|
||||
@ -419,6 +423,44 @@ guc_capture_get_device_reglist(struct intel_guc *guc)
|
||||
return default_lists;
|
||||
}
|
||||
|
||||
static const char *
|
||||
__stringify_type(u32 type)
|
||||
{
|
||||
switch (type) {
|
||||
case GUC_CAPTURE_LIST_TYPE_GLOBAL:
|
||||
return "Global";
|
||||
case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
|
||||
return "Class";
|
||||
case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
|
||||
return "Instance";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
static const char *
|
||||
__stringify_engclass(u32 class)
|
||||
{
|
||||
switch (class) {
|
||||
case GUC_RENDER_CLASS:
|
||||
return "Render";
|
||||
case GUC_VIDEO_CLASS:
|
||||
return "Video";
|
||||
case GUC_VIDEOENHANCE_CLASS:
|
||||
return "VideoEnhance";
|
||||
case GUC_BLITTER_CLASS:
|
||||
return "Blitter";
|
||||
case GUC_COMPUTE_CLASS:
|
||||
return "Compute";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
static int
|
||||
guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
|
||||
struct guc_mmio_reg *ptr, u16 num_entries)
|
||||
@ -482,32 +524,55 @@ guc_cap_list_num_regs(struct intel_guc_state_capture *gc, u32 owner, u32 type, u
|
||||
return num_regs;
|
||||
}
|
||||
|
||||
int
|
||||
intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
|
||||
size_t *size)
|
||||
static int
|
||||
guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
|
||||
size_t *size, bool is_purpose_est)
|
||||
{
|
||||
struct intel_guc_state_capture *gc = guc->capture;
|
||||
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
|
||||
struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
|
||||
int num_regs;
|
||||
|
||||
if (!gc->reglists)
|
||||
if (!gc->reglists) {
|
||||
drm_warn(&i915->drm, "GuC-capture: No reglist on this device\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (cache->is_valid) {
|
||||
*size = cache->size;
|
||||
return cache->status;
|
||||
}
|
||||
|
||||
if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
|
||||
!guc_capture_get_one_list(gc->reglists, owner, type, classid)) {
|
||||
if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL)
|
||||
drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist Global!\n");
|
||||
else
|
||||
drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist %s(%u):%s(%u)!\n",
|
||||
__stringify_type(type), type,
|
||||
__stringify_engclass(classid), classid);
|
||||
return -ENODATA;
|
||||
}
|
||||
|
||||
num_regs = guc_cap_list_num_regs(gc, owner, type, classid);
|
||||
/* intentional empty lists can exist depending on hw config */
|
||||
if (!num_regs)
|
||||
return -ENODATA;
|
||||
|
||||
*size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
|
||||
(num_regs * sizeof(struct guc_mmio_reg)));
|
||||
if (size)
|
||||
*size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
|
||||
(num_regs * sizeof(struct guc_mmio_reg)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
|
||||
size_t *size)
|
||||
{
|
||||
return guc_capture_getlistsize(guc, owner, type, classid, size, false);
|
||||
}
|
||||
|
||||
static void guc_capture_create_prealloc_nodes(struct intel_guc *guc);
|
||||
|
||||
int
|
||||
@ -606,7 +671,7 @@ guc_capture_output_min_size_est(struct intel_guc *guc)
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
struct intel_engine_cs *engine;
|
||||
enum intel_engine_id id;
|
||||
int worst_min_size = 0, num_regs = 0;
|
||||
int worst_min_size = 0;
|
||||
size_t tmp = 0;
|
||||
|
||||
if (!guc->capture)
|
||||
@ -627,21 +692,19 @@ guc_capture_output_min_size_est(struct intel_guc *guc)
|
||||
worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
|
||||
(3 * sizeof(struct guc_state_capture_header_t));
|
||||
|
||||
if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp))
|
||||
num_regs += tmp;
|
||||
if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp, true))
|
||||
worst_min_size += tmp;
|
||||
|
||||
if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
|
||||
engine->class, &tmp)) {
|
||||
num_regs += tmp;
|
||||
if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
|
||||
engine->class, &tmp, true)) {
|
||||
worst_min_size += tmp;
|
||||
}
|
||||
if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
|
||||
engine->class, &tmp)) {
|
||||
num_regs += tmp;
|
||||
if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
|
||||
engine->class, &tmp, true)) {
|
||||
worst_min_size += tmp;
|
||||
}
|
||||
}
|
||||
|
||||
worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
|
||||
|
||||
return worst_min_size;
|
||||
}
|
||||
|
||||
@ -658,15 +721,23 @@ static void check_guc_capture_size(struct intel_guc *guc)
|
||||
int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
|
||||
u32 buffer_size = intel_guc_log_section_size_capture(&guc->log);
|
||||
|
||||
/*
|
||||
* NOTE: min_size is much smaller than the capture region allocation (DG2: <80K vs 1MB)
|
||||
* Additionally, its based on space needed to fit all engines getting reset at once
|
||||
* within the same G2H handler task slot. This is very unlikely. However, if GuC really
|
||||
* does run out of space for whatever reason, we will see an separate warning message
|
||||
* when processing the G2H event capture-notification, search for:
|
||||
* INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
|
||||
*/
|
||||
if (min_size < 0)
|
||||
drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n",
|
||||
min_size);
|
||||
else if (min_size > buffer_size)
|
||||
drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n",
|
||||
drm_warn(&i915->drm, "GuC error state capture buffer maybe small: %d < %d\n",
|
||||
buffer_size, min_size);
|
||||
else if (spare_size > buffer_size)
|
||||
drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n",
|
||||
buffer_size, spare_size, min_size);
|
||||
drm_dbg(&i915->drm, "GuC error state capture buffer lacks spare size: %d < %d (min = %d)\n",
|
||||
buffer_size, spare_size, min_size);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -71,12 +71,73 @@ static bool intel_eval_slpc_support(void *data)
|
||||
return intel_guc_slpc_is_used(guc);
|
||||
}
|
||||
|
||||
static int guc_sched_disable_delay_ms_get(void *data, u64 *val)
|
||||
{
|
||||
struct intel_guc *guc = data;
|
||||
|
||||
if (!intel_guc_submission_is_used(guc))
|
||||
return -ENODEV;
|
||||
|
||||
*val = (u64)guc->submission_state.sched_disable_delay_ms;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int guc_sched_disable_delay_ms_set(void *data, u64 val)
|
||||
{
|
||||
struct intel_guc *guc = data;
|
||||
|
||||
if (!intel_guc_submission_is_used(guc))
|
||||
return -ENODEV;
|
||||
|
||||
/* clamp to a practical limit, 1 minute is reasonable for a longest delay */
|
||||
guc->submission_state.sched_disable_delay_ms = min_t(u64, val, 60000);
|
||||
|
||||
return 0;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops,
|
||||
guc_sched_disable_delay_ms_get,
|
||||
guc_sched_disable_delay_ms_set, "%lld\n");
|
||||
|
||||
static int guc_sched_disable_gucid_threshold_get(void *data, u64 *val)
|
||||
{
|
||||
struct intel_guc *guc = data;
|
||||
|
||||
if (!intel_guc_submission_is_used(guc))
|
||||
return -ENODEV;
|
||||
|
||||
*val = guc->submission_state.sched_disable_gucid_threshold;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int guc_sched_disable_gucid_threshold_set(void *data, u64 val)
|
||||
{
|
||||
struct intel_guc *guc = data;
|
||||
|
||||
if (!intel_guc_submission_is_used(guc))
|
||||
return -ENODEV;
|
||||
|
||||
if (val > intel_guc_sched_disable_gucid_threshold_max(guc))
|
||||
guc->submission_state.sched_disable_gucid_threshold =
|
||||
intel_guc_sched_disable_gucid_threshold_max(guc);
|
||||
else
|
||||
guc->submission_state.sched_disable_gucid_threshold = val;
|
||||
|
||||
return 0;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops,
|
||||
guc_sched_disable_gucid_threshold_get,
|
||||
guc_sched_disable_gucid_threshold_set, "%lld\n");
|
||||
|
||||
void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
|
||||
{
|
||||
static const struct intel_gt_debugfs_file files[] = {
|
||||
{ "guc_info", &guc_info_fops, NULL },
|
||||
{ "guc_registered_contexts", &guc_registered_contexts_fops, NULL },
|
||||
{ "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support},
|
||||
{ "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL },
|
||||
{ "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops,
|
||||
NULL },
|
||||
};
|
||||
|
||||
if (!intel_guc_is_supported(guc))
|
||||
|
@ -10,12 +10,15 @@
|
||||
*/
|
||||
|
||||
#include "gt/intel_gt.h"
|
||||
#include "gt/intel_gt_mcr.h"
|
||||
#include "gt/intel_gt_regs.h"
|
||||
#include "intel_guc_fw.h"
|
||||
#include "i915_drv.h"
|
||||
|
||||
static void guc_prepare_xfer(struct intel_uncore *uncore)
|
||||
static void guc_prepare_xfer(struct intel_gt *gt)
|
||||
{
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
|
||||
u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
|
||||
GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
|
||||
GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
|
||||
@ -35,8 +38,9 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
|
||||
|
||||
if (GRAPHICS_VER(uncore->i915) == 9) {
|
||||
/* DOP Clock Gating Enable for GuC clocks */
|
||||
intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
|
||||
0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE);
|
||||
intel_gt_mcr_multicast_write(gt, GEN8_MISCCPCTL,
|
||||
GEN8_DOP_CLOCK_GATE_GUC_ENABLE |
|
||||
intel_gt_mcr_read_any(gt, GEN8_MISCCPCTL));
|
||||
|
||||
/* allows for 5us (in 10ns units) before GT can go to RC6 */
|
||||
intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
|
||||
@ -168,7 +172,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
int ret;
|
||||
|
||||
guc_prepare_xfer(uncore);
|
||||
guc_prepare_xfer(gt);
|
||||
|
||||
/*
|
||||
* Note that GuC needs the CSS header plus uKernel code to be copied
|
||||
|
@ -290,6 +290,25 @@ struct guc_update_context_policy {
|
||||
struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
|
||||
} __packed;
|
||||
|
||||
/* Format of the UPDATE_SCHEDULING_POLICIES H2G data packet */
|
||||
struct guc_update_scheduling_policy_header {
|
||||
u32 action;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Can't dynmically allocate memory for the scheduling policy KLV because
|
||||
* it will be sent from within the reset path. Need a fixed size lump on
|
||||
* the stack instead :(.
|
||||
*
|
||||
* Currently, there is only one KLV defined, which has 1 word of KL + 2 words of V.
|
||||
*/
|
||||
#define MAX_SCHEDULING_POLICY_SIZE 3
|
||||
|
||||
struct guc_update_scheduling_policy {
|
||||
struct guc_update_scheduling_policy_header header;
|
||||
u32 data[MAX_SCHEDULING_POLICY_SIZE];
|
||||
} __packed;
|
||||
|
||||
#define GUC_POWER_UNSPECIFIED 0
|
||||
#define GUC_POWER_D0 1
|
||||
#define GUC_POWER_D1 2
|
||||
@ -298,6 +317,9 @@ struct guc_update_context_policy {
|
||||
|
||||
/* Scheduling policy settings */
|
||||
|
||||
#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION 100 /* in ms */
|
||||
#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO 50 /* in percent */
|
||||
|
||||
#define GLOBAL_POLICY_MAX_NUM_WI 15
|
||||
|
||||
/* Don't reset an engine upon preemption failure */
|
||||
@ -305,6 +327,27 @@ struct guc_update_context_policy {
|
||||
|
||||
#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
|
||||
|
||||
/*
|
||||
* GuC converts the timeout to clock ticks internally. Different platforms have
|
||||
* different GuC clocks. Thus, the maximum value before overflow is platform
|
||||
* dependent. Current worst case scenario is about 110s. So, the spec says to
|
||||
* limit to 100s to be safe.
|
||||
*/
|
||||
#define GUC_POLICY_MAX_EXEC_QUANTUM_US (100 * 1000 * 1000UL)
|
||||
#define GUC_POLICY_MAX_PREEMPT_TIMEOUT_US (100 * 1000 * 1000UL)
|
||||
|
||||
static inline u32 guc_policy_max_exec_quantum_ms(void)
|
||||
{
|
||||
BUILD_BUG_ON(GUC_POLICY_MAX_EXEC_QUANTUM_US >= UINT_MAX);
|
||||
return GUC_POLICY_MAX_EXEC_QUANTUM_US / 1000;
|
||||
}
|
||||
|
||||
static inline u32 guc_policy_max_preempt_timeout_ms(void)
|
||||
{
|
||||
BUILD_BUG_ON(GUC_POLICY_MAX_PREEMPT_TIMEOUT_US >= UINT_MAX);
|
||||
return GUC_POLICY_MAX_PREEMPT_TIMEOUT_US / 1000;
|
||||
}
|
||||
|
||||
struct guc_policies {
|
||||
u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
|
||||
/* In micro seconds. How much time to allow before DPC processing is
|
||||
|
@ -16,15 +16,15 @@
|
||||
#if defined(CONFIG_DRM_I915_DEBUG_GUC)
|
||||
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M
|
||||
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M
|
||||
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M
|
||||
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
|
||||
#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
|
||||
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M
|
||||
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M
|
||||
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M
|
||||
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
|
||||
#else
|
||||
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_8K
|
||||
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_64K
|
||||
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_2M
|
||||
#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
|
||||
#endif
|
||||
|
||||
static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log);
|
||||
|
@ -137,6 +137,17 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
|
||||
return ret > 0 ? -EPROTO : ret;
|
||||
}
|
||||
|
||||
static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
|
||||
{
|
||||
u32 request[] = {
|
||||
GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
|
||||
SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
|
||||
id,
|
||||
};
|
||||
|
||||
return intel_guc_send(guc, request, ARRAY_SIZE(request));
|
||||
}
|
||||
|
||||
static bool slpc_is_running(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING;
|
||||
@ -190,6 +201,15 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id)
|
||||
{
|
||||
struct intel_guc *guc = slpc_to_guc(slpc);
|
||||
|
||||
GEM_BUG_ON(id >= SLPC_MAX_PARAM);
|
||||
|
||||
return guc_action_slpc_unset_param(guc, id);
|
||||
}
|
||||
|
||||
static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
|
||||
{
|
||||
struct drm_i915_private *i915 = slpc_to_i915(slpc);
|
||||
@ -263,6 +283,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
|
||||
|
||||
slpc->max_freq_softlimit = 0;
|
||||
slpc->min_freq_softlimit = 0;
|
||||
slpc->min_is_rpmax = false;
|
||||
|
||||
slpc->boost_freq = 0;
|
||||
atomic_set(&slpc->num_waiters, 0);
|
||||
@ -588,6 +609,39 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
struct drm_i915_private *i915 = slpc_to_i915(slpc);
|
||||
int slpc_min_freq;
|
||||
int ret;
|
||||
|
||||
ret = intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq);
|
||||
if (ret) {
|
||||
drm_err(&i915->drm,
|
||||
"Failed to get min freq: (%d)\n",
|
||||
ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (slpc_min_freq == SLPC_MAX_FREQ_MHZ)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static void update_server_min_softlimit(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
/* For server parts, SLPC min will be at RPMax.
|
||||
* Use min softlimit to clamp it to RP0 instead.
|
||||
*/
|
||||
if (!slpc->min_freq_softlimit &&
|
||||
is_slpc_min_freq_rpmax(slpc)) {
|
||||
slpc->min_is_rpmax = true;
|
||||
slpc->min_freq_softlimit = slpc->rp0_freq;
|
||||
(slpc_to_gt(slpc))->defaults.min_freq = slpc->min_freq_softlimit;
|
||||
}
|
||||
}
|
||||
|
||||
static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
/* Force SLPC to used platform rp0 */
|
||||
@ -610,6 +664,52 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
|
||||
slpc->boost_freq = slpc->rp0_freq;
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_guc_slpc_override_gucrc_mode() - override GUCRC mode
|
||||
* @slpc: pointer to intel_guc_slpc.
|
||||
* @mode: new value of the mode.
|
||||
*
|
||||
* This function will override the GUCRC mode.
|
||||
*
|
||||
* Return: 0 on success, non-zero error code on failure.
|
||||
*/
|
||||
int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode)
|
||||
{
|
||||
int ret;
|
||||
struct drm_i915_private *i915 = slpc_to_i915(slpc);
|
||||
intel_wakeref_t wakeref;
|
||||
|
||||
if (mode >= SLPC_GUCRC_MODE_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
|
||||
ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
|
||||
if (ret)
|
||||
drm_err(&i915->drm,
|
||||
"Override gucrc mode %d failed %d\n",
|
||||
mode, ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
struct drm_i915_private *i915 = slpc_to_i915(slpc);
|
||||
intel_wakeref_t wakeref;
|
||||
int ret = 0;
|
||||
|
||||
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
|
||||
ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE);
|
||||
if (ret)
|
||||
drm_err(&i915->drm,
|
||||
"Unsetting gucrc mode failed %d\n",
|
||||
ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* intel_guc_slpc_enable() - Start SLPC
|
||||
* @slpc: pointer to intel_guc_slpc.
|
||||
@ -647,6 +747,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
|
||||
|
||||
slpc_get_rp_values(slpc);
|
||||
|
||||
/* Handle the case where min=max=RPmax */
|
||||
update_server_min_softlimit(slpc);
|
||||
|
||||
/* Set SLPC max limit to RP0 */
|
||||
ret = slpc_use_fused_rp0(slpc);
|
||||
if (unlikely(ret)) {
|
||||
|
@ -9,6 +9,8 @@
|
||||
#include "intel_guc_submission.h"
|
||||
#include "intel_guc_slpc_types.h"
|
||||
|
||||
#define SLPC_MAX_FREQ_MHZ 4250
|
||||
|
||||
struct intel_gt;
|
||||
struct drm_printer;
|
||||
|
||||
@ -42,5 +44,7 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val);
|
||||
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
|
||||
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
|
||||
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
|
||||
int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc);
|
||||
int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode);
|
||||
|
||||
#endif
|
||||
|
@ -19,6 +19,9 @@ struct intel_guc_slpc {
|
||||
bool supported;
|
||||
bool selected;
|
||||
|
||||
/* Indicates this is a server part */
|
||||
bool min_is_rpmax;
|
||||
|
||||
/* platform frequency limits */
|
||||
u32 min_freq;
|
||||
u32 rp0_freq;
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <linux/circ_buf.h>
|
||||
|
||||
#include "gem/i915_gem_context.h"
|
||||
#include "gem/i915_gem_lmem.h"
|
||||
#include "gt/gen8_engine_cs.h"
|
||||
#include "gt/intel_breadcrumbs.h"
|
||||
#include "gt/intel_context.h"
|
||||
@ -65,7 +66,13 @@
|
||||
* corresponding G2H returns indicating the scheduling disable operation has
|
||||
* completed it is safe to unpin the context. While a disable is in flight it
|
||||
* isn't safe to resubmit the context so a fence is used to stall all future
|
||||
* requests of that context until the G2H is returned.
|
||||
* requests of that context until the G2H is returned. Because this interaction
|
||||
* with the GuC takes a non-zero amount of time we delay the disabling of
|
||||
* scheduling after the pin count goes to zero by a configurable period of time
|
||||
* (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of
|
||||
* time to resubmit something on the context before doing this costly operation.
|
||||
* This delay is only done if the context isn't closed and the guc_id usage is
|
||||
* less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD).
|
||||
*
|
||||
* Context deregistration:
|
||||
* Before a context can be destroyed or if we steal its guc_id we must
|
||||
@ -163,7 +170,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
|
||||
#define SCHED_STATE_PENDING_ENABLE BIT(5)
|
||||
#define SCHED_STATE_REGISTERED BIT(6)
|
||||
#define SCHED_STATE_POLICY_REQUIRED BIT(7)
|
||||
#define SCHED_STATE_BLOCKED_SHIFT 8
|
||||
#define SCHED_STATE_CLOSED BIT(8)
|
||||
#define SCHED_STATE_BLOCKED_SHIFT 9
|
||||
#define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
|
||||
#define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
|
||||
|
||||
@ -173,12 +181,20 @@ static inline void init_sched_state(struct intel_context *ce)
|
||||
ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Kernel contexts can have SCHED_STATE_REGISTERED after suspend.
|
||||
* A context close can race with the submission path, so SCHED_STATE_CLOSED
|
||||
* can be set immediately before we try to register.
|
||||
*/
|
||||
#define SCHED_STATE_VALID_INIT \
|
||||
(SCHED_STATE_BLOCKED_MASK | \
|
||||
SCHED_STATE_CLOSED | \
|
||||
SCHED_STATE_REGISTERED)
|
||||
|
||||
__maybe_unused
|
||||
static bool sched_state_is_init(struct intel_context *ce)
|
||||
{
|
||||
/* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
|
||||
return !(ce->guc_state.sched_state &
|
||||
~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
|
||||
return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
@ -319,6 +335,17 @@ static inline void clr_context_policy_required(struct intel_context *ce)
|
||||
ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
|
||||
}
|
||||
|
||||
static inline bool context_close_done(struct intel_context *ce)
|
||||
{
|
||||
return ce->guc_state.sched_state & SCHED_STATE_CLOSED;
|
||||
}
|
||||
|
||||
static inline void set_context_close_done(struct intel_context *ce)
|
||||
{
|
||||
lockdep_assert_held(&ce->guc_state.lock);
|
||||
ce->guc_state.sched_state |= SCHED_STATE_CLOSED;
|
||||
}
|
||||
|
||||
static inline u32 context_blocked(struct intel_context *ce)
|
||||
{
|
||||
return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
|
||||
@ -343,25 +370,6 @@ static inline void decr_context_blocked(struct intel_context *ce)
|
||||
ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
|
||||
}
|
||||
|
||||
static inline bool context_has_committed_requests(struct intel_context *ce)
|
||||
{
|
||||
return !!ce->guc_state.number_committed_requests;
|
||||
}
|
||||
|
||||
static inline void incr_context_committed_requests(struct intel_context *ce)
|
||||
{
|
||||
lockdep_assert_held(&ce->guc_state.lock);
|
||||
++ce->guc_state.number_committed_requests;
|
||||
GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
|
||||
}
|
||||
|
||||
static inline void decr_context_committed_requests(struct intel_context *ce)
|
||||
{
|
||||
lockdep_assert_held(&ce->guc_state.lock);
|
||||
--ce->guc_state.number_committed_requests;
|
||||
GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
|
||||
}
|
||||
|
||||
static struct intel_context *
|
||||
request_to_scheduling_context(struct i915_request *rq)
|
||||
{
|
||||
@ -1067,6 +1075,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
|
||||
|
||||
xa_unlock(&guc->context_lookup);
|
||||
|
||||
if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
|
||||
(cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) {
|
||||
/* successful cancel so jump straight to close it */
|
||||
intel_context_sched_disable_unpin(ce);
|
||||
}
|
||||
|
||||
spin_lock(&ce->guc_state.lock);
|
||||
|
||||
/*
|
||||
@ -1994,6 +2008,9 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
if (!intel_context_is_parent(ce))
|
||||
++guc->submission_state.guc_ids_in_use;
|
||||
|
||||
ce->guc_id.id = ret;
|
||||
return 0;
|
||||
}
|
||||
@ -2003,14 +2020,16 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
|
||||
GEM_BUG_ON(intel_context_is_child(ce));
|
||||
|
||||
if (!context_guc_id_invalid(ce)) {
|
||||
if (intel_context_is_parent(ce))
|
||||
if (intel_context_is_parent(ce)) {
|
||||
bitmap_release_region(guc->submission_state.guc_ids_bitmap,
|
||||
ce->guc_id.id,
|
||||
order_base_2(ce->parallel.number_children
|
||||
+ 1));
|
||||
else
|
||||
} else {
|
||||
--guc->submission_state.guc_ids_in_use;
|
||||
ida_simple_remove(&guc->submission_state.guc_ids,
|
||||
ce->guc_id.id);
|
||||
}
|
||||
clr_ctx_id_mapping(guc, ce->guc_id.id);
|
||||
set_context_guc_id_invalid(ce);
|
||||
}
|
||||
@ -2429,6 +2448,10 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
|
||||
int ret;
|
||||
|
||||
/* NB: For both of these, zero means disabled. */
|
||||
GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
|
||||
execution_quantum));
|
||||
GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
|
||||
preemption_timeout));
|
||||
execution_quantum = engine->props.timeslice_duration_ms * 1000;
|
||||
preemption_timeout = engine->props.preempt_timeout_ms * 1000;
|
||||
|
||||
@ -2462,6 +2485,10 @@ static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
|
||||
desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
|
||||
|
||||
/* NB: For both of these, zero means disabled. */
|
||||
GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
|
||||
desc->execution_quantum));
|
||||
GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
|
||||
desc->preemption_timeout));
|
||||
desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
|
||||
desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
|
||||
}
|
||||
@ -2998,41 +3025,104 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
|
||||
}
|
||||
}
|
||||
|
||||
static void guc_context_sched_disable(struct intel_context *ce)
|
||||
static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce,
|
||||
unsigned long flags)
|
||||
__releases(ce->guc_state.lock)
|
||||
{
|
||||
struct intel_guc *guc = ce_to_guc(ce);
|
||||
unsigned long flags;
|
||||
struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
|
||||
intel_wakeref_t wakeref;
|
||||
u16 guc_id;
|
||||
|
||||
GEM_BUG_ON(intel_context_is_child(ce));
|
||||
|
||||
spin_lock_irqsave(&ce->guc_state.lock, flags);
|
||||
|
||||
/*
|
||||
* We have to check if the context has been disabled by another thread,
|
||||
* check if submssion has been disabled to seal a race with reset and
|
||||
* finally check if any more requests have been committed to the
|
||||
* context ensursing that a request doesn't slip through the
|
||||
* 'context_pending_disable' fence.
|
||||
*/
|
||||
if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
|
||||
context_has_committed_requests(ce))) {
|
||||
clr_context_enabled(ce);
|
||||
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
|
||||
goto unpin;
|
||||
}
|
||||
lockdep_assert_held(&ce->guc_state.lock);
|
||||
guc_id = prep_context_pending_disable(ce);
|
||||
|
||||
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
|
||||
|
||||
with_intel_runtime_pm(runtime_pm, wakeref)
|
||||
__guc_context_sched_disable(guc, ce, guc_id);
|
||||
}
|
||||
|
||||
return;
|
||||
unpin:
|
||||
intel_context_sched_disable_unpin(ce);
|
||||
static bool bypass_sched_disable(struct intel_guc *guc,
|
||||
struct intel_context *ce)
|
||||
{
|
||||
lockdep_assert_held(&ce->guc_state.lock);
|
||||
GEM_BUG_ON(intel_context_is_child(ce));
|
||||
|
||||
if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
|
||||
!ctx_id_mapped(guc, ce->guc_id.id)) {
|
||||
clr_context_enabled(ce);
|
||||
return true;
|
||||
}
|
||||
|
||||
return !context_enabled(ce);
|
||||
}
|
||||
|
||||
static void __delay_sched_disable(struct work_struct *wrk)
|
||||
{
|
||||
struct intel_context *ce =
|
||||
container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work);
|
||||
struct intel_guc *guc = ce_to_guc(ce);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&ce->guc_state.lock, flags);
|
||||
|
||||
if (bypass_sched_disable(guc, ce)) {
|
||||
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
|
||||
intel_context_sched_disable_unpin(ce);
|
||||
} else {
|
||||
do_sched_disable(guc, ce, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce)
|
||||
{
|
||||
/*
|
||||
* parent contexts are perma-pinned, if we are unpinning do schedule
|
||||
* disable immediately.
|
||||
*/
|
||||
if (intel_context_is_parent(ce))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If we are beyond the threshold for avail guc_ids, do schedule disable immediately.
|
||||
*/
|
||||
return guc->submission_state.guc_ids_in_use >
|
||||
guc->submission_state.sched_disable_gucid_threshold;
|
||||
}
|
||||
|
||||
static void guc_context_sched_disable(struct intel_context *ce)
|
||||
{
|
||||
struct intel_guc *guc = ce_to_guc(ce);
|
||||
u64 delay = guc->submission_state.sched_disable_delay_ms;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&ce->guc_state.lock, flags);
|
||||
|
||||
if (bypass_sched_disable(guc, ce)) {
|
||||
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
|
||||
intel_context_sched_disable_unpin(ce);
|
||||
} else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) &&
|
||||
delay) {
|
||||
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
|
||||
mod_delayed_work(system_unbound_wq,
|
||||
&ce->guc_state.sched_disable_delay_work,
|
||||
msecs_to_jiffies(delay));
|
||||
} else {
|
||||
do_sched_disable(guc, ce, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void guc_context_close(struct intel_context *ce)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
|
||||
cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))
|
||||
__delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work);
|
||||
|
||||
spin_lock_irqsave(&ce->guc_state.lock, flags);
|
||||
set_context_close_done(ce);
|
||||
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
|
||||
}
|
||||
|
||||
static inline void guc_lrc_desc_unpin(struct intel_context *ce)
|
||||
@ -3071,7 +3161,6 @@ static void __guc_context_destroy(struct intel_context *ce)
|
||||
ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
|
||||
ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
|
||||
ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
|
||||
GEM_BUG_ON(ce->guc_state.number_committed_requests);
|
||||
|
||||
lrc_fini(ce);
|
||||
intel_context_fini(ce);
|
||||
@ -3340,8 +3429,6 @@ static void remove_from_context(struct i915_request *rq)
|
||||
|
||||
guc_prio_fini(rq, ce);
|
||||
|
||||
decr_context_committed_requests(ce);
|
||||
|
||||
spin_unlock_irq(&ce->guc_state.lock);
|
||||
|
||||
atomic_dec(&ce->guc_id.ref);
|
||||
@ -3351,6 +3438,8 @@ static void remove_from_context(struct i915_request *rq)
|
||||
static const struct intel_context_ops guc_context_ops = {
|
||||
.alloc = guc_context_alloc,
|
||||
|
||||
.close = guc_context_close,
|
||||
|
||||
.pre_pin = guc_context_pre_pin,
|
||||
.pin = guc_context_pin,
|
||||
.unpin = guc_context_unpin,
|
||||
@ -3433,6 +3522,10 @@ static void guc_context_init(struct intel_context *ce)
|
||||
rcu_read_unlock();
|
||||
|
||||
ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
|
||||
|
||||
INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work,
|
||||
__delay_sched_disable);
|
||||
|
||||
set_bit(CONTEXT_GUC_INIT, &ce->flags);
|
||||
}
|
||||
|
||||
@ -3470,6 +3563,26 @@ static int guc_request_alloc(struct i915_request *rq)
|
||||
if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
|
||||
guc_context_init(ce);
|
||||
|
||||
/*
|
||||
* If the context gets closed while the execbuf is ongoing, the context
|
||||
* close code will race with the below code to cancel the delayed work.
|
||||
* If the context close wins the race and cancels the work, it will
|
||||
* immediately call the sched disable (see guc_context_close), so there
|
||||
* is a chance we can get past this check while the sched_disable code
|
||||
* is being executed. To make sure that code completes before we check
|
||||
* the status further down, we wait for the close process to complete.
|
||||
* Else, this code path could send a request down thinking that the
|
||||
* context is still in a schedule-enable mode while the GuC ends up
|
||||
* dropping the request completely because the disable did go from the
|
||||
* context_close path right to GuC just prior. In the event the CT is
|
||||
* full, we could potentially need to wait up to 1.5 seconds.
|
||||
*/
|
||||
if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work))
|
||||
intel_context_sched_disable_unpin(ce);
|
||||
else if (intel_context_is_closed(ce))
|
||||
if (wait_for(context_close_done(ce), 1500))
|
||||
drm_warn(&guc_to_gt(guc)->i915->drm,
|
||||
"timed out waiting on context sched close before realloc\n");
|
||||
/*
|
||||
* Call pin_guc_id here rather than in the pinning step as with
|
||||
* dma_resv, contexts can be repeatedly pinned / unpinned trashing the
|
||||
@ -3524,7 +3637,6 @@ out:
|
||||
|
||||
list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
|
||||
}
|
||||
incr_context_committed_requests(ce);
|
||||
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
|
||||
|
||||
return 0;
|
||||
@ -3600,6 +3712,8 @@ static int guc_virtual_context_alloc(struct intel_context *ce)
|
||||
static const struct intel_context_ops virtual_guc_context_ops = {
|
||||
.alloc = guc_virtual_context_alloc,
|
||||
|
||||
.close = guc_context_close,
|
||||
|
||||
.pre_pin = guc_virtual_context_pre_pin,
|
||||
.pin = guc_virtual_context_pin,
|
||||
.unpin = guc_virtual_context_unpin,
|
||||
@ -3689,6 +3803,8 @@ static void guc_child_context_destroy(struct kref *kref)
|
||||
static const struct intel_context_ops virtual_parent_context_ops = {
|
||||
.alloc = guc_virtual_context_alloc,
|
||||
|
||||
.close = guc_context_close,
|
||||
|
||||
.pre_pin = guc_context_pre_pin,
|
||||
.pin = guc_parent_context_pin,
|
||||
.unpin = guc_parent_context_unpin,
|
||||
@ -4093,7 +4209,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
|
||||
|
||||
engine->emit_bb_start = gen8_emit_bb_start;
|
||||
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
|
||||
engine->emit_bb_start = gen125_emit_bb_start;
|
||||
engine->emit_bb_start = xehp_emit_bb_start;
|
||||
}
|
||||
|
||||
static void rcs_submission_override(struct intel_engine_cs *engine)
|
||||
@ -4177,6 +4293,98 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct scheduling_policy {
|
||||
/* internal data */
|
||||
u32 max_words, num_words;
|
||||
u32 count;
|
||||
/* API data */
|
||||
struct guc_update_scheduling_policy h2g;
|
||||
};
|
||||
|
||||
static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy)
|
||||
{
|
||||
u32 *start = (void *)&policy->h2g;
|
||||
u32 *end = policy->h2g.data + policy->num_words;
|
||||
size_t delta = end - start;
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy)
|
||||
{
|
||||
policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
|
||||
policy->max_words = ARRAY_SIZE(policy->h2g.data);
|
||||
policy->num_words = 0;
|
||||
policy->count = 0;
|
||||
|
||||
return policy;
|
||||
}
|
||||
|
||||
static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy,
|
||||
u32 action, u32 *data, u32 len)
|
||||
{
|
||||
u32 *klv_ptr = policy->h2g.data + policy->num_words;
|
||||
|
||||
GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words);
|
||||
*(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) |
|
||||
FIELD_PREP(GUC_KLV_0_LEN, len);
|
||||
memcpy(klv_ptr, data, sizeof(u32) * len);
|
||||
policy->num_words += 1 + len;
|
||||
policy->count++;
|
||||
}
|
||||
|
||||
static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
|
||||
struct scheduling_policy *policy)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = intel_guc_send(guc, (u32 *)&policy->h2g,
|
||||
__guc_scheduling_policy_action_size(policy));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (ret != policy->count) {
|
||||
drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!",
|
||||
ret, policy->count);
|
||||
if (ret > policy->count)
|
||||
return -EPROTO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int guc_init_global_schedule_policy(struct intel_guc *guc)
|
||||
{
|
||||
struct scheduling_policy policy;
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
intel_wakeref_t wakeref;
|
||||
int ret = 0;
|
||||
|
||||
if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0))
|
||||
return 0;
|
||||
|
||||
__guc_scheduling_policy_start_klv(&policy);
|
||||
|
||||
with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
|
||||
u32 yield[] = {
|
||||
GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION,
|
||||
GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO,
|
||||
};
|
||||
|
||||
__guc_scheduling_policy_add_klv(&policy,
|
||||
GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD,
|
||||
yield, ARRAY_SIZE(yield));
|
||||
|
||||
ret = __guc_action_set_scheduling_policies(guc, &policy);
|
||||
if (ret)
|
||||
i915_probe_error(gt->i915,
|
||||
"Failed to configure global scheduling policies: %pe!\n",
|
||||
ERR_PTR(ret));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void intel_guc_submission_enable(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
@ -4189,6 +4397,7 @@ void intel_guc_submission_enable(struct intel_guc *guc)
|
||||
|
||||
guc_init_lrc_mapping(guc);
|
||||
guc_init_engine_stats(guc);
|
||||
guc_init_global_schedule_policy(guc);
|
||||
}
|
||||
|
||||
void intel_guc_submission_disable(struct intel_guc *guc)
|
||||
@ -4219,6 +4428,26 @@ static bool __guc_submission_selected(struct intel_guc *guc)
|
||||
return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
|
||||
}
|
||||
|
||||
int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc)
|
||||
{
|
||||
return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc);
|
||||
}
|
||||
|
||||
/*
|
||||
* This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher
|
||||
* workloads are able to enjoy the latency reduction when delaying the schedule-disable
|
||||
* operation. This matches the 30fps game-render + encode (real world) workload this
|
||||
* knob was tested against.
|
||||
*/
|
||||
#define SCHED_DISABLE_DELAY_MS 34
|
||||
|
||||
/*
|
||||
* A threshold of 75% is a reasonable starting point considering that real world apps
|
||||
* generally don't get anywhere near this.
|
||||
*/
|
||||
#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \
|
||||
(((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4)
|
||||
|
||||
void intel_guc_submission_init_early(struct intel_guc *guc)
|
||||
{
|
||||
xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
|
||||
@ -4235,7 +4464,10 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
|
||||
spin_lock_init(&guc->timestamp.lock);
|
||||
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
|
||||
|
||||
guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS;
|
||||
guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
|
||||
guc->submission_state.sched_disable_gucid_threshold =
|
||||
NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc);
|
||||
guc->submission_supported = __guc_submission_supported(guc);
|
||||
guc->submission_selected = __guc_submission_selected(guc);
|
||||
}
|
||||
|
@ -10,6 +10,9 @@
|
||||
#include "intel_huc.h"
|
||||
#include "i915_drv.h"
|
||||
|
||||
#include <linux/device/bus.h>
|
||||
#include <linux/mei_aux.h>
|
||||
|
||||
/**
|
||||
* DOC: HuC
|
||||
*
|
||||
@ -42,6 +45,172 @@
|
||||
* HuC-specific commands.
|
||||
*/
|
||||
|
||||
/*
|
||||
* MEI-GSC load is an async process. The probing of the exposed aux device
|
||||
* (see intel_gsc.c) usually happens a few seconds after i915 probe, depending
|
||||
* on when the kernel schedules it. Unless something goes terribly wrong, we're
|
||||
* guaranteed for this to happen during boot, so the big timeout is a safety net
|
||||
* that we never expect to need.
|
||||
* MEI-PXP + HuC load usually takes ~300ms, but if the GSC needs to be resumed
|
||||
* and/or reset, this can take longer. Note that the kernel might schedule
|
||||
* other work between the i915 init/resume and the MEI one, which can add to
|
||||
* the delay.
|
||||
*/
|
||||
#define GSC_INIT_TIMEOUT_MS 10000
|
||||
#define PXP_INIT_TIMEOUT_MS 5000
|
||||
|
||||
static int sw_fence_dummy_notify(struct i915_sw_fence *sf,
|
||||
enum i915_sw_fence_notify state)
|
||||
{
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static void __delayed_huc_load_complete(struct intel_huc *huc)
|
||||
{
|
||||
if (!i915_sw_fence_done(&huc->delayed_load.fence))
|
||||
i915_sw_fence_complete(&huc->delayed_load.fence);
|
||||
}
|
||||
|
||||
static void delayed_huc_load_complete(struct intel_huc *huc)
|
||||
{
|
||||
hrtimer_cancel(&huc->delayed_load.timer);
|
||||
__delayed_huc_load_complete(huc);
|
||||
}
|
||||
|
||||
static void __gsc_init_error(struct intel_huc *huc)
|
||||
{
|
||||
huc->delayed_load.status = INTEL_HUC_DELAYED_LOAD_ERROR;
|
||||
__delayed_huc_load_complete(huc);
|
||||
}
|
||||
|
||||
static void gsc_init_error(struct intel_huc *huc)
|
||||
{
|
||||
hrtimer_cancel(&huc->delayed_load.timer);
|
||||
__gsc_init_error(huc);
|
||||
}
|
||||
|
||||
static void gsc_init_done(struct intel_huc *huc)
|
||||
{
|
||||
hrtimer_cancel(&huc->delayed_load.timer);
|
||||
|
||||
/* MEI-GSC init is done, now we wait for MEI-PXP to bind */
|
||||
huc->delayed_load.status = INTEL_HUC_WAITING_ON_PXP;
|
||||
if (!i915_sw_fence_done(&huc->delayed_load.fence))
|
||||
hrtimer_start(&huc->delayed_load.timer,
|
||||
ms_to_ktime(PXP_INIT_TIMEOUT_MS),
|
||||
HRTIMER_MODE_REL);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart huc_delayed_load_timer_callback(struct hrtimer *hrtimer)
|
||||
{
|
||||
struct intel_huc *huc = container_of(hrtimer, struct intel_huc, delayed_load.timer);
|
||||
|
||||
if (!intel_huc_is_authenticated(huc)) {
|
||||
if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_GSC)
|
||||
drm_notice(&huc_to_gt(huc)->i915->drm,
|
||||
"timed out waiting for MEI GSC init to load HuC\n");
|
||||
else if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_PXP)
|
||||
drm_notice(&huc_to_gt(huc)->i915->drm,
|
||||
"timed out waiting for MEI PXP init to load HuC\n");
|
||||
else
|
||||
MISSING_CASE(huc->delayed_load.status);
|
||||
|
||||
__gsc_init_error(huc);
|
||||
}
|
||||
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
static void huc_delayed_load_start(struct intel_huc *huc)
|
||||
{
|
||||
ktime_t delay;
|
||||
|
||||
GEM_BUG_ON(intel_huc_is_authenticated(huc));
|
||||
|
||||
/*
|
||||
* On resume we don't have to wait for MEI-GSC to be re-probed, but we
|
||||
* do need to wait for MEI-PXP to reset & re-bind
|
||||
*/
|
||||
switch (huc->delayed_load.status) {
|
||||
case INTEL_HUC_WAITING_ON_GSC:
|
||||
delay = ms_to_ktime(GSC_INIT_TIMEOUT_MS);
|
||||
break;
|
||||
case INTEL_HUC_WAITING_ON_PXP:
|
||||
delay = ms_to_ktime(PXP_INIT_TIMEOUT_MS);
|
||||
break;
|
||||
default:
|
||||
gsc_init_error(huc);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* This fence is always complete unless we're waiting for the
|
||||
* GSC device to come up to load the HuC. We arm the fence here
|
||||
* and complete it when we confirm that the HuC is loaded from
|
||||
* the PXP bind callback.
|
||||
*/
|
||||
GEM_BUG_ON(!i915_sw_fence_done(&huc->delayed_load.fence));
|
||||
i915_sw_fence_fini(&huc->delayed_load.fence);
|
||||
i915_sw_fence_reinit(&huc->delayed_load.fence);
|
||||
i915_sw_fence_await(&huc->delayed_load.fence);
|
||||
i915_sw_fence_commit(&huc->delayed_load.fence);
|
||||
|
||||
hrtimer_start(&huc->delayed_load.timer, delay, HRTIMER_MODE_REL);
|
||||
}
|
||||
|
||||
static int gsc_notifier(struct notifier_block *nb, unsigned long action, void *data)
|
||||
{
|
||||
struct device *dev = data;
|
||||
struct intel_huc *huc = container_of(nb, struct intel_huc, delayed_load.nb);
|
||||
struct intel_gsc_intf *intf = &huc_to_gt(huc)->gsc.intf[0];
|
||||
|
||||
if (!intf->adev || &intf->adev->aux_dev.dev != dev)
|
||||
return 0;
|
||||
|
||||
switch (action) {
|
||||
case BUS_NOTIFY_BOUND_DRIVER: /* mei driver bound to aux device */
|
||||
gsc_init_done(huc);
|
||||
break;
|
||||
|
||||
case BUS_NOTIFY_DRIVER_NOT_BOUND: /* mei driver fails to be bound */
|
||||
case BUS_NOTIFY_UNBIND_DRIVER: /* mei driver about to be unbound */
|
||||
drm_info(&huc_to_gt(huc)->i915->drm,
|
||||
"mei driver not bound, disabling HuC load\n");
|
||||
gsc_init_error(huc);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!intel_huc_is_loaded_by_gsc(huc))
|
||||
return;
|
||||
|
||||
huc->delayed_load.nb.notifier_call = gsc_notifier;
|
||||
ret = bus_register_notifier(bus, &huc->delayed_load.nb);
|
||||
if (ret) {
|
||||
drm_err(&huc_to_gt(huc)->i915->drm,
|
||||
"failed to register GSC notifier\n");
|
||||
huc->delayed_load.nb.notifier_call = NULL;
|
||||
gsc_init_error(huc);
|
||||
}
|
||||
}
|
||||
|
||||
void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus)
|
||||
{
|
||||
if (!huc->delayed_load.nb.notifier_call)
|
||||
return;
|
||||
|
||||
delayed_huc_load_complete(huc);
|
||||
|
||||
bus_unregister_notifier(bus, &huc->delayed_load.nb);
|
||||
huc->delayed_load.nb.notifier_call = NULL;
|
||||
}
|
||||
|
||||
void intel_huc_init_early(struct intel_huc *huc)
|
||||
{
|
||||
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
|
||||
@ -57,6 +226,17 @@ void intel_huc_init_early(struct intel_huc *huc)
|
||||
huc->status.mask = HUC_FW_VERIFIED;
|
||||
huc->status.value = HUC_FW_VERIFIED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize fence to be complete as this is expected to be complete
|
||||
* unless there is a delayed HuC reload in progress.
|
||||
*/
|
||||
i915_sw_fence_init(&huc->delayed_load.fence,
|
||||
sw_fence_dummy_notify);
|
||||
i915_sw_fence_commit(&huc->delayed_load.fence);
|
||||
|
||||
hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
huc->delayed_load.timer.function = huc_delayed_load_timer_callback;
|
||||
}
|
||||
|
||||
#define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy")
|
||||
@ -113,6 +293,7 @@ int intel_huc_init(struct intel_huc *huc)
|
||||
return 0;
|
||||
|
||||
out:
|
||||
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
|
||||
drm_info(&i915->drm, "HuC init failed with %d\n", err);
|
||||
return err;
|
||||
}
|
||||
@ -122,9 +303,50 @@ void intel_huc_fini(struct intel_huc *huc)
|
||||
if (!intel_uc_fw_is_loadable(&huc->fw))
|
||||
return;
|
||||
|
||||
delayed_huc_load_complete(huc);
|
||||
|
||||
i915_sw_fence_fini(&huc->delayed_load.fence);
|
||||
intel_uc_fw_fini(&huc->fw);
|
||||
}
|
||||
|
||||
void intel_huc_suspend(struct intel_huc *huc)
|
||||
{
|
||||
if (!intel_uc_fw_is_loadable(&huc->fw))
|
||||
return;
|
||||
|
||||
/*
|
||||
* in the unlikely case that we're suspending before the GSC has
|
||||
* completed its loading sequence, just stop waiting. We'll restart
|
||||
* on resume.
|
||||
*/
|
||||
delayed_huc_load_complete(huc);
|
||||
}
|
||||
|
||||
int intel_huc_wait_for_auth_complete(struct intel_huc *huc)
|
||||
{
|
||||
struct intel_gt *gt = huc_to_gt(huc);
|
||||
int ret;
|
||||
|
||||
ret = __intel_wait_for_register(gt->uncore,
|
||||
huc->status.reg,
|
||||
huc->status.mask,
|
||||
huc->status.value,
|
||||
2, 50, NULL);
|
||||
|
||||
/* mark the load process as complete even if the wait failed */
|
||||
delayed_huc_load_complete(huc);
|
||||
|
||||
if (ret) {
|
||||
drm_err(>->i915->drm, "HuC: Firmware not verified %d\n", ret);
|
||||
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
|
||||
drm_info(>->i915->drm, "HuC authenticated\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_huc_auth() - Authenticate HuC uCode
|
||||
* @huc: intel_huc structure
|
||||
@ -161,27 +383,18 @@ int intel_huc_auth(struct intel_huc *huc)
|
||||
}
|
||||
|
||||
/* Check authentication status, it should be done by now */
|
||||
ret = __intel_wait_for_register(gt->uncore,
|
||||
huc->status.reg,
|
||||
huc->status.mask,
|
||||
huc->status.value,
|
||||
2, 50, NULL);
|
||||
if (ret) {
|
||||
DRM_ERROR("HuC: Firmware not verified %d\n", ret);
|
||||
ret = intel_huc_wait_for_auth_complete(huc);
|
||||
if (ret)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
|
||||
drm_info(>->i915->drm, "HuC authenticated\n");
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
|
||||
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool huc_is_authenticated(struct intel_huc *huc)
|
||||
bool intel_huc_is_authenticated(struct intel_huc *huc)
|
||||
{
|
||||
struct intel_gt *gt = huc_to_gt(huc);
|
||||
intel_wakeref_t wakeref;
|
||||
@ -200,13 +413,8 @@ static bool huc_is_authenticated(struct intel_huc *huc)
|
||||
* This function reads status register to verify if HuC
|
||||
* firmware was successfully loaded.
|
||||
*
|
||||
* Returns:
|
||||
* * -ENODEV if HuC is not present on this platform,
|
||||
* * -EOPNOTSUPP if HuC firmware is disabled,
|
||||
* * -ENOPKG if HuC firmware was not installed,
|
||||
* * -ENOEXEC if HuC firmware is invalid or mismatched,
|
||||
* * 0 if HuC firmware is not running,
|
||||
* * 1 if HuC firmware is authenticated and running.
|
||||
* The return values match what is expected for the I915_PARAM_HUC_STATUS
|
||||
* getparam.
|
||||
*/
|
||||
int intel_huc_check_status(struct intel_huc *huc)
|
||||
{
|
||||
@ -219,11 +427,21 @@ int intel_huc_check_status(struct intel_huc *huc)
|
||||
return -ENOPKG;
|
||||
case INTEL_UC_FIRMWARE_ERROR:
|
||||
return -ENOEXEC;
|
||||
case INTEL_UC_FIRMWARE_INIT_FAIL:
|
||||
return -ENOMEM;
|
||||
case INTEL_UC_FIRMWARE_LOAD_FAIL:
|
||||
return -EIO;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return huc_is_authenticated(huc);
|
||||
return intel_huc_is_authenticated(huc);
|
||||
}
|
||||
|
||||
static bool huc_has_delayed_load(struct intel_huc *huc)
|
||||
{
|
||||
return intel_huc_is_loaded_by_gsc(huc) &&
|
||||
(huc->delayed_load.status != INTEL_HUC_DELAYED_LOAD_ERROR);
|
||||
}
|
||||
|
||||
void intel_huc_update_auth_status(struct intel_huc *huc)
|
||||
@ -231,9 +449,11 @@ void intel_huc_update_auth_status(struct intel_huc *huc)
|
||||
if (!intel_uc_fw_is_loadable(&huc->fw))
|
||||
return;
|
||||
|
||||
if (huc_is_authenticated(huc))
|
||||
if (intel_huc_is_authenticated(huc))
|
||||
intel_uc_fw_change_status(&huc->fw,
|
||||
INTEL_UC_FIRMWARE_RUNNING);
|
||||
else if (huc_has_delayed_load(huc))
|
||||
huc_delayed_load_start(huc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -7,9 +7,21 @@
|
||||
#define _INTEL_HUC_H_
|
||||
|
||||
#include "i915_reg_defs.h"
|
||||
#include "i915_sw_fence.h"
|
||||
#include "intel_uc_fw.h"
|
||||
#include "intel_huc_fw.h"
|
||||
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/hrtimer.h>
|
||||
|
||||
struct bus_type;
|
||||
|
||||
enum intel_huc_delayed_load_status {
|
||||
INTEL_HUC_WAITING_ON_GSC = 0,
|
||||
INTEL_HUC_WAITING_ON_PXP,
|
||||
INTEL_HUC_DELAYED_LOAD_ERROR,
|
||||
};
|
||||
|
||||
struct intel_huc {
|
||||
/* Generic uC firmware management */
|
||||
struct intel_uc_fw fw;
|
||||
@ -20,14 +32,27 @@ struct intel_huc {
|
||||
u32 mask;
|
||||
u32 value;
|
||||
} status;
|
||||
|
||||
struct {
|
||||
struct i915_sw_fence fence;
|
||||
struct hrtimer timer;
|
||||
struct notifier_block nb;
|
||||
enum intel_huc_delayed_load_status status;
|
||||
} delayed_load;
|
||||
};
|
||||
|
||||
void intel_huc_init_early(struct intel_huc *huc);
|
||||
int intel_huc_init(struct intel_huc *huc);
|
||||
void intel_huc_fini(struct intel_huc *huc);
|
||||
void intel_huc_suspend(struct intel_huc *huc);
|
||||
int intel_huc_auth(struct intel_huc *huc);
|
||||
int intel_huc_wait_for_auth_complete(struct intel_huc *huc);
|
||||
int intel_huc_check_status(struct intel_huc *huc);
|
||||
void intel_huc_update_auth_status(struct intel_huc *huc);
|
||||
bool intel_huc_is_authenticated(struct intel_huc *huc);
|
||||
|
||||
void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
|
||||
void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
|
||||
|
||||
static inline int intel_huc_sanitize(struct intel_huc *huc)
|
||||
{
|
||||
@ -56,6 +81,12 @@ static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc)
|
||||
return huc->fw.loaded_via_gsc;
|
||||
}
|
||||
|
||||
static inline bool intel_huc_wait_required(struct intel_huc *huc)
|
||||
{
|
||||
return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) &&
|
||||
!intel_huc_is_authenticated(huc);
|
||||
}
|
||||
|
||||
void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
|
||||
|
||||
#endif
|
||||
|
@ -3,9 +3,43 @@
|
||||
* Copyright © 2014-2019 Intel Corporation
|
||||
*/
|
||||
|
||||
#include "gt/intel_gsc.h"
|
||||
#include "gt/intel_gt.h"
|
||||
#include "intel_huc.h"
|
||||
#include "intel_huc_fw.h"
|
||||
#include "i915_drv.h"
|
||||
#include "pxp/intel_pxp_huc.h"
|
||||
|
||||
int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!intel_huc_is_loaded_by_gsc(huc))
|
||||
return -ENODEV;
|
||||
|
||||
if (!intel_uc_fw_is_loadable(&huc->fw))
|
||||
return -ENOEXEC;
|
||||
|
||||
/*
|
||||
* If we abort a suspend, HuC might still be loaded when the mei
|
||||
* component gets re-bound and this function called again. If so, just
|
||||
* mark the HuC as loaded.
|
||||
*/
|
||||
if (intel_huc_is_authenticated(huc)) {
|
||||
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
|
||||
return 0;
|
||||
}
|
||||
|
||||
GEM_WARN_ON(intel_uc_fw_is_loaded(&huc->fw));
|
||||
|
||||
ret = intel_pxp_huc_load_and_auth(&huc_to_gt(huc)->pxp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED);
|
||||
|
||||
return intel_huc_wait_for_auth_complete(huc);
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_huc_fw_upload() - load HuC uCode to device via DMA transfer
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
struct intel_huc;
|
||||
|
||||
int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc);
|
||||
int intel_huc_fw_upload(struct intel_huc *huc);
|
||||
|
||||
#endif
|
||||
|
@ -93,7 +93,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
|
||||
fw_def(BROXTON, 0, guc_mmp(bxt, 70, 1, 1)) \
|
||||
fw_def(SKYLAKE, 0, guc_mmp(skl, 70, 1, 1))
|
||||
|
||||
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp) \
|
||||
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp, huc_gsc) \
|
||||
fw_def(DG2, 0, huc_gsc(dg2)) \
|
||||
fw_def(ALDERLAKE_P, 0, huc_raw(tgl)) \
|
||||
fw_def(ALDERLAKE_P, 0, huc_mmp(tgl, 7, 9, 3)) \
|
||||
fw_def(ALDERLAKE_S, 0, huc_raw(tgl)) \
|
||||
@ -141,6 +142,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
|
||||
#define MAKE_HUC_FW_PATH_BLANK(prefix_) \
|
||||
__MAKE_UC_FW_PATH_BLANK(prefix_, "_huc")
|
||||
|
||||
#define MAKE_HUC_FW_PATH_GSC(prefix_) \
|
||||
__MAKE_UC_FW_PATH_BLANK(prefix_, "_huc_gsc")
|
||||
|
||||
#define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
|
||||
__MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_)
|
||||
|
||||
@ -153,7 +157,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
|
||||
MODULE_FIRMWARE(uc_);
|
||||
|
||||
INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP)
|
||||
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP)
|
||||
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC)
|
||||
|
||||
/*
|
||||
* The next expansion of the table macros (in __uc_fw_auto_select below) provides
|
||||
@ -168,6 +172,7 @@ struct __packed uc_fw_blob {
|
||||
u8 major;
|
||||
u8 minor;
|
||||
u8 patch;
|
||||
bool loaded_via_gsc;
|
||||
};
|
||||
|
||||
#define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
|
||||
@ -176,16 +181,16 @@ struct __packed uc_fw_blob {
|
||||
.patch = patch_, \
|
||||
.path = path_,
|
||||
|
||||
#define UC_FW_BLOB_NEW(major_, minor_, patch_, path_) \
|
||||
#define UC_FW_BLOB_NEW(major_, minor_, patch_, gsc_, path_) \
|
||||
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
|
||||
.legacy = false }
|
||||
.legacy = false, .loaded_via_gsc = gsc_ }
|
||||
|
||||
#define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \
|
||||
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
|
||||
.legacy = true }
|
||||
|
||||
#define GUC_FW_BLOB(prefix_, major_, minor_) \
|
||||
UC_FW_BLOB_NEW(major_, minor_, 0, \
|
||||
UC_FW_BLOB_NEW(major_, minor_, 0, false, \
|
||||
MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_))
|
||||
|
||||
#define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
|
||||
@ -193,12 +198,15 @@ struct __packed uc_fw_blob {
|
||||
MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
|
||||
|
||||
#define HUC_FW_BLOB(prefix_) \
|
||||
UC_FW_BLOB_NEW(0, 0, 0, MAKE_HUC_FW_PATH_BLANK(prefix_))
|
||||
UC_FW_BLOB_NEW(0, 0, 0, false, MAKE_HUC_FW_PATH_BLANK(prefix_))
|
||||
|
||||
#define HUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
|
||||
UC_FW_BLOB_OLD(major_, minor_, patch_, \
|
||||
MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
|
||||
|
||||
#define HUC_FW_BLOB_GSC(prefix_) \
|
||||
UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_))
|
||||
|
||||
struct __packed uc_fw_platform_requirement {
|
||||
enum intel_platform p;
|
||||
u8 rev; /* first platform rev using this FW */
|
||||
@ -224,7 +232,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
|
||||
INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP)
|
||||
};
|
||||
static const struct uc_fw_platform_requirement blobs_huc[] = {
|
||||
INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP)
|
||||
INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
|
||||
};
|
||||
static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
|
||||
[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
|
||||
@ -272,6 +280,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
|
||||
uc_fw->file_wanted.path = blob->path;
|
||||
uc_fw->file_wanted.major_ver = blob->major;
|
||||
uc_fw->file_wanted.minor_ver = blob->minor;
|
||||
uc_fw->loaded_via_gsc = blob->loaded_via_gsc;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
@ -904,7 +913,6 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
|
||||
out_unpin:
|
||||
i915_gem_object_unpin_pages(uc_fw->obj);
|
||||
out:
|
||||
intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -354,9 +354,9 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
|
||||
memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4);
|
||||
|
||||
vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size =
|
||||
pci_resource_len(pdev, GTTMMADR_BAR);
|
||||
pci_resource_len(pdev, GEN4_GTTMMADR_BAR);
|
||||
vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size =
|
||||
pci_resource_len(pdev, GTT_APERTURE_BAR);
|
||||
pci_resource_len(pdev, GEN4_GMADR_BAR);
|
||||
|
||||
memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4);
|
||||
|
||||
|
@ -734,7 +734,7 @@ static i915_reg_t force_nonpriv_white_list[] = {
|
||||
_MMIO(0x770c),
|
||||
_MMIO(0x83a8),
|
||||
_MMIO(0xb110),
|
||||
GEN8_L3SQCREG4,//_MMIO(0xb118)
|
||||
_MMIO(0xb118),
|
||||
_MMIO(0xe100),
|
||||
_MMIO(0xe18c),
|
||||
_MMIO(0xe48c),
|
||||
@ -2257,7 +2257,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
|
||||
MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL);
|
||||
MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL);
|
||||
MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
|
||||
MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
|
||||
MMIO_DFH(HSW_HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
|
||||
MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
|
||||
|
||||
/* display */
|
||||
|
@ -106,15 +106,15 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
|
||||
{RCS0, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */
|
||||
{RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */
|
||||
{RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */
|
||||
{RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */
|
||||
{RCS0, GEN9_SCRATCH1, 0, false}, /* 0xb11c */
|
||||
{RCS0, _MMIO(0xb118), 0, false}, /* GEN8_L3SQCREG4 */
|
||||
{RCS0, _MMIO(0xb11c), 0, false}, /* GEN9_SCRATCH1 */
|
||||
{RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */
|
||||
{RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */
|
||||
{RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */
|
||||
{RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
|
||||
{RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
|
||||
{RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
|
||||
{RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
|
||||
{RCS0, _MMIO(0xe180), 0xffff, true}, /* HALF_SLICE_CHICKEN2 */
|
||||
{RCS0, _MMIO(0xe184), 0xffff, true}, /* GEN8_HALF_SLICE_CHICKEN3 */
|
||||
{RCS0, _MMIO(0xe188), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN5 */
|
||||
{RCS0, _MMIO(0xe194), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN7 */
|
||||
{RCS0, _MMIO(0xe4f0), 0xffff, true}, /* GEN8_ROW_CHICKEN */
|
||||
{RCS0, TRVATTL3PTRDW(0), 0, true}, /* 0x4de0 */
|
||||
{RCS0, TRVATTL3PTRDW(1), 0, true}, /* 0x4de4 */
|
||||
{RCS0, TRNULLDETCT, 0, true}, /* 0x4de8 */
|
||||
|
@ -81,6 +81,7 @@
|
||||
#include "i915_drm_client.h"
|
||||
#include "i915_drv.h"
|
||||
#include "i915_getparam.h"
|
||||
#include "i915_hwmon.h"
|
||||
#include "i915_ioc32.h"
|
||||
#include "i915_ioctl.h"
|
||||
#include "i915_irq.h"
|
||||
@ -764,6 +765,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
|
||||
for_each_gt(gt, dev_priv, i)
|
||||
intel_gt_driver_register(gt);
|
||||
|
||||
i915_hwmon_register(dev_priv);
|
||||
|
||||
intel_display_driver_register(dev_priv);
|
||||
|
||||
intel_power_domains_enable(dev_priv);
|
||||
@ -796,6 +799,8 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
|
||||
for_each_gt(gt, dev_priv, i)
|
||||
intel_gt_driver_unregister(gt);
|
||||
|
||||
i915_hwmon_unregister(dev_priv);
|
||||
|
||||
i915_perf_unregister(dev_priv);
|
||||
i915_pmu_unregister(dev_priv);
|
||||
|
||||
@ -1656,7 +1661,8 @@ static int intel_runtime_suspend(struct device *kdev)
|
||||
|
||||
intel_runtime_pm_enable_interrupts(dev_priv);
|
||||
|
||||
intel_gt_runtime_resume(to_gt(dev_priv));
|
||||
for_each_gt(gt, dev_priv, i)
|
||||
intel_gt_runtime_resume(gt);
|
||||
|
||||
enable_rpm_wakeref_asserts(rpm);
|
||||
|
||||
|
@ -40,7 +40,6 @@
|
||||
#include "display/intel_display_core.h"
|
||||
|
||||
#include "gem/i915_gem_context_types.h"
|
||||
#include "gem/i915_gem_lmem.h"
|
||||
#include "gem/i915_gem_shrinker.h"
|
||||
#include "gem/i915_gem_stolen.h"
|
||||
|
||||
@ -350,6 +349,8 @@ struct drm_i915_private {
|
||||
|
||||
struct i915_perf perf;
|
||||
|
||||
struct i915_hwmon *hwmon;
|
||||
|
||||
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
|
||||
struct intel_gt gt0;
|
||||
|
||||
@ -898,19 +899,17 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
||||
#define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
|
||||
#define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
|
||||
|
||||
#define HAS_OA_BPC_REPORTING(dev_priv) \
|
||||
(INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
|
||||
#define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \
|
||||
(INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits)
|
||||
|
||||
/*
|
||||
* Set this flag, when platform requires 64K GTT page sizes or larger for
|
||||
* device local memory access.
|
||||
*/
|
||||
#define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages)
|
||||
|
||||
/*
|
||||
* Set this flag when platform doesn't allow both 64k pages and 4k pages in
|
||||
* the same PT. this flag means we need to support compact PT layout for the
|
||||
* ppGTT when using the 64K GTT pages.
|
||||
*/
|
||||
#define NEEDS_COMPACT_PT(dev_priv) (INTEL_INFO(dev_priv)->needs_compact_pt)
|
||||
|
||||
#define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc)
|
||||
|
||||
#define HAS_REGION(i915, i) (RUNTIME_INFO(i915)->memory_regions & (i))
|
||||
@ -976,6 +975,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
||||
|
||||
#define HAS_ONE_EU_PER_FUSE_BIT(i915) (INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
|
||||
|
||||
#define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
|
||||
GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
|
||||
|
||||
/* intel_device_info.c */
|
||||
static inline struct intel_device_info *
|
||||
mkwrite_device_info(struct drm_i915_private *dev_priv)
|
||||
@ -983,16 +985,4 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
|
||||
return (struct intel_device_info *)INTEL_INFO(dev_priv);
|
||||
}
|
||||
|
||||
static inline enum i915_map_type
|
||||
i915_coherent_map_type(struct drm_i915_private *i915,
|
||||
struct drm_i915_gem_object *obj, bool always_coherent)
|
||||
{
|
||||
if (i915_gem_object_is_lmem(obj))
|
||||
return I915_MAP_WC;
|
||||
if (HAS_LLC(i915) || always_coherent)
|
||||
return I915_MAP_WB;
|
||||
else
|
||||
return I915_MAP_WC;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -843,7 +843,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
|
||||
__i915_gem_object_release_mmap_gtt(obj);
|
||||
|
||||
list_for_each_entry_safe(obj, on,
|
||||
&to_gt(i915)->lmem_userfault_list, userfault_link)
|
||||
&i915->runtime_pm.lmem_userfault_list, userfault_link)
|
||||
i915_gem_object_runtime_pm_release_mmap_offset(obj);
|
||||
|
||||
/*
|
||||
@ -1128,6 +1128,8 @@ void i915_gem_drain_workqueue(struct drm_i915_private *i915)
|
||||
|
||||
int i915_gem_init(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
struct intel_gt *gt;
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
/* We need to fallback to 4K pages if host doesn't support huge gtt. */
|
||||
@ -1158,9 +1160,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
|
||||
*/
|
||||
intel_init_clock_gating(dev_priv);
|
||||
|
||||
ret = intel_gt_init(to_gt(dev_priv));
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
for_each_gt(gt, dev_priv, i) {
|
||||
ret = intel_gt_init(gt);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@ -1173,8 +1177,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
|
||||
err_unlock:
|
||||
i915_gem_drain_workqueue(dev_priv);
|
||||
|
||||
if (ret != -EIO)
|
||||
intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
|
||||
if (ret != -EIO) {
|
||||
for_each_gt(gt, dev_priv, i) {
|
||||
intel_gt_driver_remove(gt);
|
||||
intel_gt_driver_release(gt);
|
||||
intel_uc_cleanup_firmwares(>->uc);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == -EIO) {
|
||||
/*
|
||||
@ -1182,10 +1191,12 @@ err_unlock:
|
||||
* as wedged. But we only want to do this when the GPU is angry,
|
||||
* for all other failure, such as an allocation failure, bail.
|
||||
*/
|
||||
if (!intel_gt_is_wedged(to_gt(dev_priv))) {
|
||||
i915_probe_error(dev_priv,
|
||||
"Failed to initialize GPU, declaring it wedged!\n");
|
||||
intel_gt_set_wedged(to_gt(dev_priv));
|
||||
for_each_gt(gt, dev_priv, i) {
|
||||
if (!intel_gt_is_wedged(gt)) {
|
||||
i915_probe_error(dev_priv,
|
||||
"Failed to initialize GPU, declaring it wedged!\n");
|
||||
intel_gt_set_wedged(gt);
|
||||
}
|
||||
}
|
||||
|
||||
/* Minimal basic recovery for KMS */
|
||||
@ -1213,23 +1224,27 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
|
||||
|
||||
void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
intel_wakeref_auto_fini(&to_gt(dev_priv)->userfault_wakeref);
|
||||
struct intel_gt *gt;
|
||||
unsigned int i;
|
||||
|
||||
i915_gem_suspend_late(dev_priv);
|
||||
intel_gt_driver_remove(to_gt(dev_priv));
|
||||
for_each_gt(gt, dev_priv, i)
|
||||
intel_gt_driver_remove(gt);
|
||||
dev_priv->uabi_engines = RB_ROOT;
|
||||
|
||||
/* Flush any outstanding unpin_work. */
|
||||
i915_gem_drain_workqueue(dev_priv);
|
||||
|
||||
i915_gem_drain_freed_objects(dev_priv);
|
||||
}
|
||||
|
||||
void i915_gem_driver_release(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
intel_gt_driver_release(to_gt(dev_priv));
|
||||
struct intel_gt *gt;
|
||||
unsigned int i;
|
||||
|
||||
intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
|
||||
for_each_gt(gt, dev_priv, i) {
|
||||
intel_gt_driver_release(gt);
|
||||
intel_uc_cleanup_firmwares(>->uc);
|
||||
}
|
||||
|
||||
/* Flush any outstanding work, including i915_gem_context.release_work. */
|
||||
i915_gem_drain_workqueue(dev_priv);
|
||||
@ -1259,7 +1274,7 @@ void i915_gem_init_early(struct drm_i915_private *dev_priv)
|
||||
|
||||
void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
i915_gem_drain_freed_objects(dev_priv);
|
||||
i915_gem_drain_workqueue(dev_priv);
|
||||
GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
|
||||
GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
|
||||
drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count);
|
||||
|
@ -175,6 +175,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
|
||||
case I915_PARAM_PERF_REVISION:
|
||||
value = i915_perf_ioctl_version();
|
||||
break;
|
||||
case I915_PARAM_OA_TIMESTAMP_FREQUENCY:
|
||||
value = i915_perf_oa_timestamp_frequency(i915);
|
||||
break;
|
||||
default:
|
||||
DRM_DEBUG("Unknown parameter %d\n", param->param);
|
||||
return -EINVAL;
|
||||
|
@ -1221,7 +1221,10 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
|
||||
if (GRAPHICS_VER(i915) >= 6) {
|
||||
ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);
|
||||
|
||||
if (GRAPHICS_VER(i915) >= 12)
|
||||
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
|
||||
ee->fault_reg = intel_gt_mcr_read_any(engine->gt,
|
||||
XEHP_RING_FAULT_REG);
|
||||
else if (GRAPHICS_VER(i915) >= 12)
|
||||
ee->fault_reg = intel_uncore_read(engine->uncore,
|
||||
GEN12_RING_FAULT_REG);
|
||||
else if (GRAPHICS_VER(i915) >= 8)
|
||||
@ -1820,7 +1823,12 @@ static void gt_record_global_regs(struct intel_gt_coredump *gt)
|
||||
if (GRAPHICS_VER(i915) == 7)
|
||||
gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
|
||||
|
||||
if (GRAPHICS_VER(i915) >= 12) {
|
||||
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
|
||||
gt->fault_data0 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt,
|
||||
XEHP_FAULT_TLB_DATA0);
|
||||
gt->fault_data1 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt,
|
||||
XEHP_FAULT_TLB_DATA1);
|
||||
} else if (GRAPHICS_VER(i915) >= 12) {
|
||||
gt->fault_data0 = intel_uncore_read(uncore,
|
||||
GEN12_FAULT_TLB_DATA0);
|
||||
gt->fault_data1 = intel_uncore_read(uncore,
|
||||
|
732
drivers/gpu/drm/i915/i915_hwmon.c
Normal file
732
drivers/gpu/drm/i915/i915_hwmon.c
Normal file
@ -0,0 +1,732 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright © 2022 Intel Corporation
|
||||
*/
|
||||
|
||||
#include <linux/hwmon.h>
|
||||
#include <linux/hwmon-sysfs.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "i915_hwmon.h"
|
||||
#include "i915_reg.h"
|
||||
#include "intel_mchbar_regs.h"
|
||||
#include "intel_pcode.h"
|
||||
#include "gt/intel_gt.h"
|
||||
#include "gt/intel_gt_regs.h"
|
||||
|
||||
/*
|
||||
* SF_* - scale factors for particular quantities according to hwmon spec.
|
||||
* - voltage - millivolts
|
||||
* - power - microwatts
|
||||
* - curr - milliamperes
|
||||
* - energy - microjoules
|
||||
* - time - milliseconds
|
||||
*/
|
||||
#define SF_VOLTAGE 1000
|
||||
#define SF_POWER 1000000
|
||||
#define SF_CURR 1000
|
||||
#define SF_ENERGY 1000000
|
||||
#define SF_TIME 1000
|
||||
|
||||
struct hwm_reg {
|
||||
i915_reg_t gt_perf_status;
|
||||
i915_reg_t pkg_power_sku_unit;
|
||||
i915_reg_t pkg_power_sku;
|
||||
i915_reg_t pkg_rapl_limit;
|
||||
i915_reg_t energy_status_all;
|
||||
i915_reg_t energy_status_tile;
|
||||
};
|
||||
|
||||
struct hwm_energy_info {
|
||||
u32 reg_val_prev;
|
||||
long accum_energy; /* Accumulated energy for energy1_input */
|
||||
};
|
||||
|
||||
struct hwm_drvdata {
|
||||
struct i915_hwmon *hwmon;
|
||||
struct intel_uncore *uncore;
|
||||
struct device *hwmon_dev;
|
||||
struct hwm_energy_info ei; /* Energy info for energy1_input */
|
||||
char name[12];
|
||||
int gt_n;
|
||||
};
|
||||
|
||||
struct i915_hwmon {
|
||||
struct hwm_drvdata ddat;
|
||||
struct hwm_drvdata ddat_gt[I915_MAX_GT];
|
||||
struct mutex hwmon_lock; /* counter overflow logic and rmw */
|
||||
struct hwm_reg rg;
|
||||
int scl_shift_power;
|
||||
int scl_shift_energy;
|
||||
int scl_shift_time;
|
||||
};
|
||||
|
||||
static void
|
||||
hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
|
||||
i915_reg_t reg, u32 clear, u32 set)
|
||||
{
|
||||
struct i915_hwmon *hwmon = ddat->hwmon;
|
||||
struct intel_uncore *uncore = ddat->uncore;
|
||||
intel_wakeref_t wakeref;
|
||||
|
||||
mutex_lock(&hwmon->hwmon_lock);
|
||||
|
||||
with_intel_runtime_pm(uncore->rpm, wakeref)
|
||||
intel_uncore_rmw(uncore, reg, clear, set);
|
||||
|
||||
mutex_unlock(&hwmon->hwmon_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function's return type of u64 allows for the case where the scaling
|
||||
* of the field taken from the 32-bit register value might cause a result to
|
||||
* exceed 32 bits.
|
||||
*/
|
||||
static u64
|
||||
hwm_field_read_and_scale(struct hwm_drvdata *ddat, i915_reg_t rgadr,
|
||||
u32 field_msk, int nshift, u32 scale_factor)
|
||||
{
|
||||
struct intel_uncore *uncore = ddat->uncore;
|
||||
intel_wakeref_t wakeref;
|
||||
u32 reg_value;
|
||||
|
||||
with_intel_runtime_pm(uncore->rpm, wakeref)
|
||||
reg_value = intel_uncore_read(uncore, rgadr);
|
||||
|
||||
reg_value = REG_FIELD_GET(field_msk, reg_value);
|
||||
|
||||
return mul_u64_u32_shr(reg_value, scale_factor, nshift);
|
||||
}
|
||||
|
||||
static void
|
||||
hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr,
|
||||
int nshift, unsigned int scale_factor, long lval)
|
||||
{
|
||||
u32 nval;
|
||||
|
||||
/* Computation in 64-bits to avoid overflow. Round to nearest. */
|
||||
nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
|
||||
|
||||
hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr,
|
||||
PKG_PWR_LIM_1,
|
||||
REG_FIELD_PREP(PKG_PWR_LIM_1, nval));
|
||||
}
|
||||
|
||||
/*
|
||||
* hwm_energy - Obtain energy value
|
||||
*
|
||||
* The underlying energy hardware register is 32-bits and is subject to
|
||||
* overflow. How long before overflow? For example, with an example
|
||||
* scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and
|
||||
* a power draw of 1000 watts, the 32-bit counter will overflow in
|
||||
* approximately 4.36 minutes.
|
||||
*
|
||||
* Examples:
|
||||
* 1 watt: (2^32 >> 14) / 1 W / (60 * 60 * 24) secs/day -> 3 days
|
||||
* 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes
|
||||
*
|
||||
* The function significantly increases overflow duration (from 4.36
|
||||
* minutes) by accumulating the energy register into a 'long' as allowed by
|
||||
* the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()),
|
||||
* a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and
|
||||
* hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before
|
||||
* energy1_input overflows. This at 1000 W is an overflow duration of 278 years.
|
||||
*/
|
||||
static void
|
||||
hwm_energy(struct hwm_drvdata *ddat, long *energy)
|
||||
{
|
||||
struct intel_uncore *uncore = ddat->uncore;
|
||||
struct i915_hwmon *hwmon = ddat->hwmon;
|
||||
struct hwm_energy_info *ei = &ddat->ei;
|
||||
intel_wakeref_t wakeref;
|
||||
i915_reg_t rgaddr;
|
||||
u32 reg_val;
|
||||
|
||||
if (ddat->gt_n >= 0)
|
||||
rgaddr = hwmon->rg.energy_status_tile;
|
||||
else
|
||||
rgaddr = hwmon->rg.energy_status_all;
|
||||
|
||||
mutex_lock(&hwmon->hwmon_lock);
|
||||
|
||||
with_intel_runtime_pm(uncore->rpm, wakeref)
|
||||
reg_val = intel_uncore_read(uncore, rgaddr);
|
||||
|
||||
if (reg_val >= ei->reg_val_prev)
|
||||
ei->accum_energy += reg_val - ei->reg_val_prev;
|
||||
else
|
||||
ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
|
||||
ei->reg_val_prev = reg_val;
|
||||
|
||||
*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
|
||||
hwmon->scl_shift_energy);
|
||||
mutex_unlock(&hwmon->hwmon_lock);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
|
||||
struct i915_hwmon *hwmon = ddat->hwmon;
|
||||
intel_wakeref_t wakeref;
|
||||
u32 r, x, y, x_w = 2; /* 2 bits */
|
||||
u64 tau4, out;
|
||||
|
||||
with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
|
||||
r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit);
|
||||
|
||||
x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
|
||||
y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
|
||||
/*
|
||||
* tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
|
||||
* = (4 | x) << (y - 2)
|
||||
* where (y - 2) ensures a 1.x fixed point representation of 1.x
|
||||
* However because y can be < 2, we compute
|
||||
* tau4 = (4 | x) << y
|
||||
* but add 2 when doing the final right shift to account for units
|
||||
*/
|
||||
tau4 = ((1 << x_w) | x) << y;
|
||||
/* val in hwmon interface units (millisec) */
|
||||
out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
|
||||
|
||||
return sysfs_emit(buf, "%llu\n", out);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
hwm_power1_max_interval_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
|
||||
struct i915_hwmon *hwmon = ddat->hwmon;
|
||||
u32 x, y, rxy, x_w = 2; /* 2 bits */
|
||||
u64 tau4, r, max_win;
|
||||
unsigned long val;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoul(buf, 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12
|
||||
* The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds
|
||||
*/
|
||||
#define PKG_MAX_WIN_DEFAULT 0x12ull
|
||||
|
||||
/*
|
||||
* val must be < max in hwmon interface units. The steps below are
|
||||
* explained in i915_power1_max_interval_show()
|
||||
*/
|
||||
r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
|
||||
x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
|
||||
y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
|
||||
tau4 = ((1 << x_w) | x) << y;
|
||||
max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
|
||||
|
||||
if (val > max_win)
|
||||
return -EINVAL;
|
||||
|
||||
/* val in hw units */
|
||||
val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
|
||||
/* Convert to 1.x * power(2,y) */
|
||||
if (!val)
|
||||
return -EINVAL;
|
||||
y = ilog2(val);
|
||||
/* x = (val - (1 << y)) >> (y - 2); */
|
||||
x = (val - (1ul << y)) << x_w >> y;
|
||||
|
||||
rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
|
||||
|
||||
hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
|
||||
PKG_PWR_LIM_1_TIME, rxy);
|
||||
return count;
|
||||
}
|
||||
|
||||
static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
|
||||
hwm_power1_max_interval_show,
|
||||
hwm_power1_max_interval_store, 0);
|
||||
|
||||
static struct attribute *hwm_attributes[] = {
|
||||
&sensor_dev_attr_power1_max_interval.dev_attr.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static umode_t hwm_attributes_visible(struct kobject *kobj,
|
||||
struct attribute *attr, int index)
|
||||
{
|
||||
struct device *dev = kobj_to_dev(kobj);
|
||||
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
|
||||
struct i915_hwmon *hwmon = ddat->hwmon;
|
||||
|
||||
if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
|
||||
return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? attr->mode : 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct attribute_group hwm_attrgroup = {
|
||||
.attrs = hwm_attributes,
|
||||
.is_visible = hwm_attributes_visible,
|
||||
};
|
||||
|
||||
static const struct attribute_group *hwm_groups[] = {
|
||||
&hwm_attrgroup,
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct hwmon_channel_info *hwm_info[] = {
|
||||
HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
|
||||
HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
|
||||
HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
|
||||
HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct hwmon_channel_info *hwm_gt_info[] = {
|
||||
HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
|
||||
NULL
|
||||
};
|
||||
|
||||
/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
|
||||
static int hwm_pcode_read_i1(struct drm_i915_private *i915, u32 *uval)
|
||||
{
|
||||
return snb_pcode_read_p(&i915->uncore, PCODE_POWER_SETUP,
|
||||
POWER_SETUP_SUBCOMMAND_READ_I1, 0, uval);
|
||||
}
|
||||
|
||||
static int hwm_pcode_write_i1(struct drm_i915_private *i915, u32 uval)
|
||||
{
|
||||
return snb_pcode_write_p(&i915->uncore, PCODE_POWER_SETUP,
|
||||
POWER_SETUP_SUBCOMMAND_WRITE_I1, 0, uval);
|
||||
}
|
||||
|
||||
static umode_t
|
||||
hwm_in_is_visible(const struct hwm_drvdata *ddat, u32 attr)
|
||||
{
|
||||
struct drm_i915_private *i915 = ddat->uncore->i915;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_in_input:
|
||||
return IS_DG1(i915) || IS_DG2(i915) ? 0444 : 0;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hwm_in_read(struct hwm_drvdata *ddat, u32 attr, long *val)
|
||||
{
|
||||
struct i915_hwmon *hwmon = ddat->hwmon;
|
||||
intel_wakeref_t wakeref;
|
||||
u32 reg_value;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_in_input:
|
||||
with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
|
||||
reg_value = intel_uncore_read(ddat->uncore, hwmon->rg.gt_perf_status);
|
||||
/* HW register value in units of 2.5 millivolt */
|
||||
*val = DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, reg_value) * 25, 10);
|
||||
return 0;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static umode_t
|
||||
hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan)
|
||||
{
|
||||
struct drm_i915_private *i915 = ddat->uncore->i915;
|
||||
struct i915_hwmon *hwmon = ddat->hwmon;
|
||||
u32 uval;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_power_max:
|
||||
return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? 0664 : 0;
|
||||
case hwmon_power_rated_max:
|
||||
return i915_mmio_reg_valid(hwmon->rg.pkg_power_sku) ? 0444 : 0;
|
||||
case hwmon_power_crit:
|
||||
return (hwm_pcode_read_i1(i915, &uval) ||
|
||||
!(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val)
|
||||
{
|
||||
struct i915_hwmon *hwmon = ddat->hwmon;
|
||||
int ret;
|
||||
u32 uval;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_power_max:
|
||||
*val = hwm_field_read_and_scale(ddat,
|
||||
hwmon->rg.pkg_rapl_limit,
|
||||
PKG_PWR_LIM_1,
|
||||
hwmon->scl_shift_power,
|
||||
SF_POWER);
|
||||
return 0;
|
||||
case hwmon_power_rated_max:
|
||||
*val = hwm_field_read_and_scale(ddat,
|
||||
hwmon->rg.pkg_power_sku,
|
||||
PKG_PKG_TDP,
|
||||
hwmon->scl_shift_power,
|
||||
SF_POWER);
|
||||
return 0;
|
||||
case hwmon_power_crit:
|
||||
ret = hwm_pcode_read_i1(ddat->uncore->i915, &uval);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!(uval & POWER_SETUP_I1_WATTS))
|
||||
return -ENODEV;
|
||||
*val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
|
||||
SF_POWER, POWER_SETUP_I1_SHIFT);
|
||||
return 0;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val)
|
||||
{
|
||||
struct i915_hwmon *hwmon = ddat->hwmon;
|
||||
u32 uval;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_power_max:
|
||||
hwm_field_scale_and_write(ddat,
|
||||
hwmon->rg.pkg_rapl_limit,
|
||||
hwmon->scl_shift_power,
|
||||
SF_POWER, val);
|
||||
return 0;
|
||||
case hwmon_power_crit:
|
||||
uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER);
|
||||
return hwm_pcode_write_i1(ddat->uncore->i915, uval);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static umode_t
|
||||
hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
|
||||
{
|
||||
struct i915_hwmon *hwmon = ddat->hwmon;
|
||||
i915_reg_t rgaddr;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_energy_input:
|
||||
if (ddat->gt_n >= 0)
|
||||
rgaddr = hwmon->rg.energy_status_tile;
|
||||
else
|
||||
rgaddr = hwmon->rg.energy_status_all;
|
||||
return i915_mmio_reg_valid(rgaddr) ? 0444 : 0;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hwm_energy_read(struct hwm_drvdata *ddat, u32 attr, long *val)
|
||||
{
|
||||
switch (attr) {
|
||||
case hwmon_energy_input:
|
||||
hwm_energy(ddat, val);
|
||||
return 0;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static umode_t
|
||||
hwm_curr_is_visible(const struct hwm_drvdata *ddat, u32 attr)
|
||||
{
|
||||
struct drm_i915_private *i915 = ddat->uncore->i915;
|
||||
u32 uval;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_curr_crit:
|
||||
return (hwm_pcode_read_i1(i915, &uval) ||
|
||||
(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hwm_curr_read(struct hwm_drvdata *ddat, u32 attr, long *val)
|
||||
{
|
||||
int ret;
|
||||
u32 uval;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_curr_crit:
|
||||
ret = hwm_pcode_read_i1(ddat->uncore->i915, &uval);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (uval & POWER_SETUP_I1_WATTS)
|
||||
return -ENODEV;
|
||||
*val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
|
||||
SF_CURR, POWER_SETUP_I1_SHIFT);
|
||||
return 0;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hwm_curr_write(struct hwm_drvdata *ddat, u32 attr, long val)
|
||||
{
|
||||
u32 uval;
|
||||
|
||||
switch (attr) {
|
||||
case hwmon_curr_crit:
|
||||
uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_CURR);
|
||||
return hwm_pcode_write_i1(ddat->uncore->i915, uval);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static umode_t
|
||||
hwm_is_visible(const void *drvdata, enum hwmon_sensor_types type,
|
||||
u32 attr, int channel)
|
||||
{
|
||||
struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata;
|
||||
|
||||
switch (type) {
|
||||
case hwmon_in:
|
||||
return hwm_in_is_visible(ddat, attr);
|
||||
case hwmon_power:
|
||||
return hwm_power_is_visible(ddat, attr, channel);
|
||||
case hwmon_energy:
|
||||
return hwm_energy_is_visible(ddat, attr);
|
||||
case hwmon_curr:
|
||||
return hwm_curr_is_visible(ddat, attr);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hwm_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
|
||||
int channel, long *val)
|
||||
{
|
||||
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
|
||||
|
||||
switch (type) {
|
||||
case hwmon_in:
|
||||
return hwm_in_read(ddat, attr, val);
|
||||
case hwmon_power:
|
||||
return hwm_power_read(ddat, attr, channel, val);
|
||||
case hwmon_energy:
|
||||
return hwm_energy_read(ddat, attr, val);
|
||||
case hwmon_curr:
|
||||
return hwm_curr_read(ddat, attr, val);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hwm_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
|
||||
int channel, long val)
|
||||
{
|
||||
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
|
||||
|
||||
switch (type) {
|
||||
case hwmon_power:
|
||||
return hwm_power_write(ddat, attr, channel, val);
|
||||
case hwmon_curr:
|
||||
return hwm_curr_write(ddat, attr, val);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct hwmon_ops hwm_ops = {
|
||||
.is_visible = hwm_is_visible,
|
||||
.read = hwm_read,
|
||||
.write = hwm_write,
|
||||
};
|
||||
|
||||
static const struct hwmon_chip_info hwm_chip_info = {
|
||||
.ops = &hwm_ops,
|
||||
.info = hwm_info,
|
||||
};
|
||||
|
||||
static umode_t
|
||||
hwm_gt_is_visible(const void *drvdata, enum hwmon_sensor_types type,
|
||||
u32 attr, int channel)
|
||||
{
|
||||
struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata;
|
||||
|
||||
switch (type) {
|
||||
case hwmon_energy:
|
||||
return hwm_energy_is_visible(ddat, attr);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hwm_gt_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
|
||||
int channel, long *val)
|
||||
{
|
||||
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
|
||||
|
||||
switch (type) {
|
||||
case hwmon_energy:
|
||||
return hwm_energy_read(ddat, attr, val);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct hwmon_ops hwm_gt_ops = {
|
||||
.is_visible = hwm_gt_is_visible,
|
||||
.read = hwm_gt_read,
|
||||
};
|
||||
|
||||
static const struct hwmon_chip_info hwm_gt_chip_info = {
|
||||
.ops = &hwm_gt_ops,
|
||||
.info = hwm_gt_info,
|
||||
};
|
||||
|
||||
static void
|
||||
hwm_get_preregistration_info(struct drm_i915_private *i915)
|
||||
{
|
||||
struct i915_hwmon *hwmon = i915->hwmon;
|
||||
struct intel_uncore *uncore = &i915->uncore;
|
||||
struct hwm_drvdata *ddat = &hwmon->ddat;
|
||||
intel_wakeref_t wakeref;
|
||||
u32 val_sku_unit = 0;
|
||||
struct intel_gt *gt;
|
||||
long energy;
|
||||
int i;
|
||||
|
||||
/* Available for all Gen12+/dGfx */
|
||||
hwmon->rg.gt_perf_status = GEN12_RPSTAT1;
|
||||
|
||||
if (IS_DG1(i915) || IS_DG2(i915)) {
|
||||
hwmon->rg.pkg_power_sku_unit = PCU_PACKAGE_POWER_SKU_UNIT;
|
||||
hwmon->rg.pkg_power_sku = PCU_PACKAGE_POWER_SKU;
|
||||
hwmon->rg.pkg_rapl_limit = PCU_PACKAGE_RAPL_LIMIT;
|
||||
hwmon->rg.energy_status_all = PCU_PACKAGE_ENERGY_STATUS;
|
||||
hwmon->rg.energy_status_tile = INVALID_MMIO_REG;
|
||||
} else if (IS_XEHPSDV(i915)) {
|
||||
hwmon->rg.pkg_power_sku_unit = GT0_PACKAGE_POWER_SKU_UNIT;
|
||||
hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
|
||||
hwmon->rg.pkg_rapl_limit = GT0_PACKAGE_RAPL_LIMIT;
|
||||
hwmon->rg.energy_status_all = GT0_PLATFORM_ENERGY_STATUS;
|
||||
hwmon->rg.energy_status_tile = GT0_PACKAGE_ENERGY_STATUS;
|
||||
} else {
|
||||
hwmon->rg.pkg_power_sku_unit = INVALID_MMIO_REG;
|
||||
hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
|
||||
hwmon->rg.pkg_rapl_limit = INVALID_MMIO_REG;
|
||||
hwmon->rg.energy_status_all = INVALID_MMIO_REG;
|
||||
hwmon->rg.energy_status_tile = INVALID_MMIO_REG;
|
||||
}
|
||||
|
||||
with_intel_runtime_pm(uncore->rpm, wakeref) {
|
||||
/*
|
||||
* The contents of register hwmon->rg.pkg_power_sku_unit do not change,
|
||||
* so read it once and store the shift values.
|
||||
*/
|
||||
if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku_unit))
|
||||
val_sku_unit = intel_uncore_read(uncore,
|
||||
hwmon->rg.pkg_power_sku_unit);
|
||||
}
|
||||
|
||||
hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
|
||||
hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
|
||||
hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
|
||||
|
||||
/*
|
||||
* Initialize 'struct hwm_energy_info', i.e. set fields to the
|
||||
* first value of the energy register read
|
||||
*/
|
||||
if (i915_mmio_reg_valid(hwmon->rg.energy_status_all))
|
||||
hwm_energy(ddat, &energy);
|
||||
if (i915_mmio_reg_valid(hwmon->rg.energy_status_tile)) {
|
||||
for_each_gt(gt, i915, i)
|
||||
hwm_energy(&hwmon->ddat_gt[i], &energy);
|
||||
}
|
||||
}
|
||||
|
||||
void i915_hwmon_register(struct drm_i915_private *i915)
|
||||
{
|
||||
struct device *dev = i915->drm.dev;
|
||||
struct i915_hwmon *hwmon;
|
||||
struct device *hwmon_dev;
|
||||
struct hwm_drvdata *ddat;
|
||||
struct hwm_drvdata *ddat_gt;
|
||||
struct intel_gt *gt;
|
||||
int i;
|
||||
|
||||
/* hwmon is available only for dGfx */
|
||||
if (!IS_DGFX(i915))
|
||||
return;
|
||||
|
||||
hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
|
||||
if (!hwmon)
|
||||
return;
|
||||
|
||||
i915->hwmon = hwmon;
|
||||
mutex_init(&hwmon->hwmon_lock);
|
||||
ddat = &hwmon->ddat;
|
||||
|
||||
ddat->hwmon = hwmon;
|
||||
ddat->uncore = &i915->uncore;
|
||||
snprintf(ddat->name, sizeof(ddat->name), "i915");
|
||||
ddat->gt_n = -1;
|
||||
|
||||
for_each_gt(gt, i915, i) {
|
||||
ddat_gt = hwmon->ddat_gt + i;
|
||||
|
||||
ddat_gt->hwmon = hwmon;
|
||||
ddat_gt->uncore = gt->uncore;
|
||||
snprintf(ddat_gt->name, sizeof(ddat_gt->name), "i915_gt%u", i);
|
||||
ddat_gt->gt_n = i;
|
||||
}
|
||||
|
||||
hwm_get_preregistration_info(i915);
|
||||
|
||||
/* hwmon_dev points to device hwmon<i> */
|
||||
hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
|
||||
ddat,
|
||||
&hwm_chip_info,
|
||||
hwm_groups);
|
||||
if (IS_ERR(hwmon_dev)) {
|
||||
i915->hwmon = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
ddat->hwmon_dev = hwmon_dev;
|
||||
|
||||
for_each_gt(gt, i915, i) {
|
||||
ddat_gt = hwmon->ddat_gt + i;
|
||||
/*
|
||||
* Create per-gt directories only if a per-gt attribute is
|
||||
* visible. Currently this is only energy
|
||||
*/
|
||||
if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0))
|
||||
continue;
|
||||
|
||||
hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name,
|
||||
ddat_gt,
|
||||
&hwm_gt_chip_info,
|
||||
NULL);
|
||||
if (!IS_ERR(hwmon_dev))
|
||||
ddat_gt->hwmon_dev = hwmon_dev;
|
||||
}
|
||||
}
|
||||
|
||||
void i915_hwmon_unregister(struct drm_i915_private *i915)
|
||||
{
|
||||
fetch_and_zero(&i915->hwmon);
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user