Merge tag 'drm-intel-next-2018-03-08' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

UAPI Changes:

- Query uAPI interface (used for GPU topology information currently)
	* Mesa: https://patchwork.freedesktop.org/series/38795/

Driver Changes:

- Increase PSR2 size for CNL (DK)
- Avoid retraining LSPCON link unnecessarily (Ville)
- Decrease request signaling latency (Chris)
- GuC error capture fix (Daniele)

* tag 'drm-intel-next-2018-03-08' of git://anongit.freedesktop.org/drm/drm-intel: (127 commits)
  drm/i915: Update DRIVER_DATE to 20180308
  drm/i915: add schedule out notification of preempted but completed request
  drm/i915: expose rcs topology through query uAPI
  drm/i915: add query uAPI
  drm/i915: add rcs topology to error state
  drm/i915/debugfs: add rcs topology entry
  drm/i915/debugfs: reuse max slice/subslices already stored in sseu
  drm/i915: store all subslice masks
  drm/i915/guc: work around gcc-4.4.4 union initializer issue
  drm/i915/cnl: Add Wa_2201832410
  drm/i915/icl: Gen11 forcewake support
  drm/i915/icl: Add Indirect Context Offset for Gen11
  drm/i915/icl: Enhanced execution list support
  drm/i915/icl: new context descriptor support
  drm/i915/icl: Correctly initialize the Gen11 engines
  drm/i915: Assert that the request is indeed complete when signaled from irq
  drm/i915: Handle changing enable_fbc parameter at runtime better.
  drm/i915: Track whether the DP link is trained or not
  drm/i915: Nuke intel_dp->channel_eq_status
  drm/i915: Move SST DP link retraining into the ->post_hotplug() hook
  ...
This commit is contained in:
Dave Airlie 2018-03-14 14:53:01 +10:00
commit 963976cfe9
110 changed files with 5552 additions and 3662 deletions

View File

@ -450,5 +450,12 @@ See drivers/gpu/drm/amd/display/TODO for tasks.
Contact: Harry Wentland, Alex Deucher Contact: Harry Wentland, Alex Deucher
i915
----
- Our early/late pm callbacks could be removed in favour of using
device_link_add to model the dependency between i915 and snd_had. See
https://dri.freedesktop.org/docs/drm/driver-api/device_link.html
Outside DRM Outside DRM
=========== ===========

View File

@ -1247,12 +1247,15 @@ void drm_crtc_vblank_on(struct drm_crtc *crtc)
EXPORT_SYMBOL(drm_crtc_vblank_on); EXPORT_SYMBOL(drm_crtc_vblank_on);
/** /**
* drm_vblank_restore - estimated vblanks using timestamps and update it. * drm_vblank_restore - estimate missed vblanks and update vblank count.
* @dev: DRM device
* @pipe: CRTC index
* *
* Power manamement features can cause frame counter resets between vblank * Power manamement features can cause frame counter resets between vblank
* disable and enable. Drivers can then use this function in their * disable and enable. Drivers can use this function in their
* &drm_crtc_funcs.enable_vblank implementation to estimate the vblanks since * &drm_crtc_funcs.enable_vblank implementation to estimate missed vblanks since
* the last &drm_crtc_funcs.disable_vblank. * the last &drm_crtc_funcs.disable_vblank using timestamps and update the
* vblank counter.
* *
* This function is the legacy version of drm_crtc_vblank_restore(). * This function is the legacy version of drm_crtc_vblank_restore().
*/ */
@ -1293,11 +1296,14 @@ void drm_vblank_restore(struct drm_device *dev, unsigned int pipe)
EXPORT_SYMBOL(drm_vblank_restore); EXPORT_SYMBOL(drm_vblank_restore);
/** /**
* drm_crtc_vblank_restore - estimate vblanks using timestamps and update it. * drm_crtc_vblank_restore - estimate missed vblanks and update vblank count.
* @crtc: CRTC in question
*
* Power manamement features can cause frame counter resets between vblank * Power manamement features can cause frame counter resets between vblank
* disable and enable. Drivers can then use this function in their * disable and enable. Drivers can use this function in their
* &drm_crtc_funcs.enable_vblank implementation to estimate the vblanks since * &drm_crtc_funcs.enable_vblank implementation to estimate missed vblanks since
* the last &drm_crtc_funcs.disable_vblank. * the last &drm_crtc_funcs.disable_vblank using timestamps and update the
* vblank counter.
*/ */
void drm_crtc_vblank_restore(struct drm_crtc *crtc) void drm_crtc_vblank_restore(struct drm_crtc *crtc)
{ {

View File

@ -63,13 +63,14 @@ i915-y += i915_cmd_parser.o \
i915_gem.o \ i915_gem.o \
i915_gem_object.o \ i915_gem_object.o \
i915_gem_render_state.o \ i915_gem_render_state.o \
i915_gem_request.o \
i915_gem_shrinker.o \ i915_gem_shrinker.o \
i915_gem_stolen.o \ i915_gem_stolen.o \
i915_gem_tiling.o \ i915_gem_tiling.o \
i915_gem_timeline.o \ i915_gem_timeline.o \
i915_gem_userptr.o \ i915_gem_userptr.o \
i915_gemfs.o \ i915_gemfs.o \
i915_query.o \
i915_request.o \
i915_trace_points.o \ i915_trace_points.o \
i915_vma.o \ i915_vma.o \
intel_breadcrumbs.o \ intel_breadcrumbs.o \
@ -89,7 +90,8 @@ i915-y += intel_uc.o \
intel_guc_fw.o \ intel_guc_fw.o \
intel_guc_log.o \ intel_guc_log.o \
intel_guc_submission.o \ intel_guc_submission.o \
intel_huc.o intel_huc.o \
intel_huc_fw.o
# autogenerated null render state # autogenerated null render state
i915-y += intel_renderstate_gen6.o \ i915-y += intel_renderstate_gen6.o \

View File

@ -3,7 +3,7 @@ GVT_DIR := gvt
GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \
interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \
execlist.o scheduler.o sched_policy.o mmio_context.o cmd_parser.o debugfs.o \ execlist.o scheduler.o sched_policy.o mmio_context.o cmd_parser.o debugfs.o \
fb_decoder.o dmabuf.o fb_decoder.o dmabuf.o page_track.o
ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) ccflags-y += -I$(src) -I$(src)/$(GVT_DIR)
i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE))

View File

@ -459,7 +459,7 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
obj = vgpu_create_gem(dev, dmabuf_obj->info); obj = vgpu_create_gem(dev, dmabuf_obj->info);
if (obj == NULL) { if (obj == NULL) {
gvt_vgpu_err("create gvt gem obj failed:%d\n", vgpu->id); gvt_vgpu_err("create gvt gem obj failed\n");
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }

File diff suppressed because it is too large Load Diff

View File

@ -39,7 +39,6 @@
struct intel_vgpu_mm; struct intel_vgpu_mm;
#define INTEL_GVT_GTT_HASH_BITS 8
#define INTEL_GVT_INVALID_ADDR (~0UL) #define INTEL_GVT_INVALID_ADDR (~0UL)
struct intel_gvt_gtt_entry { struct intel_gvt_gtt_entry {
@ -84,17 +83,12 @@ struct intel_gvt_gtt {
void (*mm_free_page_table)(struct intel_vgpu_mm *mm); void (*mm_free_page_table)(struct intel_vgpu_mm *mm);
struct list_head oos_page_use_list_head; struct list_head oos_page_use_list_head;
struct list_head oos_page_free_list_head; struct list_head oos_page_free_list_head;
struct list_head mm_lru_list_head; struct list_head ppgtt_mm_lru_list_head;
struct page *scratch_page; struct page *scratch_page;
unsigned long scratch_mfn; unsigned long scratch_mfn;
}; };
enum {
INTEL_GVT_MM_GGTT = 0,
INTEL_GVT_MM_PPGTT,
};
typedef enum { typedef enum {
GTT_TYPE_INVALID = -1, GTT_TYPE_INVALID = -1,
@ -125,66 +119,60 @@ typedef enum {
GTT_TYPE_MAX, GTT_TYPE_MAX,
} intel_gvt_gtt_type_t; } intel_gvt_gtt_type_t;
struct intel_vgpu_mm { enum intel_gvt_mm_type {
int type; INTEL_GVT_MM_GGTT,
bool initialized; INTEL_GVT_MM_PPGTT,
bool shadowed;
int page_table_entry_type;
u32 page_table_entry_size;
u32 page_table_entry_cnt;
void *virtual_page_table;
void *shadow_page_table;
int page_table_level;
bool has_shadow_page_table;
u32 pde_base_index;
struct list_head list;
struct kref ref;
atomic_t pincount;
struct list_head lru_list;
struct intel_vgpu *vgpu;
}; };
extern int intel_vgpu_mm_get_entry( #define GVT_RING_CTX_NR_PDPS GEN8_3LVL_PDPES
struct intel_vgpu_mm *mm,
void *page_table, struct intel_gvt_gtt_entry *e,
unsigned long index);
extern int intel_vgpu_mm_set_entry( struct intel_vgpu_mm {
struct intel_vgpu_mm *mm, enum intel_gvt_mm_type type;
void *page_table, struct intel_gvt_gtt_entry *e, struct intel_vgpu *vgpu;
unsigned long index);
#define ggtt_get_guest_entry(mm, e, index) \ struct kref ref;
intel_vgpu_mm_get_entry(mm, mm->virtual_page_table, e, index) atomic_t pincount;
#define ggtt_set_guest_entry(mm, e, index) \ union {
intel_vgpu_mm_set_entry(mm, mm->virtual_page_table, e, index) struct {
intel_gvt_gtt_type_t root_entry_type;
/*
* The 4 PDPs in ring context. For 48bit addressing,
* only PDP0 is valid and point to PML4. For 32it
* addressing, all 4 are used as true PDPs.
*/
u64 guest_pdps[GVT_RING_CTX_NR_PDPS];
u64 shadow_pdps[GVT_RING_CTX_NR_PDPS];
bool shadowed;
#define ggtt_get_shadow_entry(mm, e, index) \ struct list_head list;
intel_vgpu_mm_get_entry(mm, mm->shadow_page_table, e, index) struct list_head lru_list;
} ppgtt_mm;
struct {
void *virtual_ggtt;
} ggtt_mm;
};
};
#define ggtt_set_shadow_entry(mm, e, index) \ struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
intel_vgpu_mm_set_entry(mm, mm->shadow_page_table, e, index) intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
#define ppgtt_get_guest_root_entry(mm, e, index) \ static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm)
intel_vgpu_mm_get_entry(mm, mm->virtual_page_table, e, index) {
kref_get(&mm->ref);
}
#define ppgtt_set_guest_root_entry(mm, e, index) \ void _intel_vgpu_mm_release(struct kref *mm_ref);
intel_vgpu_mm_set_entry(mm, mm->virtual_page_table, e, index)
#define ppgtt_get_shadow_root_entry(mm, e, index) \ static inline void intel_vgpu_mm_put(struct intel_vgpu_mm *mm)
intel_vgpu_mm_get_entry(mm, mm->shadow_page_table, e, index) {
kref_put(&mm->ref, _intel_vgpu_mm_release);
}
#define ppgtt_set_shadow_root_entry(mm, e, index) \ static inline void intel_vgpu_destroy_mm(struct intel_vgpu_mm *mm)
intel_vgpu_mm_set_entry(mm, mm->shadow_page_table, e, index) {
intel_vgpu_mm_put(mm);
extern struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu, }
int mm_type, void *virtual_page_table, int page_table_level,
u32 pde_base_index);
extern void intel_vgpu_destroy_mm(struct kref *mm_ref);
struct intel_vgpu_guest_page; struct intel_vgpu_guest_page;
@ -196,10 +184,8 @@ struct intel_vgpu_scratch_pt {
struct intel_vgpu_gtt { struct intel_vgpu_gtt {
struct intel_vgpu_mm *ggtt_mm; struct intel_vgpu_mm *ggtt_mm;
unsigned long active_ppgtt_mm_bitmap; unsigned long active_ppgtt_mm_bitmap;
struct list_head mm_list_head; struct list_head ppgtt_mm_list_head;
DECLARE_HASHTABLE(shadow_page_hash_table, INTEL_GVT_GTT_HASH_BITS); struct radix_tree_root spt_tree;
DECLARE_HASHTABLE(tracked_guest_page_hash_table, INTEL_GVT_GTT_HASH_BITS);
atomic_t n_tracked_guest_page;
struct list_head oos_page_list_head; struct list_head oos_page_list_head;
struct list_head post_shadow_list_head; struct list_head post_shadow_list_head;
struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX];
@ -216,32 +202,8 @@ extern void intel_gvt_clean_gtt(struct intel_gvt *gvt);
extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu, extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu,
int page_table_level, void *root_entry); int page_table_level, void *root_entry);
struct intel_vgpu_oos_page;
struct intel_vgpu_shadow_page {
void *vaddr;
struct page *page;
int type;
struct hlist_node node;
unsigned long mfn;
};
struct intel_vgpu_page_track {
struct hlist_node node;
bool tracked;
unsigned long gfn;
int (*handler)(void *, u64, void *, int);
void *data;
};
struct intel_vgpu_guest_page {
struct intel_vgpu_page_track track;
unsigned long write_cnt;
struct intel_vgpu_oos_page *oos_page;
};
struct intel_vgpu_oos_page { struct intel_vgpu_oos_page {
struct intel_vgpu_guest_page *guest_page; struct intel_vgpu_ppgtt_spt *spt;
struct list_head list; struct list_head list;
struct list_head vm_list; struct list_head vm_list;
int id; int id;
@ -250,42 +212,33 @@ struct intel_vgpu_oos_page {
#define GTT_ENTRY_NUM_IN_ONE_PAGE 512 #define GTT_ENTRY_NUM_IN_ONE_PAGE 512
/* Represent a vgpu shadow page table. */
struct intel_vgpu_ppgtt_spt { struct intel_vgpu_ppgtt_spt {
struct intel_vgpu_shadow_page shadow_page;
struct intel_vgpu_guest_page guest_page;
int guest_page_type;
atomic_t refcount; atomic_t refcount;
struct intel_vgpu *vgpu; struct intel_vgpu *vgpu;
struct {
intel_gvt_gtt_type_t type;
void *vaddr;
struct page *page;
unsigned long mfn;
} shadow_page;
struct {
intel_gvt_gtt_type_t type;
unsigned long gfn;
unsigned long write_cnt;
struct intel_vgpu_oos_page *oos_page;
} guest_page;
DECLARE_BITMAP(post_shadow_bitmap, GTT_ENTRY_NUM_IN_ONE_PAGE); DECLARE_BITMAP(post_shadow_bitmap, GTT_ENTRY_NUM_IN_ONE_PAGE);
struct list_head post_shadow_list; struct list_head post_shadow_list;
}; };
int intel_vgpu_init_page_track(struct intel_vgpu *vgpu,
struct intel_vgpu_page_track *t,
unsigned long gfn,
int (*handler)(void *gp, u64, void *, int),
void *data);
void intel_vgpu_clean_page_track(struct intel_vgpu *vgpu,
struct intel_vgpu_page_track *t);
struct intel_vgpu_page_track *intel_vgpu_find_tracked_page(
struct intel_vgpu *vgpu, unsigned long gfn);
int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu); int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu);
int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu); int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu);
static inline void intel_gvt_mm_reference(struct intel_vgpu_mm *mm)
{
kref_get(&mm->ref);
}
static inline void intel_gvt_mm_unreference(struct intel_vgpu_mm *mm)
{
kref_put(&mm->ref, intel_vgpu_destroy_mm);
}
int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm); int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm);
void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm); void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm);
@ -294,21 +247,17 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm,
unsigned long gma); unsigned long gma);
struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
int page_table_level, void *root_entry); u64 pdps[]);
int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
int page_table_level); intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]);
int page_table_level);
int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
unsigned int off, void *p_data, unsigned int bytes); unsigned int off, void *p_data, unsigned int bytes);
int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
unsigned int off, void *p_data, unsigned int bytes); unsigned int off, void *p_data, unsigned int bytes);
int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa,
void *p_data, unsigned int bytes);
#endif /* _GVT_GTT_H_ */ #endif /* _GVT_GTT_H_ */

View File

@ -183,7 +183,7 @@ static const struct intel_gvt_ops intel_gvt_ops = {
.get_gvt_attrs = intel_get_gvt_attrs, .get_gvt_attrs = intel_get_gvt_attrs,
.vgpu_query_plane = intel_vgpu_query_plane, .vgpu_query_plane = intel_vgpu_query_plane,
.vgpu_get_dmabuf = intel_vgpu_get_dmabuf, .vgpu_get_dmabuf = intel_vgpu_get_dmabuf,
.write_protect_handler = intel_vgpu_write_protect_handler, .write_protect_handler = intel_vgpu_page_track_handler,
}; };
/** /**

View File

@ -48,6 +48,7 @@
#include "cmd_parser.h" #include "cmd_parser.h"
#include "fb_decoder.h" #include "fb_decoder.h"
#include "dmabuf.h" #include "dmabuf.h"
#include "page_track.h"
#define GVT_MAX_VGPU 8 #define GVT_MAX_VGPU 8
@ -131,11 +132,9 @@ struct intel_vgpu_opregion {
#define vgpu_opregion(vgpu) (&(vgpu->opregion)) #define vgpu_opregion(vgpu) (&(vgpu->opregion))
#define INTEL_GVT_MAX_PORT 5
struct intel_vgpu_display { struct intel_vgpu_display {
struct intel_vgpu_i2c_edid i2c_edid; struct intel_vgpu_i2c_edid i2c_edid;
struct intel_vgpu_port ports[INTEL_GVT_MAX_PORT]; struct intel_vgpu_port ports[I915_MAX_PORTS];
struct intel_vgpu_sbi sbi; struct intel_vgpu_sbi sbi;
}; };
@ -190,6 +189,7 @@ struct intel_vgpu {
struct intel_vgpu_opregion opregion; struct intel_vgpu_opregion opregion;
struct intel_vgpu_display display; struct intel_vgpu_display display;
struct intel_vgpu_submission submission; struct intel_vgpu_submission submission;
struct radix_tree_root page_track_tree;
u32 hws_pga[I915_NUM_ENGINES]; u32 hws_pga[I915_NUM_ENGINES];
struct dentry *debugfs; struct dentry *debugfs;
@ -201,8 +201,16 @@ struct intel_vgpu {
int num_regions; int num_regions;
struct eventfd_ctx *intx_trigger; struct eventfd_ctx *intx_trigger;
struct eventfd_ctx *msi_trigger; struct eventfd_ctx *msi_trigger;
struct rb_root cache;
/*
* Two caches are used to avoid mapping duplicated pages (eg.
* scratch pages). This help to reduce dma setup overhead.
*/
struct rb_root gfn_cache;
struct rb_root dma_addr_cache;
unsigned long nr_cache_entries;
struct mutex cache_lock; struct mutex cache_lock;
struct notifier_block iommu_notifier; struct notifier_block iommu_notifier;
struct notifier_block group_notifier; struct notifier_block group_notifier;
struct kvm *kvm; struct kvm *kvm;
@ -308,7 +316,10 @@ struct intel_gvt {
wait_queue_head_t service_thread_wq; wait_queue_head_t service_thread_wq;
unsigned long service_request; unsigned long service_request;
struct engine_mmio *engine_mmio_list; struct {
struct engine_mmio *mmio;
int ctx_mmio_count[I915_NUM_ENGINES];
} engine_mmio_list;
struct dentry *debugfs_root; struct dentry *debugfs_root;
}; };

View File

@ -188,7 +188,9 @@ void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
unsigned int fence_num, void *p_data, unsigned int bytes) unsigned int fence_num, void *p_data, unsigned int bytes)
{ {
if (fence_num >= vgpu_fence_sz(vgpu)) { unsigned int max_fence = vgpu_fence_sz(vgpu);
if (fence_num >= max_fence) {
/* When guest access oob fence regs without access /* When guest access oob fence regs without access
* pv_info first, we treat guest not supporting GVT, * pv_info first, we treat guest not supporting GVT,
@ -201,7 +203,7 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
if (!vgpu->mmio.disable_warn_untrack) { if (!vgpu->mmio.disable_warn_untrack) {
gvt_vgpu_err("found oob fence register access\n"); gvt_vgpu_err("found oob fence register access\n");
gvt_vgpu_err("total fence %d, access fence %d\n", gvt_vgpu_err("total fence %d, access fence %d\n",
vgpu_fence_sz(vgpu), fence_num); max_fence, fence_num);
} }
memset(p_data, 0, bytes); memset(p_data, 0, bytes);
return -EINVAL; return -EINVAL;
@ -320,7 +322,7 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
intel_gvt_reset_vgpu_locked(vgpu, false, engine_mask); intel_gvt_reset_vgpu_locked(vgpu, false, engine_mask);
/* sw will wait for the device to ack the reset request */ /* sw will wait for the device to ack the reset request */
vgpu_vreg(vgpu, offset) = 0; vgpu_vreg(vgpu, offset) = 0;
return 0; return 0;
} }
@ -1139,21 +1141,21 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
{ {
int ret = 0; intel_gvt_gtt_type_t root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
struct intel_vgpu_mm *mm;
u64 *pdps;
pdps = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0]));
switch (notification) { switch (notification) {
case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE: case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE:
ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, 3); root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY;
break;
case VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY:
ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, 3);
break;
case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE: case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE:
ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, 4); mm = intel_vgpu_get_ppgtt_mm(vgpu, root_entry_type, pdps);
break; return PTR_ERR_OR_ZERO(mm);
case VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY:
case VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY: case VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY:
ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, 4); return intel_vgpu_put_ppgtt_mm(vgpu, pdps);
break;
case VGT_G2V_EXECLIST_CONTEXT_CREATE: case VGT_G2V_EXECLIST_CONTEXT_CREATE:
case VGT_G2V_EXECLIST_CONTEXT_DESTROY: case VGT_G2V_EXECLIST_CONTEXT_DESTROY:
case 1: /* Remove this in guest driver. */ case 1: /* Remove this in guest driver. */
@ -1161,7 +1163,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
default: default:
gvt_vgpu_err("Invalid PV notification %d\n", notification); gvt_vgpu_err("Invalid PV notification %d\n", notification);
} }
return ret; return 0;
} }
static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready) static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready)
@ -1389,8 +1391,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset,
int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset);
if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) { if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) {
gvt_vgpu_err("VM(%d) write invalid HWSP address, reg:0x%x, value:0x%x\n", gvt_vgpu_err("write invalid HWSP address, reg:0x%x, value:0x%x\n",
vgpu->id, offset, value); offset, value);
return -EINVAL; return -EINVAL;
} }
/* /*
@ -1399,8 +1401,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset,
* support BDW, SKL or other platforms with same HWSP registers. * support BDW, SKL or other platforms with same HWSP registers.
*/ */
if (unlikely(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) { if (unlikely(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) {
gvt_vgpu_err("VM(%d) access unknown hardware status page register:0x%x\n", gvt_vgpu_err("access unknown hardware status page register:0x%x\n",
vgpu->id, offset); offset);
return -EINVAL; return -EINVAL;
} }
vgpu->hws_pga[ring_id] = value; vgpu->hws_pga[ring_id] = value;

View File

@ -44,13 +44,18 @@ struct intel_gvt_mpt {
void (*detach_vgpu)(unsigned long handle); void (*detach_vgpu)(unsigned long handle);
int (*inject_msi)(unsigned long handle, u32 addr, u16 data); int (*inject_msi)(unsigned long handle, u32 addr, u16 data);
unsigned long (*from_virt_to_mfn)(void *p); unsigned long (*from_virt_to_mfn)(void *p);
int (*set_wp_page)(unsigned long handle, u64 gfn); int (*enable_page_track)(unsigned long handle, u64 gfn);
int (*unset_wp_page)(unsigned long handle, u64 gfn); int (*disable_page_track)(unsigned long handle, u64 gfn);
int (*read_gpa)(unsigned long handle, unsigned long gpa, void *buf, int (*read_gpa)(unsigned long handle, unsigned long gpa, void *buf,
unsigned long len); unsigned long len);
int (*write_gpa)(unsigned long handle, unsigned long gpa, void *buf, int (*write_gpa)(unsigned long handle, unsigned long gpa, void *buf,
unsigned long len); unsigned long len);
unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn);
int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn,
dma_addr_t *dma_addr);
void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr);
int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,
unsigned long mfn, unsigned int nr, bool map); unsigned long mfn, unsigned int nr, bool map);
int (*set_trap_area)(unsigned long handle, u64 start, u64 end, int (*set_trap_area)(unsigned long handle, u64 start, u64 end,

View File

@ -41,6 +41,7 @@
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <linux/vfio.h> #include <linux/vfio.h>
#include <linux/mdev.h> #include <linux/mdev.h>
#include <linux/debugfs.h>
#include "i915_drv.h" #include "i915_drv.h"
#include "gvt.h" #include "gvt.h"
@ -84,12 +85,16 @@ struct kvmgt_guest_info {
#define NR_BKT (1 << 18) #define NR_BKT (1 << 18)
struct hlist_head ptable[NR_BKT]; struct hlist_head ptable[NR_BKT];
#undef NR_BKT #undef NR_BKT
struct dentry *debugfs_cache_entries;
}; };
struct gvt_dma { struct gvt_dma {
struct rb_node node; struct intel_vgpu *vgpu;
struct rb_node gfn_node;
struct rb_node dma_addr_node;
gfn_t gfn; gfn_t gfn;
unsigned long iova; dma_addr_t dma_addr;
struct kref ref;
}; };
static inline bool handle_valid(unsigned long handle) static inline bool handle_valid(unsigned long handle)
@ -101,165 +106,167 @@ static int kvmgt_guest_init(struct mdev_device *mdev);
static void intel_vgpu_release_work(struct work_struct *work); static void intel_vgpu_release_work(struct work_struct *work);
static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn, static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long *iova) dma_addr_t *dma_addr)
{ {
struct page *page;
struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
dma_addr_t daddr; struct page *page;
unsigned long pfn;
int ret;
if (unlikely(!pfn_valid(pfn))) /* Pin the page first. */
return -EFAULT; ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1,
IOMMU_READ | IOMMU_WRITE, &pfn);
if (ret != 1) {
gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
gfn, ret);
return -EINVAL;
}
/* Setup DMA mapping. */
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
daddr = dma_map_page(dev, page, 0, PAGE_SIZE, *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL); PCI_DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, daddr)) if (dma_mapping_error(dev, *dma_addr)) {
gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn);
vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
return -ENOMEM; return -ENOMEM;
}
*iova = (unsigned long)(daddr >> PAGE_SHIFT);
return 0; return 0;
} }
static void gvt_dma_unmap_iova(struct intel_vgpu *vgpu, unsigned long iova) static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
dma_addr_t dma_addr)
{ {
struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
dma_addr_t daddr; int ret;
daddr = (dma_addr_t)(iova << PAGE_SHIFT); dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
dma_unmap_page(dev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
WARN_ON(ret != 1);
} }
static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
dma_addr_t dma_addr)
{ {
struct rb_node *node = vgpu->vdev.cache.rb_node; struct rb_node *node = vgpu->vdev.dma_addr_cache.rb_node;
struct gvt_dma *ret = NULL; struct gvt_dma *itr;
while (node) { while (node) {
struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node); itr = rb_entry(node, struct gvt_dma, dma_addr_node);
if (dma_addr < itr->dma_addr)
node = node->rb_left;
else if (dma_addr > itr->dma_addr)
node = node->rb_right;
else
return itr;
}
return NULL;
}
static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
{
struct rb_node *node = vgpu->vdev.gfn_cache.rb_node;
struct gvt_dma *itr;
while (node) {
itr = rb_entry(node, struct gvt_dma, gfn_node);
if (gfn < itr->gfn) if (gfn < itr->gfn)
node = node->rb_left; node = node->rb_left;
else if (gfn > itr->gfn) else if (gfn > itr->gfn)
node = node->rb_right; node = node->rb_right;
else { else
ret = itr; return itr;
goto out;
}
} }
return NULL;
out:
return ret;
} }
static unsigned long gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
{ dma_addr_t dma_addr)
struct gvt_dma *entry;
unsigned long iova;
mutex_lock(&vgpu->vdev.cache_lock);
entry = __gvt_cache_find(vgpu, gfn);
iova = (entry == NULL) ? INTEL_GVT_INVALID_ADDR : entry->iova;
mutex_unlock(&vgpu->vdev.cache_lock);
return iova;
}
static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
unsigned long iova)
{ {
struct gvt_dma *new, *itr; struct gvt_dma *new, *itr;
struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL; struct rb_node **link, *parent = NULL;
new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
if (!new) if (!new)
return; return;
new->vgpu = vgpu;
new->gfn = gfn; new->gfn = gfn;
new->iova = iova; new->dma_addr = dma_addr;
kref_init(&new->ref);
mutex_lock(&vgpu->vdev.cache_lock); /* gfn_cache maps gfn to struct gvt_dma. */
link = &vgpu->vdev.gfn_cache.rb_node;
while (*link) { while (*link) {
parent = *link; parent = *link;
itr = rb_entry(parent, struct gvt_dma, node); itr = rb_entry(parent, struct gvt_dma, gfn_node);
if (gfn == itr->gfn) if (gfn < itr->gfn)
goto out;
else if (gfn < itr->gfn)
link = &parent->rb_left; link = &parent->rb_left;
else else
link = &parent->rb_right; link = &parent->rb_right;
} }
rb_link_node(&new->gfn_node, parent, link);
rb_insert_color(&new->gfn_node, &vgpu->vdev.gfn_cache);
rb_link_node(&new->node, parent, link); /* dma_addr_cache maps dma addr to struct gvt_dma. */
rb_insert_color(&new->node, &vgpu->vdev.cache); parent = NULL;
mutex_unlock(&vgpu->vdev.cache_lock); link = &vgpu->vdev.dma_addr_cache.rb_node;
return; while (*link) {
parent = *link;
itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
out: if (dma_addr < itr->dma_addr)
mutex_unlock(&vgpu->vdev.cache_lock); link = &parent->rb_left;
kfree(new); else
link = &parent->rb_right;
}
rb_link_node(&new->dma_addr_node, parent, link);
rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache);
vgpu->vdev.nr_cache_entries++;
} }
static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
struct gvt_dma *entry) struct gvt_dma *entry)
{ {
rb_erase(&entry->node, &vgpu->vdev.cache); rb_erase(&entry->gfn_node, &vgpu->vdev.gfn_cache);
rb_erase(&entry->dma_addr_node, &vgpu->vdev.dma_addr_cache);
kfree(entry); kfree(entry);
} vgpu->vdev.nr_cache_entries--;
static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
{
struct device *dev = mdev_dev(vgpu->vdev.mdev);
struct gvt_dma *this;
unsigned long g1;
int rc;
mutex_lock(&vgpu->vdev.cache_lock);
this = __gvt_cache_find(vgpu, gfn);
if (!this) {
mutex_unlock(&vgpu->vdev.cache_lock);
return;
}
g1 = gfn;
gvt_dma_unmap_iova(vgpu, this->iova);
rc = vfio_unpin_pages(dev, &g1, 1);
WARN_ON(rc != 1);
__gvt_cache_remove_entry(vgpu, this);
mutex_unlock(&vgpu->vdev.cache_lock);
}
static void gvt_cache_init(struct intel_vgpu *vgpu)
{
vgpu->vdev.cache = RB_ROOT;
mutex_init(&vgpu->vdev.cache_lock);
} }
static void gvt_cache_destroy(struct intel_vgpu *vgpu) static void gvt_cache_destroy(struct intel_vgpu *vgpu)
{ {
struct gvt_dma *dma; struct gvt_dma *dma;
struct rb_node *node = NULL; struct rb_node *node = NULL;
struct device *dev = mdev_dev(vgpu->vdev.mdev);
unsigned long gfn;
for (;;) { for (;;) {
mutex_lock(&vgpu->vdev.cache_lock); mutex_lock(&vgpu->vdev.cache_lock);
node = rb_first(&vgpu->vdev.cache); node = rb_first(&vgpu->vdev.gfn_cache);
if (!node) { if (!node) {
mutex_unlock(&vgpu->vdev.cache_lock); mutex_unlock(&vgpu->vdev.cache_lock);
break; break;
} }
dma = rb_entry(node, struct gvt_dma, node); dma = rb_entry(node, struct gvt_dma, gfn_node);
gvt_dma_unmap_iova(vgpu, dma->iova); gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr);
gfn = dma->gfn;
__gvt_cache_remove_entry(vgpu, dma); __gvt_cache_remove_entry(vgpu, dma);
mutex_unlock(&vgpu->vdev.cache_lock); mutex_unlock(&vgpu->vdev.cache_lock);
vfio_unpin_pages(dev, &gfn, 1);
} }
} }
static void gvt_cache_init(struct intel_vgpu *vgpu)
{
vgpu->vdev.gfn_cache = RB_ROOT;
vgpu->vdev.dma_addr_cache = RB_ROOT;
vgpu->vdev.nr_cache_entries = 0;
mutex_init(&vgpu->vdev.cache_lock);
}
static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
{ {
hash_init(info->ptable); hash_init(info->ptable);
@ -452,7 +459,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
vgpu = intel_gvt_ops->vgpu_create(gvt, type); vgpu = intel_gvt_ops->vgpu_create(gvt, type);
if (IS_ERR_OR_NULL(vgpu)) { if (IS_ERR_OR_NULL(vgpu)) {
ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu); ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
gvt_vgpu_err("failed to create intel vgpu: %d\n", ret); gvt_err("failed to create intel vgpu: %d\n", ret);
goto out; goto out;
} }
@ -489,13 +496,22 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
struct vfio_iommu_type1_dma_unmap *unmap = data; struct vfio_iommu_type1_dma_unmap *unmap = data;
unsigned long gfn, end_gfn; struct gvt_dma *entry;
unsigned long iov_pfn, end_iov_pfn;
gfn = unmap->iova >> PAGE_SHIFT; iov_pfn = unmap->iova >> PAGE_SHIFT;
end_gfn = gfn + unmap->size / PAGE_SIZE; end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
while (gfn < end_gfn) mutex_lock(&vgpu->vdev.cache_lock);
gvt_cache_remove(vgpu, gfn++); for (; iov_pfn < end_iov_pfn; iov_pfn++) {
entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
if (!entry)
continue;
gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr);
__gvt_cache_remove_entry(vgpu, entry);
}
mutex_unlock(&vgpu->vdev.cache_lock);
} }
return NOTIFY_OK; return NOTIFY_OK;
@ -1321,7 +1337,7 @@ static void kvmgt_host_exit(struct device *dev, void *gvt)
mdev_unregister_device(dev); mdev_unregister_device(dev);
} }
static int kvmgt_write_protect_add(unsigned long handle, u64 gfn) static int kvmgt_page_track_add(unsigned long handle, u64 gfn)
{ {
struct kvmgt_guest_info *info; struct kvmgt_guest_info *info;
struct kvm *kvm; struct kvm *kvm;
@ -1355,7 +1371,7 @@ out:
return 0; return 0;
} }
static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn) static int kvmgt_page_track_remove(unsigned long handle, u64 gfn)
{ {
struct kvmgt_guest_info *info; struct kvmgt_guest_info *info;
struct kvm *kvm; struct kvm *kvm;
@ -1483,11 +1499,20 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
info->track_node.track_flush_slot = kvmgt_page_track_flush_slot; info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
kvm_page_track_register_notifier(kvm, &info->track_node); kvm_page_track_register_notifier(kvm, &info->track_node);
info->debugfs_cache_entries = debugfs_create_ulong(
"kvmgt_nr_cache_entries",
0444, vgpu->debugfs,
&vgpu->vdev.nr_cache_entries);
if (!info->debugfs_cache_entries)
gvt_vgpu_err("Cannot create kvmgt debugfs entry\n");
return 0; return 0;
} }
static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
{ {
debugfs_remove(info->debugfs_cache_entries);
kvm_page_track_unregister_notifier(info->kvm, &info->track_node); kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
kvm_put_kvm(info->kvm); kvm_put_kvm(info->kvm);
kvmgt_protect_table_destroy(info); kvmgt_protect_table_destroy(info);
@ -1527,39 +1552,77 @@ static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
{ {
unsigned long iova, pfn;
struct kvmgt_guest_info *info; struct kvmgt_guest_info *info;
struct device *dev; kvm_pfn_t pfn;
struct intel_vgpu *vgpu;
int rc;
if (!handle_valid(handle)) if (!handle_valid(handle))
return INTEL_GVT_INVALID_ADDR; return INTEL_GVT_INVALID_ADDR;
info = (struct kvmgt_guest_info *)handle;
pfn = gfn_to_pfn(info->kvm, gfn);
if (is_error_noslot_pfn(pfn))
return INTEL_GVT_INVALID_ADDR;
return pfn;
}
int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
dma_addr_t *dma_addr)
{
struct kvmgt_guest_info *info;
struct intel_vgpu *vgpu;
struct gvt_dma *entry;
int ret;
if (!handle_valid(handle))
return -EINVAL;
info = (struct kvmgt_guest_info *)handle; info = (struct kvmgt_guest_info *)handle;
vgpu = info->vgpu; vgpu = info->vgpu;
iova = gvt_cache_find(info->vgpu, gfn);
if (iova != INTEL_GVT_INVALID_ADDR)
return iova;
pfn = INTEL_GVT_INVALID_ADDR; mutex_lock(&info->vgpu->vdev.cache_lock);
dev = mdev_dev(info->vgpu->vdev.mdev);
rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn); entry = __gvt_cache_find_gfn(info->vgpu, gfn);
if (rc != 1) { if (!entry) {
gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", ret = gvt_dma_map_page(vgpu, gfn, dma_addr);
gfn, rc); if (ret) {
return INTEL_GVT_INVALID_ADDR; mutex_unlock(&info->vgpu->vdev.cache_lock);
} return ret;
/* transfer to host iova for GFX to use DMA */ }
rc = gvt_dma_map_iova(info->vgpu, pfn, &iova); __gvt_cache_add(info->vgpu, gfn, *dma_addr);
if (rc) { } else {
gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn); kref_get(&entry->ref);
vfio_unpin_pages(dev, &gfn, 1); *dma_addr = entry->dma_addr;
return INTEL_GVT_INVALID_ADDR;
} }
gvt_cache_add(info->vgpu, gfn, iova); mutex_unlock(&info->vgpu->vdev.cache_lock);
return iova; return 0;
}
static void __gvt_dma_release(struct kref *ref)
{
struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr);
__gvt_cache_remove_entry(entry->vgpu, entry);
}
void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr)
{
struct kvmgt_guest_info *info;
struct gvt_dma *entry;
if (!handle_valid(handle))
return;
info = (struct kvmgt_guest_info *)handle;
mutex_lock(&info->vgpu->vdev.cache_lock);
entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
if (entry)
kref_put(&entry->ref, __gvt_dma_release);
mutex_unlock(&info->vgpu->vdev.cache_lock);
} }
static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
@ -1629,11 +1692,13 @@ struct intel_gvt_mpt kvmgt_mpt = {
.detach_vgpu = kvmgt_detach_vgpu, .detach_vgpu = kvmgt_detach_vgpu,
.inject_msi = kvmgt_inject_msi, .inject_msi = kvmgt_inject_msi,
.from_virt_to_mfn = kvmgt_virt_to_pfn, .from_virt_to_mfn = kvmgt_virt_to_pfn,
.set_wp_page = kvmgt_write_protect_add, .enable_page_track = kvmgt_page_track_add,
.unset_wp_page = kvmgt_write_protect_remove, .disable_page_track = kvmgt_page_track_remove,
.read_gpa = kvmgt_read_gpa, .read_gpa = kvmgt_read_gpa,
.write_gpa = kvmgt_write_gpa, .write_gpa = kvmgt_write_gpa,
.gfn_to_mfn = kvmgt_gfn_to_pfn, .gfn_to_mfn = kvmgt_gfn_to_pfn,
.dma_map_guest_page = kvmgt_dma_map_guest_page,
.dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
.set_opregion = kvmgt_set_opregion, .set_opregion = kvmgt_set_opregion,
.get_vfio_device = kvmgt_get_vfio_device, .get_vfio_device = kvmgt_get_vfio_device,
.put_vfio_device = kvmgt_put_vfio_device, .put_vfio_device = kvmgt_put_vfio_device,

View File

@ -76,10 +76,9 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa,
else else
intel_vgpu_default_mmio_write(vgpu, offset, p_data, intel_vgpu_default_mmio_write(vgpu, offset, p_data,
bytes); bytes);
} else if (reg_is_gtt(gvt, offset) && } else if (reg_is_gtt(gvt, offset)) {
vgpu->gtt.ggtt_mm->virtual_page_table) {
offset -= gvt->device_info.gtt_start_offset; offset -= gvt->device_info.gtt_start_offset;
pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset; pt = vgpu->gtt.ggtt_mm->ggtt_mm.virtual_ggtt + offset;
if (read) if (read)
memcpy(p_data, pt, bytes); memcpy(p_data, pt, bytes);
else else
@ -125,7 +124,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
if (WARN_ON(!reg_is_gtt(gvt, offset + bytes - 1))) if (WARN_ON(!reg_is_gtt(gvt, offset + bytes - 1)))
goto err; goto err;
ret = intel_vgpu_emulate_gtt_mmio_read(vgpu, offset, ret = intel_vgpu_emulate_ggtt_mmio_read(vgpu, offset,
p_data, bytes); p_data, bytes);
if (ret) if (ret)
goto err; goto err;
@ -198,7 +197,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
if (WARN_ON(!reg_is_gtt(gvt, offset + bytes - 1))) if (WARN_ON(!reg_is_gtt(gvt, offset + bytes - 1)))
goto err; goto err;
ret = intel_vgpu_emulate_gtt_mmio_write(vgpu, offset, ret = intel_vgpu_emulate_ggtt_mmio_write(vgpu, offset,
p_data, bytes); p_data, bytes);
if (ret) if (ret)
goto err; goto err;

View File

@ -50,6 +50,8 @@
#define RING_GFX_MODE(base) _MMIO((base) + 0x29c) #define RING_GFX_MODE(base) _MMIO((base) + 0x29c)
#define VF_GUARDBAND _MMIO(0x83a4) #define VF_GUARDBAND _MMIO(0x83a4)
#define GEN9_MOCS_SIZE 64
/* Raw offset is appened to each line for convenience. */ /* Raw offset is appened to each line for convenience. */
static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = { static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = {
{RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */
@ -151,8 +153,8 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
static struct { static struct {
bool initialized; bool initialized;
u32 control_table[I915_NUM_ENGINES][64]; u32 control_table[I915_NUM_ENGINES][GEN9_MOCS_SIZE];
u32 l3cc_table[32]; u32 l3cc_table[GEN9_MOCS_SIZE / 2];
} gen9_render_mocs; } gen9_render_mocs;
static void load_render_mocs(struct drm_i915_private *dev_priv) static void load_render_mocs(struct drm_i915_private *dev_priv)
@ -169,7 +171,7 @@ static void load_render_mocs(struct drm_i915_private *dev_priv)
for (ring_id = 0; ring_id < ARRAY_SIZE(regs); ring_id++) { for (ring_id = 0; ring_id < ARRAY_SIZE(regs); ring_id++) {
offset.reg = regs[ring_id]; offset.reg = regs[ring_id];
for (i = 0; i < 64; i++) { for (i = 0; i < GEN9_MOCS_SIZE; i++) {
gen9_render_mocs.control_table[ring_id][i] = gen9_render_mocs.control_table[ring_id][i] =
I915_READ_FW(offset); I915_READ_FW(offset);
offset.reg += 4; offset.reg += 4;
@ -177,7 +179,7 @@ static void load_render_mocs(struct drm_i915_private *dev_priv)
} }
offset.reg = 0xb020; offset.reg = 0xb020;
for (i = 0; i < 32; i++) { for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) {
gen9_render_mocs.l3cc_table[i] = gen9_render_mocs.l3cc_table[i] =
I915_READ_FW(offset); I915_READ_FW(offset);
offset.reg += 4; offset.reg += 4;
@ -185,6 +187,153 @@ static void load_render_mocs(struct drm_i915_private *dev_priv)
gen9_render_mocs.initialized = true; gen9_render_mocs.initialized = true;
} }
static int
restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
struct i915_request *req)
{
u32 *cs;
int ret;
struct engine_mmio *mmio;
struct intel_gvt *gvt = vgpu->gvt;
int ring_id = req->engine->id;
int count = gvt->engine_mmio_list.ctx_mmio_count[ring_id];
if (count == 0)
return 0;
ret = req->engine->emit_flush(req, EMIT_BARRIER);
if (ret)
return ret;
cs = intel_ring_begin(req, count * 2 + 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(count);
for (mmio = gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->ring_id != ring_id ||
!mmio->in_context)
continue;
*cs++ = i915_mmio_reg_offset(mmio->reg);
*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
(mmio->mask << 16);
gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
*(cs-2), *(cs-1), vgpu->id, ring_id);
}
*cs++ = MI_NOOP;
intel_ring_advance(req, cs);
ret = req->engine->emit_flush(req, EMIT_BARRIER);
if (ret)
return ret;
return 0;
}
static int
restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
struct i915_request *req)
{
unsigned int index;
u32 *cs;
cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE + 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
for (index = 0; index < GEN9_MOCS_SIZE; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
*cs++ = vgpu_vreg_t(vgpu, GEN9_GFX_MOCS(index));
gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
*(cs-2), *(cs-1), vgpu->id, req->engine->id);
}
*cs++ = MI_NOOP;
intel_ring_advance(req, cs);
return 0;
}
static int
restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
struct i915_request *req)
{
unsigned int index;
u32 *cs;
cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE / 2 + 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
*cs++ = vgpu_vreg_t(vgpu, GEN9_LNCFCMOCS(index));
gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
*(cs-2), *(cs-1), vgpu->id, req->engine->id);
}
*cs++ = MI_NOOP;
intel_ring_advance(req, cs);
return 0;
}
/*
* Use lri command to initialize the mmio which is in context state image for
* inhibit context, it contains tracked engine mmio, render_mocs and
* render_mocs_l3cc.
*/
int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
struct i915_request *req)
{
int ret;
u32 *cs;
cs = intel_ring_begin(req, 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
*cs++ = MI_NOOP;
intel_ring_advance(req, cs);
ret = restore_context_mmio_for_inhibit(vgpu, req);
if (ret)
goto out;
/* no MOCS register in context except render engine */
if (req->engine->id != RCS)
goto out;
ret = restore_render_mocs_control_for_inhibit(vgpu, req);
if (ret)
goto out;
ret = restore_render_mocs_l3cc_for_inhibit(vgpu, req);
if (ret)
goto out;
out:
cs = intel_ring_begin(req, 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
*cs++ = MI_NOOP;
intel_ring_advance(req, cs);
return ret;
}
static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
{ {
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
@ -251,11 +400,14 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
return; return;
if (IS_KABYLAKE(dev_priv) && ring_id == RCS)
return;
if (!pre && !gen9_render_mocs.initialized) if (!pre && !gen9_render_mocs.initialized)
load_render_mocs(dev_priv); load_render_mocs(dev_priv);
offset.reg = regs[ring_id]; offset.reg = regs[ring_id];
for (i = 0; i < 64; i++) { for (i = 0; i < GEN9_MOCS_SIZE; i++) {
if (pre) if (pre)
old_v = vgpu_vreg_t(pre, offset); old_v = vgpu_vreg_t(pre, offset);
else else
@ -273,7 +425,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
if (ring_id == RCS) { if (ring_id == RCS) {
l3_offset.reg = 0xb020; l3_offset.reg = 0xb020;
for (i = 0; i < 32; i++) { for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) {
if (pre) if (pre)
old_v = vgpu_vreg_t(pre, l3_offset); old_v = vgpu_vreg_t(pre, l3_offset);
else else
@ -293,6 +445,16 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
#define CTX_CONTEXT_CONTROL_VAL 0x03 #define CTX_CONTEXT_CONTROL_VAL 0x03
bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id)
{
u32 *reg_state = ctx->engine[ring_id].lrc_reg_state;
u32 inhibit_mask =
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
return inhibit_mask ==
(reg_state[CTX_CONTEXT_CONTROL_VAL] & inhibit_mask);
}
/* Switch ring mmio values (context). */ /* Switch ring mmio values (context). */
static void switch_mmio(struct intel_vgpu *pre, static void switch_mmio(struct intel_vgpu *pre,
struct intel_vgpu *next, struct intel_vgpu *next,
@ -300,9 +462,6 @@ static void switch_mmio(struct intel_vgpu *pre,
{ {
struct drm_i915_private *dev_priv; struct drm_i915_private *dev_priv;
struct intel_vgpu_submission *s; struct intel_vgpu_submission *s;
u32 *reg_state, ctx_ctrl;
u32 inhibit_mask =
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
struct engine_mmio *mmio; struct engine_mmio *mmio;
u32 old_v, new_v; u32 old_v, new_v;
@ -310,10 +469,18 @@ static void switch_mmio(struct intel_vgpu *pre,
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
switch_mocs(pre, next, ring_id); switch_mocs(pre, next, ring_id);
for (mmio = dev_priv->gvt->engine_mmio_list; for (mmio = dev_priv->gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) { i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->ring_id != ring_id) if (mmio->ring_id != ring_id)
continue; continue;
/*
* No need to do save or restore of the mmio which is in context
* state image on kabylake, it's initialized by lri command and
* save or restore with context together.
*/
if (IS_KABYLAKE(dev_priv) && mmio->in_context)
continue;
// save // save
if (pre) { if (pre) {
vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg); vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg);
@ -327,16 +494,13 @@ static void switch_mmio(struct intel_vgpu *pre,
// restore // restore
if (next) { if (next) {
s = &next->submission; s = &next->submission;
reg_state =
s->shadow_ctx->engine[ring_id].lrc_reg_state;
ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
/* /*
* if it is an inhibit context, load in_context mmio * No need to restore the mmio which is in context state
* into HW by mmio write. If it is not, skip this mmio * image if it's not inhibit context, it will restore
* write. * itself.
*/ */
if (mmio->in_context && if (mmio->in_context &&
(ctx_ctrl & inhibit_mask) != inhibit_mask) !is_inhibit_context(s->shadow_ctx, ring_id))
continue; continue;
if (mmio->mask) if (mmio->mask)
@ -405,8 +569,16 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
*/ */
void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt)
{ {
struct engine_mmio *mmio;
if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv))
gvt->engine_mmio_list = gen9_engine_mmio_list; gvt->engine_mmio_list.mmio = gen9_engine_mmio_list;
else else
gvt->engine_mmio_list = gen8_engine_mmio_list; gvt->engine_mmio_list.mmio = gen8_engine_mmio_list;
for (mmio = gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->in_context)
gvt->engine_mmio_list.ctx_mmio_count[mmio->ring_id]++;
}
} }

View File

@ -49,4 +49,9 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt); void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt);
bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id);
int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
struct i915_request *req);
#endif #endif

View File

@ -154,54 +154,31 @@ static inline unsigned long intel_gvt_hypervisor_virt_to_mfn(void *p)
} }
/** /**
* intel_gvt_hypervisor_enable - set a guest page to write-protected * intel_gvt_hypervisor_enable_page_track - track a guest page
* @vgpu: a vGPU * @vgpu: a vGPU
* @t: page track data structure * @gfn: the gfn of guest
* *
* Returns: * Returns:
* Zero on success, negative error code if failed. * Zero on success, negative error code if failed.
*/ */
static inline int intel_gvt_hypervisor_enable_page_track( static inline int intel_gvt_hypervisor_enable_page_track(
struct intel_vgpu *vgpu, struct intel_vgpu *vgpu, unsigned long gfn)
struct intel_vgpu_page_track *t)
{ {
int ret; return intel_gvt_host.mpt->enable_page_track(vgpu->handle, gfn);
if (t->tracked)
return 0;
ret = intel_gvt_host.mpt->set_wp_page(vgpu->handle, t->gfn);
if (ret)
return ret;
t->tracked = true;
atomic_inc(&vgpu->gtt.n_tracked_guest_page);
return 0;
} }
/** /**
* intel_gvt_hypervisor_disable_page_track - remove the write-protection of a * intel_gvt_hypervisor_disable_page_track - untrack a guest page
* guest page
* @vgpu: a vGPU * @vgpu: a vGPU
* @t: page track data structure * @gfn: the gfn of guest
* *
* Returns: * Returns:
* Zero on success, negative error code if failed. * Zero on success, negative error code if failed.
*/ */
static inline int intel_gvt_hypervisor_disable_page_track( static inline int intel_gvt_hypervisor_disable_page_track(
struct intel_vgpu *vgpu, struct intel_vgpu *vgpu, unsigned long gfn)
struct intel_vgpu_page_track *t)
{ {
int ret; return intel_gvt_host.mpt->disable_page_track(vgpu->handle, gfn);
if (!t->tracked)
return 0;
ret = intel_gvt_host.mpt->unset_wp_page(vgpu->handle, t->gfn);
if (ret)
return ret;
t->tracked = false;
atomic_dec(&vgpu->gtt.n_tracked_guest_page);
return 0;
} }
/** /**
@ -250,6 +227,34 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn(
return intel_gvt_host.mpt->gfn_to_mfn(vgpu->handle, gfn); return intel_gvt_host.mpt->gfn_to_mfn(vgpu->handle, gfn);
} }
/**
* intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page
* @vgpu: a vGPU
* @gpfn: guest pfn
* @dma_addr: retrieve allocated dma addr
*
* Returns:
* 0 on success, negative error code if failed.
*/
static inline int intel_gvt_hypervisor_dma_map_guest_page(
struct intel_vgpu *vgpu, unsigned long gfn,
dma_addr_t *dma_addr)
{
return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn,
dma_addr);
}
/**
* intel_gvt_hypervisor_dma_unmap_guest_page - cancel dma map for guest page
* @vgpu: a vGPU
* @dma_addr: the mapped dma addr
*/
static inline void intel_gvt_hypervisor_dma_unmap_guest_page(
struct intel_vgpu *vgpu, dma_addr_t dma_addr)
{
intel_gvt_host.mpt->dma_unmap_guest_page(vgpu->handle, dma_addr);
}
/** /**
* intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN
* @vgpu: a vGPU * @vgpu: a vGPU

View File

@ -0,0 +1,184 @@
/*
* Copyright(c) 2011-2017 Intel Corporation. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "i915_drv.h"
#include "gvt.h"
/**
* intel_vgpu_find_page_track - find page track rcord of guest page
* @vgpu: a vGPU
* @gfn: the gfn of guest page
*
* Returns:
* A pointer to struct intel_vgpu_page_track if found, else NULL returned.
*/
struct intel_vgpu_page_track *intel_vgpu_find_page_track(
struct intel_vgpu *vgpu, unsigned long gfn)
{
return radix_tree_lookup(&vgpu->page_track_tree, gfn);
}
/**
* intel_vgpu_register_page_track - register a guest page to be tacked
* @vgpu: a vGPU
* @gfn: the gfn of guest page
*
* Returns:
* zero on success, negative error code if failed.
*/
int intel_vgpu_register_page_track(struct intel_vgpu *vgpu, unsigned long gfn,
gvt_page_track_handler_t handler, void *priv)
{
struct intel_vgpu_page_track *track;
int ret;
track = intel_vgpu_find_page_track(vgpu, gfn);
if (track)
return -EEXIST;
track = kzalloc(sizeof(*track), GFP_KERNEL);
if (!track)
return -ENOMEM;
track->handler = handler;
track->priv_data = priv;
ret = radix_tree_insert(&vgpu->page_track_tree, gfn, track);
if (ret) {
kfree(track);
return ret;
}
return 0;
}
/**
* intel_vgpu_unregister_page_track - unregister the tracked guest page
* @vgpu: a vGPU
* @gfn: the gfn of guest page
*
*/
void intel_vgpu_unregister_page_track(struct intel_vgpu *vgpu,
unsigned long gfn)
{
struct intel_vgpu_page_track *track;
track = radix_tree_delete(&vgpu->page_track_tree, gfn);
if (track) {
if (track->tracked)
intel_gvt_hypervisor_disable_page_track(vgpu, gfn);
kfree(track);
}
}
/**
* intel_vgpu_enable_page_track - set write-protection on guest page
* @vgpu: a vGPU
* @gfn: the gfn of guest page
*
* Returns:
* zero on success, negative error code if failed.
*/
int intel_vgpu_enable_page_track(struct intel_vgpu *vgpu, unsigned long gfn)
{
struct intel_vgpu_page_track *track;
int ret;
track = intel_vgpu_find_page_track(vgpu, gfn);
if (!track)
return -ENXIO;
if (track->tracked)
return 0;
ret = intel_gvt_hypervisor_enable_page_track(vgpu, gfn);
if (ret)
return ret;
track->tracked = true;
return 0;
}
/**
* intel_vgpu_enable_page_track - cancel write-protection on guest page
* @vgpu: a vGPU
* @gfn: the gfn of guest page
*
* Returns:
* zero on success, negative error code if failed.
*/
int intel_vgpu_disable_page_track(struct intel_vgpu *vgpu, unsigned long gfn)
{
struct intel_vgpu_page_track *track;
int ret;
track = intel_vgpu_find_page_track(vgpu, gfn);
if (!track)
return -ENXIO;
if (!track->tracked)
return 0;
ret = intel_gvt_hypervisor_disable_page_track(vgpu, gfn);
if (ret)
return ret;
track->tracked = false;
return 0;
}
/**
* intel_vgpu_page_track_handler - called when write to write-protected page
* @vgpu: a vGPU
* @gpa: the gpa of this write
* @data: the writed data
* @bytes: the length of this write
*
* Returns:
* zero on success, negative error code if failed.
*/
int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa,
void *data, unsigned int bytes)
{
struct intel_gvt *gvt = vgpu->gvt;
struct intel_vgpu_page_track *page_track;
int ret = 0;
mutex_lock(&gvt->lock);
page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT);
if (!page_track) {
ret = -ENXIO;
goto out;
}
if (unlikely(vgpu->failsafe)) {
/* Remove write protection to prevent furture traps. */
intel_vgpu_disable_page_track(vgpu, gpa >> PAGE_SHIFT);
} else {
ret = page_track->handler(page_track, gpa, data, bytes);
if (ret)
gvt_err("guest page write error, gpa %llx\n", gpa);
}
out:
mutex_unlock(&gvt->lock);
return ret;
}

View File

@ -0,0 +1,56 @@
/*
* Copyright(c) 2011-2017 Intel Corporation. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef _GVT_PAGE_TRACK_H_
#define _GVT_PAGE_TRACK_H_
struct intel_vgpu_page_track;
typedef int (*gvt_page_track_handler_t)(
struct intel_vgpu_page_track *page_track,
u64 gpa, void *data, int bytes);
/* Track record for a write-protected guest page. */
struct intel_vgpu_page_track {
gvt_page_track_handler_t handler;
bool tracked;
void *priv_data;
};
struct intel_vgpu_page_track *intel_vgpu_find_page_track(
struct intel_vgpu *vgpu, unsigned long gfn);
int intel_vgpu_register_page_track(struct intel_vgpu *vgpu,
unsigned long gfn, gvt_page_track_handler_t handler,
void *priv);
void intel_vgpu_unregister_page_track(struct intel_vgpu *vgpu,
unsigned long gfn);
int intel_vgpu_enable_page_track(struct intel_vgpu *vgpu, unsigned long gfn);
int intel_vgpu_disable_page_track(struct intel_vgpu *vgpu, unsigned long gfn);
int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa,
void *data, unsigned int bytes);
#endif

View File

@ -103,9 +103,8 @@ static void gvt_balance_timeslice(struct gvt_sched_data *sched_data)
list_for_each(pos, &sched_data->lru_runq_head) { list_for_each(pos, &sched_data->lru_runq_head) {
vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
fair_timeslice = ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS) * fair_timeslice = ktime_divns(ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS),
vgpu_data->sched_ctl.weight / total_weight) * vgpu_data->sched_ctl.weight;
total_weight;
vgpu_data->allocated_ts = fair_timeslice; vgpu_data->allocated_ts = fair_timeslice;
vgpu_data->left_ts = vgpu_data->allocated_ts; vgpu_data->left_ts = vgpu_data->allocated_ts;

View File

@ -113,7 +113,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
#undef COPY_REG #undef COPY_REG
set_context_pdp_root_pointer(shadow_ring_context, set_context_pdp_root_pointer(shadow_ring_context,
workload->shadow_mm->shadow_page_table); (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps);
intel_gvt_hypervisor_read_gpa(vgpu, intel_gvt_hypervisor_read_gpa(vgpu,
workload->ring_context_gpa + workload->ring_context_gpa +
@ -126,7 +126,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
return 0; return 0;
} }
static inline bool is_gvt_request(struct drm_i915_gem_request *req) static inline bool is_gvt_request(struct i915_request *req)
{ {
return i915_gem_context_force_single_submission(req->ctx); return i915_gem_context_force_single_submission(req->ctx);
} }
@ -148,7 +148,7 @@ static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id)
static int shadow_context_status_change(struct notifier_block *nb, static int shadow_context_status_change(struct notifier_block *nb,
unsigned long action, void *data) unsigned long action, void *data)
{ {
struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data; struct i915_request *req = data;
struct intel_gvt *gvt = container_of(nb, struct intel_gvt, struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
shadow_ctx_notifier_block[req->engine->id]); shadow_ctx_notifier_block[req->engine->id]);
struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
@ -225,6 +225,11 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
void *shadow_ring_buffer_va; void *shadow_ring_buffer_va;
u32 *cs; u32 *cs;
struct i915_request *req = workload->req;
if (IS_KABYLAKE(req->i915) &&
is_inhibit_context(req->ctx, req->engine->id))
intel_vgpu_restore_inhibit_context(vgpu, req);
/* allocate shadow ring buffer */ /* allocate shadow ring buffer */
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
@ -333,13 +338,13 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload)
int ring_id = workload->ring_id; int ring_id = workload->ring_id;
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct intel_engine_cs *engine = dev_priv->engine[ring_id];
struct drm_i915_gem_request *rq; struct i915_request *rq;
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
struct intel_vgpu_submission *s = &vgpu->submission; struct intel_vgpu_submission *s = &vgpu->submission;
struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct i915_gem_context *shadow_ctx = s->shadow_ctx;
int ret; int ret;
rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); rq = i915_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
gvt_vgpu_err("fail to allocate gem request\n"); gvt_vgpu_err("fail to allocate gem request\n");
ret = PTR_ERR(rq); ret = PTR_ERR(rq);
@ -348,7 +353,7 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload)
gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq); gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
workload->req = i915_gem_request_get(rq); workload->req = i915_request_get(rq);
ret = copy_workload_to_ring_buffer(workload); ret = copy_workload_to_ring_buffer(workload);
if (ret) if (ret)
goto err_unpin; goto err_unpin;
@ -582,7 +587,7 @@ out:
if (!IS_ERR_OR_NULL(workload->req)) { if (!IS_ERR_OR_NULL(workload->req)) {
gvt_dbg_sched("ring id %d submit workload to i915 %p\n", gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
ring_id, workload->req); ring_id, workload->req);
i915_add_request(workload->req); i915_request_add(workload->req);
workload->dispatched = true; workload->dispatched = true;
} }
@ -769,7 +774,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
workload->status = 0; workload->status = 0;
} }
i915_gem_request_put(fetch_and_zero(&workload->req)); i915_request_put(fetch_and_zero(&workload->req));
if (!workload->status && !(vgpu->resetting_eng & if (!workload->status && !(vgpu->resetting_eng &
ENGINE_MASK(ring_id))) { ENGINE_MASK(ring_id))) {
@ -886,7 +891,7 @@ static int workload_thread(void *priv)
gvt_dbg_sched("ring id %d wait workload %p\n", gvt_dbg_sched("ring id %d wait workload %p\n",
workload->ring_id, workload); workload->ring_id, workload);
i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT); i915_request_wait(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
complete: complete:
gvt_dbg_sched("will complete workload %p, status: %d\n", gvt_dbg_sched("will complete workload %p, status: %d\n",
@ -1132,7 +1137,7 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
struct intel_vgpu_submission *s = &workload->vgpu->submission; struct intel_vgpu_submission *s = &workload->vgpu->submission;
if (workload->shadow_mm) if (workload->shadow_mm)
intel_gvt_mm_unreference(workload->shadow_mm); intel_vgpu_mm_put(workload->shadow_mm);
kmem_cache_free(s->workloads, workload); kmem_cache_free(s->workloads, workload);
} }
@ -1181,32 +1186,27 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
struct intel_vgpu_mm *mm; struct intel_vgpu_mm *mm;
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
int page_table_level; intel_gvt_gtt_type_t root_entry_type;
u32 pdp[8]; u64 pdps[GVT_RING_CTX_NR_PDPS];
if (desc->addressing_mode == 1) { /* legacy 32-bit */ switch (desc->addressing_mode) {
page_table_level = 3; case 1: /* legacy 32-bit */
} else if (desc->addressing_mode == 3) { /* legacy 64 bit */ root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY;
page_table_level = 4; break;
} else { case 3: /* legacy 64-bit */
root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
break;
default:
gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
return -EINVAL; return -EINVAL;
} }
read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp); read_guest_pdps(workload->vgpu, workload->ring_context_gpa, (void *)pdps);
mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp); mm = intel_vgpu_get_ppgtt_mm(workload->vgpu, root_entry_type, pdps);
if (mm) { if (IS_ERR(mm))
intel_gvt_mm_reference(mm); return PTR_ERR(mm);
} else {
mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
pdp, page_table_level, 0);
if (IS_ERR(mm)) {
gvt_vgpu_err("fail to create mm object.\n");
return PTR_ERR(mm);
}
}
workload->shadow_mm = mm; workload->shadow_mm = mm;
return 0; return 0;
} }

View File

@ -80,7 +80,7 @@ struct intel_shadow_wa_ctx {
struct intel_vgpu_workload { struct intel_vgpu_workload {
struct intel_vgpu *vgpu; struct intel_vgpu *vgpu;
int ring_id; int ring_id;
struct drm_i915_gem_request *req; struct i915_request *req;
/* if this workload has been dispatched to i915? */ /* if this workload has been dispatched to i915? */
bool dispatched; bool dispatched;
bool shadowed; bool shadowed;

View File

@ -113,10 +113,10 @@ TRACE_EVENT(gma_index,
); );
TRACE_EVENT(gma_translate, TRACE_EVENT(gma_translate,
TP_PROTO(int id, char *type, int ring_id, int pt_level, TP_PROTO(int id, char *type, int ring_id, int root_entry_type,
unsigned long gma, unsigned long gpa), unsigned long gma, unsigned long gpa),
TP_ARGS(id, type, ring_id, pt_level, gma, gpa), TP_ARGS(id, type, ring_id, root_entry_type, gma, gpa),
TP_STRUCT__entry( TP_STRUCT__entry(
__array(char, buf, MAX_BUF_LEN) __array(char, buf, MAX_BUF_LEN)
@ -124,8 +124,8 @@ TRACE_EVENT(gma_translate,
TP_fast_assign( TP_fast_assign(
snprintf(__entry->buf, MAX_BUF_LEN, snprintf(__entry->buf, MAX_BUF_LEN,
"VM%d %s ring %d pt_level %d gma 0x%lx -> gpa 0x%lx\n", "VM%d %s ring %d root_entry_type %d gma 0x%lx -> gpa 0x%lx\n",
id, type, ring_id, pt_level, gma, gpa); id, type, ring_id, root_entry_type, gma, gpa);
), ),
TP_printk("%s", __entry->buf) TP_printk("%s", __entry->buf)
@ -168,7 +168,7 @@ TRACE_EVENT(spt_change,
TP_printk("%s", __entry->buf) TP_printk("%s", __entry->buf)
); );
TRACE_EVENT(gpt_change, TRACE_EVENT(spt_guest_change,
TP_PROTO(int id, const char *tag, void *spt, int type, u64 v, TP_PROTO(int id, const char *tag, void *spt, int type, u64 v,
unsigned long index), unsigned long index),

View File

@ -354,6 +354,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
vgpu->gvt = gvt; vgpu->gvt = gvt;
vgpu->sched_ctl.weight = param->weight; vgpu->sched_ctl.weight = param->weight;
INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head); INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head);
INIT_RADIX_TREE(&vgpu->page_track_tree, GFP_KERNEL);
idr_init(&vgpu->object_idr); idr_init(&vgpu->object_idr);
intel_vgpu_init_cfg_space(vgpu, param->primary); intel_vgpu_init_cfg_space(vgpu, param->primary);

View File

@ -519,7 +519,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
list_for_each_entry_reverse(file, &dev->filelist, lhead) { list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct file_stats stats; struct file_stats stats;
struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_request *request; struct i915_request *request;
struct task_struct *task; struct task_struct *task;
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
@ -536,7 +536,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
* Therefore, we need to protect this ->comm access using RCU. * Therefore, we need to protect this ->comm access using RCU.
*/ */
request = list_first_entry_or_null(&file_priv->mm.request_list, request = list_first_entry_or_null(&file_priv->mm.request_list,
struct drm_i915_gem_request, struct i915_request,
client_link); client_link);
rcu_read_lock(); rcu_read_lock();
task = pid_task(request && request->ctx->pid ? task = pid_task(request && request->ctx->pid ?
@ -646,6 +646,56 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
return 0; return 0;
} }
static void gen8_display_interrupt_info(struct seq_file *m)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
int pipe;
for_each_pipe(dev_priv, pipe) {
enum intel_display_power_domain power_domain;
power_domain = POWER_DOMAIN_PIPE(pipe);
if (!intel_display_power_get_if_enabled(dev_priv,
power_domain)) {
seq_printf(m, "Pipe %c power disabled\n",
pipe_name(pipe));
continue;
}
seq_printf(m, "Pipe %c IMR:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IMR(pipe)));
seq_printf(m, "Pipe %c IIR:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IIR(pipe)));
seq_printf(m, "Pipe %c IER:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IER(pipe)));
intel_display_power_put(dev_priv, power_domain);
}
seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
I915_READ(GEN8_DE_PORT_IMR));
seq_printf(m, "Display Engine port interrupt identity:\t%08x\n",
I915_READ(GEN8_DE_PORT_IIR));
seq_printf(m, "Display Engine port interrupt enable:\t%08x\n",
I915_READ(GEN8_DE_PORT_IER));
seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n",
I915_READ(GEN8_DE_MISC_IMR));
seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n",
I915_READ(GEN8_DE_MISC_IIR));
seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n",
I915_READ(GEN8_DE_MISC_IER));
seq_printf(m, "PCU interrupt mask:\t%08x\n",
I915_READ(GEN8_PCU_IMR));
seq_printf(m, "PCU interrupt identity:\t%08x\n",
I915_READ(GEN8_PCU_IIR));
seq_printf(m, "PCU interrupt enable:\t%08x\n",
I915_READ(GEN8_PCU_IER));
}
static int i915_interrupt_info(struct seq_file *m, void *data) static int i915_interrupt_info(struct seq_file *m, void *data)
{ {
struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_i915_private *dev_priv = node_to_i915(m->private);
@ -709,6 +759,27 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
I915_READ(GEN8_PCU_IIR)); I915_READ(GEN8_PCU_IIR));
seq_printf(m, "PCU interrupt enable:\t%08x\n", seq_printf(m, "PCU interrupt enable:\t%08x\n",
I915_READ(GEN8_PCU_IER)); I915_READ(GEN8_PCU_IER));
} else if (INTEL_GEN(dev_priv) >= 11) {
seq_printf(m, "Master Interrupt Control: %08x\n",
I915_READ(GEN11_GFX_MSTR_IRQ));
seq_printf(m, "Render/Copy Intr Enable: %08x\n",
I915_READ(GEN11_RENDER_COPY_INTR_ENABLE));
seq_printf(m, "VCS/VECS Intr Enable: %08x\n",
I915_READ(GEN11_VCS_VECS_INTR_ENABLE));
seq_printf(m, "GUC/SG Intr Enable:\t %08x\n",
I915_READ(GEN11_GUC_SG_INTR_ENABLE));
seq_printf(m, "GPM/WGBOXPERF Intr Enable: %08x\n",
I915_READ(GEN11_GPM_WGBOXPERF_INTR_ENABLE));
seq_printf(m, "Crypto Intr Enable:\t %08x\n",
I915_READ(GEN11_CRYPTO_RSVD_INTR_ENABLE));
seq_printf(m, "GUnit/CSME Intr Enable:\t %08x\n",
I915_READ(GEN11_GUNIT_CSME_INTR_ENABLE));
seq_printf(m, "Display Interrupt Control:\t%08x\n",
I915_READ(GEN11_DISPLAY_INT_CTL));
gen8_display_interrupt_info(m);
} else if (INTEL_GEN(dev_priv) >= 8) { } else if (INTEL_GEN(dev_priv) >= 8) {
seq_printf(m, "Master Interrupt Control:\t%08x\n", seq_printf(m, "Master Interrupt Control:\t%08x\n",
I915_READ(GEN8_MASTER_IRQ)); I915_READ(GEN8_MASTER_IRQ));
@ -722,49 +793,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
i, I915_READ(GEN8_GT_IER(i))); i, I915_READ(GEN8_GT_IER(i)));
} }
for_each_pipe(dev_priv, pipe) { gen8_display_interrupt_info(m);
enum intel_display_power_domain power_domain;
power_domain = POWER_DOMAIN_PIPE(pipe);
if (!intel_display_power_get_if_enabled(dev_priv,
power_domain)) {
seq_printf(m, "Pipe %c power disabled\n",
pipe_name(pipe));
continue;
}
seq_printf(m, "Pipe %c IMR:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IMR(pipe)));
seq_printf(m, "Pipe %c IIR:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IIR(pipe)));
seq_printf(m, "Pipe %c IER:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IER(pipe)));
intel_display_power_put(dev_priv, power_domain);
}
seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
I915_READ(GEN8_DE_PORT_IMR));
seq_printf(m, "Display Engine port interrupt identity:\t%08x\n",
I915_READ(GEN8_DE_PORT_IIR));
seq_printf(m, "Display Engine port interrupt enable:\t%08x\n",
I915_READ(GEN8_DE_PORT_IER));
seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n",
I915_READ(GEN8_DE_MISC_IMR));
seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n",
I915_READ(GEN8_DE_MISC_IIR));
seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n",
I915_READ(GEN8_DE_MISC_IER));
seq_printf(m, "PCU interrupt mask:\t%08x\n",
I915_READ(GEN8_PCU_IMR));
seq_printf(m, "PCU interrupt identity:\t%08x\n",
I915_READ(GEN8_PCU_IIR));
seq_printf(m, "PCU interrupt enable:\t%08x\n",
I915_READ(GEN8_PCU_IER));
} else if (IS_VALLEYVIEW(dev_priv)) { } else if (IS_VALLEYVIEW(dev_priv)) {
seq_printf(m, "Display IER:\t%08x\n", seq_printf(m, "Display IER:\t%08x\n",
I915_READ(VLV_IER)); I915_READ(VLV_IER));
@ -846,13 +875,35 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
seq_printf(m, "Graphics Interrupt mask: %08x\n", seq_printf(m, "Graphics Interrupt mask: %08x\n",
I915_READ(GTIMR)); I915_READ(GTIMR));
} }
if (INTEL_GEN(dev_priv) >= 6) {
if (INTEL_GEN(dev_priv) >= 11) {
seq_printf(m, "RCS Intr Mask:\t %08x\n",
I915_READ(GEN11_RCS0_RSVD_INTR_MASK));
seq_printf(m, "BCS Intr Mask:\t %08x\n",
I915_READ(GEN11_BCS_RSVD_INTR_MASK));
seq_printf(m, "VCS0/VCS1 Intr Mask:\t %08x\n",
I915_READ(GEN11_VCS0_VCS1_INTR_MASK));
seq_printf(m, "VCS2/VCS3 Intr Mask:\t %08x\n",
I915_READ(GEN11_VCS2_VCS3_INTR_MASK));
seq_printf(m, "VECS0/VECS1 Intr Mask:\t %08x\n",
I915_READ(GEN11_VECS0_VECS1_INTR_MASK));
seq_printf(m, "GUC/SG Intr Mask:\t %08x\n",
I915_READ(GEN11_GUC_SG_INTR_MASK));
seq_printf(m, "GPM/WGBOXPERF Intr Mask: %08x\n",
I915_READ(GEN11_GPM_WGBOXPERF_INTR_MASK));
seq_printf(m, "Crypto Intr Mask:\t %08x\n",
I915_READ(GEN11_CRYPTO_RSVD_INTR_MASK));
seq_printf(m, "Gunit/CSME Intr Mask:\t %08x\n",
I915_READ(GEN11_GUNIT_CSME_INTR_MASK));
} else if (INTEL_GEN(dev_priv) >= 6) {
for_each_engine(engine, dev_priv, id) { for_each_engine(engine, dev_priv, id) {
seq_printf(m, seq_printf(m,
"Graphics Interrupt mask (%s): %08x\n", "Graphics Interrupt mask (%s): %08x\n",
engine->name, I915_READ_IMR(engine)); engine->name, I915_READ_IMR(engine));
} }
} }
intel_runtime_pm_put(dev_priv); intel_runtime_pm_put(dev_priv);
return 0; return 0;
@ -3150,6 +3201,16 @@ static int i915_engine_info(struct seq_file *m, void *unused)
return 0; return 0;
} }
static int i915_rcs_topology(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_printer p = drm_seq_file_printer(m);
intel_device_info_dump_topology(&INTEL_INFO(dev_priv)->sseu, &p);
return 0;
}
static int i915_shrinker_info(struct seq_file *m, void *unused) static int i915_shrinker_info(struct seq_file *m, void *unused)
{ {
struct drm_i915_private *i915 = node_to_i915(m->private); struct drm_i915_private *i915 = node_to_i915(m->private);
@ -3926,7 +3987,8 @@ i915_wedged_set(void *data, u64 val)
engine->hangcheck.stalled = true; engine->hangcheck.stalled = true;
} }
i915_handle_error(i915, val, "Manually setting wedged to %llu", val); i915_handle_error(i915, val, "Manually set wedged engine mask = %llx",
val);
wait_on_bit(&i915->gpu_error.flags, wait_on_bit(&i915->gpu_error.flags,
I915_RESET_HANDOFF, I915_RESET_HANDOFF,
@ -4060,7 +4122,7 @@ i915_drop_caches_set(void *data, u64 val)
I915_WAIT_LOCKED); I915_WAIT_LOCKED);
if (val & DROP_RETIRE) if (val & DROP_RETIRE)
i915_gem_retire_requests(dev_priv); i915_retire_requests(dev_priv);
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
} }
@ -4271,7 +4333,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
continue; continue;
sseu->slice_mask = BIT(0); sseu->slice_mask = BIT(0);
sseu->subslice_mask |= BIT(ss); sseu->subslice_mask[0] |= BIT(ss);
eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
@ -4286,11 +4348,11 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
struct sseu_dev_info *sseu) struct sseu_dev_info *sseu)
{ {
const struct intel_device_info *info = INTEL_INFO(dev_priv); const struct intel_device_info *info = INTEL_INFO(dev_priv);
int s_max = 6, ss_max = 4;
int s, ss; int s, ss;
u32 s_reg[s_max], eu_reg[2 * s_max], eu_mask[2]; u32 s_reg[info->sseu.max_slices];
u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2];
for (s = 0; s < s_max; s++) { for (s = 0; s < info->sseu.max_slices; s++) {
/* /*
* FIXME: Valid SS Mask respects the spec and read * FIXME: Valid SS Mask respects the spec and read
* only valid bits for those registers, excluding reserverd * only valid bits for those registers, excluding reserverd
@ -4312,15 +4374,15 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
GEN9_PGCTL_SSB_EU210_ACK | GEN9_PGCTL_SSB_EU210_ACK |
GEN9_PGCTL_SSB_EU311_ACK; GEN9_PGCTL_SSB_EU311_ACK;
for (s = 0; s < s_max; s++) { for (s = 0; s < info->sseu.max_slices; s++) {
if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
/* skip disabled slice */ /* skip disabled slice */
continue; continue;
sseu->slice_mask |= BIT(s); sseu->slice_mask |= BIT(s);
sseu->subslice_mask = info->sseu.subslice_mask; sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
for (ss = 0; ss < ss_max; ss++) { for (ss = 0; ss < info->sseu.max_subslices; ss++) {
unsigned int eu_cnt; unsigned int eu_cnt;
if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
@ -4340,17 +4402,12 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
struct sseu_dev_info *sseu) struct sseu_dev_info *sseu)
{ {
int s_max = 3, ss_max = 4; const struct intel_device_info *info = INTEL_INFO(dev_priv);
int s, ss; int s, ss;
u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2]; u32 s_reg[info->sseu.max_slices];
u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2];
/* BXT has a single slice and at most 3 subslices. */ for (s = 0; s < info->sseu.max_slices; s++) {
if (IS_GEN9_LP(dev_priv)) {
s_max = 1;
ss_max = 3;
}
for (s = 0; s < s_max; s++) {
s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s)); s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s));
eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s)); eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s));
eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s)); eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s));
@ -4365,7 +4422,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
GEN9_PGCTL_SSB_EU210_ACK | GEN9_PGCTL_SSB_EU210_ACK |
GEN9_PGCTL_SSB_EU311_ACK; GEN9_PGCTL_SSB_EU311_ACK;
for (s = 0; s < s_max; s++) { for (s = 0; s < info->sseu.max_slices; s++) {
if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
/* skip disabled slice */ /* skip disabled slice */
continue; continue;
@ -4373,10 +4430,10 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
sseu->slice_mask |= BIT(s); sseu->slice_mask |= BIT(s);
if (IS_GEN9_BC(dev_priv)) if (IS_GEN9_BC(dev_priv))
sseu->subslice_mask = sseu->subslice_mask[s] =
INTEL_INFO(dev_priv)->sseu.subslice_mask; INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
for (ss = 0; ss < ss_max; ss++) { for (ss = 0; ss < info->sseu.max_subslices; ss++) {
unsigned int eu_cnt; unsigned int eu_cnt;
if (IS_GEN9_LP(dev_priv)) { if (IS_GEN9_LP(dev_priv)) {
@ -4384,7 +4441,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
/* skip disabled subslice */ /* skip disabled subslice */
continue; continue;
sseu->subslice_mask |= BIT(ss); sseu->subslice_mask[s] |= BIT(ss);
} }
eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
@ -4406,9 +4463,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
if (sseu->slice_mask) { if (sseu->slice_mask) {
sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
sseu->eu_per_subslice = sseu->eu_per_subslice =
INTEL_INFO(dev_priv)->sseu.eu_per_subslice; INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
for (s = 0; s < fls(sseu->slice_mask); s++) {
sseu->subslice_mask[s] =
INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
}
sseu->eu_total = sseu->eu_per_subslice * sseu->eu_total = sseu->eu_per_subslice *
sseu_subslice_total(sseu); sseu_subslice_total(sseu);
@ -4427,6 +4487,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
{ {
struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_i915_private *dev_priv = node_to_i915(m->private);
const char *type = is_available_info ? "Available" : "Enabled"; const char *type = is_available_info ? "Available" : "Enabled";
int s;
seq_printf(m, " %s Slice Mask: %04x\n", type, seq_printf(m, " %s Slice Mask: %04x\n", type,
sseu->slice_mask); sseu->slice_mask);
@ -4434,10 +4495,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
hweight8(sseu->slice_mask)); hweight8(sseu->slice_mask));
seq_printf(m, " %s Subslice Total: %u\n", type, seq_printf(m, " %s Subslice Total: %u\n", type,
sseu_subslice_total(sseu)); sseu_subslice_total(sseu));
seq_printf(m, " %s Subslice Mask: %04x\n", type, for (s = 0; s < fls(sseu->slice_mask); s++) {
sseu->subslice_mask); seq_printf(m, " %s Slice%i subslices: %u\n", type,
seq_printf(m, " %s Subslice Per Slice: %u\n", type, s, hweight8(sseu->subslice_mask[s]));
hweight8(sseu->subslice_mask)); }
seq_printf(m, " %s EU Total: %u\n", type, seq_printf(m, " %s EU Total: %u\n", type,
sseu->eu_total); sseu->eu_total);
seq_printf(m, " %s EU Per Subslice: %u\n", type, seq_printf(m, " %s EU Per Subslice: %u\n", type,
@ -4471,6 +4532,10 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
seq_puts(m, "SSEU Device Status\n"); seq_puts(m, "SSEU Device Status\n");
memset(&sseu, 0, sizeof(sseu)); memset(&sseu, 0, sizeof(sseu));
sseu.max_slices = INTEL_INFO(dev_priv)->sseu.max_slices;
sseu.max_subslices = INTEL_INFO(dev_priv)->sseu.max_subslices;
sseu.max_eus_per_subslice =
INTEL_INFO(dev_priv)->sseu.max_eus_per_subslice;
intel_runtime_pm_get(dev_priv); intel_runtime_pm_get(dev_priv);
@ -4678,6 +4743,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_dmc_info", i915_dmc_info, 0}, {"i915_dmc_info", i915_dmc_info, 0},
{"i915_display_info", i915_display_info, 0}, {"i915_display_info", i915_display_info, 0},
{"i915_engine_info", i915_engine_info, 0}, {"i915_engine_info", i915_engine_info, 0},
{"i915_rcs_topology", i915_rcs_topology, 0},
{"i915_shrinker_info", i915_shrinker_info, 0}, {"i915_shrinker_info", i915_shrinker_info, 0},
{"i915_shared_dplls_info", i915_shared_dplls_info, 0}, {"i915_shared_dplls_info", i915_shared_dplls_info, 0},
{"i915_dp_mst_info", i915_dp_mst_info, 0}, {"i915_dp_mst_info", i915_dp_mst_info, 0},

View File

@ -49,6 +49,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_trace.h" #include "i915_trace.h"
#include "i915_pmu.h" #include "i915_pmu.h"
#include "i915_query.h"
#include "i915_vgpu.h" #include "i915_vgpu.h"
#include "intel_drv.h" #include "intel_drv.h"
#include "intel_uc.h" #include "intel_uc.h"
@ -428,7 +429,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
return -ENODEV; return -ENODEV;
break; break;
case I915_PARAM_SUBSLICE_MASK: case I915_PARAM_SUBSLICE_MASK:
value = INTEL_INFO(dev_priv)->sseu.subslice_mask; value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
if (!value) if (!value)
return -ENODEV; return -ENODEV;
break; break;
@ -808,7 +809,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
/* /*
* The i915 workqueue is primarily used for batched retirement of * The i915 workqueue is primarily used for batched retirement of
* requests (and thus managing bo) once the task has been completed * requests (and thus managing bo) once the task has been completed
* by the GPU. i915_gem_retire_requests() is called directly when we * by the GPU. i915_retire_requests() is called directly when we
* need high-priority retirement, such as waiting for an explicit * need high-priority retirement, such as waiting for an explicit
* bo. * bo.
* *
@ -1992,7 +1993,7 @@ taint:
add_taint(TAINT_WARN, LOCKDEP_STILL_OK); add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
error: error:
i915_gem_set_wedged(i915); i915_gem_set_wedged(i915);
i915_gem_retire_requests(i915); i915_retire_requests(i915);
intel_gpu_reset(i915, ALL_ENGINES); intel_gpu_reset(i915, ALL_ENGINES);
goto finish; goto finish;
} }
@ -2019,7 +2020,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags)
{ {
struct i915_gpu_error *error = &engine->i915->gpu_error; struct i915_gpu_error *error = &engine->i915->gpu_error;
struct drm_i915_gem_request *active_request; struct i915_request *active_request;
int ret; int ret;
GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
@ -2575,7 +2576,7 @@ static int intel_runtime_suspend(struct device *kdev)
*/ */
i915_gem_runtime_suspend(dev_priv); i915_gem_runtime_suspend(dev_priv);
intel_guc_suspend(dev_priv); intel_uc_suspend(dev_priv);
intel_runtime_pm_disable_interrupts(dev_priv); intel_runtime_pm_disable_interrupts(dev_priv);
@ -2597,7 +2598,7 @@ static int intel_runtime_suspend(struct device *kdev)
intel_runtime_pm_enable_interrupts(dev_priv); intel_runtime_pm_enable_interrupts(dev_priv);
intel_guc_resume(dev_priv); intel_uc_resume(dev_priv);
i915_gem_init_swizzling(dev_priv); i915_gem_init_swizzling(dev_priv);
i915_gem_restore_fences(dev_priv); i915_gem_restore_fences(dev_priv);
@ -2683,7 +2684,7 @@ static int intel_runtime_resume(struct device *kdev)
intel_runtime_pm_enable_interrupts(dev_priv); intel_runtime_pm_enable_interrupts(dev_priv);
intel_guc_resume(dev_priv); intel_uc_resume(dev_priv);
/* /*
* No point of rolling back things in case of an error, as the best * No point of rolling back things in case of an error, as the best
@ -2832,6 +2833,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
}; };
static struct drm_driver driver = { static struct drm_driver driver = {

View File

@ -71,9 +71,9 @@
#include "i915_gem_fence_reg.h" #include "i915_gem_fence_reg.h"
#include "i915_gem_object.h" #include "i915_gem_object.h"
#include "i915_gem_gtt.h" #include "i915_gem_gtt.h"
#include "i915_gem_request.h"
#include "i915_gem_timeline.h" #include "i915_gem_timeline.h"
#include "i915_request.h"
#include "i915_vma.h" #include "i915_vma.h"
#include "intel_gvt.h" #include "intel_gvt.h"
@ -83,8 +83,8 @@
#define DRIVER_NAME "i915" #define DRIVER_NAME "i915"
#define DRIVER_DESC "Intel Graphics" #define DRIVER_DESC "Intel Graphics"
#define DRIVER_DATE "20180221" #define DRIVER_DATE "20180308"
#define DRIVER_TIMESTAMP 1519219289 #define DRIVER_TIMESTAMP 1520513379
/* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
* WARN_ON()) for hw state sanity checks to check for unexpected conditions * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@ -1231,7 +1231,7 @@ struct i915_gpu_error {
* *
* #I915_WEDGED - If reset fails and we can no longer use the GPU, * #I915_WEDGED - If reset fails and we can no longer use the GPU,
* we set the #I915_WEDGED bit. Prior to command submission, e.g. * we set the #I915_WEDGED bit. Prior to command submission, e.g.
* i915_gem_request_alloc(), this bit is checked and the sequence * i915_request_alloc(), this bit is checked and the sequence
* aborted (with -EIO reported to userspace) if set. * aborted (with -EIO reported to userspace) if set.
*/ */
unsigned long flags; unsigned long flags;
@ -2103,6 +2103,7 @@ struct drm_i915_private {
*/ */
struct ida hw_ida; struct ida hw_ida;
#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ #define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
} contexts; } contexts;
u32 fdi_rx_config; u32 fdi_rx_config;
@ -2746,6 +2747,9 @@ intel_info(const struct drm_i915_private *dev_priv)
#define BLT_RING ENGINE_MASK(BCS) #define BLT_RING ENGINE_MASK(BCS)
#define VEBOX_RING ENGINE_MASK(VECS) #define VEBOX_RING ENGINE_MASK(VECS)
#define BSD2_RING ENGINE_MASK(VCS2) #define BSD2_RING ENGINE_MASK(VCS2)
#define BSD3_RING ENGINE_MASK(VCS3)
#define BSD4_RING ENGINE_MASK(VCS4)
#define VEBOX2_RING ENGINE_MASK(VECS2)
#define ALL_ENGINES (~0) #define ALL_ENGINES (~0)
#define HAS_ENGINE(dev_priv, id) \ #define HAS_ENGINE(dev_priv, id) \
@ -2768,6 +2772,8 @@ intel_info(const struct drm_i915_private *dev_priv)
#define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \
((dev_priv)->info.has_logical_ring_contexts) ((dev_priv)->info.has_logical_ring_contexts)
#define HAS_LOGICAL_RING_ELSQ(dev_priv) \
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \ #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption) ((dev_priv)->info.has_logical_ring_preemption)
@ -2788,9 +2794,10 @@ intel_info(const struct drm_i915_private *dev_priv)
/* Early gen2 have a totally busted CS tlb and require pinned batches. */ /* Early gen2 have a totally busted CS tlb and require pinned batches. */
#define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv))
/* WaRsDisableCoarsePowerGating:skl,bxt */ /* WaRsDisableCoarsePowerGating:skl,cnl */
#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
(IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) (IS_CANNONLAKE(dev_priv) || \
IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv))
/* /*
* dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts
@ -3329,7 +3336,7 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
void i915_vma_move_to_active(struct i915_vma *vma, void i915_vma_move_to_active(struct i915_vma *vma,
struct drm_i915_gem_request *req, struct i915_request *rq,
unsigned int flags); unsigned int flags);
int i915_gem_dumb_create(struct drm_file *file_priv, int i915_gem_dumb_create(struct drm_file *file_priv,
struct drm_device *dev, struct drm_device *dev,
@ -3344,11 +3351,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
struct drm_i915_gem_request * struct i915_request *
i915_gem_find_active_request(struct intel_engine_cs *engine); i915_gem_find_active_request(struct intel_engine_cs *engine);
void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
static inline bool i915_reset_backoff(struct i915_gpu_error *error) static inline bool i915_reset_backoff(struct i915_gpu_error *error)
{ {
return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags)); return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
@ -3380,7 +3385,7 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
return READ_ONCE(error->reset_engine_count[engine->id]); return READ_ONCE(error->reset_engine_count[engine->id]);
} }
struct drm_i915_gem_request * struct i915_request *
i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset(struct drm_i915_private *dev_priv);
@ -3389,7 +3394,7 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
void i915_gem_reset_engine(struct intel_engine_cs *engine, void i915_gem_reset_engine(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request); struct i915_request *request);
void i915_gem_init_mmio(struct drm_i915_private *i915); void i915_gem_init_mmio(struct drm_i915_private *i915);
int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
@ -4008,9 +4013,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
} }
static inline bool static inline bool
__i915_request_irq_complete(const struct drm_i915_gem_request *req) __i915_request_irq_complete(const struct i915_request *rq)
{ {
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = rq->engine;
u32 seqno; u32 seqno;
/* Note that the engine may have wrapped around the seqno, and /* Note that the engine may have wrapped around the seqno, and
@ -4019,7 +4024,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
* this by kicking all the waiters before resetting the seqno * this by kicking all the waiters before resetting the seqno
* in hardware, and also signal the fence. * in hardware, and also signal the fence.
*/ */
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
return true; return true;
/* The request was dequeued before we were awoken. We check after /* The request was dequeued before we were awoken. We check after
@ -4028,14 +4033,14 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
* the request execution are sufficient to ensure that a check * the request execution are sufficient to ensure that a check
* after reading the value from hw matches this request. * after reading the value from hw matches this request.
*/ */
seqno = i915_gem_request_global_seqno(req); seqno = i915_request_global_seqno(rq);
if (!seqno) if (!seqno)
return false; return false;
/* Before we do the heavier coherent read of the seqno, /* Before we do the heavier coherent read of the seqno,
* check the value (hopefully) in the CPU cacheline. * check the value (hopefully) in the CPU cacheline.
*/ */
if (__i915_gem_request_completed(req, seqno)) if (__i915_request_completed(rq, seqno))
return true; return true;
/* Ensure our read of the seqno is coherent so that we /* Ensure our read of the seqno is coherent so that we
@ -4084,7 +4089,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
wake_up_process(b->irq_wait->tsk); wake_up_process(b->irq_wait->tsk);
spin_unlock_irq(&b->irq_lock); spin_unlock_irq(&b->irq_lock);
if (__i915_gem_request_completed(req, seqno)) if (__i915_request_completed(rq, seqno))
return true; return true;
} }

View File

@ -353,7 +353,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
long timeout, long timeout,
struct intel_rps_client *rps_client) struct intel_rps_client *rps_client)
{ {
struct drm_i915_gem_request *rq; struct i915_request *rq;
BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
@ -366,7 +366,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
timeout); timeout);
rq = to_request(fence); rq = to_request(fence);
if (i915_gem_request_completed(rq)) if (i915_request_completed(rq))
goto out; goto out;
/* /*
@ -385,16 +385,16 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
* forcing the clocks too high for the whole system, we only allow * forcing the clocks too high for the whole system, we only allow
* each client to waitboost once in a busy period. * each client to waitboost once in a busy period.
*/ */
if (rps_client && !i915_gem_request_started(rq)) { if (rps_client && !i915_request_started(rq)) {
if (INTEL_GEN(rq->i915) >= 6) if (INTEL_GEN(rq->i915) >= 6)
gen6_rps_boost(rq, rps_client); gen6_rps_boost(rq, rps_client);
} }
timeout = i915_wait_request(rq, flags, timeout); timeout = i915_request_wait(rq, flags, timeout);
out: out:
if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
i915_gem_request_retire_upto(rq); i915_request_retire_upto(rq);
return timeout; return timeout;
} }
@ -463,7 +463,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
static void __fence_set_priority(struct dma_fence *fence, int prio) static void __fence_set_priority(struct dma_fence *fence, int prio)
{ {
struct drm_i915_gem_request *rq; struct i915_request *rq;
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
@ -2856,10 +2856,10 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
atomic_inc(&ctx->active_count); atomic_inc(&ctx->active_count);
} }
struct drm_i915_gem_request * struct i915_request *
i915_gem_find_active_request(struct intel_engine_cs *engine) i915_gem_find_active_request(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_request *request, *active = NULL; struct i915_request *request, *active = NULL;
unsigned long flags; unsigned long flags;
/* We are called by the error capture and reset at a random /* We are called by the error capture and reset at a random
@ -2872,8 +2872,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
*/ */
spin_lock_irqsave(&engine->timeline->lock, flags); spin_lock_irqsave(&engine->timeline->lock, flags);
list_for_each_entry(request, &engine->timeline->requests, link) { list_for_each_entry(request, &engine->timeline->requests, link) {
if (__i915_gem_request_completed(request, if (__i915_request_completed(request, request->global_seqno))
request->global_seqno))
continue; continue;
GEM_BUG_ON(request->engine != engine); GEM_BUG_ON(request->engine != engine);
@ -2906,10 +2905,10 @@ static bool engine_stalled(struct intel_engine_cs *engine)
* Ensure irq handler finishes, and not run again. * Ensure irq handler finishes, and not run again.
* Also return the active request so that we only search for it once. * Also return the active request so that we only search for it once.
*/ */
struct drm_i915_gem_request * struct i915_request *
i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_request *request = NULL; struct i915_request *request = NULL;
/* /*
* During the reset sequence, we must prevent the engine from * During the reset sequence, we must prevent the engine from
@ -2967,7 +2966,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
{ {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
struct drm_i915_gem_request *request; struct i915_request *request;
enum intel_engine_id id; enum intel_engine_id id;
int err = 0; int err = 0;
@ -2986,7 +2985,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
return err; return err;
} }
static void skip_request(struct drm_i915_gem_request *request) static void skip_request(struct i915_request *request)
{ {
void *vaddr = request->ring->vaddr; void *vaddr = request->ring->vaddr;
u32 head; u32 head;
@ -3005,7 +3004,7 @@ static void skip_request(struct drm_i915_gem_request *request)
dma_fence_set_error(&request->fence, -EIO); dma_fence_set_error(&request->fence, -EIO);
} }
static void engine_skip_context(struct drm_i915_gem_request *request) static void engine_skip_context(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
struct i915_gem_context *hung_ctx = request->ctx; struct i915_gem_context *hung_ctx = request->ctx;
@ -3029,9 +3028,9 @@ static void engine_skip_context(struct drm_i915_gem_request *request)
} }
/* Returns the request if it was guilty of the hang */ /* Returns the request if it was guilty of the hang */
static struct drm_i915_gem_request * static struct i915_request *
i915_gem_reset_request(struct intel_engine_cs *engine, i915_gem_reset_request(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request) struct i915_request *request)
{ {
/* The guilty request will get skipped on a hung engine. /* The guilty request will get skipped on a hung engine.
* *
@ -3085,7 +3084,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
} }
void i915_gem_reset_engine(struct intel_engine_cs *engine, void i915_gem_reset_engine(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request) struct i915_request *request)
{ {
/* /*
* Make sure this write is visible before we re-enable the interrupt * Make sure this write is visible before we re-enable the interrupt
@ -3113,7 +3112,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&dev_priv->drm.struct_mutex);
i915_gem_retire_requests(dev_priv); i915_retire_requests(dev_priv);
for_each_engine(engine, dev_priv, id) { for_each_engine(engine, dev_priv, id) {
struct i915_gem_context *ctx; struct i915_gem_context *ctx;
@ -3134,12 +3133,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
* empty request appears sufficient to paper over the glitch. * empty request appears sufficient to paper over the glitch.
*/ */
if (intel_engine_is_idle(engine)) { if (intel_engine_is_idle(engine)) {
struct drm_i915_gem_request *rq; struct i915_request *rq;
rq = i915_gem_request_alloc(engine, rq = i915_request_alloc(engine,
dev_priv->kernel_context); dev_priv->kernel_context);
if (!IS_ERR(rq)) if (!IS_ERR(rq))
__i915_add_request(rq, false); __i915_request_add(rq, false);
} }
} }
@ -3174,21 +3173,21 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
} }
} }
static void nop_submit_request(struct drm_i915_gem_request *request) static void nop_submit_request(struct i915_request *request)
{ {
dma_fence_set_error(&request->fence, -EIO); dma_fence_set_error(&request->fence, -EIO);
i915_gem_request_submit(request); i915_request_submit(request);
} }
static void nop_complete_submit_request(struct drm_i915_gem_request *request) static void nop_complete_submit_request(struct i915_request *request)
{ {
unsigned long flags; unsigned long flags;
dma_fence_set_error(&request->fence, -EIO); dma_fence_set_error(&request->fence, -EIO);
spin_lock_irqsave(&request->engine->timeline->lock, flags); spin_lock_irqsave(&request->engine->timeline->lock, flags);
__i915_gem_request_submit(request); __i915_request_submit(request);
intel_engine_init_global_seqno(request->engine, request->global_seqno); intel_engine_init_global_seqno(request->engine, request->global_seqno);
spin_unlock_irqrestore(&request->engine->timeline->lock, flags); spin_unlock_irqrestore(&request->engine->timeline->lock, flags);
} }
@ -3213,8 +3212,10 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
* rolling the global seqno forward (since this would complete requests * rolling the global seqno forward (since this would complete requests
* for which we haven't set the fence error to EIO yet). * for which we haven't set the fence error to EIO yet).
*/ */
for_each_engine(engine, i915, id) for_each_engine(engine, i915, id) {
i915_gem_reset_prepare_engine(engine);
engine->submit_request = nop_submit_request; engine->submit_request = nop_submit_request;
}
/* /*
* Make sure no one is running the old callback before we proceed with * Make sure no one is running the old callback before we proceed with
@ -3256,6 +3257,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
intel_engine_init_global_seqno(engine, intel_engine_init_global_seqno(engine,
intel_engine_last_submit(engine)); intel_engine_last_submit(engine));
spin_unlock_irqrestore(&engine->timeline->lock, flags); spin_unlock_irqrestore(&engine->timeline->lock, flags);
i915_gem_reset_finish_engine(engine);
} }
wake_up_all(&i915->gpu_error.reset_queue); wake_up_all(&i915->gpu_error.reset_queue);
@ -3281,7 +3284,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
*/ */
list_for_each_entry(tl, &i915->gt.timelines, link) { list_for_each_entry(tl, &i915->gt.timelines, link) {
for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
struct drm_i915_gem_request *rq; struct i915_request *rq;
rq = i915_gem_active_peek(&tl->engine[i].last_request, rq = i915_gem_active_peek(&tl->engine[i].last_request,
&i915->drm.struct_mutex); &i915->drm.struct_mutex);
@ -3330,7 +3333,7 @@ i915_gem_retire_work_handler(struct work_struct *work)
/* Come back later if the device is busy... */ /* Come back later if the device is busy... */
if (mutex_trylock(&dev->struct_mutex)) { if (mutex_trylock(&dev->struct_mutex)) {
i915_gem_retire_requests(dev_priv); i915_retire_requests(dev_priv);
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
} }
@ -3418,25 +3421,22 @@ i915_gem_idle_work_handler(struct work_struct *work)
container_of(work, typeof(*dev_priv), gt.idle_work.work); container_of(work, typeof(*dev_priv), gt.idle_work.work);
unsigned int epoch = I915_EPOCH_INVALID; unsigned int epoch = I915_EPOCH_INVALID;
bool rearm_hangcheck; bool rearm_hangcheck;
ktime_t end;
if (!READ_ONCE(dev_priv->gt.awake)) if (!READ_ONCE(dev_priv->gt.awake))
return; return;
/* /*
* Wait for last execlists context complete, but bail out in case a * Wait for last execlists context complete, but bail out in case a
* new request is submitted. * new request is submitted. As we don't trust the hardware, we
* continue on if the wait times out. This is necessary to allow
* the machine to suspend even if the hardware dies, and we will
* try to recover in resume (after depriving the hardware of power,
* it may be in a better mmod).
*/ */
end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT); __wait_for(if (new_requests_since_last_retire(dev_priv)) return,
do { intel_engines_are_idle(dev_priv),
if (new_requests_since_last_retire(dev_priv)) I915_IDLE_ENGINES_TIMEOUT * 1000,
return; 10, 500);
if (intel_engines_are_idle(dev_priv))
break;
usleep_range(100, 500);
} while (ktime_before(ktime_get(), end));
rearm_hangcheck = rearm_hangcheck =
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
@ -3684,7 +3684,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
if (ret) if (ret)
return ret; return ret;
} }
i915_gem_retire_requests(i915); i915_retire_requests(i915);
ret = wait_for_engines(i915); ret = wait_for_engines(i915);
} else { } else {
@ -4224,7 +4224,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_file_private *file_priv = file->driver_priv;
unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_gem_request *request, *target = NULL; struct i915_request *request, *target = NULL;
long ret; long ret;
/* ABI: return -EIO if already wedged */ /* ABI: return -EIO if already wedged */
@ -4244,16 +4244,16 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
target = request; target = request;
} }
if (target) if (target)
i915_gem_request_get(target); i915_request_get(target);
spin_unlock(&file_priv->mm.lock); spin_unlock(&file_priv->mm.lock);
if (target == NULL) if (target == NULL)
return 0; return 0;
ret = i915_wait_request(target, ret = i915_request_wait(target,
I915_WAIT_INTERRUPTIBLE, I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT); MAX_SCHEDULE_TIMEOUT);
i915_gem_request_put(target); i915_request_put(target);
return ret < 0 ? ret : 0; return ret < 0 ? ret : 0;
} }
@ -4367,7 +4367,7 @@ static __always_inline unsigned int
__busy_set_if_active(const struct dma_fence *fence, __busy_set_if_active(const struct dma_fence *fence,
unsigned int (*flag)(unsigned int id)) unsigned int (*flag)(unsigned int id))
{ {
struct drm_i915_gem_request *rq; struct i915_request *rq;
/* We have to check the current hw status of the fence as the uABI /* We have to check the current hw status of the fence as the uABI
* guarantees forward progress. We could rely on the idle worker * guarantees forward progress. We could rely on the idle worker
@ -4380,8 +4380,8 @@ __busy_set_if_active(const struct dma_fence *fence,
return 0; return 0;
/* opencode to_request() in order to avoid const warnings */ /* opencode to_request() in order to avoid const warnings */
rq = container_of(fence, struct drm_i915_gem_request, fence); rq = container_of(fence, struct i915_request, fence);
if (i915_gem_request_completed(rq)) if (i915_request_completed(rq))
return 0; return 0;
return flag(rq->engine->uabi_id); return flag(rq->engine->uabi_id);
@ -4526,8 +4526,7 @@ out:
} }
static void static void
frontbuffer_retire(struct i915_gem_active *active, frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request)
struct drm_i915_gem_request *request)
{ {
struct drm_i915_gem_object *obj = struct drm_i915_gem_object *obj =
container_of(active, typeof(*obj), frontbuffer_write); container_of(active, typeof(*obj), frontbuffer_write);
@ -4921,7 +4920,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
i915_gem_contexts_lost(dev_priv); i915_gem_contexts_lost(dev_priv);
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
intel_guc_suspend(dev_priv); intel_uc_suspend(dev_priv);
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
cancel_delayed_work_sync(&dev_priv->gt.retire_work); cancel_delayed_work_sync(&dev_priv->gt.retire_work);
@ -4988,7 +4987,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
if (i915_gem_init_hw(i915)) if (i915_gem_init_hw(i915))
goto err_wedged; goto err_wedged;
intel_guc_resume(i915); intel_uc_resume(i915);
/* Always reload a context for powersaving. */ /* Always reload a context for powersaving. */
if (i915_gem_switch_to_kernel_context(i915)) if (i915_gem_switch_to_kernel_context(i915))
@ -5161,9 +5160,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
return PTR_ERR(ctx); return PTR_ERR(ctx);
for_each_engine(engine, i915, id) { for_each_engine(engine, i915, id) {
struct drm_i915_gem_request *rq; struct i915_request *rq;
rq = i915_gem_request_alloc(engine, ctx); rq = i915_request_alloc(engine, ctx);
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
err = PTR_ERR(rq); err = PTR_ERR(rq);
goto out_ctx; goto out_ctx;
@ -5173,7 +5172,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
if (engine->init_context) if (engine->init_context)
err = engine->init_context(rq); err = engine->init_context(rq);
__i915_add_request(rq, true); __i915_request_add(rq, true);
if (err) if (err)
goto err_active; goto err_active;
} }
@ -5479,7 +5478,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
if (!dev_priv->luts) if (!dev_priv->luts)
goto err_vmas; goto err_vmas;
dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, dev_priv->requests = KMEM_CACHE(i915_request,
SLAB_HWCACHE_ALIGN | SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT | SLAB_RECLAIM_ACCOUNT |
SLAB_TYPESAFE_BY_RCU); SLAB_TYPESAFE_BY_RCU);
@ -5612,7 +5611,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
void i915_gem_release(struct drm_device *dev, struct drm_file *file) void i915_gem_release(struct drm_device *dev, struct drm_file *file)
{ {
struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_request *request; struct i915_request *request;
/* Clean up our request list when the client is going away, so that /* Clean up our request list when the client is going away, so that
* later retire_requests won't dereference our soon-to-be-gone * later retire_requests won't dereference our soon-to-be-gone

View File

@ -29,7 +29,10 @@
#ifdef CONFIG_DRM_I915_DEBUG_GEM #ifdef CONFIG_DRM_I915_DEBUG_GEM
#define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \
printk(KERN_ERR "GEM_BUG_ON(%s)\n", __stringify(condition)); \ pr_err("%s:%d GEM_BUG_ON(%s)\n", \
__func__, __LINE__, __stringify(condition)); \
GEM_TRACE("%s:%d GEM_BUG_ON(%s)\n", \
__func__, __LINE__, __stringify(condition)); \
BUG(); \ BUG(); \
} \ } \
} while(0) } while(0)
@ -54,6 +57,6 @@
#define GEM_TRACE(...) do { } while (0) #define GEM_TRACE(...) do { } while (0)
#endif #endif
#define I915_NUM_ENGINES 5 #define I915_NUM_ENGINES 8
#endif /* __I915_GEM_H__ */ #endif /* __I915_GEM_H__ */

View File

@ -119,7 +119,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
if (!reservation_object_test_signaled_rcu(resv, true)) if (!reservation_object_test_signaled_rcu(resv, true))
break; break;
i915_gem_retire_requests(pool->engine->i915); i915_retire_requests(pool->engine->i915);
GEM_BUG_ON(i915_gem_object_is_active(obj)); GEM_BUG_ON(i915_gem_object_is_active(obj));
/* /*

View File

@ -211,17 +211,23 @@ static void context_close(struct i915_gem_context *ctx)
static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
{ {
int ret; int ret;
unsigned int max;
if (INTEL_GEN(dev_priv) >= 11)
max = GEN11_MAX_CONTEXT_HW_ID;
else
max = MAX_CONTEXT_HW_ID;
ret = ida_simple_get(&dev_priv->contexts.hw_ida, ret = ida_simple_get(&dev_priv->contexts.hw_ida,
0, MAX_CONTEXT_HW_ID, GFP_KERNEL); 0, max, GFP_KERNEL);
if (ret < 0) { if (ret < 0) {
/* Contexts are only released when no longer active. /* Contexts are only released when no longer active.
* Flush any pending retires to hopefully release some * Flush any pending retires to hopefully release some
* stale contexts and try again. * stale contexts and try again.
*/ */
i915_gem_retire_requests(dev_priv); i915_retire_requests(dev_priv);
ret = ida_simple_get(&dev_priv->contexts.hw_ida, ret = ida_simple_get(&dev_priv->contexts.hw_ida,
0, MAX_CONTEXT_HW_ID, GFP_KERNEL); 0, max, GFP_KERNEL);
if (ret < 0) if (ret < 0)
return ret; return ret;
} }
@ -463,6 +469,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
/* Using the simple ida interface, the max is limited by sizeof(int) */ /* Using the simple ida interface, the max is limited by sizeof(int) */
BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
ida_init(&dev_priv->contexts.hw_ida); ida_init(&dev_priv->contexts.hw_ida);
/* lowest priority; idle task */ /* lowest priority; idle task */
@ -590,28 +597,28 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&dev_priv->drm.struct_mutex);
i915_gem_retire_requests(dev_priv); i915_retire_requests(dev_priv);
for_each_engine(engine, dev_priv, id) { for_each_engine(engine, dev_priv, id) {
struct drm_i915_gem_request *req; struct i915_request *rq;
if (engine_has_idle_kernel_context(engine)) if (engine_has_idle_kernel_context(engine))
continue; continue;
req = i915_gem_request_alloc(engine, dev_priv->kernel_context); rq = i915_request_alloc(engine, dev_priv->kernel_context);
if (IS_ERR(req)) if (IS_ERR(rq))
return PTR_ERR(req); return PTR_ERR(rq);
/* Queue this switch after all other activity */ /* Queue this switch after all other activity */
list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
struct drm_i915_gem_request *prev; struct i915_request *prev;
struct intel_timeline *tl; struct intel_timeline *tl;
tl = &timeline->engine[engine->id]; tl = &timeline->engine[engine->id];
prev = i915_gem_active_raw(&tl->last_request, prev = i915_gem_active_raw(&tl->last_request,
&dev_priv->drm.struct_mutex); &dev_priv->drm.struct_mutex);
if (prev) if (prev)
i915_sw_fence_await_sw_fence_gfp(&req->submit, i915_sw_fence_await_sw_fence_gfp(&rq->submit,
&prev->submit, &prev->submit,
I915_FENCE_GFP); I915_FENCE_GFP);
} }
@ -623,7 +630,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
* but an extra layer of paranoia before we declare the system * but an extra layer of paranoia before we declare the system
* idle (on suspend etc) is advisable! * idle (on suspend etc) is advisable!
*/ */
__i915_add_request(req, true); __i915_request_add(rq, true);
} }
return 0; return 0;

View File

@ -38,8 +38,8 @@ struct drm_file;
struct drm_i915_private; struct drm_i915_private;
struct drm_i915_file_private; struct drm_i915_file_private;
struct drm_i915_gem_request;
struct i915_hw_ppgtt; struct i915_hw_ppgtt;
struct i915_request;
struct i915_vma; struct i915_vma;
struct intel_ring; struct intel_ring;
@ -276,7 +276,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
struct drm_file *file); struct drm_file *file);
void i915_gem_context_close(struct drm_file *file); void i915_gem_context_close(struct drm_file *file);
int i915_switch_context(struct drm_i915_gem_request *req); int i915_switch_context(struct i915_request *rq);
int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv);
void i915_gem_context_release(struct kref *ctx_ref); void i915_gem_context_release(struct kref *ctx_ref);

View File

@ -168,7 +168,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
* retiring. * retiring.
*/ */
if (!(flags & PIN_NONBLOCK)) if (!(flags & PIN_NONBLOCK))
i915_gem_retire_requests(dev_priv); i915_retire_requests(dev_priv);
else else
phases[1] = NULL; phases[1] = NULL;
@ -293,7 +293,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
* retiring. * retiring.
*/ */
if (!(flags & PIN_NONBLOCK)) if (!(flags & PIN_NONBLOCK))
i915_gem_retire_requests(vm->i915); i915_retire_requests(vm->i915);
check_color = vm->mm.color_adjust; check_color = vm->mm.color_adjust;
if (check_color) { if (check_color) {

View File

@ -200,7 +200,7 @@ struct i915_execbuffer {
struct i915_gem_context *ctx; /** context for building the request */ struct i915_gem_context *ctx; /** context for building the request */
struct i915_address_space *vm; /** GTT and vma for the request */ struct i915_address_space *vm; /** GTT and vma for the request */
struct drm_i915_gem_request *request; /** our request to build */ struct i915_request *request; /** our request to build */
struct i915_vma *batch; /** identity of the batch obj/vma */ struct i915_vma *batch; /** identity of the batch obj/vma */
/** actual size of execobj[] as we may extend it for the cmdparser */ /** actual size of execobj[] as we may extend it for the cmdparser */
@ -227,7 +227,7 @@ struct i915_execbuffer {
bool has_fence : 1; bool has_fence : 1;
bool needs_unfenced : 1; bool needs_unfenced : 1;
struct drm_i915_gem_request *rq; struct i915_request *rq;
u32 *rq_cmd; u32 *rq_cmd;
unsigned int rq_size; unsigned int rq_size;
} reloc_cache; } reloc_cache;
@ -886,7 +886,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
i915_gem_object_unpin_map(cache->rq->batch->obj); i915_gem_object_unpin_map(cache->rq->batch->obj);
i915_gem_chipset_flush(cache->rq->i915); i915_gem_chipset_flush(cache->rq->i915);
__i915_add_request(cache->rq, true); __i915_request_add(cache->rq, true);
cache->rq = NULL; cache->rq = NULL;
} }
@ -1070,7 +1070,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
{ {
struct reloc_cache *cache = &eb->reloc_cache; struct reloc_cache *cache = &eb->reloc_cache;
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
struct drm_i915_gem_request *rq; struct i915_request *rq;
struct i915_vma *batch; struct i915_vma *batch;
u32 *cmd; u32 *cmd;
int err; int err;
@ -1103,13 +1103,13 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err) if (err)
goto err_unmap; goto err_unmap;
rq = i915_gem_request_alloc(eb->engine, eb->ctx); rq = i915_request_alloc(eb->engine, eb->ctx);
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
err = PTR_ERR(rq); err = PTR_ERR(rq);
goto err_unpin; goto err_unpin;
} }
err = i915_gem_request_await_object(rq, vma->obj, true); err = i915_request_await_object(rq, vma->obj, true);
if (err) if (err)
goto err_request; goto err_request;
@ -1141,7 +1141,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
return 0; return 0;
err_request: err_request:
i915_add_request(rq); i915_request_add(rq);
err_unpin: err_unpin:
i915_vma_unpin(batch); i915_vma_unpin(batch);
err_unmap: err_unmap:
@ -1727,7 +1727,7 @@ slow:
} }
static void eb_export_fence(struct i915_vma *vma, static void eb_export_fence(struct i915_vma *vma,
struct drm_i915_gem_request *req, struct i915_request *rq,
unsigned int flags) unsigned int flags)
{ {
struct reservation_object *resv = vma->resv; struct reservation_object *resv = vma->resv;
@ -1739,9 +1739,9 @@ static void eb_export_fence(struct i915_vma *vma,
*/ */
reservation_object_lock(resv, NULL); reservation_object_lock(resv, NULL);
if (flags & EXEC_OBJECT_WRITE) if (flags & EXEC_OBJECT_WRITE)
reservation_object_add_excl_fence(resv, &req->fence); reservation_object_add_excl_fence(resv, &rq->fence);
else if (reservation_object_reserve_shared(resv) == 0) else if (reservation_object_reserve_shared(resv) == 0)
reservation_object_add_shared_fence(resv, &req->fence); reservation_object_add_shared_fence(resv, &rq->fence);
reservation_object_unlock(resv); reservation_object_unlock(resv);
} }
@ -1757,7 +1757,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_object *obj = vma->obj;
if (flags & EXEC_OBJECT_CAPTURE) { if (flags & EXEC_OBJECT_CAPTURE) {
struct i915_gem_capture_list *capture; struct i915_capture_list *capture;
capture = kmalloc(sizeof(*capture), GFP_KERNEL); capture = kmalloc(sizeof(*capture), GFP_KERNEL);
if (unlikely(!capture)) if (unlikely(!capture))
@ -1788,7 +1788,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
if (flags & EXEC_OBJECT_ASYNC) if (flags & EXEC_OBJECT_ASYNC)
continue; continue;
err = i915_gem_request_await_object err = i915_request_await_object
(eb->request, obj, flags & EXEC_OBJECT_WRITE); (eb->request, obj, flags & EXEC_OBJECT_WRITE);
if (err) if (err)
return err; return err;
@ -1840,13 +1840,13 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
} }
void i915_vma_move_to_active(struct i915_vma *vma, void i915_vma_move_to_active(struct i915_vma *vma,
struct drm_i915_gem_request *req, struct i915_request *rq,
unsigned int flags) unsigned int flags)
{ {
struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_object *obj = vma->obj;
const unsigned int idx = req->engine->id; const unsigned int idx = rq->engine->id;
lockdep_assert_held(&req->i915->drm.struct_mutex); lockdep_assert_held(&rq->i915->drm.struct_mutex);
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
/* /*
@ -1860,7 +1860,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
if (!i915_vma_is_active(vma)) if (!i915_vma_is_active(vma))
obj->active_count++; obj->active_count++;
i915_vma_set_active(vma, idx); i915_vma_set_active(vma, idx);
i915_gem_active_set(&vma->last_read[idx], req); i915_gem_active_set(&vma->last_read[idx], rq);
list_move_tail(&vma->vm_link, &vma->vm->active_list); list_move_tail(&vma->vm_link, &vma->vm->active_list);
obj->write_domain = 0; obj->write_domain = 0;
@ -1868,27 +1868,27 @@ void i915_vma_move_to_active(struct i915_vma *vma,
obj->write_domain = I915_GEM_DOMAIN_RENDER; obj->write_domain = I915_GEM_DOMAIN_RENDER;
if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
i915_gem_active_set(&obj->frontbuffer_write, req); i915_gem_active_set(&obj->frontbuffer_write, rq);
obj->read_domains = 0; obj->read_domains = 0;
} }
obj->read_domains |= I915_GEM_GPU_DOMAINS; obj->read_domains |= I915_GEM_GPU_DOMAINS;
if (flags & EXEC_OBJECT_NEEDS_FENCE) if (flags & EXEC_OBJECT_NEEDS_FENCE)
i915_gem_active_set(&vma->last_fence, req); i915_gem_active_set(&vma->last_fence, rq);
} }
static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
{ {
u32 *cs; u32 *cs;
int i; int i;
if (!IS_GEN7(req->i915) || req->engine->id != RCS) { if (!IS_GEN7(rq->i915) || rq->engine->id != RCS) {
DRM_DEBUG("sol reset is gen7/rcs only\n"); DRM_DEBUG("sol reset is gen7/rcs only\n");
return -EINVAL; return -EINVAL;
} }
cs = intel_ring_begin(req, 4 * 2 + 2); cs = intel_ring_begin(rq, 4 * 2 + 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1898,7 +1898,7 @@ static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
*cs++ = 0; *cs++ = 0;
} }
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -1944,10 +1944,10 @@ out:
} }
static void static void
add_to_client(struct drm_i915_gem_request *req, struct drm_file *file) add_to_client(struct i915_request *rq, struct drm_file *file)
{ {
req->file_priv = file->driver_priv; rq->file_priv = file->driver_priv;
list_add_tail(&req->client_link, &req->file_priv->mm.request_list); list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
} }
static int eb_submit(struct i915_execbuffer *eb) static int eb_submit(struct i915_execbuffer *eb)
@ -2151,7 +2151,7 @@ await_fence_array(struct i915_execbuffer *eb,
if (!fence) if (!fence)
return -EINVAL; return -EINVAL;
err = i915_gem_request_await_dma_fence(eb->request, fence); err = i915_request_await_dma_fence(eb->request, fence);
dma_fence_put(fence); dma_fence_put(fence);
if (err < 0) if (err < 0)
return err; return err;
@ -2365,14 +2365,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
GEM_BUG_ON(eb.reloc_cache.rq); GEM_BUG_ON(eb.reloc_cache.rq);
/* Allocate a request for this batch buffer nice and early. */ /* Allocate a request for this batch buffer nice and early. */
eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); eb.request = i915_request_alloc(eb.engine, eb.ctx);
if (IS_ERR(eb.request)) { if (IS_ERR(eb.request)) {
err = PTR_ERR(eb.request); err = PTR_ERR(eb.request);
goto err_batch_unpin; goto err_batch_unpin;
} }
if (in_fence) { if (in_fence) {
err = i915_gem_request_await_dma_fence(eb.request, in_fence); err = i915_request_await_dma_fence(eb.request, in_fence);
if (err < 0) if (err < 0)
goto err_request; goto err_request;
} }
@ -2400,10 +2400,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
*/ */
eb.request->batch = eb.batch; eb.request->batch = eb.batch;
trace_i915_gem_request_queue(eb.request, eb.batch_flags); trace_i915_request_queue(eb.request, eb.batch_flags);
err = eb_submit(&eb); err = eb_submit(&eb);
err_request: err_request:
__i915_add_request(eb.request, err == 0); __i915_request_add(eb.request, err == 0);
add_to_client(eb.request, file); add_to_client(eb.request, file);
if (fences) if (fences)

View File

@ -765,16 +765,16 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
} }
/* Broadwell Page Directory Pointer Descriptors */ /* Broadwell Page Directory Pointer Descriptors */
static int gen8_write_pdp(struct drm_i915_gem_request *req, static int gen8_write_pdp(struct i915_request *rq,
unsigned entry, unsigned entry,
dma_addr_t addr) dma_addr_t addr)
{ {
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = rq->engine;
u32 *cs; u32 *cs;
BUG_ON(entry >= 4); BUG_ON(entry >= 4);
cs = intel_ring_begin(req, 6); cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -784,20 +784,20 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
*cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry));
*cs++ = lower_32_bits(addr); *cs++ = lower_32_bits(addr);
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req) struct i915_request *rq)
{ {
int i, ret; int i, ret;
for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
ret = gen8_write_pdp(req, i, pd_daddr); ret = gen8_write_pdp(rq, i, pd_daddr);
if (ret) if (ret)
return ret; return ret;
} }
@ -806,9 +806,9 @@ static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt,
} }
static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt, static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req) struct i915_request *rq)
{ {
return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); return gen8_write_pdp(rq, 0, px_dma(&ppgtt->pml4));
} }
/* PDE TLBs are a pain to invalidate on GEN8+. When we modify /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
@ -1732,13 +1732,13 @@ static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt)
} }
static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req) struct i915_request *rq)
{ {
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = rq->engine;
u32 *cs; u32 *cs;
/* NB: TLBs must be flushed and invalidated before a switch */ /* NB: TLBs must be flushed and invalidated before a switch */
cs = intel_ring_begin(req, 6); cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1748,19 +1748,19 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
*cs++ = get_pd_offset(ppgtt); *cs++ = get_pd_offset(ppgtt);
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req) struct i915_request *rq)
{ {
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = rq->engine;
u32 *cs; u32 *cs;
/* NB: TLBs must be flushed and invalidated before a switch */ /* NB: TLBs must be flushed and invalidated before a switch */
cs = intel_ring_begin(req, 6); cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1770,16 +1770,16 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
*cs++ = get_pd_offset(ppgtt); *cs++ = get_pd_offset(ppgtt);
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req) struct i915_request *rq)
{ {
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = rq->engine;
struct drm_i915_private *dev_priv = req->i915; struct drm_i915_private *dev_priv = rq->i915;
I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));

View File

@ -39,7 +39,8 @@
#include <linux/pagevec.h> #include <linux/pagevec.h>
#include "i915_gem_timeline.h" #include "i915_gem_timeline.h"
#include "i915_gem_request.h"
#include "i915_request.h"
#include "i915_selftest.h" #include "i915_selftest.h"
#define I915_GTT_PAGE_SIZE_4K BIT(12) #define I915_GTT_PAGE_SIZE_4K BIT(12)
@ -398,7 +399,7 @@ struct i915_hw_ppgtt {
gen6_pte_t __iomem *pd_addr; gen6_pte_t __iomem *pd_addr;
int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req); struct i915_request *rq);
void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
}; };

View File

@ -33,7 +33,7 @@
#include <drm/i915_drm.h> #include <drm/i915_drm.h>
#include "i915_gem_request.h" #include "i915_request.h"
#include "i915_selftest.h" #include "i915_selftest.h"
struct drm_i915_gem_object; struct drm_i915_gem_object;

View File

@ -177,7 +177,7 @@ err:
#undef OUT_BATCH #undef OUT_BATCH
int i915_gem_render_state_emit(struct drm_i915_gem_request *rq) int i915_gem_render_state_emit(struct i915_request *rq)
{ {
struct intel_engine_cs *engine = rq->engine; struct intel_engine_cs *engine = rq->engine;
struct intel_render_state so = {}; /* keep the compiler happy */ struct intel_render_state so = {}; /* keep the compiler happy */

View File

@ -24,8 +24,8 @@
#ifndef _I915_GEM_RENDER_STATE_H_ #ifndef _I915_GEM_RENDER_STATE_H_
#define _I915_GEM_RENDER_STATE_H_ #define _I915_GEM_RENDER_STATE_H_
struct drm_i915_gem_request; struct i915_request;
int i915_gem_render_state_emit(struct drm_i915_gem_request *rq); int i915_gem_render_state_emit(struct i915_request *rq);
#endif /* _I915_GEM_RENDER_STATE_H_ */ #endif /* _I915_GEM_RENDER_STATE_H_ */

View File

@ -175,7 +175,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
trace_i915_gem_shrink(i915, target, flags); trace_i915_gem_shrink(i915, target, flags);
i915_gem_retire_requests(i915); i915_retire_requests(i915);
/* /*
* Unbinding of objects will require HW access; Let us not wake the * Unbinding of objects will require HW access; Let us not wake the
@ -267,7 +267,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
if (flags & I915_SHRINK_BOUND) if (flags & I915_SHRINK_BOUND)
intel_runtime_pm_put(i915); intel_runtime_pm_put(i915);
i915_gem_retire_requests(i915); i915_retire_requests(i915);
shrinker_unlock(i915, unlock); shrinker_unlock(i915, unlock);

View File

@ -27,9 +27,9 @@
#include <linux/list.h> #include <linux/list.h>
#include "i915_utils.h" #include "i915_request.h"
#include "i915_gem_request.h"
#include "i915_syncmap.h" #include "i915_syncmap.h"
#include "i915_utils.h"
struct i915_gem_timeline; struct i915_gem_timeline;

View File

@ -586,6 +586,7 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m,
intel_device_info_dump_flags(info, &p); intel_device_info_dump_flags(info, &p);
intel_driver_caps_print(caps, &p); intel_driver_caps_print(caps, &p);
intel_device_info_dump_topology(&info->sseu, &p);
} }
static void err_print_params(struct drm_i915_error_state_buf *m, static void err_print_params(struct drm_i915_error_state_buf *m,
@ -991,7 +992,7 @@ out:
static inline uint32_t static inline uint32_t
__active_get_seqno(struct i915_gem_active *active) __active_get_seqno(struct i915_gem_active *active)
{ {
struct drm_i915_gem_request *request; struct i915_request *request;
request = __i915_gem_active_peek(active); request = __i915_gem_active_peek(active);
return request ? request->global_seqno : 0; return request ? request->global_seqno : 0;
@ -1000,7 +1001,7 @@ __active_get_seqno(struct i915_gem_active *active)
static inline int static inline int
__active_get_engine_id(struct i915_gem_active *active) __active_get_engine_id(struct i915_gem_active *active)
{ {
struct drm_i915_gem_request *request; struct i915_request *request;
request = __i915_gem_active_peek(active); request = __i915_gem_active_peek(active);
return request ? request->engine->id : -1; return request ? request->engine->id : -1;
@ -1084,9 +1085,9 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv,
return error_code; return error_code;
} }
static void i915_gem_record_fences(struct drm_i915_private *dev_priv, static void gem_record_fences(struct i915_gpu_state *error)
struct i915_gpu_state *error)
{ {
struct drm_i915_private *dev_priv = error->i915;
int i; int i;
if (INTEL_GEN(dev_priv) >= 6) { if (INTEL_GEN(dev_priv) >= 6) {
@ -1102,27 +1103,6 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv,
error->nfence = i; error->nfence = i;
} }
static inline u32
gen8_engine_sync_index(struct intel_engine_cs *engine,
struct intel_engine_cs *other)
{
int idx;
/*
* rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
* vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
* bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
* vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
* vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
*/
idx = (other - engine) - 1;
if (idx < 0)
idx += I915_NUM_ENGINES;
return idx;
}
static void gen6_record_semaphore_state(struct intel_engine_cs *engine, static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee) struct drm_i915_error_engine *ee)
{ {
@ -1293,7 +1273,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
} }
} }
static void record_request(struct drm_i915_gem_request *request, static void record_request(struct i915_request *request,
struct drm_i915_error_request *erq) struct drm_i915_error_request *erq)
{ {
erq->context = request->ctx->hw_id; erq->context = request->ctx->hw_id;
@ -1310,10 +1290,10 @@ static void record_request(struct drm_i915_gem_request *request,
} }
static void engine_record_requests(struct intel_engine_cs *engine, static void engine_record_requests(struct intel_engine_cs *engine,
struct drm_i915_gem_request *first, struct i915_request *first,
struct drm_i915_error_engine *ee) struct drm_i915_error_engine *ee)
{ {
struct drm_i915_gem_request *request; struct i915_request *request;
int count; int count;
count = 0; count = 0;
@ -1363,7 +1343,7 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine,
unsigned int n; unsigned int n;
for (n = 0; n < execlists_num_ports(execlists); n++) { for (n = 0; n < execlists_num_ports(execlists); n++) {
struct drm_i915_gem_request *rq = port_request(&execlists->port[n]); struct i915_request *rq = port_request(&execlists->port[n]);
if (!rq) if (!rq)
break; break;
@ -1398,10 +1378,10 @@ static void record_context(struct drm_i915_error_context *e,
e->active = atomic_read(&ctx->active_count); e->active = atomic_read(&ctx->active_count);
} }
static void request_record_user_bo(struct drm_i915_gem_request *request, static void request_record_user_bo(struct i915_request *request,
struct drm_i915_error_engine *ee) struct drm_i915_error_engine *ee)
{ {
struct i915_gem_capture_list *c; struct i915_capture_list *c;
struct drm_i915_error_object **bo; struct drm_i915_error_object **bo;
long count; long count;
@ -1445,16 +1425,16 @@ capture_object(struct drm_i915_private *dev_priv,
} }
} }
static void i915_gem_record_rings(struct drm_i915_private *dev_priv, static void gem_record_rings(struct i915_gpu_state *error)
struct i915_gpu_state *error)
{ {
struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_i915_private *i915 = error->i915;
struct i915_ggtt *ggtt = &i915->ggtt;
int i; int i;
for (i = 0; i < I915_NUM_ENGINES; i++) { for (i = 0; i < I915_NUM_ENGINES; i++) {
struct intel_engine_cs *engine = dev_priv->engine[i]; struct intel_engine_cs *engine = i915->engine[i];
struct drm_i915_error_engine *ee = &error->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i];
struct drm_i915_gem_request *request; struct i915_request *request;
ee->engine_id = -1; ee->engine_id = -1;
@ -1481,17 +1461,16 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
* by userspace. * by userspace.
*/ */
ee->batchbuffer = ee->batchbuffer =
i915_error_object_create(dev_priv, i915_error_object_create(i915, request->batch);
request->batch);
if (HAS_BROKEN_CS_TLB(dev_priv)) if (HAS_BROKEN_CS_TLB(i915))
ee->wa_batchbuffer = ee->wa_batchbuffer =
i915_error_object_create(dev_priv, i915_error_object_create(i915,
engine->scratch); engine->scratch);
request_record_user_bo(request, ee); request_record_user_bo(request, ee);
ee->ctx = ee->ctx =
i915_error_object_create(dev_priv, i915_error_object_create(i915,
request->ctx->engine[i].state); request->ctx->engine[i].state);
error->simulated |= error->simulated |=
@ -1505,27 +1484,24 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
ee->cpu_ring_head = ring->head; ee->cpu_ring_head = ring->head;
ee->cpu_ring_tail = ring->tail; ee->cpu_ring_tail = ring->tail;
ee->ringbuffer = ee->ringbuffer =
i915_error_object_create(dev_priv, ring->vma); i915_error_object_create(i915, ring->vma);
engine_record_requests(engine, request, ee); engine_record_requests(engine, request, ee);
} }
ee->hws_page = ee->hws_page =
i915_error_object_create(dev_priv, i915_error_object_create(i915,
engine->status_page.vma); engine->status_page.vma);
ee->wa_ctx = ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma);
i915_error_object_create(dev_priv, engine->wa_ctx.vma);
ee->default_state = ee->default_state = capture_object(i915, engine->default_state);
capture_object(dev_priv, engine->default_state);
} }
} }
static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, static void gem_capture_vm(struct i915_gpu_state *error,
struct i915_gpu_state *error, struct i915_address_space *vm,
struct i915_address_space *vm, int idx)
int idx)
{ {
struct drm_i915_error_buffer *active_bo; struct drm_i915_error_buffer *active_bo;
struct i915_vma *vma; struct i915_vma *vma;
@ -1548,8 +1524,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
error->active_bo_count[idx] = count; error->active_bo_count[idx] = count;
} }
static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, static void capture_active_buffers(struct i915_gpu_state *error)
struct i915_gpu_state *error)
{ {
int cnt = 0, i, j; int cnt = 0, i, j;
@ -1569,14 +1544,13 @@ static void i915_capture_active_buffers(struct drm_i915_private *dev_priv,
for (j = 0; j < i && !found; j++) for (j = 0; j < i && !found; j++)
found = error->engine[j].vm == ee->vm; found = error->engine[j].vm == ee->vm;
if (!found) if (!found)
i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++); gem_capture_vm(error, ee->vm, cnt++);
} }
} }
static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, static void capture_pinned_buffers(struct i915_gpu_state *error)
struct i915_gpu_state *error)
{ {
struct i915_address_space *vm = &dev_priv->ggtt.base; struct i915_address_space *vm = &error->i915->ggtt.base;
struct drm_i915_error_buffer *bo; struct drm_i915_error_buffer *bo;
struct i915_vma *vma; struct i915_vma *vma;
int count_inactive, count_active; int count_inactive, count_active;
@ -1626,9 +1600,9 @@ static void capture_uc_state(struct i915_gpu_state *error)
} }
/* Capture all registers which don't fit into another category. */ /* Capture all registers which don't fit into another category. */
static void i915_capture_reg_state(struct drm_i915_private *dev_priv, static void capture_reg_state(struct i915_gpu_state *error)
struct i915_gpu_state *error)
{ {
struct drm_i915_private *dev_priv = error->i915;
int i; int i;
/* General organization /* General organization
@ -1725,24 +1699,25 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
engine_mask ? "reset" : "continue"); engine_mask ? "reset" : "continue");
} }
static void i915_capture_gen_state(struct drm_i915_private *dev_priv, static void capture_gen_state(struct i915_gpu_state *error)
struct i915_gpu_state *error)
{ {
error->awake = dev_priv->gt.awake; struct drm_i915_private *i915 = error->i915;
error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count);
error->suspended = dev_priv->runtime_pm.suspended; error->awake = i915->gt.awake;
error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
error->suspended = i915->runtime_pm.suspended;
error->iommu = -1; error->iommu = -1;
#ifdef CONFIG_INTEL_IOMMU #ifdef CONFIG_INTEL_IOMMU
error->iommu = intel_iommu_gfx_mapped; error->iommu = intel_iommu_gfx_mapped;
#endif #endif
error->reset_count = i915_reset_count(&dev_priv->gpu_error); error->reset_count = i915_reset_count(&i915->gpu_error);
error->suspend_count = dev_priv->suspend_count; error->suspend_count = i915->suspend_count;
memcpy(&error->device_info, memcpy(&error->device_info,
INTEL_INFO(dev_priv), INTEL_INFO(i915),
sizeof(error->device_info)); sizeof(error->device_info));
error->driver_caps = dev_priv->caps; error->driver_caps = i915->caps;
} }
static __always_inline void dup_param(const char *type, void *x) static __always_inline void dup_param(const char *type, void *x)
@ -1769,14 +1744,13 @@ static int capture(void *data)
error->i915->gt.last_init_time); error->i915->gt.last_init_time);
capture_params(error); capture_params(error);
capture_gen_state(error);
capture_uc_state(error); capture_uc_state(error);
capture_reg_state(error);
i915_capture_gen_state(error->i915, error); gem_record_fences(error);
i915_capture_reg_state(error->i915, error); gem_record_rings(error);
i915_gem_record_fences(error->i915, error); capture_active_buffers(error);
i915_gem_record_rings(error->i915, error); capture_pinned_buffers(error);
i915_capture_active_buffers(error->i915, error);
i915_capture_pinned_buffers(error->i915, error);
error->overlay = intel_overlay_capture_error_state(error->i915); error->overlay = intel_overlay_capture_error_state(error->i915);
error->display = intel_display_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915);

View File

@ -415,6 +415,9 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
if (READ_ONCE(rps->interrupts_enabled)) if (READ_ONCE(rps->interrupts_enabled))
return; return;
if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
return;
spin_lock_irq(&dev_priv->irq_lock); spin_lock_irq(&dev_priv->irq_lock);
WARN_ON_ONCE(rps->pm_iir); WARN_ON_ONCE(rps->pm_iir);
WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
@ -431,6 +434,9 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
if (!READ_ONCE(rps->interrupts_enabled)) if (!READ_ONCE(rps->interrupts_enabled))
return; return;
if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
return;
spin_lock_irq(&dev_priv->irq_lock); spin_lock_irq(&dev_priv->irq_lock);
rps->interrupts_enabled = false; rps->interrupts_enabled = false;
@ -1071,7 +1077,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
static void notify_ring(struct intel_engine_cs *engine) static void notify_ring(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_request *rq = NULL; struct i915_request *rq = NULL;
struct intel_wait *wait; struct intel_wait *wait;
if (!engine->breadcrumbs.irq_armed) if (!engine->breadcrumbs.irq_armed)
@ -1098,13 +1104,13 @@ static void notify_ring(struct intel_engine_cs *engine)
*/ */
if (i915_seqno_passed(intel_engine_get_seqno(engine), if (i915_seqno_passed(intel_engine_get_seqno(engine),
wait->seqno)) { wait->seqno)) {
struct drm_i915_gem_request *waiter = wait->request; struct i915_request *waiter = wait->request;
wakeup = true; wakeup = true;
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&waiter->fence.flags) && &waiter->fence.flags) &&
intel_wait_check_request(wait, waiter)) intel_wait_check_request(wait, waiter))
rq = i915_gem_request_get(waiter); rq = i915_request_get(waiter);
} }
if (wakeup) if (wakeup)
@ -1117,7 +1123,8 @@ static void notify_ring(struct intel_engine_cs *engine)
if (rq) { if (rq) {
dma_fence_signal(&rq->fence); dma_fence_signal(&rq->fence);
i915_gem_request_put(rq); GEM_BUG_ON(!i915_request_completed(rq));
i915_request_put(rq);
} }
trace_intel_engine_notify(engine, wait); trace_intel_engine_notify(engine, wait);
@ -2755,6 +2762,156 @@ static void __fini_wedge(struct wedge_me *w)
(W)->i915; \ (W)->i915; \
__fini_wedge((W))) __fini_wedge((W)))
static __always_inline void
gen11_cs_irq_handler(struct intel_engine_cs * const engine, const u32 iir)
{
gen8_cs_irq_handler(engine, iir, 0);
}
static void
gen11_gt_engine_irq_handler(struct drm_i915_private * const i915,
const unsigned int bank,
const unsigned int engine_n,
const u16 iir)
{
struct intel_engine_cs ** const engine = i915->engine;
switch (bank) {
case 0:
switch (engine_n) {
case GEN11_RCS0:
return gen11_cs_irq_handler(engine[RCS], iir);
case GEN11_BCS:
return gen11_cs_irq_handler(engine[BCS], iir);
}
case 1:
switch (engine_n) {
case GEN11_VCS(0):
return gen11_cs_irq_handler(engine[_VCS(0)], iir);
case GEN11_VCS(1):
return gen11_cs_irq_handler(engine[_VCS(1)], iir);
case GEN11_VCS(2):
return gen11_cs_irq_handler(engine[_VCS(2)], iir);
case GEN11_VCS(3):
return gen11_cs_irq_handler(engine[_VCS(3)], iir);
case GEN11_VECS(0):
return gen11_cs_irq_handler(engine[_VECS(0)], iir);
case GEN11_VECS(1):
return gen11_cs_irq_handler(engine[_VECS(1)], iir);
}
}
}
static u32
gen11_gt_engine_intr(struct drm_i915_private * const i915,
const unsigned int bank, const unsigned int bit)
{
void __iomem * const regs = i915->regs;
u32 timeout_ts;
u32 ident;
raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit));
/*
* NB: Specs do not specify how long to spin wait,
* so we do ~100us as an educated guess.
*/
timeout_ts = (local_clock() >> 10) + 100;
do {
ident = raw_reg_read(regs, GEN11_INTR_IDENTITY_REG(bank));
} while (!(ident & GEN11_INTR_DATA_VALID) &&
!time_after32(local_clock() >> 10, timeout_ts));
if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) {
DRM_ERROR("INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n",
bank, bit, ident);
return 0;
}
raw_reg_write(regs, GEN11_INTR_IDENTITY_REG(bank),
GEN11_INTR_DATA_VALID);
return ident & GEN11_INTR_ENGINE_MASK;
}
static void
gen11_gt_irq_handler(struct drm_i915_private * const i915,
const u32 master_ctl)
{
void __iomem * const regs = i915->regs;
unsigned int bank;
for (bank = 0; bank < 2; bank++) {
unsigned long intr_dw;
unsigned int bit;
if (!(master_ctl & GEN11_GT_DW_IRQ(bank)))
continue;
intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank));
if (unlikely(!intr_dw)) {
DRM_ERROR("GT_INTR_DW%u blank!\n", bank);
continue;
}
for_each_set_bit(bit, &intr_dw, 32) {
const u16 iir = gen11_gt_engine_intr(i915, bank, bit);
if (unlikely(!iir))
continue;
gen11_gt_engine_irq_handler(i915, bank, bit, iir);
}
/* Clear must be after shared has been served for engine */
raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw);
}
}
static irqreturn_t gen11_irq_handler(int irq, void *arg)
{
struct drm_i915_private * const i915 = to_i915(arg);
void __iomem * const regs = i915->regs;
u32 master_ctl;
if (!intel_irqs_enabled(i915))
return IRQ_NONE;
master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ);
master_ctl &= ~GEN11_MASTER_IRQ;
if (!master_ctl)
return IRQ_NONE;
/* Disable interrupts. */
raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, 0);
/* Find, clear, then process each source of interrupt. */
gen11_gt_irq_handler(i915, master_ctl);
/* IRQs are synced during runtime_suspend, we don't require a wakeref */
if (master_ctl & GEN11_DISPLAY_IRQ) {
const u32 disp_ctl = raw_reg_read(regs, GEN11_DISPLAY_INT_CTL);
disable_rpm_wakeref_asserts(i915);
/*
* GEN11_DISPLAY_INT_CTL has same format as GEN8_MASTER_IRQ
* for the display related bits.
*/
gen8_de_irq_handler(i915, disp_ctl);
enable_rpm_wakeref_asserts(i915);
}
/* Acknowledge and enable interrupts. */
raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ | master_ctl);
return IRQ_HANDLED;
}
/** /**
* i915_reset_device - do process context error handling work * i915_reset_device - do process context error handling work
* @dev_priv: i915 device private * @dev_priv: i915 device private
@ -3180,6 +3337,42 @@ static void gen8_irq_reset(struct drm_device *dev)
ibx_irq_reset(dev_priv); ibx_irq_reset(dev_priv);
} }
static void gen11_gt_irq_reset(struct drm_i915_private *dev_priv)
{
/* Disable RCS, BCS, VCS and VECS class engines. */
I915_WRITE(GEN11_RENDER_COPY_INTR_ENABLE, 0);
I915_WRITE(GEN11_VCS_VECS_INTR_ENABLE, 0);
/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
I915_WRITE(GEN11_RCS0_RSVD_INTR_MASK, ~0);
I915_WRITE(GEN11_BCS_RSVD_INTR_MASK, ~0);
I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK, ~0);
I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~0);
I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~0);
}
static void gen11_irq_reset(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int pipe;
I915_WRITE(GEN11_GFX_MSTR_IRQ, 0);
POSTING_READ(GEN11_GFX_MSTR_IRQ);
gen11_gt_irq_reset(dev_priv);
I915_WRITE(GEN11_DISPLAY_INT_CTL, 0);
for_each_pipe(dev_priv, pipe)
if (intel_display_power_is_enabled(dev_priv,
POWER_DOMAIN_PIPE(pipe)))
GEN8_IRQ_RESET_NDX(DE_PIPE, pipe);
GEN3_IRQ_RESET(GEN8_DE_PORT_);
GEN3_IRQ_RESET(GEN8_DE_MISC_);
GEN3_IRQ_RESET(GEN8_PCU_);
}
void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv,
u8 pipe_mask) u8 pipe_mask)
{ {
@ -3677,6 +3870,41 @@ static int gen8_irq_postinstall(struct drm_device *dev)
return 0; return 0;
} }
static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv)
{
const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT;
BUILD_BUG_ON(irqs & 0xffff0000);
/* Enable RCS, BCS, VCS and VECS class interrupts. */
I915_WRITE(GEN11_RENDER_COPY_INTR_ENABLE, irqs << 16 | irqs);
I915_WRITE(GEN11_VCS_VECS_INTR_ENABLE, irqs << 16 | irqs);
/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
I915_WRITE(GEN11_RCS0_RSVD_INTR_MASK, ~(irqs << 16));
I915_WRITE(GEN11_BCS_RSVD_INTR_MASK, ~(irqs << 16));
I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK, ~(irqs | irqs << 16));
I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~(irqs | irqs << 16));
I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~(irqs | irqs << 16));
dev_priv->pm_imr = 0xffffffff; /* TODO */
}
static int gen11_irq_postinstall(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
gen11_gt_irq_postinstall(dev_priv);
gen8_de_irq_postinstall(dev_priv);
I915_WRITE(GEN11_DISPLAY_INT_CTL, GEN11_DISPLAY_IRQ_ENABLE);
I915_WRITE(GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ);
POSTING_READ(GEN11_GFX_MSTR_IRQ);
return 0;
}
static int cherryview_irq_postinstall(struct drm_device *dev) static int cherryview_irq_postinstall(struct drm_device *dev)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
@ -4125,6 +4353,14 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
dev->driver->enable_vblank = i965_enable_vblank; dev->driver->enable_vblank = i965_enable_vblank;
dev->driver->disable_vblank = i965_disable_vblank; dev->driver->disable_vblank = i965_disable_vblank;
dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
} else if (INTEL_GEN(dev_priv) >= 11) {
dev->driver->irq_handler = gen11_irq_handler;
dev->driver->irq_preinstall = gen11_irq_reset;
dev->driver->irq_postinstall = gen11_irq_postinstall;
dev->driver->irq_uninstall = gen11_irq_reset;
dev->driver->enable_vblank = gen8_enable_vblank;
dev->driver->disable_vblank = gen8_disable_vblank;
dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup;
} else if (INTEL_GEN(dev_priv) >= 8) { } else if (INTEL_GEN(dev_priv) >= 8) {
dev->driver->irq_handler = gen8_irq_handler; dev->driver->irq_handler = gen8_irq_handler;
dev->driver->irq_preinstall = gen8_irq_reset; dev->driver->irq_preinstall = gen8_irq_reset;

View File

@ -594,7 +594,8 @@ static const struct intel_device_info intel_cannonlake_info = {
GEN10_FEATURES, \ GEN10_FEATURES, \
GEN(11), \ GEN(11), \
.ddb_size = 2048, \ .ddb_size = 2048, \
.has_csr = 0 .has_csr = 0, \
.has_logical_ring_elsq = 1
static const struct intel_device_info intel_icelake_11_info = { static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES, GEN11_FEATURES,
@ -664,6 +665,7 @@ static const struct pci_device_id pciidlist[] = {
INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info),
INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info),
INTEL_CNL_IDS(&intel_cannonlake_info), INTEL_CNL_IDS(&intel_cannonlake_info),
INTEL_ICL_11_IDS(&intel_icelake_11_info),
{0, 0, 0} {0, 0, 0}
}; };
MODULE_DEVICE_TABLE(pci, pciidlist); MODULE_DEVICE_TABLE(pci, pciidlist);

View File

@ -1303,9 +1303,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
*/ */
mutex_lock(&dev_priv->drm.struct_mutex); mutex_lock(&dev_priv->drm.struct_mutex);
dev_priv->perf.oa.exclusive_stream = NULL; dev_priv->perf.oa.exclusive_stream = NULL;
mutex_unlock(&dev_priv->drm.struct_mutex);
dev_priv->perf.oa.ops.disable_metric_set(dev_priv); dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
free_oa_buffer(dev_priv); free_oa_buffer(dev_priv);
@ -1630,10 +1629,10 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
* Same as gen8_update_reg_state_unlocked only through the batchbuffer. This * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This
* is only used by the kernel context. * is only used by the kernel context.
*/ */
static int gen8_emit_oa_config(struct drm_i915_gem_request *req, static int gen8_emit_oa_config(struct i915_request *rq,
const struct i915_oa_config *oa_config) const struct i915_oa_config *oa_config)
{ {
struct drm_i915_private *dev_priv = req->i915; struct drm_i915_private *dev_priv = rq->i915;
/* The MMIO offsets for Flex EU registers aren't contiguous */ /* The MMIO offsets for Flex EU registers aren't contiguous */
u32 flex_mmio[] = { u32 flex_mmio[] = {
i915_mmio_reg_offset(EU_PERF_CNTL0), i915_mmio_reg_offset(EU_PERF_CNTL0),
@ -1647,7 +1646,7 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req,
u32 *cs; u32 *cs;
int i; int i;
cs = intel_ring_begin(req, ARRAY_SIZE(flex_mmio) * 2 + 4); cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1685,7 +1684,7 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req,
} }
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -1695,38 +1694,38 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
{ {
struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct intel_engine_cs *engine = dev_priv->engine[RCS];
struct i915_gem_timeline *timeline; struct i915_gem_timeline *timeline;
struct drm_i915_gem_request *req; struct i915_request *rq;
int ret; int ret;
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&dev_priv->drm.struct_mutex);
i915_gem_retire_requests(dev_priv); i915_retire_requests(dev_priv);
req = i915_gem_request_alloc(engine, dev_priv->kernel_context); rq = i915_request_alloc(engine, dev_priv->kernel_context);
if (IS_ERR(req)) if (IS_ERR(rq))
return PTR_ERR(req); return PTR_ERR(rq);
ret = gen8_emit_oa_config(req, oa_config); ret = gen8_emit_oa_config(rq, oa_config);
if (ret) { if (ret) {
i915_add_request(req); i915_request_add(rq);
return ret; return ret;
} }
/* Queue this switch after all other activity */ /* Queue this switch after all other activity */
list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
struct drm_i915_gem_request *prev; struct i915_request *prev;
struct intel_timeline *tl; struct intel_timeline *tl;
tl = &timeline->engine[engine->id]; tl = &timeline->engine[engine->id];
prev = i915_gem_active_raw(&tl->last_request, prev = i915_gem_active_raw(&tl->last_request,
&dev_priv->drm.struct_mutex); &dev_priv->drm.struct_mutex);
if (prev) if (prev)
i915_sw_fence_await_sw_fence_gfp(&req->submit, i915_sw_fence_await_sw_fence_gfp(&rq->submit,
&prev->submit, &prev->submit,
GFP_KERNEL); GFP_KERNEL);
} }
i915_add_request(req); i915_request_add(rq);
return 0; return 0;
} }
@ -1756,22 +1755,13 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
* Note: it's only the RCS/Render context that has any OA state. * Note: it's only the RCS/Render context that has any OA state.
*/ */
static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
const struct i915_oa_config *oa_config, const struct i915_oa_config *oa_config)
bool interruptible)
{ {
struct i915_gem_context *ctx; struct i915_gem_context *ctx;
int ret; int ret;
unsigned int wait_flags = I915_WAIT_LOCKED; unsigned int wait_flags = I915_WAIT_LOCKED;
if (interruptible) { lockdep_assert_held(&dev_priv->drm.struct_mutex);
ret = i915_mutex_lock_interruptible(&dev_priv->drm);
if (ret)
return ret;
wait_flags |= I915_WAIT_INTERRUPTIBLE;
} else {
mutex_lock(&dev_priv->drm.struct_mutex);
}
/* Switch away from any user context. */ /* Switch away from any user context. */
ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config); ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
@ -1819,8 +1809,6 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
} }
out: out:
mutex_unlock(&dev_priv->drm.struct_mutex);
return ret; return ret;
} }
@ -1863,7 +1851,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
* to make sure all slices/subslices are ON before writing to NOA * to make sure all slices/subslices are ON before writing to NOA
* registers. * registers.
*/ */
ret = gen8_configure_all_contexts(dev_priv, oa_config, true); ret = gen8_configure_all_contexts(dev_priv, oa_config);
if (ret) if (ret)
return ret; return ret;
@ -1878,7 +1866,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
{ {
/* Reset all contexts' slices/subslices configurations. */ /* Reset all contexts' slices/subslices configurations. */
gen8_configure_all_contexts(dev_priv, NULL, false); gen8_configure_all_contexts(dev_priv, NULL);
I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
~GT_NOA_ENABLE)); ~GT_NOA_ENABLE));
@ -1888,7 +1876,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
{ {
/* Reset all contexts' slices/subslices configurations. */ /* Reset all contexts' slices/subslices configurations. */
gen8_configure_all_contexts(dev_priv, NULL, false); gen8_configure_all_contexts(dev_priv, NULL);
/* Make sure we disable noa to save power. */ /* Make sure we disable noa to save power. */
I915_WRITE(RPM_CONFIG1, I915_WRITE(RPM_CONFIG1,
@ -2138,6 +2126,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
if (ret) if (ret)
goto err_oa_buf_alloc; goto err_oa_buf_alloc;
ret = i915_mutex_lock_interruptible(&dev_priv->drm);
if (ret)
goto err_lock;
ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
stream->oa_config); stream->oa_config);
if (ret) if (ret)
@ -2145,23 +2137,17 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->ops = &i915_oa_stream_ops; stream->ops = &i915_oa_stream_ops;
/* Lock device for exclusive_stream access late because
* enable_metric_set() might lock as well on gen8+.
*/
ret = i915_mutex_lock_interruptible(&dev_priv->drm);
if (ret)
goto err_lock;
dev_priv->perf.oa.exclusive_stream = stream; dev_priv->perf.oa.exclusive_stream = stream;
mutex_unlock(&dev_priv->drm.struct_mutex); mutex_unlock(&dev_priv->drm.struct_mutex);
return 0; return 0;
err_lock:
dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
err_enable: err_enable:
dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
err_lock:
free_oa_buffer(dev_priv); free_oa_buffer(dev_priv);
err_oa_buf_alloc: err_oa_buf_alloc:

View File

@ -0,0 +1,125 @@
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2018 Intel Corporation
*/
#include "i915_drv.h"
#include "i915_query.h"
#include <uapi/drm/i915_drm.h>
static int query_topology_info(struct drm_i915_private *dev_priv,
struct drm_i915_query_item *query_item)
{
const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
struct drm_i915_query_topology_info topo;
u32 slice_length, subslice_length, eu_length, total_length;
if (query_item->flags != 0)
return -EINVAL;
if (sseu->max_slices == 0)
return -ENODEV;
BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
slice_length = sizeof(sseu->slice_mask);
subslice_length = sseu->max_slices *
DIV_ROUND_UP(sseu->max_subslices,
sizeof(sseu->subslice_mask[0]) * BITS_PER_BYTE);
eu_length = sseu->max_slices * sseu->max_subslices *
DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE);
total_length = sizeof(topo) + slice_length + subslice_length + eu_length;
if (query_item->length == 0)
return total_length;
if (query_item->length < total_length)
return -EINVAL;
if (copy_from_user(&topo, u64_to_user_ptr(query_item->data_ptr),
sizeof(topo)))
return -EFAULT;
if (topo.flags != 0)
return -EINVAL;
if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(query_item->data_ptr),
total_length))
return -EFAULT;
memset(&topo, 0, sizeof(topo));
topo.max_slices = sseu->max_slices;
topo.max_subslices = sseu->max_subslices;
topo.max_eus_per_subslice = sseu->max_eus_per_subslice;
topo.subslice_offset = slice_length;
topo.subslice_stride = DIV_ROUND_UP(sseu->max_subslices, BITS_PER_BYTE);
topo.eu_offset = slice_length + subslice_length;
topo.eu_stride =
DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE);
if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr),
&topo, sizeof(topo)))
return -EFAULT;
if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
&sseu->slice_mask, slice_length))
return -EFAULT;
if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr +
sizeof(topo) + slice_length),
sseu->subslice_mask, subslice_length))
return -EFAULT;
if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr +
sizeof(topo) +
slice_length + subslice_length),
sseu->eu_mask, eu_length))
return -EFAULT;
return total_length;
}
static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
struct drm_i915_query_item *query_item) = {
query_topology_info,
};
int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_query *args = data;
struct drm_i915_query_item __user *user_item_ptr =
u64_to_user_ptr(args->items_ptr);
u32 i;
if (args->flags != 0)
return -EINVAL;
for (i = 0; i < args->num_items; i++, user_item_ptr++) {
struct drm_i915_query_item item;
u64 func_idx;
int ret;
if (copy_from_user(&item, user_item_ptr, sizeof(item)))
return -EFAULT;
if (item.query_id == 0)
return -EINVAL;
func_idx = item.query_id - 1;
if (func_idx < ARRAY_SIZE(i915_query_funcs))
ret = i915_query_funcs[func_idx](dev_priv, &item);
else
ret = -EINVAL;
/* Only write the length back to userspace if they differ. */
if (ret != item.length && put_user(ret, &user_item_ptr->length))
return -EFAULT;
}
return 0;
}

View File

@ -0,0 +1,15 @@
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2018 Intel Corporation
*/
#ifndef _I915_QUERY_H_
#define _I915_QUERY_H_
struct drm_device;
struct drm_file;
int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
#endif

View File

@ -178,6 +178,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define BCS_HW 2 #define BCS_HW 2
#define VECS_HW 3 #define VECS_HW 3
#define VCS2_HW 4 #define VCS2_HW 4
#define VCS3_HW 6
#define VCS4_HW 7
#define VECS2_HW 12
/* Engine class */ /* Engine class */
@ -188,7 +191,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define OTHER_CLASS 4 #define OTHER_CLASS 4
#define MAX_ENGINE_CLASS 4 #define MAX_ENGINE_CLASS 4
#define MAX_ENGINE_INSTANCE 1 #define MAX_ENGINE_INSTANCE 3
/* PCI config space */ /* PCI config space */
@ -2342,7 +2345,13 @@ enum i915_power_well_id {
#define BSD_RING_BASE 0x04000 #define BSD_RING_BASE 0x04000
#define GEN6_BSD_RING_BASE 0x12000 #define GEN6_BSD_RING_BASE 0x12000
#define GEN8_BSD2_RING_BASE 0x1c000 #define GEN8_BSD2_RING_BASE 0x1c000
#define GEN11_BSD_RING_BASE 0x1c0000
#define GEN11_BSD2_RING_BASE 0x1c4000
#define GEN11_BSD3_RING_BASE 0x1d0000
#define GEN11_BSD4_RING_BASE 0x1d4000
#define VEBOX_RING_BASE 0x1a000 #define VEBOX_RING_BASE 0x1a000
#define GEN11_VEBOX_RING_BASE 0x1c8000
#define GEN11_VEBOX2_RING_BASE 0x1d8000
#define BLT_RING_BASE 0x22000 #define BLT_RING_BASE 0x22000
#define RING_TAIL(base) _MMIO((base)+0x30) #define RING_TAIL(base) _MMIO((base)+0x30)
#define RING_HEAD(base) _MMIO((base)+0x34) #define RING_HEAD(base) _MMIO((base)+0x34)
@ -2807,6 +2816,13 @@ enum i915_power_well_id {
#define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
/* Fuse readout registers for GT */ /* Fuse readout registers for GT */
#define HSW_PAVP_FUSE1 _MMIO(0x911C)
#define HSW_F1_EU_DIS_SHIFT 16
#define HSW_F1_EU_DIS_MASK (0x3 << HSW_F1_EU_DIS_SHIFT)
#define HSW_F1_EU_DIS_10EUS 0
#define HSW_F1_EU_DIS_8EUS 1
#define HSW_F1_EU_DIS_6EUS 2
#define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168) #define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168)
#define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS0 (1 << 10)
#define CHV_FGT_DISABLE_SS1 (1 << 11) #define CHV_FGT_DISABLE_SS1 (1 << 11)
@ -3896,6 +3912,12 @@ enum {
#define GEN8_CTX_ID_SHIFT 32 #define GEN8_CTX_ID_SHIFT 32
#define GEN8_CTX_ID_WIDTH 21 #define GEN8_CTX_ID_WIDTH 21
#define GEN11_SW_CTX_ID_SHIFT 37
#define GEN11_SW_CTX_ID_WIDTH 11
#define GEN11_ENGINE_CLASS_SHIFT 61
#define GEN11_ENGINE_CLASS_WIDTH 3
#define GEN11_ENGINE_INSTANCE_SHIFT 48
#define GEN11_ENGINE_INSTANCE_WIDTH 6
#define CHV_CLK_CTL1 _MMIO(0x101100) #define CHV_CLK_CTL1 _MMIO(0x101100)
#define VLV_CLK_CTL2 _MMIO(0x101104) #define VLV_CLK_CTL2 _MMIO(0x101104)
@ -3943,6 +3965,9 @@ enum {
#define SARBUNIT_CLKGATE_DIS (1 << 5) #define SARBUNIT_CLKGATE_DIS (1 << 5)
#define RCCUNIT_CLKGATE_DIS (1 << 7) #define RCCUNIT_CLKGATE_DIS (1 << 7)
#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524)
#define GWUNIT_CLKGATE_DIS (1 << 16)
#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434)
#define VFUNIT_CLKGATE_DIS (1 << 20) #define VFUNIT_CLKGATE_DIS (1 << 20)
@ -5347,8 +5372,8 @@ enum {
#define _DPF_AUX_CH_DATA4 (dev_priv->info.display_mmio_offset + 0x64520) #define _DPF_AUX_CH_DATA4 (dev_priv->info.display_mmio_offset + 0x64520)
#define _DPF_AUX_CH_DATA5 (dev_priv->info.display_mmio_offset + 0x64524) #define _DPF_AUX_CH_DATA5 (dev_priv->info.display_mmio_offset + 0x64524)
#define DP_AUX_CH_CTL(port) _MMIO_PORT(port, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL) #define DP_AUX_CH_CTL(aux_ch) _MMIO_PORT(aux_ch, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL)
#define DP_AUX_CH_DATA(port, i) _MMIO(_PORT(port, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ #define DP_AUX_CH_DATA(aux_ch, i) _MMIO(_PORT(aux_ch, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */
#define DP_AUX_CH_CTL_SEND_BUSY (1 << 31) #define DP_AUX_CH_CTL_SEND_BUSY (1 << 31)
#define DP_AUX_CH_CTL_DONE (1 << 30) #define DP_AUX_CH_CTL_DONE (1 << 30)
@ -7897,8 +7922,8 @@ enum {
#define _PCH_DPD_AUX_CH_DATA4 0xe4320 #define _PCH_DPD_AUX_CH_DATA4 0xe4320
#define _PCH_DPD_AUX_CH_DATA5 0xe4324 #define _PCH_DPD_AUX_CH_DATA5 0xe4324
#define PCH_DP_AUX_CH_CTL(port) _MMIO_PORT((port) - PORT_B, _PCH_DPB_AUX_CH_CTL, _PCH_DPC_AUX_CH_CTL) #define PCH_DP_AUX_CH_CTL(aux_ch) _MMIO_PORT((aux_ch) - AUX_CH_B, _PCH_DPB_AUX_CH_CTL, _PCH_DPC_AUX_CH_CTL)
#define PCH_DP_AUX_CH_DATA(port, i) _MMIO(_PORT((port) - PORT_B, _PCH_DPB_AUX_CH_DATA1, _PCH_DPC_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ #define PCH_DP_AUX_CH_DATA(aux_ch, i) _MMIO(_PORT((aux_ch) - AUX_CH_B, _PCH_DPB_AUX_CH_DATA1, _PCH_DPC_AUX_CH_DATA1) + (i) * 4) /* 5 registers */
/* CPT */ /* CPT */
#define PORT_TRANS_A_SEL_CPT 0 #define PORT_TRANS_A_SEL_CPT 0
@ -7998,9 +8023,13 @@ enum {
#define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7) #define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7)
#define FORCEWAKE_MT _MMIO(0xa188) /* multi-threaded */ #define FORCEWAKE_MT _MMIO(0xa188) /* multi-threaded */
#define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270)
#define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4)
#define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4)
#define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278)
#define FORCEWAKE_BLITTER_GEN9 _MMIO(0xa188) #define FORCEWAKE_BLITTER_GEN9 _MMIO(0xa188)
#define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88)
#define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n) _MMIO(0x0D50 + (n) * 4)
#define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n) _MMIO(0x0D70 + (n) * 4)
#define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84)
#define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044)
#define FORCEWAKE_KERNEL BIT(0) #define FORCEWAKE_KERNEL BIT(0)

View File

@ -37,7 +37,8 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence)
static const char *i915_fence_get_timeline_name(struct dma_fence *fence) static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
{ {
/* The timeline struct (as part of the ppgtt underneath a context) /*
* The timeline struct (as part of the ppgtt underneath a context)
* may be freed when the request is no longer in use by the GPU. * may be freed when the request is no longer in use by the GPU.
* We could extend the life of a context to beyond that of all * We could extend the life of a context to beyond that of all
* fences, possibly keeping the hw resource around indefinitely, * fences, possibly keeping the hw resource around indefinitely,
@ -53,7 +54,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
static bool i915_fence_signaled(struct dma_fence *fence) static bool i915_fence_signaled(struct dma_fence *fence)
{ {
return i915_gem_request_completed(to_request(fence)); return i915_request_completed(to_request(fence));
} }
static bool i915_fence_enable_signaling(struct dma_fence *fence) static bool i915_fence_enable_signaling(struct dma_fence *fence)
@ -69,22 +70,23 @@ static signed long i915_fence_wait(struct dma_fence *fence,
bool interruptible, bool interruptible,
signed long timeout) signed long timeout)
{ {
return i915_wait_request(to_request(fence), interruptible, timeout); return i915_request_wait(to_request(fence), interruptible, timeout);
} }
static void i915_fence_release(struct dma_fence *fence) static void i915_fence_release(struct dma_fence *fence)
{ {
struct drm_i915_gem_request *req = to_request(fence); struct i915_request *rq = to_request(fence);
/* The request is put onto a RCU freelist (i.e. the address /*
* The request is put onto a RCU freelist (i.e. the address
* is immediately reused), mark the fences as being freed now. * is immediately reused), mark the fences as being freed now.
* Otherwise the debugobjects for the fences are only marked as * Otherwise the debugobjects for the fences are only marked as
* freed when the slab cache itself is freed, and so we would get * freed when the slab cache itself is freed, and so we would get
* caught trying to reuse dead objects. * caught trying to reuse dead objects.
*/ */
i915_sw_fence_fini(&req->submit); i915_sw_fence_fini(&rq->submit);
kmem_cache_free(req->i915->requests, req); kmem_cache_free(rq->i915->requests, rq);
} }
const struct dma_fence_ops i915_fence_ops = { const struct dma_fence_ops i915_fence_ops = {
@ -97,7 +99,7 @@ const struct dma_fence_ops i915_fence_ops = {
}; };
static inline void static inline void
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) i915_request_remove_from_client(struct i915_request *request)
{ {
struct drm_i915_file_private *file_priv; struct drm_i915_file_private *file_priv;
@ -215,9 +217,9 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
struct intel_timeline *tl = engine->timeline; struct intel_timeline *tl = engine->timeline;
if (!i915_seqno_passed(seqno, tl->seqno)) { if (!i915_seqno_passed(seqno, tl->seqno)) {
/* spin until threads are complete */ /* Flush any waiters before we reuse the seqno */
while (intel_breadcrumbs_busy(engine)) intel_engine_disarm_breadcrumbs(engine);
cond_resched(); GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
} }
/* Check we are idle before we fiddle with hw state! */ /* Check we are idle before we fiddle with hw state! */
@ -238,17 +240,15 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *i915 = to_i915(dev);
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&i915->drm.struct_mutex);
if (seqno == 0) if (seqno == 0)
return -EINVAL; return -EINVAL;
/* HWS page needs to be set less than what we /* HWS page needs to be set less than what we will inject to ring */
* will inject to ring return reset_all_global_seqno(i915, seqno - 1);
*/
return reset_all_global_seqno(dev_priv, seqno - 1);
} }
static void mark_busy(struct drm_i915_private *i915) static void mark_busy(struct drm_i915_private *i915)
@ -331,16 +331,17 @@ static void unreserve_engine(struct intel_engine_cs *engine)
} }
void i915_gem_retire_noop(struct i915_gem_active *active, void i915_gem_retire_noop(struct i915_gem_active *active,
struct drm_i915_gem_request *request) struct i915_request *request)
{ {
/* Space left intentionally blank */ /* Space left intentionally blank */
} }
static void advance_ring(struct drm_i915_gem_request *request) static void advance_ring(struct i915_request *request)
{ {
unsigned int tail; unsigned int tail;
/* We know the GPU must have read the request to have /*
* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position * sent us the seqno + interrupt, so use the position
* of tail of the request to update the last known position * of tail of the request to update the last known position
* of the GPU head. * of the GPU head.
@ -349,7 +350,8 @@ static void advance_ring(struct drm_i915_gem_request *request)
* completion order. * completion order.
*/ */
if (list_is_last(&request->ring_link, &request->ring->request_list)) { if (list_is_last(&request->ring_link, &request->ring->request_list)) {
/* We may race here with execlists resubmitting this request /*
* We may race here with execlists resubmitting this request
* as we retire it. The resubmission will move the ring->tail * as we retire it. The resubmission will move the ring->tail
* forwards (to request->wa_tail). We either read the * forwards (to request->wa_tail). We either read the
* current value that was written to hw, or the value that * current value that was written to hw, or the value that
@ -365,30 +367,30 @@ static void advance_ring(struct drm_i915_gem_request *request)
request->ring->head = tail; request->ring->head = tail;
} }
static void free_capture_list(struct drm_i915_gem_request *request) static void free_capture_list(struct i915_request *request)
{ {
struct i915_gem_capture_list *capture; struct i915_capture_list *capture;
capture = request->capture_list; capture = request->capture_list;
while (capture) { while (capture) {
struct i915_gem_capture_list *next = capture->next; struct i915_capture_list *next = capture->next;
kfree(capture); kfree(capture);
capture = next; capture = next;
} }
} }
static void i915_gem_request_retire(struct drm_i915_gem_request *request) static void i915_request_retire(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
struct i915_gem_active *active, *next; struct i915_gem_active *active, *next;
lockdep_assert_held(&request->i915->drm.struct_mutex); lockdep_assert_held(&request->i915->drm.struct_mutex);
GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
GEM_BUG_ON(!i915_gem_request_completed(request)); GEM_BUG_ON(!i915_request_completed(request));
GEM_BUG_ON(!request->i915->gt.active_requests); GEM_BUG_ON(!request->i915->gt.active_requests);
trace_i915_gem_request_retire(request); trace_i915_request_retire(request);
spin_lock_irq(&engine->timeline->lock); spin_lock_irq(&engine->timeline->lock);
list_del_init(&request->link); list_del_init(&request->link);
@ -399,7 +401,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
free_capture_list(request); free_capture_list(request);
/* Walk through the active list, calling retire on each. This allows /*
* Walk through the active list, calling retire on each. This allows
* objects to track their GPU activity and mark themselves as idle * objects to track their GPU activity and mark themselves as idle
* when their *last* active request is completed (updating state * when their *last* active request is completed (updating state
* tracking lists for eviction, active references for GEM, etc). * tracking lists for eviction, active references for GEM, etc).
@ -409,7 +412,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
* the node after the callback). * the node after the callback).
*/ */
list_for_each_entry_safe(active, next, &request->active_list, link) { list_for_each_entry_safe(active, next, &request->active_list, link) {
/* In microbenchmarks or focusing upon time inside the kernel, /*
* In microbenchmarks or focusing upon time inside the kernel,
* we may spend an inordinate amount of time simply handling * we may spend an inordinate amount of time simply handling
* the retirement of requests and processing their callbacks. * the retirement of requests and processing their callbacks.
* Of which, this loop itself is particularly hot due to the * Of which, this loop itself is particularly hot due to the
@ -426,15 +430,16 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
active->retire(active, request); active->retire(active, request);
} }
i915_gem_request_remove_from_client(request); i915_request_remove_from_client(request);
/* Retirement decays the ban score as it is a sign of ctx progress */ /* Retirement decays the ban score as it is a sign of ctx progress */
atomic_dec_if_positive(&request->ctx->ban_score); atomic_dec_if_positive(&request->ctx->ban_score);
/* The backing object for the context is done after switching to the /*
* The backing object for the context is done after switching to the
* *next* context. Therefore we cannot retire the previous context until * *next* context. Therefore we cannot retire the previous context until
* the next context has already started running. However, since we * the next context has already started running. However, since we
* cannot take the required locks at i915_gem_request_submit() we * cannot take the required locks at i915_request_submit() we
* defer the unpinning of the active context to now, retirement of * defer the unpinning of the active context to now, retirement of
* the subsequent request. * the subsequent request.
*/ */
@ -454,26 +459,26 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
spin_unlock_irq(&request->lock); spin_unlock_irq(&request->lock);
i915_priotree_fini(request->i915, &request->priotree); i915_priotree_fini(request->i915, &request->priotree);
i915_gem_request_put(request); i915_request_put(request);
} }
void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) void i915_request_retire_upto(struct i915_request *rq)
{ {
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = rq->engine;
struct drm_i915_gem_request *tmp; struct i915_request *tmp;
lockdep_assert_held(&req->i915->drm.struct_mutex); lockdep_assert_held(&rq->i915->drm.struct_mutex);
GEM_BUG_ON(!i915_gem_request_completed(req)); GEM_BUG_ON(!i915_request_completed(rq));
if (list_empty(&req->link)) if (list_empty(&rq->link))
return; return;
do { do {
tmp = list_first_entry(&engine->timeline->requests, tmp = list_first_entry(&engine->timeline->requests,
typeof(*tmp), link); typeof(*tmp), link);
i915_gem_request_retire(tmp); i915_request_retire(tmp);
} while (tmp != req); } while (tmp != rq);
} }
static u32 timeline_get_seqno(struct intel_timeline *tl) static u32 timeline_get_seqno(struct intel_timeline *tl)
@ -481,7 +486,7 @@ static u32 timeline_get_seqno(struct intel_timeline *tl)
return ++tl->seqno; return ++tl->seqno;
} }
void __i915_gem_request_submit(struct drm_i915_gem_request *request) void __i915_request_submit(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
struct intel_timeline *timeline; struct intel_timeline *timeline;
@ -490,8 +495,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
GEM_BUG_ON(!irqs_disabled()); GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->timeline->lock); lockdep_assert_held(&engine->timeline->lock);
trace_i915_gem_request_execute(request);
/* Transfer from per-context onto the global per-engine timeline */ /* Transfer from per-context onto the global per-engine timeline */
timeline = engine->timeline; timeline = engine->timeline;
GEM_BUG_ON(timeline == request->timeline); GEM_BUG_ON(timeline == request->timeline);
@ -515,10 +518,12 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
list_move_tail(&request->link, &timeline->requests); list_move_tail(&request->link, &timeline->requests);
spin_unlock(&request->timeline->lock); spin_unlock(&request->timeline->lock);
trace_i915_request_execute(request);
wake_up_all(&request->execute); wake_up_all(&request->execute);
} }
void i915_gem_request_submit(struct drm_i915_gem_request *request) void i915_request_submit(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
unsigned long flags; unsigned long flags;
@ -526,12 +531,12 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request)
/* Will be called from irq-context when using foreign fences. */ /* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags); spin_lock_irqsave(&engine->timeline->lock, flags);
__i915_gem_request_submit(request); __i915_request_submit(request);
spin_unlock_irqrestore(&engine->timeline->lock, flags); spin_unlock_irqrestore(&engine->timeline->lock, flags);
} }
void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) void __i915_request_unsubmit(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
struct intel_timeline *timeline; struct intel_timeline *timeline;
@ -539,7 +544,8 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
GEM_BUG_ON(!irqs_disabled()); GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->timeline->lock); lockdep_assert_held(&engine->timeline->lock);
/* Only unwind in reverse order, required so that the per-context list /*
* Only unwind in reverse order, required so that the per-context list
* is kept in seqno/ring order. * is kept in seqno/ring order.
*/ */
GEM_BUG_ON(!request->global_seqno); GEM_BUG_ON(!request->global_seqno);
@ -563,15 +569,16 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
list_move(&request->link, &timeline->requests); list_move(&request->link, &timeline->requests);
spin_unlock(&timeline->lock); spin_unlock(&timeline->lock);
/* We don't need to wake_up any waiters on request->execute, they /*
* We don't need to wake_up any waiters on request->execute, they
* will get woken by any other event or us re-adding this request * will get woken by any other event or us re-adding this request
* to the engine timeline (__i915_gem_request_submit()). The waiters * to the engine timeline (__i915_request_submit()). The waiters
* should be quite adapt at finding that the request now has a new * should be quite adapt at finding that the request now has a new
* global_seqno to the one they went to sleep on. * global_seqno to the one they went to sleep on.
*/ */
} }
void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) void i915_request_unsubmit(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
unsigned long flags; unsigned long flags;
@ -579,7 +586,7 @@ void i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
/* Will be called from irq-context when using foreign fences. */ /* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags); spin_lock_irqsave(&engine->timeline->lock, flags);
__i915_gem_request_unsubmit(request); __i915_request_unsubmit(request);
spin_unlock_irqrestore(&engine->timeline->lock, flags); spin_unlock_irqrestore(&engine->timeline->lock, flags);
} }
@ -587,18 +594,19 @@ void i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
static int __i915_sw_fence_call static int __i915_sw_fence_call
submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{ {
struct drm_i915_gem_request *request = struct i915_request *request =
container_of(fence, typeof(*request), submit); container_of(fence, typeof(*request), submit);
switch (state) { switch (state) {
case FENCE_COMPLETE: case FENCE_COMPLETE:
trace_i915_gem_request_submit(request); trace_i915_request_submit(request);
/* /*
* We need to serialize use of the submit_request() callback with its * We need to serialize use of the submit_request() callback
* hotplugging performed during an emergency i915_gem_set_wedged(). * with its hotplugging performed during an emergency
* We use the RCU mechanism to mark the critical section in order to * i915_gem_set_wedged(). We use the RCU mechanism to mark the
* force i915_gem_set_wedged() to wait until the submit_request() is * critical section in order to force i915_gem_set_wedged() to
* completed before proceeding. * wait until the submit_request() is completed before
* proceeding.
*/ */
rcu_read_lock(); rcu_read_lock();
request->engine->submit_request(request); request->engine->submit_request(request);
@ -606,7 +614,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
break; break;
case FENCE_FREE: case FENCE_FREE:
i915_gem_request_put(request); i915_request_put(request);
break; break;
} }
@ -614,7 +622,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
} }
/** /**
* i915_gem_request_alloc - allocate a request structure * i915_request_alloc - allocate a request structure
* *
* @engine: engine that we wish to issue the request on. * @engine: engine that we wish to issue the request on.
* @ctx: context that the request will be associated with. * @ctx: context that the request will be associated with.
@ -622,31 +630,32 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
* Returns a pointer to the allocated request if successful, * Returns a pointer to the allocated request if successful,
* or an error code if not. * or an error code if not.
*/ */
struct drm_i915_gem_request * struct i915_request *
i915_gem_request_alloc(struct intel_engine_cs *engine, i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
struct i915_gem_context *ctx)
{ {
struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_private *i915 = engine->i915;
struct drm_i915_gem_request *req; struct i915_request *rq;
struct intel_ring *ring; struct intel_ring *ring;
int ret; int ret;
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&i915->drm.struct_mutex);
/* /*
* Preempt contexts are reserved for exclusive use to inject a * Preempt contexts are reserved for exclusive use to inject a
* preemption context switch. They are never to be used for any trivial * preemption context switch. They are never to be used for any trivial
* request! * request!
*/ */
GEM_BUG_ON(ctx == dev_priv->preempt_context); GEM_BUG_ON(ctx == i915->preempt_context);
/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report /*
* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged. * EIO if the GPU is already wedged.
*/ */
if (i915_terminally_wedged(&dev_priv->gpu_error)) if (i915_terminally_wedged(&i915->gpu_error))
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
/* Pinning the contexts may generate requests in order to acquire /*
* Pinning the contexts may generate requests in order to acquire
* GGTT space, so do this first before we reserve a seqno for * GGTT space, so do this first before we reserve a seqno for
* ourselves. * ourselves.
*/ */
@ -664,12 +673,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
goto err_unreserve; goto err_unreserve;
/* Move the oldest request to the slab-cache (if not in use!) */ /* Move the oldest request to the slab-cache (if not in use!) */
req = list_first_entry_or_null(&engine->timeline->requests, rq = list_first_entry_or_null(&engine->timeline->requests,
typeof(*req), link); typeof(*rq), link);
if (req && i915_gem_request_completed(req)) if (rq && i915_request_completed(rq))
i915_gem_request_retire(req); i915_request_retire(rq);
/* Beware: Dragons be flying overhead. /*
* Beware: Dragons be flying overhead.
* *
* We use RCU to look up requests in flight. The lookups may * We use RCU to look up requests in flight. The lookups may
* race with the request being allocated from the slab freelist. * race with the request being allocated from the slab freelist.
@ -697,11 +707,11 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
* *
* Do not use kmem_cache_zalloc() here! * Do not use kmem_cache_zalloc() here!
*/ */
req = kmem_cache_alloc(dev_priv->requests, rq = kmem_cache_alloc(i915->requests,
GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (unlikely(!req)) { if (unlikely(!rq)) {
/* Ratelimit ourselves to prevent oom from malicious clients */ /* Ratelimit ourselves to prevent oom from malicious clients */
ret = i915_gem_wait_for_idle(dev_priv, ret = i915_gem_wait_for_idle(i915,
I915_WAIT_LOCKED | I915_WAIT_LOCKED |
I915_WAIT_INTERRUPTIBLE); I915_WAIT_INTERRUPTIBLE);
if (ret) if (ret)
@ -715,55 +725,55 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
* Having already penalized the client to stall, we spend * Having already penalized the client to stall, we spend
* a little extra time to re-optimise page allocation. * a little extra time to re-optimise page allocation.
*/ */
kmem_cache_shrink(dev_priv->requests); kmem_cache_shrink(i915->requests);
rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */ rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */
req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); rq = kmem_cache_alloc(i915->requests, GFP_KERNEL);
if (!req) { if (!rq) {
ret = -ENOMEM; ret = -ENOMEM;
goto err_unreserve; goto err_unreserve;
} }
} }
req->timeline = i915_gem_context_lookup_timeline(ctx, engine); rq->timeline = i915_gem_context_lookup_timeline(ctx, engine);
GEM_BUG_ON(req->timeline == engine->timeline); GEM_BUG_ON(rq->timeline == engine->timeline);
spin_lock_init(&req->lock); spin_lock_init(&rq->lock);
dma_fence_init(&req->fence, dma_fence_init(&rq->fence,
&i915_fence_ops, &i915_fence_ops,
&req->lock, &rq->lock,
req->timeline->fence_context, rq->timeline->fence_context,
timeline_get_seqno(req->timeline)); timeline_get_seqno(rq->timeline));
/* We bump the ref for the fence chain */ /* We bump the ref for the fence chain */
i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
init_waitqueue_head(&req->execute); init_waitqueue_head(&rq->execute);
i915_priotree_init(&req->priotree); i915_priotree_init(&rq->priotree);
INIT_LIST_HEAD(&req->active_list); INIT_LIST_HEAD(&rq->active_list);
req->i915 = dev_priv; rq->i915 = i915;
req->engine = engine; rq->engine = engine;
req->ctx = ctx; rq->ctx = ctx;
req->ring = ring; rq->ring = ring;
/* No zalloc, must clear what we need by hand */ /* No zalloc, must clear what we need by hand */
req->global_seqno = 0; rq->global_seqno = 0;
req->signaling.wait.seqno = 0; rq->signaling.wait.seqno = 0;
req->file_priv = NULL; rq->file_priv = NULL;
req->batch = NULL; rq->batch = NULL;
req->capture_list = NULL; rq->capture_list = NULL;
req->waitboost = false; rq->waitboost = false;
/* /*
* Reserve space in the ring buffer for all the commands required to * Reserve space in the ring buffer for all the commands required to
* eventually emit this request. This is to guarantee that the * eventually emit this request. This is to guarantee that the
* i915_add_request() call can't fail. Note that the reserve may need * i915_request_add() call can't fail. Note that the reserve may need
* to be redone if the request is not actually submitted straight * to be redone if the request is not actually submitted straight
* away, e.g. because a GPU scheduler has deferred it. * away, e.g. because a GPU scheduler has deferred it.
*/ */
req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz); GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
/* /*
* Record the position of the start of the request so that * Record the position of the start of the request so that
@ -771,30 +781,30 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
* GPU processing the request, we never over-estimate the * GPU processing the request, we never over-estimate the
* position of the head. * position of the head.
*/ */
req->head = req->ring->emit; rq->head = rq->ring->emit;
/* Unconditionally invalidate GPU caches and TLBs. */ /* Unconditionally invalidate GPU caches and TLBs. */
ret = engine->emit_flush(req, EMIT_INVALIDATE); ret = engine->emit_flush(rq, EMIT_INVALIDATE);
if (ret) if (ret)
goto err_unwind; goto err_unwind;
ret = engine->request_alloc(req); ret = engine->request_alloc(rq);
if (ret) if (ret)
goto err_unwind; goto err_unwind;
/* Check that we didn't interrupt ourselves with a new request */ /* Check that we didn't interrupt ourselves with a new request */
GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
return req; return rq;
err_unwind: err_unwind:
req->ring->emit = req->head; rq->ring->emit = rq->head;
/* Make sure we didn't add ourselves to external state before freeing */ /* Make sure we didn't add ourselves to external state before freeing */
GEM_BUG_ON(!list_empty(&req->active_list)); GEM_BUG_ON(!list_empty(&rq->active_list));
GEM_BUG_ON(!list_empty(&req->priotree.signalers_list)); GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list));
GEM_BUG_ON(!list_empty(&req->priotree.waiters_list)); GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list));
kmem_cache_free(dev_priv->requests, req); kmem_cache_free(i915->requests, rq);
err_unreserve: err_unreserve:
unreserve_engine(engine); unreserve_engine(engine);
err_unpin: err_unpin:
@ -803,15 +813,14 @@ err_unpin:
} }
static int static int
i915_gem_request_await_request(struct drm_i915_gem_request *to, i915_request_await_request(struct i915_request *to, struct i915_request *from)
struct drm_i915_gem_request *from)
{ {
int ret; int ret;
GEM_BUG_ON(to == from); GEM_BUG_ON(to == from);
GEM_BUG_ON(to->timeline == from->timeline); GEM_BUG_ON(to->timeline == from->timeline);
if (i915_gem_request_completed(from)) if (i915_request_completed(from))
return 0; return 0;
if (to->engine->schedule) { if (to->engine->schedule) {
@ -834,7 +843,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
GEM_BUG_ON(!from->engine->semaphore.signal); GEM_BUG_ON(!from->engine->semaphore.signal);
seqno = i915_gem_request_global_seqno(from); seqno = i915_request_global_seqno(from);
if (!seqno) if (!seqno)
goto await_dma_fence; goto await_dma_fence;
@ -858,14 +867,14 @@ await_dma_fence:
} }
int int
i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
struct dma_fence *fence)
{ {
struct dma_fence **child = &fence; struct dma_fence **child = &fence;
unsigned int nchild = 1; unsigned int nchild = 1;
int ret; int ret;
/* Note that if the fence-array was created in signal-on-any mode, /*
* Note that if the fence-array was created in signal-on-any mode,
* we should *not* decompose it into its individual fences. However, * we should *not* decompose it into its individual fences. However,
* we don't currently store which mode the fence-array is operating * we don't currently store which mode the fence-array is operating
* in. Fortunately, the only user of signal-on-any is private to * in. Fortunately, the only user of signal-on-any is private to
@ -887,37 +896,36 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
/* /*
* Requests on the same timeline are explicitly ordered, along * Requests on the same timeline are explicitly ordered, along
* with their dependencies, by i915_add_request() which ensures * with their dependencies, by i915_request_add() which ensures
* that requests are submitted in-order through each ring. * that requests are submitted in-order through each ring.
*/ */
if (fence->context == req->fence.context) if (fence->context == rq->fence.context)
continue; continue;
/* Squash repeated waits to the same timelines */ /* Squash repeated waits to the same timelines */
if (fence->context != req->i915->mm.unordered_timeline && if (fence->context != rq->i915->mm.unordered_timeline &&
intel_timeline_sync_is_later(req->timeline, fence)) intel_timeline_sync_is_later(rq->timeline, fence))
continue; continue;
if (dma_fence_is_i915(fence)) if (dma_fence_is_i915(fence))
ret = i915_gem_request_await_request(req, ret = i915_request_await_request(rq, to_request(fence));
to_request(fence));
else else
ret = i915_sw_fence_await_dma_fence(&req->submit, fence, ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
I915_FENCE_TIMEOUT, I915_FENCE_TIMEOUT,
I915_FENCE_GFP); I915_FENCE_GFP);
if (ret < 0) if (ret < 0)
return ret; return ret;
/* Record the latest fence used against each timeline */ /* Record the latest fence used against each timeline */
if (fence->context != req->i915->mm.unordered_timeline) if (fence->context != rq->i915->mm.unordered_timeline)
intel_timeline_sync_set(req->timeline, fence); intel_timeline_sync_set(rq->timeline, fence);
} while (--nchild); } while (--nchild);
return 0; return 0;
} }
/** /**
* i915_gem_request_await_object - set this request to (async) wait upon a bo * i915_request_await_object - set this request to (async) wait upon a bo
* @to: request we are wishing to use * @to: request we are wishing to use
* @obj: object which may be in use on another ring. * @obj: object which may be in use on another ring.
* @write: whether the wait is on behalf of a writer * @write: whether the wait is on behalf of a writer
@ -937,9 +945,9 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
* Returns 0 if successful, else propagates up the lower layer error. * Returns 0 if successful, else propagates up the lower layer error.
*/ */
int int
i915_gem_request_await_object(struct drm_i915_gem_request *to, i915_request_await_object(struct i915_request *to,
struct drm_i915_gem_object *obj, struct drm_i915_gem_object *obj,
bool write) bool write)
{ {
struct dma_fence *excl; struct dma_fence *excl;
int ret = 0; int ret = 0;
@ -954,7 +962,7 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to,
return ret; return ret;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
ret = i915_gem_request_await_dma_fence(to, shared[i]); ret = i915_request_await_dma_fence(to, shared[i]);
if (ret) if (ret)
break; break;
@ -970,7 +978,7 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to,
if (excl) { if (excl) {
if (ret == 0) if (ret == 0)
ret = i915_gem_request_await_dma_fence(to, excl); ret = i915_request_await_dma_fence(to, excl);
dma_fence_put(excl); dma_fence_put(excl);
} }
@ -983,21 +991,21 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to,
* request is not being tracked for completion but the work itself is * request is not being tracked for completion but the work itself is
* going to happen on the hardware. This would be a Bad Thing(tm). * going to happen on the hardware. This would be a Bad Thing(tm).
*/ */
void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) void __i915_request_add(struct i915_request *request, bool flush_caches)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
struct intel_ring *ring = request->ring; struct intel_ring *ring = request->ring;
struct intel_timeline *timeline = request->timeline; struct intel_timeline *timeline = request->timeline;
struct drm_i915_gem_request *prev; struct i915_request *prev;
u32 *cs; u32 *cs;
int err; int err;
lockdep_assert_held(&request->i915->drm.struct_mutex); lockdep_assert_held(&request->i915->drm.struct_mutex);
trace_i915_gem_request_add(request); trace_i915_request_add(request);
/* /*
* Make sure that no request gazumped us - if it was allocated after * Make sure that no request gazumped us - if it was allocated after
* our i915_gem_request_alloc() and called __i915_add_request() before * our i915_request_alloc() and called __i915_request_add() before
* us, the timeline will hold its seqno which is later than ours. * us, the timeline will hold its seqno which is later than ours.
*/ */
GEM_BUG_ON(timeline->seqno != request->fence.seqno); GEM_BUG_ON(timeline->seqno != request->fence.seqno);
@ -1042,7 +1050,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
prev = i915_gem_active_raw(&timeline->last_request, prev = i915_gem_active_raw(&timeline->last_request,
&request->i915->drm.struct_mutex); &request->i915->drm.struct_mutex);
if (prev && !i915_gem_request_completed(prev)) { if (prev && !i915_request_completed(prev)) {
i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
&request->submitq); &request->submitq);
if (engine->schedule) if (engine->schedule)
@ -1097,15 +1105,16 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* work on behalf of others -- but instead we should benefit from * work on behalf of others -- but instead we should benefit from
* improved resource management. (Well, that's the theory at least.) * improved resource management. (Well, that's the theory at least.)
*/ */
if (prev && i915_gem_request_completed(prev)) if (prev && i915_request_completed(prev))
i915_gem_request_retire_upto(prev); i915_request_retire_upto(prev);
} }
static unsigned long local_clock_us(unsigned int *cpu) static unsigned long local_clock_us(unsigned int *cpu)
{ {
unsigned long t; unsigned long t;
/* Cheaply and approximately convert from nanoseconds to microseconds. /*
* Cheaply and approximately convert from nanoseconds to microseconds.
* The result and subsequent calculations are also defined in the same * The result and subsequent calculations are also defined in the same
* approximate microseconds units. The principal source of timing * approximate microseconds units. The principal source of timing
* error here is from the simple truncation. * error here is from the simple truncation.
@ -1133,10 +1142,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
return this_cpu != cpu; return this_cpu != cpu;
} }
static bool __i915_spin_request(const struct drm_i915_gem_request *req, static bool __i915_spin_request(const struct i915_request *rq,
u32 seqno, int state, unsigned long timeout_us) u32 seqno, int state, unsigned long timeout_us)
{ {
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = rq->engine;
unsigned int irq, cpu; unsigned int irq, cpu;
GEM_BUG_ON(!seqno); GEM_BUG_ON(!seqno);
@ -1155,7 +1164,8 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req,
if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1)) if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1))
return false; return false;
/* When waiting for high frequency requests, e.g. during synchronous /*
* When waiting for high frequency requests, e.g. during synchronous
* rendering split between the CPU and GPU, the finite amount of time * rendering split between the CPU and GPU, the finite amount of time
* required to set up the irq and wait upon it limits the response * required to set up the irq and wait upon it limits the response
* rate. By busywaiting on the request completion for a short while we * rate. By busywaiting on the request completion for a short while we
@ -1169,9 +1179,10 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req,
timeout_us += local_clock_us(&cpu); timeout_us += local_clock_us(&cpu);
do { do {
if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno))
return seqno == i915_gem_request_global_seqno(req); return seqno == i915_request_global_seqno(rq);
/* Seqno are meant to be ordered *before* the interrupt. If /*
* Seqno are meant to be ordered *before* the interrupt. If
* we see an interrupt without a corresponding seqno advance, * we see an interrupt without a corresponding seqno advance,
* assume we won't see one in the near future but require * assume we won't see one in the near future but require
* the engine->seqno_barrier() to fixup coherency. * the engine->seqno_barrier() to fixup coherency.
@ -1191,7 +1202,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req,
return false; return false;
} }
static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request) static bool __i915_wait_request_check_and_reset(struct i915_request *request)
{ {
if (likely(!i915_reset_handoff(&request->i915->gpu_error))) if (likely(!i915_reset_handoff(&request->i915->gpu_error)))
return false; return false;
@ -1202,12 +1213,12 @@ static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *req
} }
/** /**
* i915_wait_request - wait until execution of request has finished * i915_request_wait - wait until execution of request has finished
* @req: the request to wait upon * @rq: the request to wait upon
* @flags: how to wait * @flags: how to wait
* @timeout: how long to wait in jiffies * @timeout: how long to wait in jiffies
* *
* i915_wait_request() waits for the request to be completed, for a * i915_request_wait() waits for the request to be completed, for a
* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
* unbounded wait). * unbounded wait).
* *
@ -1220,13 +1231,13 @@ static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *req
* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
* pending before the request completes. * pending before the request completes.
*/ */
long i915_wait_request(struct drm_i915_gem_request *req, long i915_request_wait(struct i915_request *rq,
unsigned int flags, unsigned int flags,
long timeout) long timeout)
{ {
const int state = flags & I915_WAIT_INTERRUPTIBLE ? const int state = flags & I915_WAIT_INTERRUPTIBLE ?
TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
DEFINE_WAIT_FUNC(reset, default_wake_function); DEFINE_WAIT_FUNC(reset, default_wake_function);
DEFINE_WAIT_FUNC(exec, default_wake_function); DEFINE_WAIT_FUNC(exec, default_wake_function);
struct intel_wait wait; struct intel_wait wait;
@ -1234,33 +1245,33 @@ long i915_wait_request(struct drm_i915_gem_request *req,
might_sleep(); might_sleep();
#if IS_ENABLED(CONFIG_LOCKDEP) #if IS_ENABLED(CONFIG_LOCKDEP)
GEM_BUG_ON(debug_locks && GEM_BUG_ON(debug_locks &&
!!lockdep_is_held(&req->i915->drm.struct_mutex) != !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
!!(flags & I915_WAIT_LOCKED)); !!(flags & I915_WAIT_LOCKED));
#endif #endif
GEM_BUG_ON(timeout < 0); GEM_BUG_ON(timeout < 0);
if (i915_gem_request_completed(req)) if (i915_request_completed(rq))
return timeout; return timeout;
if (!timeout) if (!timeout)
return -ETIME; return -ETIME;
trace_i915_gem_request_wait_begin(req, flags); trace_i915_request_wait_begin(rq, flags);
add_wait_queue(&req->execute, &exec); add_wait_queue(&rq->execute, &exec);
if (flags & I915_WAIT_LOCKED) if (flags & I915_WAIT_LOCKED)
add_wait_queue(errq, &reset); add_wait_queue(errq, &reset);
intel_wait_init(&wait, req); intel_wait_init(&wait, rq);
restart: restart:
do { do {
set_current_state(state); set_current_state(state);
if (intel_wait_update_request(&wait, req)) if (intel_wait_update_request(&wait, rq))
break; break;
if (flags & I915_WAIT_LOCKED && if (flags & I915_WAIT_LOCKED &&
__i915_wait_request_check_and_reset(req)) __i915_wait_request_check_and_reset(rq))
continue; continue;
if (signal_pending_state(state, current)) { if (signal_pending_state(state, current)) {
@ -1277,22 +1288,23 @@ restart:
} while (1); } while (1);
GEM_BUG_ON(!intel_wait_has_seqno(&wait)); GEM_BUG_ON(!intel_wait_has_seqno(&wait));
GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
/* Optimistic short spin before touching IRQs */ /* Optimistic short spin before touching IRQs */
if (__i915_spin_request(req, wait.seqno, state, 5)) if (__i915_spin_request(rq, wait.seqno, state, 5))
goto complete; goto complete;
set_current_state(state); set_current_state(state);
if (intel_engine_add_wait(req->engine, &wait)) if (intel_engine_add_wait(rq->engine, &wait))
/* In order to check that we haven't missed the interrupt /*
* In order to check that we haven't missed the interrupt
* as we enabled it, we need to kick ourselves to do a * as we enabled it, we need to kick ourselves to do a
* coherent check on the seqno before we sleep. * coherent check on the seqno before we sleep.
*/ */
goto wakeup; goto wakeup;
if (flags & I915_WAIT_LOCKED) if (flags & I915_WAIT_LOCKED)
__i915_wait_request_check_and_reset(req); __i915_wait_request_check_and_reset(rq);
for (;;) { for (;;) {
if (signal_pending_state(state, current)) { if (signal_pending_state(state, current)) {
@ -1308,21 +1320,23 @@ restart:
timeout = io_schedule_timeout(timeout); timeout = io_schedule_timeout(timeout);
if (intel_wait_complete(&wait) && if (intel_wait_complete(&wait) &&
intel_wait_check_request(&wait, req)) intel_wait_check_request(&wait, rq))
break; break;
set_current_state(state); set_current_state(state);
wakeup: wakeup:
/* Carefully check if the request is complete, giving time /*
* Carefully check if the request is complete, giving time
* for the seqno to be visible following the interrupt. * for the seqno to be visible following the interrupt.
* We also have to check in case we are kicked by the GPU * We also have to check in case we are kicked by the GPU
* reset in order to drop the struct_mutex. * reset in order to drop the struct_mutex.
*/ */
if (__i915_request_irq_complete(req)) if (__i915_request_irq_complete(rq))
break; break;
/* If the GPU is hung, and we hold the lock, reset the GPU /*
* If the GPU is hung, and we hold the lock, reset the GPU
* and then check for completion. On a full reset, the engine's * and then check for completion. On a full reset, the engine's
* HW seqno will be advanced passed us and we are complete. * HW seqno will be advanced passed us and we are complete.
* If we do a partial reset, we have to wait for the GPU to * If we do a partial reset, we have to wait for the GPU to
@ -1333,33 +1347,33 @@ wakeup:
* itself, or indirectly by recovering the GPU). * itself, or indirectly by recovering the GPU).
*/ */
if (flags & I915_WAIT_LOCKED && if (flags & I915_WAIT_LOCKED &&
__i915_wait_request_check_and_reset(req)) __i915_wait_request_check_and_reset(rq))
continue; continue;
/* Only spin if we know the GPU is processing this request */ /* Only spin if we know the GPU is processing this request */
if (__i915_spin_request(req, wait.seqno, state, 2)) if (__i915_spin_request(rq, wait.seqno, state, 2))
break; break;
if (!intel_wait_check_request(&wait, req)) { if (!intel_wait_check_request(&wait, rq)) {
intel_engine_remove_wait(req->engine, &wait); intel_engine_remove_wait(rq->engine, &wait);
goto restart; goto restart;
} }
} }
intel_engine_remove_wait(req->engine, &wait); intel_engine_remove_wait(rq->engine, &wait);
complete: complete:
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
if (flags & I915_WAIT_LOCKED) if (flags & I915_WAIT_LOCKED)
remove_wait_queue(errq, &reset); remove_wait_queue(errq, &reset);
remove_wait_queue(&req->execute, &exec); remove_wait_queue(&rq->execute, &exec);
trace_i915_gem_request_wait_end(req); trace_i915_request_wait_end(rq);
return timeout; return timeout;
} }
static void engine_retire_requests(struct intel_engine_cs *engine) static void engine_retire_requests(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_request *request, *next; struct i915_request *request, *next;
u32 seqno = intel_engine_get_seqno(engine); u32 seqno = intel_engine_get_seqno(engine);
LIST_HEAD(retire); LIST_HEAD(retire);
@ -1374,24 +1388,24 @@ static void engine_retire_requests(struct intel_engine_cs *engine)
spin_unlock_irq(&engine->timeline->lock); spin_unlock_irq(&engine->timeline->lock);
list_for_each_entry_safe(request, next, &retire, link) list_for_each_entry_safe(request, next, &retire, link)
i915_gem_request_retire(request); i915_request_retire(request);
} }
void i915_gem_retire_requests(struct drm_i915_private *dev_priv) void i915_retire_requests(struct drm_i915_private *i915)
{ {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&i915->drm.struct_mutex);
if (!dev_priv->gt.active_requests) if (!i915->gt.active_requests)
return; return;
for_each_engine(engine, dev_priv, id) for_each_engine(engine, i915, id)
engine_retire_requests(engine); engine_retire_requests(engine);
} }
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_request.c" #include "selftests/mock_request.c"
#include "selftests/i915_gem_request.c" #include "selftests/i915_request.c"
#endif #endif

View File

@ -1,5 +1,5 @@
/* /*
* Copyright © 2008-2015 Intel Corporation * Copyright © 2008-2018 Intel Corporation
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@ -22,8 +22,8 @@
* *
*/ */
#ifndef I915_GEM_REQUEST_H #ifndef I915_REQUEST_H
#define I915_GEM_REQUEST_H #define I915_REQUEST_H
#include <linux/dma-fence.h> #include <linux/dma-fence.h>
@ -34,18 +34,18 @@
struct drm_file; struct drm_file;
struct drm_i915_gem_object; struct drm_i915_gem_object;
struct drm_i915_gem_request; struct i915_request;
struct intel_wait { struct intel_wait {
struct rb_node node; struct rb_node node;
struct task_struct *tsk; struct task_struct *tsk;
struct drm_i915_gem_request *request; struct i915_request *request;
u32 seqno; u32 seqno;
}; };
struct intel_signal_node { struct intel_signal_node {
struct rb_node node;
struct intel_wait wait; struct intel_wait wait;
struct list_head link;
}; };
struct i915_dependency { struct i915_dependency {
@ -57,7 +57,12 @@ struct i915_dependency {
#define I915_DEPENDENCY_ALLOC BIT(0) #define I915_DEPENDENCY_ALLOC BIT(0)
}; };
/* Requests exist in a complex web of interdependencies. Each request /*
* "People assume that time is a strict progression of cause to effect, but
* actually, from a nonlinear, non-subjective viewpoint, it's more like a big
* ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015
*
* Requests exist in a complex web of interdependencies. Each request
* has to wait for some other request to complete before it is ready to be run * has to wait for some other request to complete before it is ready to be run
* (e.g. we have to wait until the pixels have been rendering into a texture * (e.g. we have to wait until the pixels have been rendering into a texture
* before we can copy from it). We track the readiness of a request in terms * before we can copy from it). We track the readiness of a request in terms
@ -81,8 +86,8 @@ enum {
I915_PRIORITY_INVALID = INT_MIN I915_PRIORITY_INVALID = INT_MIN
}; };
struct i915_gem_capture_list { struct i915_capture_list {
struct i915_gem_capture_list *next; struct i915_capture_list *next;
struct i915_vma *vma; struct i915_vma *vma;
}; };
@ -106,7 +111,7 @@ struct i915_gem_capture_list {
* *
* The requests are reference counted. * The requests are reference counted.
*/ */
struct drm_i915_gem_request { struct i915_request {
struct dma_fence fence; struct dma_fence fence;
spinlock_t lock; spinlock_t lock;
@ -120,7 +125,7 @@ struct drm_i915_gem_request {
* it persists while any request is linked to it. Requests themselves * it persists while any request is linked to it. Requests themselves
* are also refcounted, so the request will only be freed when the last * are also refcounted, so the request will only be freed when the last
* reference to it is dismissed, and the code in * reference to it is dismissed, and the code in
* i915_gem_request_free() will then decrement the refcount on the * i915_request_free() will then decrement the refcount on the
* context. * context.
*/ */
struct i915_gem_context *ctx; struct i915_gem_context *ctx;
@ -129,7 +134,8 @@ struct drm_i915_gem_request {
struct intel_timeline *timeline; struct intel_timeline *timeline;
struct intel_signal_node signaling; struct intel_signal_node signaling;
/* Fences for the various phases in the request's lifetime. /*
* Fences for the various phases in the request's lifetime.
* *
* The submit fence is used to await upon all of the request's * The submit fence is used to await upon all of the request's
* dependencies. When it is signaled, the request is ready to run. * dependencies. When it is signaled, the request is ready to run.
@ -139,7 +145,8 @@ struct drm_i915_gem_request {
wait_queue_entry_t submitq; wait_queue_entry_t submitq;
wait_queue_head_t execute; wait_queue_head_t execute;
/* A list of everyone we wait upon, and everyone who waits upon us. /*
* A list of everyone we wait upon, and everyone who waits upon us.
* Even though we will not be submitted to the hardware before the * Even though we will not be submitted to the hardware before the
* submit fence is signaled (it waits for all external events as well * submit fence is signaled (it waits for all external events as well
* as our own requests), the scheduler still needs to know the * as our own requests), the scheduler still needs to know the
@ -150,7 +157,8 @@ struct drm_i915_gem_request {
struct i915_priotree priotree; struct i915_priotree priotree;
struct i915_dependency dep; struct i915_dependency dep;
/** GEM sequence number associated with this request on the /**
* GEM sequence number associated with this request on the
* global execution timeline. It is zero when the request is not * global execution timeline. It is zero when the request is not
* on the HW queue (i.e. not on the engine timeline list). * on the HW queue (i.e. not on the engine timeline list).
* Its value is guarded by the timeline spinlock. * Its value is guarded by the timeline spinlock.
@ -180,12 +188,13 @@ struct drm_i915_gem_request {
* error state dump only). * error state dump only).
*/ */
struct i915_vma *batch; struct i915_vma *batch;
/** Additional buffers requested by userspace to be captured upon /**
* Additional buffers requested by userspace to be captured upon
* a GPU hang. The vma/obj on this list are protected by their * a GPU hang. The vma/obj on this list are protected by their
* active reference - all objects on this list must also be * active reference - all objects on this list must also be
* on the active_list (of their final request). * on the active_list (of their final request).
*/ */
struct i915_gem_capture_list *capture_list; struct i915_capture_list *capture_list;
struct list_head active_list; struct list_head active_list;
/** Time at which this request was emitted, in jiffies. */ /** Time at which this request was emitted, in jiffies. */
@ -213,40 +222,40 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence)
return fence->ops == &i915_fence_ops; return fence->ops == &i915_fence_ops;
} }
struct drm_i915_gem_request * __must_check struct i915_request * __must_check
i915_gem_request_alloc(struct intel_engine_cs *engine, i915_request_alloc(struct intel_engine_cs *engine,
struct i915_gem_context *ctx); struct i915_gem_context *ctx);
void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); void i915_request_retire_upto(struct i915_request *rq);
static inline struct drm_i915_gem_request * static inline struct i915_request *
to_request(struct dma_fence *fence) to_request(struct dma_fence *fence)
{ {
/* We assume that NULL fence/request are interoperable */ /* We assume that NULL fence/request are interoperable */
BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); BUILD_BUG_ON(offsetof(struct i915_request, fence) != 0);
GEM_BUG_ON(fence && !dma_fence_is_i915(fence)); GEM_BUG_ON(fence && !dma_fence_is_i915(fence));
return container_of(fence, struct drm_i915_gem_request, fence); return container_of(fence, struct i915_request, fence);
} }
static inline struct drm_i915_gem_request * static inline struct i915_request *
i915_gem_request_get(struct drm_i915_gem_request *req) i915_request_get(struct i915_request *rq)
{ {
return to_request(dma_fence_get(&req->fence)); return to_request(dma_fence_get(&rq->fence));
} }
static inline struct drm_i915_gem_request * static inline struct i915_request *
i915_gem_request_get_rcu(struct drm_i915_gem_request *req) i915_request_get_rcu(struct i915_request *rq)
{ {
return to_request(dma_fence_get_rcu(&req->fence)); return to_request(dma_fence_get_rcu(&rq->fence));
} }
static inline void static inline void
i915_gem_request_put(struct drm_i915_gem_request *req) i915_request_put(struct i915_request *rq)
{ {
dma_fence_put(&req->fence); dma_fence_put(&rq->fence);
} }
/** /**
* i915_gem_request_global_seqno - report the current global seqno * i915_request_global_seqno - report the current global seqno
* @request - the request * @request - the request
* *
* A request is assigned a global seqno only when it is on the hardware * A request is assigned a global seqno only when it is on the hardware
@ -264,34 +273,28 @@ i915_gem_request_put(struct drm_i915_gem_request *req)
* after the read, it is indeed complete). * after the read, it is indeed complete).
*/ */
static u32 static u32
i915_gem_request_global_seqno(const struct drm_i915_gem_request *request) i915_request_global_seqno(const struct i915_request *request)
{ {
return READ_ONCE(request->global_seqno); return READ_ONCE(request->global_seqno);
} }
int int i915_request_await_object(struct i915_request *to,
i915_gem_request_await_object(struct drm_i915_gem_request *to,
struct drm_i915_gem_object *obj, struct drm_i915_gem_object *obj,
bool write); bool write);
int i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, int i915_request_await_dma_fence(struct i915_request *rq,
struct dma_fence *fence); struct dma_fence *fence);
void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); void __i915_request_add(struct i915_request *rq, bool flush_caches);
#define i915_add_request(req) \ #define i915_request_add(rq) \
__i915_add_request(req, false) __i915_request_add(rq, false)
void __i915_gem_request_submit(struct drm_i915_gem_request *request); void __i915_request_submit(struct i915_request *request);
void i915_gem_request_submit(struct drm_i915_gem_request *request); void i915_request_submit(struct i915_request *request);
void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); void __i915_request_unsubmit(struct i915_request *request);
void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); void i915_request_unsubmit(struct i915_request *request);
struct intel_rps_client; long i915_request_wait(struct i915_request *rq,
#define NO_WAITBOOST ERR_PTR(-1)
#define IS_RPS_CLIENT(p) (!IS_ERR(p))
#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
long i915_wait_request(struct drm_i915_gem_request *req,
unsigned int flags, unsigned int flags,
long timeout) long timeout)
__attribute__((nonnull(1))); __attribute__((nonnull(1)));
@ -310,47 +313,48 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
} }
static inline bool static inline bool
__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno) __i915_request_completed(const struct i915_request *rq, u32 seqno)
{ {
GEM_BUG_ON(!seqno); GEM_BUG_ON(!seqno);
return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) && return i915_seqno_passed(intel_engine_get_seqno(rq->engine), seqno) &&
seqno == i915_gem_request_global_seqno(req); seqno == i915_request_global_seqno(rq);
} }
static inline bool static inline bool i915_request_completed(const struct i915_request *rq)
i915_gem_request_completed(const struct drm_i915_gem_request *req)
{ {
u32 seqno; u32 seqno;
seqno = i915_gem_request_global_seqno(req); seqno = i915_request_global_seqno(rq);
if (!seqno) if (!seqno)
return false; return false;
return __i915_gem_request_completed(req, seqno); return __i915_request_completed(rq, seqno);
} }
static inline bool static inline bool i915_request_started(const struct i915_request *rq)
i915_gem_request_started(const struct drm_i915_gem_request *req)
{ {
u32 seqno; u32 seqno;
seqno = i915_gem_request_global_seqno(req); seqno = i915_request_global_seqno(rq);
if (!seqno) if (!seqno)
return false; return false;
return i915_seqno_passed(intel_engine_get_seqno(req->engine), return i915_seqno_passed(intel_engine_get_seqno(rq->engine),
seqno - 1); seqno - 1);
} }
static inline bool i915_priotree_signaled(const struct i915_priotree *pt) static inline bool i915_priotree_signaled(const struct i915_priotree *pt)
{ {
const struct drm_i915_gem_request *rq = const struct i915_request *rq =
container_of(pt, const struct drm_i915_gem_request, priotree); container_of(pt, const struct i915_request, priotree);
return i915_gem_request_completed(rq); return i915_request_completed(rq);
} }
/* We treat requests as fences. This is not be to confused with our void i915_retire_requests(struct drm_i915_private *i915);
/*
* We treat requests as fences. This is not be to confused with our
* "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
* We use the fences to synchronize access from the CPU with activity on the * We use the fences to synchronize access from the CPU with activity on the
* GPU, for example, we should not rewrite an object's PTE whilst the GPU * GPU, for example, we should not rewrite an object's PTE whilst the GPU
@ -380,16 +384,16 @@ static inline bool i915_priotree_signaled(const struct i915_priotree *pt)
struct i915_gem_active; struct i915_gem_active;
typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
struct drm_i915_gem_request *); struct i915_request *);
struct i915_gem_active { struct i915_gem_active {
struct drm_i915_gem_request __rcu *request; struct i915_request __rcu *request;
struct list_head link; struct list_head link;
i915_gem_retire_fn retire; i915_gem_retire_fn retire;
}; };
void i915_gem_retire_noop(struct i915_gem_active *, void i915_gem_retire_noop(struct i915_gem_active *,
struct drm_i915_gem_request *request); struct i915_request *request);
/** /**
* init_request_active - prepares the activity tracker for use * init_request_active - prepares the activity tracker for use
@ -421,7 +425,7 @@ init_request_active(struct i915_gem_active *active,
*/ */
static inline void static inline void
i915_gem_active_set(struct i915_gem_active *active, i915_gem_active_set(struct i915_gem_active *active,
struct drm_i915_gem_request *request) struct i915_request *request)
{ {
list_move(&active->link, &request->active_list); list_move(&active->link, &request->active_list);
rcu_assign_pointer(active->request, request); rcu_assign_pointer(active->request, request);
@ -446,10 +450,11 @@ i915_gem_active_set_retire_fn(struct i915_gem_active *active,
active->retire = fn ?: i915_gem_retire_noop; active->retire = fn ?: i915_gem_retire_noop;
} }
static inline struct drm_i915_gem_request * static inline struct i915_request *
__i915_gem_active_peek(const struct i915_gem_active *active) __i915_gem_active_peek(const struct i915_gem_active *active)
{ {
/* Inside the error capture (running with the driver in an unknown /*
* Inside the error capture (running with the driver in an unknown
* state), we want to bend the rules slightly (a lot). * state), we want to bend the rules slightly (a lot).
* *
* Work is in progress to make it safer, in the meantime this keeps * Work is in progress to make it safer, in the meantime this keeps
@ -466,7 +471,7 @@ __i915_gem_active_peek(const struct i915_gem_active *active)
* It does not obtain a reference on the request for the caller, so the caller * It does not obtain a reference on the request for the caller, so the caller
* must hold struct_mutex. * must hold struct_mutex.
*/ */
static inline struct drm_i915_gem_request * static inline struct i915_request *
i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
{ {
return rcu_dereference_protected(active->request, return rcu_dereference_protected(active->request,
@ -481,13 +486,13 @@ i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
* still active, or NULL. It does not obtain a reference on the request * still active, or NULL. It does not obtain a reference on the request
* for the caller, so the caller must hold struct_mutex. * for the caller, so the caller must hold struct_mutex.
*/ */
static inline struct drm_i915_gem_request * static inline struct i915_request *
i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
{ {
struct drm_i915_gem_request *request; struct i915_request *request;
request = i915_gem_active_raw(active, mutex); request = i915_gem_active_raw(active, mutex);
if (!request || i915_gem_request_completed(request)) if (!request || i915_request_completed(request))
return NULL; return NULL;
return request; return request;
@ -500,10 +505,10 @@ i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
* i915_gem_active_get() returns a reference to the active request, or NULL * i915_gem_active_get() returns a reference to the active request, or NULL
* if the active tracker is idle. The caller must hold struct_mutex. * if the active tracker is idle. The caller must hold struct_mutex.
*/ */
static inline struct drm_i915_gem_request * static inline struct i915_request *
i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
{ {
return i915_gem_request_get(i915_gem_active_peek(active, mutex)); return i915_request_get(i915_gem_active_peek(active, mutex));
} }
/** /**
@ -514,10 +519,11 @@ i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
* if the active tracker is idle. The caller must hold the RCU read lock, but * if the active tracker is idle. The caller must hold the RCU read lock, but
* the returned pointer is safe to use outside of RCU. * the returned pointer is safe to use outside of RCU.
*/ */
static inline struct drm_i915_gem_request * static inline struct i915_request *
__i915_gem_active_get_rcu(const struct i915_gem_active *active) __i915_gem_active_get_rcu(const struct i915_gem_active *active)
{ {
/* Performing a lockless retrieval of the active request is super /*
* Performing a lockless retrieval of the active request is super
* tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
* slab of request objects will not be freed whilst we hold the * slab of request objects will not be freed whilst we hold the
* RCU read lock. It does not guarantee that the request itself * RCU read lock. It does not guarantee that the request itself
@ -525,13 +531,13 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
* *
* Thread A Thread B * Thread A Thread B
* *
* req = active.request * rq = active.request
* retire(req) -> free(req); * retire(rq) -> free(rq);
* (req is now first on the slab freelist) * (rq is now first on the slab freelist)
* active.request = NULL * active.request = NULL
* *
* req = new submission on a new object * rq = new submission on a new object
* ref(req) * ref(rq)
* *
* To prevent the request from being reused whilst the caller * To prevent the request from being reused whilst the caller
* uses it, we take a reference like normal. Whilst acquiring * uses it, we take a reference like normal. Whilst acquiring
@ -560,32 +566,34 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
* *
* It is then imperative that we do not zero the request on * It is then imperative that we do not zero the request on
* reallocation, so that we can chase the dangling pointers! * reallocation, so that we can chase the dangling pointers!
* See i915_gem_request_alloc(). * See i915_request_alloc().
*/ */
do { do {
struct drm_i915_gem_request *request; struct i915_request *request;
request = rcu_dereference(active->request); request = rcu_dereference(active->request);
if (!request || i915_gem_request_completed(request)) if (!request || i915_request_completed(request))
return NULL; return NULL;
/* An especially silly compiler could decide to recompute the /*
* result of i915_gem_request_completed, more specifically * An especially silly compiler could decide to recompute the
* result of i915_request_completed, more specifically
* re-emit the load for request->fence.seqno. A race would catch * re-emit the load for request->fence.seqno. A race would catch
* a later seqno value, which could flip the result from true to * a later seqno value, which could flip the result from true to
* false. Which means part of the instructions below might not * false. Which means part of the instructions below might not
* be executed, while later on instructions are executed. Due to * be executed, while later on instructions are executed. Due to
* barriers within the refcounting the inconsistency can't reach * barriers within the refcounting the inconsistency can't reach
* past the call to i915_gem_request_get_rcu, but not executing * past the call to i915_request_get_rcu, but not executing
* that while still executing i915_gem_request_put() creates * that while still executing i915_request_put() creates
* havoc enough. Prevent this with a compiler barrier. * havoc enough. Prevent this with a compiler barrier.
*/ */
barrier(); barrier();
request = i915_gem_request_get_rcu(request); request = i915_request_get_rcu(request);
/* What stops the following rcu_access_pointer() from occurring /*
* before the above i915_gem_request_get_rcu()? If we were * What stops the following rcu_access_pointer() from occurring
* before the above i915_request_get_rcu()? If we were
* to read the value before pausing to get the reference to * to read the value before pausing to get the reference to
* the request, we may not notice a change in the active * the request, we may not notice a change in the active
* tracker. * tracker.
@ -599,9 +607,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
* compiler. * compiler.
* *
* The atomic operation at the heart of * The atomic operation at the heart of
* i915_gem_request_get_rcu(), see dma_fence_get_rcu(), is * i915_request_get_rcu(), see dma_fence_get_rcu(), is
* atomic_inc_not_zero() which is only a full memory barrier * atomic_inc_not_zero() which is only a full memory barrier
* when successful. That is, if i915_gem_request_get_rcu() * when successful. That is, if i915_request_get_rcu()
* returns the request (and so with the reference counted * returns the request (and so with the reference counted
* incremented) then the following read for rcu_access_pointer() * incremented) then the following read for rcu_access_pointer()
* must occur after the atomic operation and so confirm * must occur after the atomic operation and so confirm
@ -613,7 +621,7 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
if (!request || request == rcu_access_pointer(active->request)) if (!request || request == rcu_access_pointer(active->request))
return rcu_pointer_handoff(request); return rcu_pointer_handoff(request);
i915_gem_request_put(request); i915_request_put(request);
} while (1); } while (1);
} }
@ -625,12 +633,12 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
* or NULL if the active tracker is idle. The reference is obtained under RCU, * or NULL if the active tracker is idle. The reference is obtained under RCU,
* so no locking is required by the caller. * so no locking is required by the caller.
* *
* The reference should be freed with i915_gem_request_put(). * The reference should be freed with i915_request_put().
*/ */
static inline struct drm_i915_gem_request * static inline struct i915_request *
i915_gem_active_get_unlocked(const struct i915_gem_active *active) i915_gem_active_get_unlocked(const struct i915_gem_active *active)
{ {
struct drm_i915_gem_request *request; struct i915_request *request;
rcu_read_lock(); rcu_read_lock();
request = __i915_gem_active_get_rcu(active); request = __i915_gem_active_get_rcu(active);
@ -670,7 +678,7 @@ i915_gem_active_isset(const struct i915_gem_active *active)
* can then wait upon the request, and afterwards release our reference, * can then wait upon the request, and afterwards release our reference,
* free of any locking. * free of any locking.
* *
* This function wraps i915_wait_request(), see it for the full details on * This function wraps i915_request_wait(), see it for the full details on
* the arguments. * the arguments.
* *
* Returns 0 if successful, or a negative error code. * Returns 0 if successful, or a negative error code.
@ -678,13 +686,13 @@ i915_gem_active_isset(const struct i915_gem_active *active)
static inline int static inline int
i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags) i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags)
{ {
struct drm_i915_gem_request *request; struct i915_request *request;
long ret = 0; long ret = 0;
request = i915_gem_active_get_unlocked(active); request = i915_gem_active_get_unlocked(active);
if (request) { if (request) {
ret = i915_wait_request(request, flags, MAX_SCHEDULE_TIMEOUT); ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT);
i915_gem_request_put(request); i915_request_put(request);
} }
return ret < 0 ? ret : 0; return ret < 0 ? ret : 0;
@ -703,14 +711,14 @@ static inline int __must_check
i915_gem_active_retire(struct i915_gem_active *active, i915_gem_active_retire(struct i915_gem_active *active,
struct mutex *mutex) struct mutex *mutex)
{ {
struct drm_i915_gem_request *request; struct i915_request *request;
long ret; long ret;
request = i915_gem_active_raw(active, mutex); request = i915_gem_active_raw(active, mutex);
if (!request) if (!request)
return 0; return 0;
ret = i915_wait_request(request, ret = i915_request_wait(request,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT); MAX_SCHEDULE_TIMEOUT);
if (ret < 0) if (ret < 0)
@ -727,4 +735,4 @@ i915_gem_active_retire(struct i915_gem_active *active,
#define for_each_active(mask, idx) \ #define for_each_active(mask, idx) \
for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
#endif /* I915_GEM_REQUEST_H */ #endif /* I915_REQUEST_H */

View File

@ -586,8 +586,7 @@ TRACE_EVENT(i915_gem_evict_vm,
); );
TRACE_EVENT(i915_gem_ring_sync_to, TRACE_EVENT(i915_gem_ring_sync_to,
TP_PROTO(struct drm_i915_gem_request *to, TP_PROTO(struct i915_request *to, struct i915_request *from),
struct drm_i915_gem_request *from),
TP_ARGS(to, from), TP_ARGS(to, from),
TP_STRUCT__entry( TP_STRUCT__entry(
@ -610,9 +609,9 @@ TRACE_EVENT(i915_gem_ring_sync_to,
__entry->seqno) __entry->seqno)
); );
TRACE_EVENT(i915_gem_request_queue, TRACE_EVENT(i915_request_queue,
TP_PROTO(struct drm_i915_gem_request *req, u32 flags), TP_PROTO(struct i915_request *rq, u32 flags),
TP_ARGS(req, flags), TP_ARGS(rq, flags),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u32, dev) __field(u32, dev)
@ -624,11 +623,11 @@ TRACE_EVENT(i915_gem_request_queue,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = req->i915->drm.primary->index; __entry->dev = rq->i915->drm.primary->index;
__entry->hw_id = req->ctx->hw_id; __entry->hw_id = rq->ctx->hw_id;
__entry->ring = req->engine->id; __entry->ring = rq->engine->id;
__entry->ctx = req->fence.context; __entry->ctx = rq->fence.context;
__entry->seqno = req->fence.seqno; __entry->seqno = rq->fence.seqno;
__entry->flags = flags; __entry->flags = flags;
), ),
@ -637,9 +636,9 @@ TRACE_EVENT(i915_gem_request_queue,
__entry->seqno, __entry->flags) __entry->seqno, __entry->flags)
); );
DECLARE_EVENT_CLASS(i915_gem_request, DECLARE_EVENT_CLASS(i915_request,
TP_PROTO(struct drm_i915_gem_request *req), TP_PROTO(struct i915_request *rq),
TP_ARGS(req), TP_ARGS(rq),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u32, dev) __field(u32, dev)
@ -651,12 +650,12 @@ DECLARE_EVENT_CLASS(i915_gem_request,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = req->i915->drm.primary->index; __entry->dev = rq->i915->drm.primary->index;
__entry->hw_id = req->ctx->hw_id; __entry->hw_id = rq->ctx->hw_id;
__entry->ring = req->engine->id; __entry->ring = rq->engine->id;
__entry->ctx = req->fence.context; __entry->ctx = rq->fence.context;
__entry->seqno = req->fence.seqno; __entry->seqno = rq->fence.seqno;
__entry->global = req->global_seqno; __entry->global = rq->global_seqno;
), ),
TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u", TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u",
@ -664,26 +663,25 @@ DECLARE_EVENT_CLASS(i915_gem_request,
__entry->seqno, __entry->global) __entry->seqno, __entry->global)
); );
DEFINE_EVENT(i915_gem_request, i915_gem_request_add, DEFINE_EVENT(i915_request, i915_request_add,
TP_PROTO(struct drm_i915_gem_request *req), TP_PROTO(struct i915_request *rq),
TP_ARGS(req) TP_ARGS(rq)
); );
#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) #if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
DEFINE_EVENT(i915_gem_request, i915_gem_request_submit, DEFINE_EVENT(i915_request, i915_request_submit,
TP_PROTO(struct drm_i915_gem_request *req), TP_PROTO(struct i915_request *rq),
TP_ARGS(req) TP_ARGS(rq)
); );
DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, DEFINE_EVENT(i915_request, i915_request_execute,
TP_PROTO(struct drm_i915_gem_request *req), TP_PROTO(struct i915_request *rq),
TP_ARGS(req) TP_ARGS(rq)
); );
DECLARE_EVENT_CLASS(i915_gem_request_hw, DECLARE_EVENT_CLASS(i915_request_hw,
TP_PROTO(struct drm_i915_gem_request *req, TP_PROTO(struct i915_request *rq, unsigned int port),
unsigned int port), TP_ARGS(rq, port),
TP_ARGS(req, port),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u32, dev) __field(u32, dev)
@ -696,14 +694,14 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = req->i915->drm.primary->index; __entry->dev = rq->i915->drm.primary->index;
__entry->hw_id = req->ctx->hw_id; __entry->hw_id = rq->ctx->hw_id;
__entry->ring = req->engine->id; __entry->ring = rq->engine->id;
__entry->ctx = req->fence.context; __entry->ctx = rq->fence.context;
__entry->seqno = req->fence.seqno; __entry->seqno = rq->fence.seqno;
__entry->global_seqno = req->global_seqno; __entry->global_seqno = rq->global_seqno;
__entry->port = port; __entry->port = port;
), ),
TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u",
__entry->dev, __entry->hw_id, __entry->ring, __entry->dev, __entry->hw_id, __entry->ring,
@ -711,34 +709,34 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw,
__entry->global_seqno, __entry->port) __entry->global_seqno, __entry->port)
); );
DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, DEFINE_EVENT(i915_request_hw, i915_request_in,
TP_PROTO(struct drm_i915_gem_request *req, unsigned int port), TP_PROTO(struct i915_request *rq, unsigned int port),
TP_ARGS(req, port) TP_ARGS(rq, port)
); );
DEFINE_EVENT(i915_gem_request, i915_gem_request_out, DEFINE_EVENT(i915_request, i915_request_out,
TP_PROTO(struct drm_i915_gem_request *req), TP_PROTO(struct i915_request *rq),
TP_ARGS(req) TP_ARGS(rq)
); );
#else #else
#if !defined(TRACE_HEADER_MULTI_READ) #if !defined(TRACE_HEADER_MULTI_READ)
static inline void static inline void
trace_i915_gem_request_submit(struct drm_i915_gem_request *req) trace_i915_request_submit(struct i915_request *rq)
{ {
} }
static inline void static inline void
trace_i915_gem_request_execute(struct drm_i915_gem_request *req) trace_i915_request_execute(struct i915_request *rq)
{ {
} }
static inline void static inline void
trace_i915_gem_request_in(struct drm_i915_gem_request *req, unsigned int port) trace_i915_request_in(struct i915_request *rq, unsigned int port)
{ {
} }
static inline void static inline void
trace_i915_gem_request_out(struct drm_i915_gem_request *req) trace_i915_request_out(struct i915_request *rq)
{ {
} }
#endif #endif
@ -767,14 +765,14 @@ TRACE_EVENT(intel_engine_notify,
__entry->waiters) __entry->waiters)
); );
DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, DEFINE_EVENT(i915_request, i915_request_retire,
TP_PROTO(struct drm_i915_gem_request *req), TP_PROTO(struct i915_request *rq),
TP_ARGS(req) TP_ARGS(rq)
); );
TRACE_EVENT(i915_gem_request_wait_begin, TRACE_EVENT(i915_request_wait_begin,
TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), TP_PROTO(struct i915_request *rq, unsigned int flags),
TP_ARGS(req, flags), TP_ARGS(rq, flags),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u32, dev) __field(u32, dev)
@ -793,12 +791,12 @@ TRACE_EVENT(i915_gem_request_wait_begin,
* less desirable. * less desirable.
*/ */
TP_fast_assign( TP_fast_assign(
__entry->dev = req->i915->drm.primary->index; __entry->dev = rq->i915->drm.primary->index;
__entry->hw_id = req->ctx->hw_id; __entry->hw_id = rq->ctx->hw_id;
__entry->ring = req->engine->id; __entry->ring = rq->engine->id;
__entry->ctx = req->fence.context; __entry->ctx = rq->fence.context;
__entry->seqno = req->fence.seqno; __entry->seqno = rq->fence.seqno;
__entry->global = req->global_seqno; __entry->global = rq->global_seqno;
__entry->flags = flags; __entry->flags = flags;
), ),
@ -808,9 +806,9 @@ TRACE_EVENT(i915_gem_request_wait_begin,
!!(__entry->flags & I915_WAIT_LOCKED), __entry->flags) !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags)
); );
DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, DEFINE_EVENT(i915_request, i915_request_wait_end,
TP_PROTO(struct drm_i915_gem_request *req), TP_PROTO(struct i915_request *rq),
TP_ARGS(req) TP_ARGS(rq)
); );
TRACE_EVENT(i915_flip_request, TRACE_EVENT(i915_flip_request,

View File

@ -31,8 +31,7 @@
#include <drm/drm_gem.h> #include <drm/drm_gem.h>
static void static void
i915_vma_retire(struct i915_gem_active *active, i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
struct drm_i915_gem_request *rq)
{ {
const unsigned int idx = rq->engine->id; const unsigned int idx = rq->engine->id;
struct i915_vma *vma = struct i915_vma *vma =

View File

@ -32,8 +32,8 @@
#include "i915_gem_gtt.h" #include "i915_gem_gtt.h"
#include "i915_gem_fence_reg.h" #include "i915_gem_fence_reg.h"
#include "i915_gem_object.h" #include "i915_gem_object.h"
#include "i915_gem_request.h"
#include "i915_request.h"
enum i915_cache_level; enum i915_cache_level;

View File

@ -168,17 +168,21 @@ static void irq_enable(struct intel_engine_cs *engine)
set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
/* Caller disables interrupts */ /* Caller disables interrupts */
spin_lock(&engine->i915->irq_lock); if (engine->irq_enable) {
engine->irq_enable(engine); spin_lock(&engine->i915->irq_lock);
spin_unlock(&engine->i915->irq_lock); engine->irq_enable(engine);
spin_unlock(&engine->i915->irq_lock);
}
} }
static void irq_disable(struct intel_engine_cs *engine) static void irq_disable(struct intel_engine_cs *engine)
{ {
/* Caller disables interrupts */ /* Caller disables interrupts */
spin_lock(&engine->i915->irq_lock); if (engine->irq_disable) {
engine->irq_disable(engine); spin_lock(&engine->i915->irq_lock);
spin_unlock(&engine->i915->irq_lock); engine->irq_disable(engine);
spin_unlock(&engine->i915->irq_lock);
}
} }
void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
@ -243,6 +247,8 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
spin_unlock(&b->irq_lock); spin_unlock(&b->irq_lock);
rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) {
GEM_BUG_ON(!i915_seqno_passed(intel_engine_get_seqno(engine),
wait->seqno));
RB_CLEAR_NODE(&wait->node); RB_CLEAR_NODE(&wait->node);
wake_up_process(wait->tsk); wake_up_process(wait->tsk);
} }
@ -336,7 +342,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
lockdep_assert_held(&b->rb_lock); lockdep_assert_held(&b->rb_lock);
GEM_BUG_ON(b->irq_wait == wait); GEM_BUG_ON(b->irq_wait == wait);
/* This request is completed, so remove it from the tree, mark it as /*
* This request is completed, so remove it from the tree, mark it as
* complete, and *then* wake up the associated task. N.B. when the * complete, and *then* wake up the associated task. N.B. when the
* task wakes up, it will find the empty rb_node, discern that it * task wakes up, it will find the empty rb_node, discern that it
* has already been removed from the tree and skip the serialisation * has already been removed from the tree and skip the serialisation
@ -347,7 +354,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
rb_erase(&wait->node, &b->waiters); rb_erase(&wait->node, &b->waiters);
RB_CLEAR_NODE(&wait->node); RB_CLEAR_NODE(&wait->node);
wake_up_process(wait->tsk); /* implicit smp_wmb() */ if (wait->tsk->state != TASK_RUNNING)
wake_up_process(wait->tsk); /* implicit smp_wmb() */
} }
static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
@ -588,23 +596,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
spin_unlock_irq(&b->rb_lock); spin_unlock_irq(&b->rb_lock);
} }
static bool signal_complete(const struct drm_i915_gem_request *request)
{
if (!request)
return false;
/*
* Carefully check if the request is complete, giving time for the
* seqno to be visible or if the GPU hung.
*/
return __i915_request_irq_complete(request);
}
static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
{
return rb_entry(rb, struct drm_i915_gem_request, signaling.node);
}
static void signaler_set_rtpriority(void) static void signaler_set_rtpriority(void)
{ {
struct sched_param param = { .sched_priority = 1 }; struct sched_param param = { .sched_priority = 1 };
@ -612,78 +603,26 @@ static void signaler_set_rtpriority(void)
sched_setscheduler_nocheck(current, SCHED_FIFO, &param); sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
} }
static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
lockdep_assert_held(&b->rb_lock);
/*
* Wake up all other completed waiters and select the
* next bottom-half for the next user interrupt.
*/
__intel_engine_remove_wait(engine, &request->signaling.wait);
/*
* Find the next oldest signal. Note that as we have
* not been holding the lock, another client may
* have installed an even older signal than the one
* we just completed - so double check we are still
* the oldest before picking the next one.
*/
if (request->signaling.wait.seqno) {
if (request == rcu_access_pointer(b->first_signal)) {
struct rb_node *rb = rb_next(&request->signaling.node);
rcu_assign_pointer(b->first_signal,
rb ? to_signaler(rb) : NULL);
}
rb_erase(&request->signaling.node, &b->signals);
request->signaling.wait.seqno = 0;
}
}
static struct drm_i915_gem_request *
get_first_signal_rcu(struct intel_breadcrumbs *b)
{
/*
* See the big warnings for i915_gem_active_get_rcu() and similarly
* for dma_fence_get_rcu_safe() that explain the intricacies involved
* here with defeating CPU/compiler speculation and enforcing
* the required memory barriers.
*/
do {
struct drm_i915_gem_request *request;
request = rcu_dereference(b->first_signal);
if (request)
request = i915_gem_request_get_rcu(request);
barrier();
if (!request || request == rcu_access_pointer(b->first_signal))
return rcu_pointer_handoff(request);
i915_gem_request_put(request);
} while (1);
}
static int intel_breadcrumbs_signaler(void *arg) static int intel_breadcrumbs_signaler(void *arg)
{ {
struct intel_engine_cs *engine = arg; struct intel_engine_cs *engine = arg;
struct intel_breadcrumbs *b = &engine->breadcrumbs; struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct drm_i915_gem_request *request; struct i915_request *rq, *n;
/* Install ourselves with high priority to reduce signalling latency */ /* Install ourselves with high priority to reduce signalling latency */
signaler_set_rtpriority(); signaler_set_rtpriority();
do { do {
bool do_schedule = true; bool do_schedule = true;
LIST_HEAD(list);
u32 seqno;
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
if (list_empty(&b->signals))
goto sleep;
/* We are either woken up by the interrupt bottom-half, /*
* We are either woken up by the interrupt bottom-half,
* or by a client adding a new signaller. In both cases, * or by a client adding a new signaller. In both cases,
* the GPU seqno may have advanced beyond our oldest signal. * the GPU seqno may have advanced beyond our oldest signal.
* If it has, propagate the signal, remove the waiter and * If it has, propagate the signal, remove the waiter and
@ -691,25 +630,45 @@ static int intel_breadcrumbs_signaler(void *arg)
* need to wait for a new interrupt from the GPU or for * need to wait for a new interrupt from the GPU or for
* a new client. * a new client.
*/ */
rcu_read_lock(); seqno = intel_engine_get_seqno(engine);
request = get_first_signal_rcu(b);
rcu_read_unlock();
if (signal_complete(request)) {
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&request->fence.flags)) {
local_bh_disable();
dma_fence_signal(&request->fence);
GEM_BUG_ON(!i915_gem_request_completed(request));
local_bh_enable(); /* kick start the tasklets */
}
if (READ_ONCE(request->signaling.wait.seqno)) { spin_lock_irq(&b->rb_lock);
spin_lock_irq(&b->rb_lock); list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
__intel_engine_remove_signal(engine, request); u32 this = rq->signaling.wait.seqno;
spin_unlock_irq(&b->rb_lock);
}
/* If the engine is saturated we may be continually GEM_BUG_ON(!rq->signaling.wait.seqno);
if (!i915_seqno_passed(seqno, this))
break;
if (likely(this == i915_request_global_seqno(rq))) {
__intel_engine_remove_wait(engine,
&rq->signaling.wait);
rq->signaling.wait.seqno = 0;
__list_del_entry(&rq->signaling.link);
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&rq->fence.flags)) {
list_add_tail(&rq->signaling.link,
&list);
i915_request_get(rq);
}
}
}
spin_unlock_irq(&b->rb_lock);
if (!list_empty(&list)) {
local_bh_disable();
list_for_each_entry_safe(rq, n, &list, signaling.link) {
dma_fence_signal(&rq->fence);
GEM_BUG_ON(!i915_request_completed(rq));
i915_request_put(rq);
}
local_bh_enable(); /* kick start the tasklets */
/*
* If the engine is saturated we may be continually
* processing completed requests. This angers the * processing completed requests. This angers the
* NMI watchdog if we never let anything else * NMI watchdog if we never let anything else
* have access to the CPU. Let's pretend to be nice * have access to the CPU. Let's pretend to be nice
@ -718,9 +677,19 @@ static int intel_breadcrumbs_signaler(void *arg)
*/ */
do_schedule = need_resched(); do_schedule = need_resched();
} }
i915_gem_request_put(request);
if (unlikely(do_schedule)) { if (unlikely(do_schedule)) {
/* Before we sleep, check for a missed seqno */
if (current->state & TASK_NORMAL &&
!list_empty(&b->signals) &&
engine->irq_seqno_barrier &&
test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
&engine->irq_posted)) {
engine->irq_seqno_barrier(engine);
intel_engine_wakeup(engine);
}
sleep:
if (kthread_should_park()) if (kthread_should_park())
kthread_parkme(); kthread_parkme();
@ -735,14 +704,40 @@ static int intel_breadcrumbs_signaler(void *arg)
return 0; return 0;
} }
void intel_engine_enable_signaling(struct drm_i915_gem_request *request, static void insert_signal(struct intel_breadcrumbs *b,
bool wakeup) struct i915_request *request,
const u32 seqno)
{
struct i915_request *iter;
lockdep_assert_held(&b->rb_lock);
/*
* A reasonable assumption is that we are called to add signals
* in sequence, as the requests are submitted for execution and
* assigned a global_seqno. This will be the case for the majority
* of internally generated signals (inter-engine signaling).
*
* Out of order waiters triggering random signaling enabling will
* be more problematic, but hopefully rare enough and the list
* small enough that the O(N) insertion sort is not an issue.
*/
list_for_each_entry_reverse(iter, &b->signals, signaling.link)
if (i915_seqno_passed(seqno, iter->signaling.wait.seqno))
break;
list_add(&request->signaling.link, &iter->signaling.link);
}
void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
struct intel_breadcrumbs *b = &engine->breadcrumbs; struct intel_breadcrumbs *b = &engine->breadcrumbs;
u32 seqno; u32 seqno;
/* Note that we may be called from an interrupt handler on another /*
* Note that we may be called from an interrupt handler on another
* device (e.g. nouveau signaling a fence completion causing us * device (e.g. nouveau signaling a fence completion causing us
* to submit a request, and so enable signaling). As such, * to submit a request, and so enable signaling). As such,
* we need to make sure that all other users of b->rb_lock protect * we need to make sure that all other users of b->rb_lock protect
@ -753,18 +748,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
GEM_BUG_ON(!irqs_disabled()); GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&request->lock); lockdep_assert_held(&request->lock);
seqno = i915_gem_request_global_seqno(request); seqno = i915_request_global_seqno(request);
if (!seqno) if (!seqno) /* will be enabled later upon execution */
return; return;
spin_lock(&b->rb_lock);
GEM_BUG_ON(request->signaling.wait.seqno); GEM_BUG_ON(request->signaling.wait.seqno);
request->signaling.wait.tsk = b->signaler; request->signaling.wait.tsk = b->signaler;
request->signaling.wait.request = request; request->signaling.wait.request = request;
request->signaling.wait.seqno = seqno; request->signaling.wait.seqno = seqno;
/* First add ourselves into the list of waiters, but register our /*
* Add ourselves into the list of waiters, but registering our
* bottom-half as the signaller thread. As per usual, only the oldest * bottom-half as the signaller thread. As per usual, only the oldest
* waiter (not just signaller) is tasked as the bottom-half waking * waiter (not just signaller) is tasked as the bottom-half waking
* up all completed waiters after the user interrupt. * up all completed waiters after the user interrupt.
@ -772,58 +766,31 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
* If we are the oldest waiter, enable the irq (after which we * If we are the oldest waiter, enable the irq (after which we
* must double check that the seqno did not complete). * must double check that the seqno did not complete).
*/ */
spin_lock(&b->rb_lock);
insert_signal(b, request, seqno);
wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
if (!__i915_gem_request_completed(request, seqno)) {
struct rb_node *parent, **p;
bool first;
/* Now insert ourselves into the retirement ordered list of
* signals on this engine. We track the oldest seqno as that
* will be the first signal to complete.
*/
parent = NULL;
first = true;
p = &b->signals.rb_node;
while (*p) {
parent = *p;
if (i915_seqno_passed(seqno,
to_signaler(parent)->signaling.wait.seqno)) {
p = &parent->rb_right;
first = false;
} else {
p = &parent->rb_left;
}
}
rb_link_node(&request->signaling.node, parent, p);
rb_insert_color(&request->signaling.node, &b->signals);
if (first)
rcu_assign_pointer(b->first_signal, request);
} else {
__intel_engine_remove_wait(engine, &request->signaling.wait);
request->signaling.wait.seqno = 0;
wakeup = false;
}
spin_unlock(&b->rb_lock); spin_unlock(&b->rb_lock);
if (wakeup) if (wakeup)
wake_up_process(b->signaler); wake_up_process(b->signaler);
} }
void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) void intel_engine_cancel_signaling(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
GEM_BUG_ON(!irqs_disabled()); GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&request->lock); lockdep_assert_held(&request->lock);
if (READ_ONCE(request->signaling.wait.seqno)) { if (!READ_ONCE(request->signaling.wait.seqno))
struct intel_engine_cs *engine = request->engine; return;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
spin_lock(&b->rb_lock); spin_lock(&b->rb_lock);
__intel_engine_remove_signal(engine, request); __intel_engine_remove_wait(engine, &request->signaling.wait);
spin_unlock(&b->rb_lock); if (fetch_and_zero(&request->signaling.wait.seqno))
} __list_del_entry(&request->signaling.link);
spin_unlock(&b->rb_lock);
} }
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@ -837,6 +804,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
INIT_LIST_HEAD(&b->signals);
/* Spawn a thread to provide a common bottom-half for all signals. /* Spawn a thread to provide a common bottom-half for all signals.
* As this is an asynchronous interface we cannot steal the current * As this is an asynchronous interface we cannot steal the current
* task for handling the bottom-half to the user interrupt, therefore * task for handling the bottom-half to the user interrupt, therefore
@ -896,8 +865,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
/* The engines should be idle and all requests accounted for! */ /* The engines should be idle and all requests accounted for! */
WARN_ON(READ_ONCE(b->irq_wait)); WARN_ON(READ_ONCE(b->irq_wait));
WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
WARN_ON(rcu_access_pointer(b->first_signal)); WARN_ON(!list_empty(&b->signals));
WARN_ON(!RB_EMPTY_ROOT(&b->signals));
if (!IS_ERR_OR_NULL(b->signaler)) if (!IS_ERR_OR_NULL(b->signaler))
kthread_stop(b->signaler); kthread_stop(b->signaler);
@ -905,28 +873,6 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
cancel_fake_irq(engine); cancel_fake_irq(engine);
} }
bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
bool busy = false;
spin_lock_irq(&b->rb_lock);
if (b->irq_wait) {
wake_up_process(b->irq_wait->tsk);
busy = true;
}
if (rcu_access_pointer(b->first_signal)) {
wake_up_process(b->signaler);
busy = true;
}
spin_unlock_irq(&b->rb_lock);
return busy;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/intel_breadcrumbs.c" #include "selftests/intel_breadcrumbs.c"
#endif #endif

View File

@ -66,13 +66,13 @@
* of the CTM coefficient and we write the value from bit 3. We also round the * of the CTM coefficient and we write the value from bit 3. We also round the
* value. * value.
*/ */
#define I9XX_CSC_COEFF_FP(coeff, fbits) \ #define ILK_CSC_COEFF_FP(coeff, fbits) \
(clamp_val(((coeff) >> (32 - (fbits) - 3)) + 4, 0, 0xfff) & 0xff8) (clamp_val(((coeff) >> (32 - (fbits) - 3)) + 4, 0, 0xfff) & 0xff8)
#define I9XX_CSC_COEFF_LIMITED_RANGE \ #define ILK_CSC_COEFF_LIMITED_RANGE \
I9XX_CSC_COEFF_FP(CTM_COEFF_LIMITED_RANGE, 9) ILK_CSC_COEFF_FP(CTM_COEFF_LIMITED_RANGE, 9)
#define I9XX_CSC_COEFF_1_0 \ #define ILK_CSC_COEFF_1_0 \
((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) ((7 << 12) | ILK_CSC_COEFF_FP(CTM_COEFF_1_0, 8))
static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state)
{ {
@ -84,30 +84,31 @@ static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state)
/* /*
* When using limited range, multiply the matrix given by userspace by * When using limited range, multiply the matrix given by userspace by
* the matrix that we would use for the limited range. We do the * the matrix that we would use for the limited range.
* multiplication in U2.30 format.
*/ */
static void ctm_mult_by_limited(uint64_t *result, int64_t *input) static u64 *ctm_mult_by_limited(u64 *result, const u64 *input)
{ {
int i; int i;
for (i = 0; i < 9; i++) for (i = 0; i < 9; i++) {
result[i] = 0; u64 user_coeff = input[i];
u32 limited_coeff = CTM_COEFF_LIMITED_RANGE;
u32 abs_coeff = clamp_val(CTM_COEFF_ABS(user_coeff), 0,
CTM_COEFF_4_0 - 1) >> 2;
for (i = 0; i < 3; i++) { /*
int64_t user_coeff = input[i * 3 + i]; * By scaling every co-efficient with limited range (16-235)
uint64_t limited_coeff = CTM_COEFF_LIMITED_RANGE >> 2; * vs full range (0-255) the final o/p will be scaled down to
uint64_t abs_coeff = clamp_val(CTM_COEFF_ABS(user_coeff), * fit in the limited range supported by the panel.
0, */
CTM_COEFF_4_0 - 1) >> 2; result[i] = mul_u32_u32(limited_coeff, abs_coeff) >> 30;
result[i] |= user_coeff & CTM_COEFF_SIGN;
result[i * 3 + i] = (limited_coeff * abs_coeff) >> 27;
if (CTM_COEFF_NEGATIVE(user_coeff))
result[i * 3 + i] |= CTM_COEFF_SIGN;
} }
return result;
} }
static void i9xx_load_ycbcr_conversion_matrix(struct intel_crtc *intel_crtc) static void ilk_load_ycbcr_conversion_matrix(struct intel_crtc *intel_crtc)
{ {
int pipe = intel_crtc->pipe; int pipe = intel_crtc->pipe;
struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
@ -131,8 +132,7 @@ static void i9xx_load_ycbcr_conversion_matrix(struct intel_crtc *intel_crtc)
I915_WRITE(PIPE_CSC_MODE(pipe), 0); I915_WRITE(PIPE_CSC_MODE(pipe), 0);
} }
/* Set up the pipe CSC unit. */ static void ilk_load_csc_matrix(struct drm_crtc_state *crtc_state)
static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state)
{ {
struct drm_crtc *crtc = crtc_state->crtc; struct drm_crtc *crtc = crtc_state->crtc;
struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct drm_i915_private *dev_priv = to_i915(crtc->dev);
@ -140,21 +140,28 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state)
int i, pipe = intel_crtc->pipe; int i, pipe = intel_crtc->pipe;
uint16_t coeffs[9] = { 0, }; uint16_t coeffs[9] = { 0, };
struct intel_crtc_state *intel_crtc_state = to_intel_crtc_state(crtc_state); struct intel_crtc_state *intel_crtc_state = to_intel_crtc_state(crtc_state);
bool limited_color_range = false;
/*
* FIXME if there's a gamma LUT after the CSC, we should
* do the range compression using the gamma LUT instead.
*/
if (INTEL_GEN(dev_priv) >= 8 || IS_HASWELL(dev_priv))
limited_color_range = intel_crtc_state->limited_color_range;
if (intel_crtc_state->ycbcr420) { if (intel_crtc_state->ycbcr420) {
i9xx_load_ycbcr_conversion_matrix(intel_crtc); ilk_load_ycbcr_conversion_matrix(intel_crtc);
return; return;
} else if (crtc_state->ctm) { } else if (crtc_state->ctm) {
struct drm_color_ctm *ctm = struct drm_color_ctm *ctm =
(struct drm_color_ctm *)crtc_state->ctm->data; (struct drm_color_ctm *)crtc_state->ctm->data;
uint64_t input[9] = { 0, }; const u64 *input;
u64 temp[9];
if (intel_crtc_state->limited_color_range) { if (limited_color_range)
ctm_mult_by_limited(input, ctm->matrix); input = ctm_mult_by_limited(temp, ctm->matrix);
} else { else
for (i = 0; i < ARRAY_SIZE(input); i++) input = ctm->matrix;
input[i] = ctm->matrix[i];
}
/* /*
* Convert fixed point S31.32 input to format supported by the * Convert fixed point S31.32 input to format supported by the
@ -175,21 +182,21 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state)
if (abs_coeff < CTM_COEFF_0_125) if (abs_coeff < CTM_COEFF_0_125)
coeffs[i] |= (3 << 12) | coeffs[i] |= (3 << 12) |
I9XX_CSC_COEFF_FP(abs_coeff, 12); ILK_CSC_COEFF_FP(abs_coeff, 12);
else if (abs_coeff < CTM_COEFF_0_25) else if (abs_coeff < CTM_COEFF_0_25)
coeffs[i] |= (2 << 12) | coeffs[i] |= (2 << 12) |
I9XX_CSC_COEFF_FP(abs_coeff, 11); ILK_CSC_COEFF_FP(abs_coeff, 11);
else if (abs_coeff < CTM_COEFF_0_5) else if (abs_coeff < CTM_COEFF_0_5)
coeffs[i] |= (1 << 12) | coeffs[i] |= (1 << 12) |
I9XX_CSC_COEFF_FP(abs_coeff, 10); ILK_CSC_COEFF_FP(abs_coeff, 10);
else if (abs_coeff < CTM_COEFF_1_0) else if (abs_coeff < CTM_COEFF_1_0)
coeffs[i] |= I9XX_CSC_COEFF_FP(abs_coeff, 9); coeffs[i] |= ILK_CSC_COEFF_FP(abs_coeff, 9);
else if (abs_coeff < CTM_COEFF_2_0) else if (abs_coeff < CTM_COEFF_2_0)
coeffs[i] |= (7 << 12) | coeffs[i] |= (7 << 12) |
I9XX_CSC_COEFF_FP(abs_coeff, 8); ILK_CSC_COEFF_FP(abs_coeff, 8);
else else
coeffs[i] |= (6 << 12) | coeffs[i] |= (6 << 12) |
I9XX_CSC_COEFF_FP(abs_coeff, 7); ILK_CSC_COEFF_FP(abs_coeff, 7);
} }
} else { } else {
/* /*
@ -201,11 +208,11 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state)
* into consideration. * into consideration.
*/ */
for (i = 0; i < 3; i++) { for (i = 0; i < 3; i++) {
if (intel_crtc_state->limited_color_range) if (limited_color_range)
coeffs[i * 3 + i] = coeffs[i * 3 + i] =
I9XX_CSC_COEFF_LIMITED_RANGE; ILK_CSC_COEFF_LIMITED_RANGE;
else else
coeffs[i * 3 + i] = I9XX_CSC_COEFF_1_0; coeffs[i * 3 + i] = ILK_CSC_COEFF_1_0;
} }
} }
@ -225,7 +232,7 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state)
if (INTEL_GEN(dev_priv) > 6) { if (INTEL_GEN(dev_priv) > 6) {
uint16_t postoff = 0; uint16_t postoff = 0;
if (intel_crtc_state->limited_color_range) if (limited_color_range)
postoff = (16 * (1 << 12) / 255) & 0x1fff; postoff = (16 * (1 << 12) / 255) & 0x1fff;
I915_WRITE(PIPE_CSC_POSTOFF_HI(pipe), postoff); I915_WRITE(PIPE_CSC_POSTOFF_HI(pipe), postoff);
@ -236,7 +243,7 @@ static void i9xx_load_csc_matrix(struct drm_crtc_state *crtc_state)
} else { } else {
uint32_t mode = CSC_MODE_YUV_TO_RGB; uint32_t mode = CSC_MODE_YUV_TO_RGB;
if (intel_crtc_state->limited_color_range) if (limited_color_range)
mode |= CSC_BLACK_SCREEN_OFFSET; mode |= CSC_BLACK_SCREEN_OFFSET;
I915_WRITE(PIPE_CSC_MODE(pipe), mode); I915_WRITE(PIPE_CSC_MODE(pipe), mode);
@ -651,14 +658,14 @@ void intel_color_init(struct drm_crtc *crtc)
dev_priv->display.load_csc_matrix = cherryview_load_csc_matrix; dev_priv->display.load_csc_matrix = cherryview_load_csc_matrix;
dev_priv->display.load_luts = cherryview_load_luts; dev_priv->display.load_luts = cherryview_load_luts;
} else if (IS_HASWELL(dev_priv)) { } else if (IS_HASWELL(dev_priv)) {
dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_csc_matrix = ilk_load_csc_matrix;
dev_priv->display.load_luts = haswell_load_luts; dev_priv->display.load_luts = haswell_load_luts;
} else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) ||
IS_BROXTON(dev_priv)) { IS_BROXTON(dev_priv)) {
dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_csc_matrix = ilk_load_csc_matrix;
dev_priv->display.load_luts = broadwell_load_luts; dev_priv->display.load_luts = broadwell_load_luts;
} else if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { } else if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) {
dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_csc_matrix = ilk_load_csc_matrix;
dev_priv->display.load_luts = glk_load_luts; dev_priv->display.load_luts = glk_load_luts;
} else { } else {
dev_priv->display.load_luts = i9xx_load_luts; dev_priv->display.load_luts = i9xx_load_luts;

View File

@ -956,8 +956,10 @@ void intel_crt_init(struct drm_i915_private *dev_priv)
crt->base.power_domain = POWER_DOMAIN_PORT_CRT; crt->base.power_domain = POWER_DOMAIN_PORT_CRT;
if (I915_HAS_HOTPLUG(dev_priv) && if (I915_HAS_HOTPLUG(dev_priv) &&
!dmi_check_system(intel_spurious_crt_detect)) !dmi_check_system(intel_spurious_crt_detect)) {
crt->base.hpd_pin = HPD_CRT; crt->base.hpd_pin = HPD_CRT;
crt->base.hotplug = intel_encoder_hotplug;
}
if (HAS_DDI(dev_priv)) { if (HAS_DDI(dev_priv)) {
crt->base.port = PORT_E; crt->base.port = PORT_E;

View File

@ -25,6 +25,7 @@
* *
*/ */
#include <drm/drm_scdc_helper.h>
#include "i915_drv.h" #include "i915_drv.h"
#include "intel_drv.h" #include "intel_drv.h"
@ -2507,6 +2508,8 @@ static void intel_disable_ddi_dp(struct intel_encoder *encoder,
{ {
struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
intel_dp->link_trained = false;
if (old_crtc_state->has_audio) if (old_crtc_state->has_audio)
intel_audio_codec_disable(encoder, intel_audio_codec_disable(encoder,
old_crtc_state, old_conn_state); old_crtc_state, old_conn_state);
@ -2798,6 +2801,150 @@ intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port)
return connector; return connector;
} }
static int modeset_pipe(struct drm_crtc *crtc,
struct drm_modeset_acquire_ctx *ctx)
{
struct drm_atomic_state *state;
struct drm_crtc_state *crtc_state;
int ret;
state = drm_atomic_state_alloc(crtc->dev);
if (!state)
return -ENOMEM;
state->acquire_ctx = ctx;
crtc_state = drm_atomic_get_crtc_state(state, crtc);
if (IS_ERR(crtc_state)) {
ret = PTR_ERR(crtc_state);
goto out;
}
crtc_state->mode_changed = true;
ret = drm_atomic_add_affected_connectors(state, crtc);
if (ret)
goto out;
ret = drm_atomic_add_affected_planes(state, crtc);
if (ret)
goto out;
ret = drm_atomic_commit(state);
if (ret)
goto out;
return 0;
out:
drm_atomic_state_put(state);
return ret;
}
static int intel_hdmi_reset_link(struct intel_encoder *encoder,
struct drm_modeset_acquire_ctx *ctx)
{
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_hdmi *hdmi = enc_to_intel_hdmi(&encoder->base);
struct intel_connector *connector = hdmi->attached_connector;
struct i2c_adapter *adapter =
intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
struct drm_connector_state *conn_state;
struct intel_crtc_state *crtc_state;
struct intel_crtc *crtc;
u8 config;
int ret;
if (!connector || connector->base.status != connector_status_connected)
return 0;
ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
ctx);
if (ret)
return ret;
conn_state = connector->base.state;
crtc = to_intel_crtc(conn_state->crtc);
if (!crtc)
return 0;
ret = drm_modeset_lock(&crtc->base.mutex, ctx);
if (ret)
return ret;
crtc_state = to_intel_crtc_state(crtc->base.state);
WARN_ON(!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI));
if (!crtc_state->base.active)
return 0;
if (!crtc_state->hdmi_high_tmds_clock_ratio &&
!crtc_state->hdmi_scrambling)
return 0;
if (conn_state->commit &&
!try_wait_for_completion(&conn_state->commit->hw_done))
return 0;
ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config);
if (ret < 0) {
DRM_ERROR("Failed to read TMDS config: %d\n", ret);
return 0;
}
if (!!(config & SCDC_TMDS_BIT_CLOCK_RATIO_BY_40) ==
crtc_state->hdmi_high_tmds_clock_ratio &&
!!(config & SCDC_SCRAMBLING_ENABLE) ==
crtc_state->hdmi_scrambling)
return 0;
/*
* HDMI 2.0 says that one should not send scrambled data
* prior to configuring the sink scrambling, and that
* TMDS clock/data transmission should be suspended when
* changing the TMDS clock rate in the sink. So let's
* just do a full modeset here, even though some sinks
* would be perfectly happy if were to just reconfigure
* the SCDC settings on the fly.
*/
return modeset_pipe(&crtc->base, ctx);
}
static bool intel_ddi_hotplug(struct intel_encoder *encoder,
struct intel_connector *connector)
{
struct drm_modeset_acquire_ctx ctx;
bool changed;
int ret;
changed = intel_encoder_hotplug(encoder, connector);
drm_modeset_acquire_init(&ctx, 0);
for (;;) {
if (connector->base.connector_type == DRM_MODE_CONNECTOR_HDMIA)
ret = intel_hdmi_reset_link(encoder, &ctx);
else
ret = intel_dp_retrain_link(encoder, &ctx);
if (ret == -EDEADLK) {
drm_modeset_backoff(&ctx);
continue;
}
break;
}
drm_modeset_drop_locks(&ctx);
drm_modeset_acquire_fini(&ctx);
WARN(ret, "Acquiring modeset locks failed with %i\n", ret);
return changed;
}
static struct intel_connector * static struct intel_connector *
intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port)
{ {
@ -2842,39 +2989,45 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dport)
return false; return false;
} }
static int
intel_ddi_max_lanes(struct intel_digital_port *intel_dport)
{
struct drm_i915_private *dev_priv = to_i915(intel_dport->base.base.dev);
enum port port = intel_dport->base.port;
int max_lanes = 4;
if (INTEL_GEN(dev_priv) >= 11)
return max_lanes;
if (port == PORT_A || port == PORT_E) {
if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES)
max_lanes = port == PORT_A ? 4 : 0;
else
/* Both A and E share 2 lanes */
max_lanes = 2;
}
/*
* Some BIOS might fail to set this bit on port A if eDP
* wasn't lit up at boot. Force this bit set when needed
* so we use the proper lane count for our calculations.
*/
if (intel_ddi_a_force_4_lanes(intel_dport)) {
DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n");
intel_dport->saved_port_bits |= DDI_A_4_LANES;
max_lanes = 4;
}
return max_lanes;
}
void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
{ {
struct intel_digital_port *intel_dig_port; struct intel_digital_port *intel_dig_port;
struct intel_encoder *intel_encoder; struct intel_encoder *intel_encoder;
struct drm_encoder *encoder; struct drm_encoder *encoder;
bool init_hdmi, init_dp, init_lspcon = false; bool init_hdmi, init_dp, init_lspcon = false;
int max_lanes;
if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) {
switch (port) {
case PORT_A:
max_lanes = 4;
break;
case PORT_E:
max_lanes = 0;
break;
default:
max_lanes = 4;
break;
}
} else {
switch (port) {
case PORT_A:
max_lanes = 2;
break;
case PORT_E:
max_lanes = 2;
break;
default:
max_lanes = 4;
break;
}
}
init_hdmi = (dev_priv->vbt.ddi_port_info[port].supports_dvi || init_hdmi = (dev_priv->vbt.ddi_port_info[port].supports_dvi ||
dev_priv->vbt.ddi_port_info[port].supports_hdmi); dev_priv->vbt.ddi_port_info[port].supports_hdmi);
@ -2908,6 +3061,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs, drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port)); DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port));
intel_encoder->hotplug = intel_ddi_hotplug;
intel_encoder->compute_output_type = intel_ddi_compute_output_type; intel_encoder->compute_output_type = intel_ddi_compute_output_type;
intel_encoder->compute_config = intel_ddi_compute_config; intel_encoder->compute_config = intel_ddi_compute_config;
intel_encoder->enable = intel_enable_ddi; intel_encoder->enable = intel_enable_ddi;
@ -2920,10 +3074,17 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
intel_encoder->get_config = intel_ddi_get_config; intel_encoder->get_config = intel_ddi_get_config;
intel_encoder->suspend = intel_dp_encoder_suspend; intel_encoder->suspend = intel_dp_encoder_suspend;
intel_encoder->get_power_domains = intel_ddi_get_power_domains; intel_encoder->get_power_domains = intel_ddi_get_power_domains;
intel_encoder->type = INTEL_OUTPUT_DDI;
intel_encoder->power_domain = intel_port_to_power_domain(port);
intel_encoder->port = port;
intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
intel_encoder->cloneable = 0;
intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
(DDI_BUF_PORT_REVERSAL | (DDI_BUF_PORT_REVERSAL |
DDI_A_4_LANES); DDI_A_4_LANES);
intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
intel_dig_port->max_lanes = intel_ddi_max_lanes(intel_dig_port);
switch (port) { switch (port) {
case PORT_A: case PORT_A:
@ -2954,26 +3115,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
MISSING_CASE(port); MISSING_CASE(port);
} }
/*
* Some BIOS might fail to set this bit on port A if eDP
* wasn't lit up at boot. Force this bit set when needed
* so we use the proper lane count for our calculations.
*/
if (intel_ddi_a_force_4_lanes(intel_dig_port)) {
DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n");
intel_dig_port->saved_port_bits |= DDI_A_4_LANES;
max_lanes = 4;
}
intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
intel_dig_port->max_lanes = max_lanes;
intel_encoder->type = INTEL_OUTPUT_DDI;
intel_encoder->power_domain = intel_port_to_power_domain(port);
intel_encoder->port = port;
intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
intel_encoder->cloneable = 0;
intel_infoframe_init(intel_dig_port); intel_infoframe_init(intel_dig_port);
if (init_dp) { if (init_dp) {

View File

@ -81,12 +81,16 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
{ {
int s;
drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask); for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
drm_printf(p, "subslice per slice: %u\n", drm_printf(p, "slice%d %u subslices mask=%04x\n",
hweight8(sseu->subslice_mask)); s, hweight8(sseu->subslice_mask[s]),
sseu->subslice_mask[s]);
}
drm_printf(p, "EU total: %u\n", sseu->eu_total); drm_printf(p, "EU total: %u\n", sseu->eu_total);
drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
drm_printf(p, "has slice power gating: %s\n", drm_printf(p, "has slice power gating: %s\n",
@ -120,22 +124,100 @@ void intel_device_info_dump(const struct intel_device_info *info,
intel_device_info_dump_flags(info, p); intel_device_info_dump_flags(info, p);
} }
void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
struct drm_printer *p)
{
int s, ss;
if (sseu->max_slices == 0) {
drm_printf(p, "Unavailable\n");
return;
}
for (s = 0; s < sseu->max_slices; s++) {
drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
s, hweight8(sseu->subslice_mask[s]),
sseu->subslice_mask[s]);
for (ss = 0; ss < sseu->max_subslices; ss++) {
u16 enabled_eus = sseu_get_eus(sseu, s, ss);
drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
ss, hweight16(enabled_eus), enabled_eus);
}
}
}
static u16 compute_eu_total(const struct sseu_dev_info *sseu)
{
u16 i, total = 0;
for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
total += hweight8(sseu->eu_mask[i]);
return total;
}
static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
{ {
struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
const u32 fuse2 = I915_READ(GEN8_FUSE2); const u32 fuse2 = I915_READ(GEN8_FUSE2);
int s, ss;
const int eu_mask = 0xff;
u32 subslice_mask, eu_en;
sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
GEN10_F2_S_ENA_SHIFT; GEN10_F2_S_ENA_SHIFT;
sseu->subslice_mask = (1 << 4) - 1; sseu->max_slices = 6;
sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> sseu->max_subslices = 4;
GEN10_F2_SS_DIS_SHIFT); sseu->max_eus_per_subslice = 8;
sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0)); subslice_mask = (1 << 4) - 1;
sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1)); subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2)); GEN10_F2_SS_DIS_SHIFT);
sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
GEN10_EU_DIS_SS_MASK)); /*
* Slice0 can have up to 3 subslices, but there are only 2 in
* slice1/2.
*/
sseu->subslice_mask[0] = subslice_mask;
for (s = 1; s < sseu->max_slices; s++)
sseu->subslice_mask[s] = subslice_mask & 0x3;
/* Slice0 */
eu_en = ~I915_READ(GEN8_EU_DISABLE0);
for (ss = 0; ss < sseu->max_subslices; ss++)
sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask);
/* Slice1 */
sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask);
eu_en = ~I915_READ(GEN8_EU_DISABLE1);
sseu_set_eus(sseu, 1, 1, eu_en & eu_mask);
/* Slice2 */
sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask);
sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask);
/* Slice3 */
sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask);
eu_en = ~I915_READ(GEN8_EU_DISABLE2);
sseu_set_eus(sseu, 3, 1, eu_en & eu_mask);
/* Slice4 */
sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask);
sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask);
/* Slice5 */
sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask);
eu_en = ~I915_READ(GEN10_EU_DISABLE3);
sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
/* Do a second pass where we mark the subslices disabled if all their
* eus are off.
*/
for (s = 0; s < sseu->max_slices; s++) {
for (ss = 0; ss < sseu->max_subslices; ss++) {
if (sseu_get_eus(sseu, s, ss) == 0)
sseu->subslice_mask[s] &= ~BIT(ss);
}
}
sseu->eu_total = compute_eu_total(sseu);
/* /*
* CNL is expected to always have a uniform distribution * CNL is expected to always have a uniform distribution
@ -156,26 +238,39 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
{ {
struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
u32 fuse, eu_dis; u32 fuse;
fuse = I915_READ(CHV_FUSE_GT); fuse = I915_READ(CHV_FUSE_GT);
sseu->slice_mask = BIT(0); sseu->slice_mask = BIT(0);
sseu->max_slices = 1;
sseu->max_subslices = 2;
sseu->max_eus_per_subslice = 8;
if (!(fuse & CHV_FGT_DISABLE_SS0)) { if (!(fuse & CHV_FGT_DISABLE_SS0)) {
sseu->subslice_mask |= BIT(0); u8 disabled_mask =
eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK | ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
CHV_FGT_EU_DIS_SS0_R1_MASK); CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
sseu->eu_total += 8 - hweight32(eu_dis); (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
sseu->subslice_mask[0] |= BIT(0);
sseu_set_eus(sseu, 0, 0, ~disabled_mask);
} }
if (!(fuse & CHV_FGT_DISABLE_SS1)) { if (!(fuse & CHV_FGT_DISABLE_SS1)) {
sseu->subslice_mask |= BIT(1); u8 disabled_mask =
eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK | ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
CHV_FGT_EU_DIS_SS1_R1_MASK); CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
sseu->eu_total += 8 - hweight32(eu_dis); (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
sseu->subslice_mask[0] |= BIT(1);
sseu_set_eus(sseu, 0, 1, ~disabled_mask);
} }
sseu->eu_total = compute_eu_total(sseu);
/* /*
* CHV expected to always have a uniform distribution of EU * CHV expected to always have a uniform distribution of EU
* across subslices. * across subslices.
@ -197,41 +292,52 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
{ {
struct intel_device_info *info = mkwrite_device_info(dev_priv); struct intel_device_info *info = mkwrite_device_info(dev_priv);
struct sseu_dev_info *sseu = &info->sseu; struct sseu_dev_info *sseu = &info->sseu;
int s_max = 3, ss_max = 4, eu_max = 8;
int s, ss; int s, ss;
u32 fuse2, eu_disable; u32 fuse2, eu_disable, subslice_mask;
u8 eu_mask = 0xff; const u8 eu_mask = 0xff;
fuse2 = I915_READ(GEN8_FUSE2); fuse2 = I915_READ(GEN8_FUSE2);
sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
/* BXT has a single slice and at most 3 subslices. */
sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
sseu->max_eus_per_subslice = 8;
/* /*
* The subslice disable field is global, i.e. it applies * The subslice disable field is global, i.e. it applies
* to each of the enabled slices. * to each of the enabled slices.
*/ */
sseu->subslice_mask = (1 << ss_max) - 1; subslice_mask = (1 << sseu->max_subslices) - 1;
sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
GEN9_F2_SS_DIS_SHIFT); GEN9_F2_SS_DIS_SHIFT);
/* /*
* Iterate through enabled slices and subslices to * Iterate through enabled slices and subslices to
* count the total enabled EU. * count the total enabled EU.
*/ */
for (s = 0; s < s_max; s++) { for (s = 0; s < sseu->max_slices; s++) {
if (!(sseu->slice_mask & BIT(s))) if (!(sseu->slice_mask & BIT(s)))
/* skip disabled slice */ /* skip disabled slice */
continue; continue;
eu_disable = I915_READ(GEN9_EU_DISABLE(s)); sseu->subslice_mask[s] = subslice_mask;
for (ss = 0; ss < ss_max; ss++) {
int eu_per_ss;
if (!(sseu->subslice_mask & BIT(ss))) eu_disable = I915_READ(GEN9_EU_DISABLE(s));
for (ss = 0; ss < sseu->max_subslices; ss++) {
int eu_per_ss;
u8 eu_disabled_mask;
if (!(sseu->subslice_mask[s] & BIT(ss)))
/* skip disabled subslice */ /* skip disabled subslice */
continue; continue;
eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) & eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask;
eu_mask);
sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
eu_per_ss = sseu->max_eus_per_subslice -
hweight8(eu_disabled_mask);
/* /*
* Record which subslice(s) has(have) 7 EUs. we * Record which subslice(s) has(have) 7 EUs. we
@ -240,11 +346,11 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
*/ */
if (eu_per_ss == 7) if (eu_per_ss == 7)
sseu->subslice_7eu[s] |= BIT(ss); sseu->subslice_7eu[s] |= BIT(ss);
sseu->eu_total += eu_per_ss;
} }
} }
sseu->eu_total = compute_eu_total(sseu);
/* /*
* SKL is expected to always have a uniform distribution * SKL is expected to always have a uniform distribution
* of EU across subslices with the exception that any one * of EU across subslices with the exception that any one
@ -270,8 +376,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
sseu->has_eu_pg = sseu->eu_per_subslice > 2; sseu->has_eu_pg = sseu->eu_per_subslice > 2;
if (IS_GEN9_LP(dev_priv)) { if (IS_GEN9_LP(dev_priv)) {
#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss)))
info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
sseu->min_eu_in_pool = 0; sseu->min_eu_in_pool = 0;
if (info->has_pooled_eu) { if (info->has_pooled_eu) {
@ -289,19 +395,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
{ {
struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
const int s_max = 3, ss_max = 3, eu_max = 8;
int s, ss; int s, ss;
u32 fuse2, eu_disable[3]; /* s_max */ u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
fuse2 = I915_READ(GEN8_FUSE2); fuse2 = I915_READ(GEN8_FUSE2);
sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
sseu->max_slices = 3;
sseu->max_subslices = 3;
sseu->max_eus_per_subslice = 8;
/* /*
* The subslice disable field is global, i.e. it applies * The subslice disable field is global, i.e. it applies
* to each of the enabled slices. * to each of the enabled slices.
*/ */
sseu->subslice_mask = GENMASK(ss_max - 1, 0); subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
GEN8_F2_SS_DIS_SHIFT); GEN8_F2_SS_DIS_SHIFT);
eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK; eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) | eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
@ -315,30 +424,38 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
* Iterate through enabled slices and subslices to * Iterate through enabled slices and subslices to
* count the total enabled EU. * count the total enabled EU.
*/ */
for (s = 0; s < s_max; s++) { for (s = 0; s < sseu->max_slices; s++) {
if (!(sseu->slice_mask & BIT(s))) if (!(sseu->slice_mask & BIT(s)))
/* skip disabled slice */ /* skip disabled slice */
continue; continue;
for (ss = 0; ss < ss_max; ss++) { sseu->subslice_mask[s] = subslice_mask;
for (ss = 0; ss < sseu->max_subslices; ss++) {
u8 eu_disabled_mask;
u32 n_disabled; u32 n_disabled;
if (!(sseu->subslice_mask & BIT(ss))) if (!(sseu->subslice_mask[ss] & BIT(ss)))
/* skip disabled subslice */ /* skip disabled subslice */
continue; continue;
n_disabled = hweight8(eu_disable[s] >> (ss * eu_max)); eu_disabled_mask =
eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
n_disabled = hweight8(eu_disabled_mask);
/* /*
* Record which subslices have 7 EUs. * Record which subslices have 7 EUs.
*/ */
if (eu_max - n_disabled == 7) if (sseu->max_eus_per_subslice - n_disabled == 7)
sseu->subslice_7eu[s] |= 1 << ss; sseu->subslice_7eu[s] |= 1 << ss;
sseu->eu_total += eu_max - n_disabled;
} }
} }
sseu->eu_total = compute_eu_total(sseu);
/* /*
* BDW is expected to always have a uniform distribution of EU across * BDW is expected to always have a uniform distribution of EU across
* subslices with the exception that any one EU in any one subslice may * subslices with the exception that any one EU in any one subslice may
@ -357,6 +474,72 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
sseu->has_eu_pg = 0; sseu->has_eu_pg = 0;
} }
static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
{
struct intel_device_info *info = mkwrite_device_info(dev_priv);
struct sseu_dev_info *sseu = &info->sseu;
u32 fuse1;
int s, ss;
/*
* There isn't a register to tell us how many slices/subslices. We
* work off the PCI-ids here.
*/
switch (info->gt) {
default:
MISSING_CASE(info->gt);
/* fall through */
case 1:
sseu->slice_mask = BIT(0);
sseu->subslice_mask[0] = BIT(0);
break;
case 2:
sseu->slice_mask = BIT(0);
sseu->subslice_mask[0] = BIT(0) | BIT(1);
break;
case 3:
sseu->slice_mask = BIT(0) | BIT(1);
sseu->subslice_mask[0] = BIT(0) | BIT(1);
sseu->subslice_mask[1] = BIT(0) | BIT(1);
break;
}
sseu->max_slices = hweight8(sseu->slice_mask);
sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
fuse1 = I915_READ(HSW_PAVP_FUSE1);
switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
default:
MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >>
HSW_F1_EU_DIS_SHIFT);
/* fall through */
case HSW_F1_EU_DIS_10EUS:
sseu->eu_per_subslice = 10;
break;
case HSW_F1_EU_DIS_8EUS:
sseu->eu_per_subslice = 8;
break;
case HSW_F1_EU_DIS_6EUS:
sseu->eu_per_subslice = 6;
break;
}
sseu->max_eus_per_subslice = sseu->eu_per_subslice;
for (s = 0; s < sseu->max_slices; s++) {
for (ss = 0; ss < sseu->max_subslices; ss++) {
sseu_set_eus(sseu, s, ss,
(1UL << sseu->eu_per_subslice) - 1);
}
}
sseu->eu_total = compute_eu_total(sseu);
/* No powergating for you. */
sseu->has_slice_pg = 0;
sseu->has_subslice_pg = 0;
sseu->has_eu_pg = 0;
}
static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv)
{ {
u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE); u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE);
@ -489,6 +672,9 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
info->num_scalers[PIPE_C] = 1; info->num_scalers[PIPE_C] = 1;
} }
BUILD_BUG_ON(I915_NUM_ENGINES >
sizeof(intel_ring_mask_t) * BITS_PER_BYTE);
/* /*
* Skylake and Broxton currently don't expose the topmost plane as its * Skylake and Broxton currently don't expose the topmost plane as its
* use is exclusive with the legacy cursor and we only want to expose * use is exclusive with the legacy cursor and we only want to expose
@ -574,7 +760,9 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
} }
/* Initialize slice/subslice/EU info */ /* Initialize slice/subslice/EU info */
if (IS_CHERRYVIEW(dev_priv)) if (IS_HASWELL(dev_priv))
haswell_sseu_info_init(dev_priv);
else if (IS_CHERRYVIEW(dev_priv))
cherryview_sseu_info_init(dev_priv); cherryview_sseu_info_init(dev_priv);
else if (IS_BROADWELL(dev_priv)) else if (IS_BROADWELL(dev_priv))
broadwell_sseu_info_init(dev_priv); broadwell_sseu_info_init(dev_priv);

View File

@ -96,6 +96,7 @@ enum intel_platform {
func(has_l3_dpf); \ func(has_l3_dpf); \
func(has_llc); \ func(has_llc); \
func(has_logical_ring_contexts); \ func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \ func(has_logical_ring_preemption); \
func(has_overlay); \ func(has_overlay); \
func(has_pooled_eu); \ func(has_pooled_eu); \
@ -112,10 +113,13 @@ enum intel_platform {
func(supports_tv); \ func(supports_tv); \
func(has_ipc); func(has_ipc);
#define GEN_MAX_SLICES (6) /* CNL upper bound */
#define GEN_MAX_SUBSLICES (7)
struct sseu_dev_info { struct sseu_dev_info {
u8 slice_mask; u8 slice_mask;
u8 subslice_mask; u8 subslice_mask[GEN_MAX_SUBSLICES];
u8 eu_total; u16 eu_total;
u8 eu_per_subslice; u8 eu_per_subslice;
u8 min_eu_in_pool; u8 min_eu_in_pool;
/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
@ -123,8 +127,21 @@ struct sseu_dev_info {
u8 has_slice_pg:1; u8 has_slice_pg:1;
u8 has_subslice_pg:1; u8 has_subslice_pg:1;
u8 has_eu_pg:1; u8 has_eu_pg:1;
/* Topology fields */
u8 max_slices;
u8 max_subslices;
u8 max_eus_per_subslice;
/* We don't have more than 8 eus per subslice at the moment and as we
* store eus enabled using bits, no need to multiply by eus per
* subslice.
*/
u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
}; };
typedef u8 intel_ring_mask_t;
struct intel_device_info { struct intel_device_info {
u16 device_id; u16 device_id;
u16 gen_mask; u16 gen_mask;
@ -132,19 +149,19 @@ struct intel_device_info {
u8 gen; u8 gen;
u8 gt; /* GT number, 0 if undefined */ u8 gt; /* GT number, 0 if undefined */
u8 num_rings; u8 num_rings;
u8 ring_mask; /* Rings supported by the HW */ intel_ring_mask_t ring_mask; /* Rings supported by the HW */
enum intel_platform platform; enum intel_platform platform;
u32 platform_mask; u32 platform_mask;
unsigned int page_sizes; /* page sizes supported by the HW */
u32 display_mmio_offset; u32 display_mmio_offset;
u8 num_pipes; u8 num_pipes;
u8 num_sprites[I915_MAX_PIPES]; u8 num_sprites[I915_MAX_PIPES];
u8 num_scalers[I915_MAX_PIPES]; u8 num_scalers[I915_MAX_PIPES];
unsigned int page_sizes; /* page sizes supported by the HW */
#define DEFINE_FLAG(name) u8 name:1 #define DEFINE_FLAG(name) u8 name:1
DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
#undef DEFINE_FLAG #undef DEFINE_FLAG
@ -173,7 +190,49 @@ struct intel_driver_caps {
static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
{ {
return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); unsigned int i, total = 0;
for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
total += hweight8(sseu->subslice_mask[i]);
return total;
}
static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
int slice, int subslice)
{
int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
BITS_PER_BYTE);
int slice_stride = sseu->max_subslices * subslice_stride;
return slice * slice_stride + subslice * subslice_stride;
}
static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu,
int slice, int subslice)
{
int i, offset = sseu_eu_idx(sseu, slice, subslice);
u16 eu_mask = 0;
for (i = 0;
i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
eu_mask |= ((u16) sseu->eu_mask[offset + i]) <<
(i * BITS_PER_BYTE);
}
return eu_mask;
}
static inline void sseu_set_eus(struct sseu_dev_info *sseu,
int slice, int subslice, u16 eu_mask)
{
int i, offset = sseu_eu_idx(sseu, slice, subslice);
for (i = 0;
i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
sseu->eu_mask[offset + i] =
(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
}
} }
const char *intel_platform_name(enum intel_platform platform); const char *intel_platform_name(enum intel_platform platform);
@ -185,6 +244,8 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
struct drm_printer *p); struct drm_printer *p);
void intel_device_info_dump_runtime(const struct intel_device_info *info, void intel_device_info_dump_runtime(const struct intel_device_info *info,
struct drm_printer *p); struct drm_printer *p);
void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
struct drm_printer *p);
void intel_driver_caps_print(const struct intel_driver_caps *caps, void intel_driver_caps_print(const struct intel_driver_caps *caps,
struct drm_printer *p); struct drm_printer *p);

View File

@ -2067,9 +2067,18 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
} }
} }
static bool intel_plane_uses_fence(const struct intel_plane_state *plane_state)
{
struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
return INTEL_GEN(dev_priv) < 4 || plane->has_fbc;
}
struct i915_vma * struct i915_vma *
intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
unsigned int rotation, unsigned int rotation,
bool uses_fence,
unsigned long *out_flags) unsigned long *out_flags)
{ {
struct drm_device *dev = fb->dev; struct drm_device *dev = fb->dev;
@ -2122,7 +2131,9 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
if (IS_ERR(vma)) if (IS_ERR(vma))
goto err; goto err;
if (i915_vma_is_map_and_fenceable(vma)) { if (uses_fence && i915_vma_is_map_and_fenceable(vma)) {
int ret;
/* Install a fence for tiled scan-out. Pre-i965 always needs a /* Install a fence for tiled scan-out. Pre-i965 always needs a
* fence, whereas 965+ only requires a fence if using * fence, whereas 965+ only requires a fence if using
* framebuffer compression. For simplicity, we always, when * framebuffer compression. For simplicity, we always, when
@ -2139,7 +2150,14 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
* something and try to run the system in a "less than optimal" * something and try to run the system in a "less than optimal"
* mode that matches the user configuration. * mode that matches the user configuration.
*/ */
if (i915_vma_pin_fence(vma) == 0 && vma->fence) ret = i915_vma_pin_fence(vma);
if (ret != 0 && INTEL_GEN(dev_priv) < 4) {
i915_gem_object_unpin_from_display_plane(vma);
vma = ERR_PTR(ret);
goto err;
}
if (ret == 0 && vma->fence)
*out_flags |= PLANE_HAS_FENCE; *out_flags |= PLANE_HAS_FENCE;
} }
@ -2828,6 +2846,7 @@ valid_fb:
intel_state->vma = intel_state->vma =
intel_pin_and_fence_fb_obj(fb, intel_pin_and_fence_fb_obj(fb,
primary->state->rotation, primary->state->rotation,
intel_plane_uses_fence(intel_state),
&intel_state->flags); &intel_state->flags);
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
if (IS_ERR(intel_state->vma)) { if (IS_ERR(intel_state->vma)) {
@ -12034,6 +12053,14 @@ static int intel_atomic_check(struct drm_device *dev,
int ret, i; int ret, i;
bool any_ms = false; bool any_ms = false;
/* Catch I915_MODE_FLAG_INHERITED */
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
crtc_state, i) {
if (crtc_state->mode.private_flags !=
old_crtc_state->mode.private_flags)
crtc_state->mode_changed = true;
}
ret = drm_atomic_helper_check_modeset(dev, state); ret = drm_atomic_helper_check_modeset(dev, state);
if (ret) if (ret)
return ret; return ret;
@ -12042,10 +12069,6 @@ static int intel_atomic_check(struct drm_device *dev,
struct intel_crtc_state *pipe_config = struct intel_crtc_state *pipe_config =
to_intel_crtc_state(crtc_state); to_intel_crtc_state(crtc_state);
/* Catch I915_MODE_FLAG_INHERITED */
if (crtc_state->mode.private_flags != old_crtc_state->mode.private_flags)
crtc_state->mode_changed = true;
if (!needs_modeset(crtc_state)) if (!needs_modeset(crtc_state))
continue; continue;
@ -12054,13 +12077,6 @@ static int intel_atomic_check(struct drm_device *dev,
continue; continue;
} }
/* FIXME: For only active_changed we shouldn't need to do any
* state recomputation at all. */
ret = drm_atomic_add_affected_connectors(state, crtc);
if (ret)
return ret;
ret = intel_modeset_pipe_config(crtc, pipe_config); ret = intel_modeset_pipe_config(crtc, pipe_config);
if (ret) { if (ret) {
intel_dump_pipe_config(to_intel_crtc(crtc), intel_dump_pipe_config(to_intel_crtc(crtc),
@ -12079,10 +12095,6 @@ static int intel_atomic_check(struct drm_device *dev,
if (needs_modeset(crtc_state)) if (needs_modeset(crtc_state))
any_ms = true; any_ms = true;
ret = drm_atomic_add_affected_planes(state, crtc);
if (ret)
return ret;
intel_dump_pipe_config(to_intel_crtc(crtc), pipe_config, intel_dump_pipe_config(to_intel_crtc(crtc), pipe_config,
needs_modeset(crtc_state) ? needs_modeset(crtc_state) ?
"[modeset]" : "[fastset]"); "[modeset]" : "[fastset]");
@ -12600,23 +12612,23 @@ struct wait_rps_boost {
struct wait_queue_entry wait; struct wait_queue_entry wait;
struct drm_crtc *crtc; struct drm_crtc *crtc;
struct drm_i915_gem_request *request; struct i915_request *request;
}; };
static int do_rps_boost(struct wait_queue_entry *_wait, static int do_rps_boost(struct wait_queue_entry *_wait,
unsigned mode, int sync, void *key) unsigned mode, int sync, void *key)
{ {
struct wait_rps_boost *wait = container_of(_wait, typeof(*wait), wait); struct wait_rps_boost *wait = container_of(_wait, typeof(*wait), wait);
struct drm_i915_gem_request *rq = wait->request; struct i915_request *rq = wait->request;
/* /*
* If we missed the vblank, but the request is already running it * If we missed the vblank, but the request is already running it
* is reasonable to assume that it will complete before the next * is reasonable to assume that it will complete before the next
* vblank without our intervention, so leave RPS alone. * vblank without our intervention, so leave RPS alone.
*/ */
if (!i915_gem_request_started(rq)) if (!i915_request_started(rq))
gen6_rps_boost(rq, NULL); gen6_rps_boost(rq, NULL);
i915_gem_request_put(rq); i915_request_put(rq);
drm_crtc_vblank_put(wait->crtc); drm_crtc_vblank_put(wait->crtc);
@ -12654,6 +12666,42 @@ static void add_rps_boost_after_vblank(struct drm_crtc *crtc,
add_wait_queue(drm_crtc_vblank_waitqueue(crtc), &wait->wait); add_wait_queue(drm_crtc_vblank_waitqueue(crtc), &wait->wait);
} }
static int intel_plane_pin_fb(struct intel_plane_state *plane_state)
{
struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
struct drm_framebuffer *fb = plane_state->base.fb;
struct i915_vma *vma;
if (plane->id == PLANE_CURSOR &&
INTEL_INFO(dev_priv)->cursor_needs_physical) {
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
const int align = intel_cursor_alignment(dev_priv);
return i915_gem_object_attach_phys(obj, align);
}
vma = intel_pin_and_fence_fb_obj(fb,
plane_state->base.rotation,
intel_plane_uses_fence(plane_state),
&plane_state->flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
plane_state->vma = vma;
return 0;
}
static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
{
struct i915_vma *vma;
vma = fetch_and_zero(&old_plane_state->vma);
if (vma)
intel_unpin_fb_vma(vma, old_plane_state->flags);
}
/** /**
* intel_prepare_plane_fb - Prepare fb for usage on plane * intel_prepare_plane_fb - Prepare fb for usage on plane
* @plane: drm plane to prepare for * @plane: drm plane to prepare for
@ -12728,22 +12776,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
return ret; return ret;
} }
if (plane->type == DRM_PLANE_TYPE_CURSOR && ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
INTEL_INFO(dev_priv)->cursor_needs_physical) {
const int align = intel_cursor_alignment(dev_priv);
ret = i915_gem_object_attach_phys(obj, align);
} else {
struct i915_vma *vma;
vma = intel_pin_and_fence_fb_obj(fb,
new_state->rotation,
&to_intel_plane_state(new_state)->flags);
if (!IS_ERR(vma))
to_intel_plane_state(new_state)->vma = vma;
else
ret = PTR_ERR(vma);
}
i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
@ -12787,15 +12820,12 @@ void
intel_cleanup_plane_fb(struct drm_plane *plane, intel_cleanup_plane_fb(struct drm_plane *plane,
struct drm_plane_state *old_state) struct drm_plane_state *old_state)
{ {
struct i915_vma *vma; struct drm_i915_private *dev_priv = to_i915(plane->dev);
/* Should only be called after a successful intel_prepare_plane_fb()! */ /* Should only be called after a successful intel_prepare_plane_fb()! */
vma = fetch_and_zero(&to_intel_plane_state(old_state)->vma); mutex_lock(&dev_priv->drm.struct_mutex);
if (vma) { intel_plane_unpin_fb(to_intel_plane_state(old_state));
mutex_lock(&plane->dev->struct_mutex); mutex_unlock(&dev_priv->drm.struct_mutex);
intel_unpin_fb_vma(vma, to_intel_plane_state(old_state)->flags);
mutex_unlock(&plane->dev->struct_mutex);
}
} }
int int
@ -13080,7 +13110,6 @@ intel_legacy_cursor_update(struct drm_plane *plane,
struct intel_plane *intel_plane = to_intel_plane(plane); struct intel_plane *intel_plane = to_intel_plane(plane);
struct drm_framebuffer *old_fb; struct drm_framebuffer *old_fb;
struct drm_crtc_state *crtc_state = crtc->state; struct drm_crtc_state *crtc_state = crtc->state;
struct i915_vma *old_vma, *vma;
/* /*
* When crtc is inactive or there is a modeset pending, * When crtc is inactive or there is a modeset pending,
@ -13139,27 +13168,9 @@ intel_legacy_cursor_update(struct drm_plane *plane,
if (ret) if (ret)
goto out_free; goto out_free;
if (INTEL_INFO(dev_priv)->cursor_needs_physical) { ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state));
int align = intel_cursor_alignment(dev_priv); if (ret)
goto out_unlock;
ret = i915_gem_object_attach_phys(intel_fb_obj(fb), align);
if (ret) {
DRM_DEBUG_KMS("failed to attach phys object\n");
goto out_unlock;
}
} else {
vma = intel_pin_and_fence_fb_obj(fb,
new_plane_state->rotation,
&to_intel_plane_state(new_plane_state)->flags);
if (IS_ERR(vma)) {
DRM_DEBUG_KMS("failed to pin object\n");
ret = PTR_ERR(vma);
goto out_unlock;
}
to_intel_plane_state(new_plane_state)->vma = vma;
}
old_fb = old_plane_state->fb; old_fb = old_plane_state->fb;
@ -13179,10 +13190,7 @@ intel_legacy_cursor_update(struct drm_plane *plane,
intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc)); intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc));
} }
old_vma = fetch_and_zero(&to_intel_plane_state(old_plane_state)->vma); intel_plane_unpin_fb(to_intel_plane_state(old_plane_state));
if (old_vma)
intel_unpin_fb_vma(old_vma,
to_intel_plane_state(old_plane_state)->flags);
out_unlock: out_unlock:
mutex_unlock(&dev_priv->drm.struct_mutex); mutex_unlock(&dev_priv->drm.struct_mutex);
@ -13210,6 +13218,32 @@ static const struct drm_plane_funcs intel_cursor_plane_funcs = {
.format_mod_supported = intel_cursor_plane_format_mod_supported, .format_mod_supported = intel_cursor_plane_format_mod_supported,
}; };
static bool i9xx_plane_has_fbc(struct drm_i915_private *dev_priv,
enum i9xx_plane_id i9xx_plane)
{
if (!HAS_FBC(dev_priv))
return false;
if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv))
return i9xx_plane == PLANE_A; /* tied to pipe A */
else if (IS_IVYBRIDGE(dev_priv))
return i9xx_plane == PLANE_A || i9xx_plane == PLANE_B ||
i9xx_plane == PLANE_C;
else if (INTEL_GEN(dev_priv) >= 4)
return i9xx_plane == PLANE_A || i9xx_plane == PLANE_B;
else
return i9xx_plane == PLANE_A;
}
static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv,
enum pipe pipe, enum plane_id plane_id)
{
if (!HAS_FBC(dev_priv))
return false;
return pipe == PIPE_A && plane_id == PLANE_PRIMARY;
}
static struct intel_plane * static struct intel_plane *
intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
{ {
@ -13252,6 +13286,21 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
primary->i9xx_plane = (enum i9xx_plane_id) pipe; primary->i9xx_plane = (enum i9xx_plane_id) pipe;
primary->id = PLANE_PRIMARY; primary->id = PLANE_PRIMARY;
primary->frontbuffer_bit = INTEL_FRONTBUFFER(pipe, primary->id); primary->frontbuffer_bit = INTEL_FRONTBUFFER(pipe, primary->id);
if (INTEL_GEN(dev_priv) >= 9)
primary->has_fbc = skl_plane_has_fbc(dev_priv,
primary->pipe,
primary->id);
else
primary->has_fbc = i9xx_plane_has_fbc(dev_priv,
primary->i9xx_plane);
if (primary->has_fbc) {
struct intel_fbc *fbc = &dev_priv->fbc;
fbc->possible_framebuffer_bits |= primary->frontbuffer_bit;
}
primary->check_plane = intel_check_primary_plane; primary->check_plane = intel_check_primary_plane;
if (INTEL_GEN(dev_priv) >= 9) { if (INTEL_GEN(dev_priv) >= 9) {

View File

@ -139,6 +139,17 @@ enum dpio_phy {
#define I915_NUM_PHYS_VLV 2 #define I915_NUM_PHYS_VLV 2
enum aux_ch {
AUX_CH_A,
AUX_CH_B,
AUX_CH_C,
AUX_CH_D,
_AUX_CH_E, /* does not exist */
AUX_CH_F,
};
#define aux_ch_name(a) ((a) + 'A')
enum intel_display_power_domain { enum intel_display_power_domain {
POWER_DOMAIN_PIPE_A, POWER_DOMAIN_PIPE_A,
POWER_DOMAIN_PIPE_B, POWER_DOMAIN_PIPE_B,
@ -175,6 +186,7 @@ enum intel_display_power_domain {
POWER_DOMAIN_AUX_C, POWER_DOMAIN_AUX_C,
POWER_DOMAIN_AUX_D, POWER_DOMAIN_AUX_D,
POWER_DOMAIN_AUX_F, POWER_DOMAIN_AUX_F,
POWER_DOMAIN_AUX_IO_A,
POWER_DOMAIN_GMBUS, POWER_DOMAIN_GMBUS,
POWER_DOMAIN_MODESET, POWER_DOMAIN_MODESET,
POWER_DOMAIN_GT_IRQ, POWER_DOMAIN_GT_IRQ,

File diff suppressed because it is too large Load Diff

View File

@ -248,6 +248,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
int tries; int tries;
u32 training_pattern; u32 training_pattern;
uint8_t link_status[DP_LINK_STATUS_SIZE]; uint8_t link_status[DP_LINK_STATUS_SIZE];
bool channel_eq = false;
training_pattern = intel_dp_training_pattern(intel_dp); training_pattern = intel_dp_training_pattern(intel_dp);
@ -259,7 +260,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
return false; return false;
} }
intel_dp->channel_eq_status = false;
for (tries = 0; tries < 5; tries++) { for (tries = 0; tries < 5; tries++) {
drm_dp_link_train_channel_eq_delay(intel_dp->dpcd); drm_dp_link_train_channel_eq_delay(intel_dp->dpcd);
@ -279,7 +279,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
if (drm_dp_channel_eq_ok(link_status, if (drm_dp_channel_eq_ok(link_status,
intel_dp->lane_count)) { intel_dp->lane_count)) {
intel_dp->channel_eq_status = true; channel_eq = true;
DRM_DEBUG_KMS("Channel EQ done. DP Training " DRM_DEBUG_KMS("Channel EQ done. DP Training "
"successful\n"); "successful\n");
break; break;
@ -301,12 +301,14 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
intel_dp_set_idle_link_train(intel_dp); intel_dp_set_idle_link_train(intel_dp);
return intel_dp->channel_eq_status; return channel_eq;
} }
void intel_dp_stop_link_train(struct intel_dp *intel_dp) void intel_dp_stop_link_train(struct intel_dp *intel_dp)
{ {
intel_dp->link_trained = true;
intel_dp_set_link_train(intel_dp, intel_dp_set_link_train(intel_dp,
DP_TRAINING_PATTERN_DISABLE); DP_TRAINING_PATTERN_DISABLE);
} }

View File

@ -215,7 +215,8 @@ struct intel_encoder {
enum intel_output_type type; enum intel_output_type type;
enum port port; enum port port;
unsigned int cloneable; unsigned int cloneable;
void (*hot_plug)(struct intel_encoder *); bool (*hotplug)(struct intel_encoder *encoder,
struct intel_connector *connector);
enum intel_output_type (*compute_output_type)(struct intel_encoder *, enum intel_output_type (*compute_output_type)(struct intel_encoder *,
struct intel_crtc_state *, struct intel_crtc_state *,
struct drm_connector_state *); struct drm_connector_state *);
@ -935,6 +936,7 @@ struct intel_plane {
enum plane_id id; enum plane_id id;
enum pipe pipe; enum pipe pipe;
bool can_scale; bool can_scale;
bool has_fbc;
int max_downscale; int max_downscale;
uint32_t frontbuffer_bit; uint32_t frontbuffer_bit;
@ -1041,17 +1043,16 @@ struct intel_dp_compliance {
struct intel_dp { struct intel_dp {
i915_reg_t output_reg; i915_reg_t output_reg;
i915_reg_t aux_ch_ctl_reg;
i915_reg_t aux_ch_data_reg[5];
uint32_t DP; uint32_t DP;
int link_rate; int link_rate;
uint8_t lane_count; uint8_t lane_count;
uint8_t sink_count; uint8_t sink_count;
bool link_mst; bool link_mst;
bool link_trained;
bool has_audio; bool has_audio;
bool detect_done; bool detect_done;
bool channel_eq_status;
bool reset_link_params; bool reset_link_params;
enum aux_ch aux_ch;
uint8_t dpcd[DP_RECEIVER_CAP_SIZE]; uint8_t dpcd[DP_RECEIVER_CAP_SIZE];
uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE];
uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
@ -1126,6 +1127,9 @@ struct intel_dp {
int send_bytes, int send_bytes,
uint32_t aux_clock_divider); uint32_t aux_clock_divider);
i915_reg_t (*aux_ch_ctl_reg)(struct intel_dp *dp);
i915_reg_t (*aux_ch_data_reg)(struct intel_dp *dp, int index);
/* This is called before a link training is starterd */ /* This is called before a link training is starterd */
void (*prepare_link_retrain)(struct intel_dp *intel_dp); void (*prepare_link_retrain)(struct intel_dp *intel_dp);
@ -1508,6 +1512,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
struct i915_vma * struct i915_vma *
intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
unsigned int rotation, unsigned int rotation,
bool uses_fence,
unsigned long *out_flags); unsigned long *out_flags);
void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags); void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags);
struct drm_framebuffer * struct drm_framebuffer *
@ -1622,6 +1627,8 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
int link_rate, uint8_t lane_count); int link_rate, uint8_t lane_count);
void intel_dp_start_link_train(struct intel_dp *intel_dp); void intel_dp_start_link_train(struct intel_dp *intel_dp);
void intel_dp_stop_link_train(struct intel_dp *intel_dp); void intel_dp_stop_link_train(struct intel_dp *intel_dp);
int intel_dp_retrain_link(struct intel_encoder *encoder,
struct drm_modeset_acquire_ctx *ctx);
void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
void intel_dp_encoder_reset(struct drm_encoder *encoder); void intel_dp_encoder_reset(struct drm_encoder *encoder);
void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);
@ -1701,7 +1708,8 @@ int intel_dsi_dcs_init_backlight_funcs(struct intel_connector *intel_connector);
void intel_dvo_init(struct drm_i915_private *dev_priv); void intel_dvo_init(struct drm_i915_private *dev_priv);
/* intel_hotplug.c */ /* intel_hotplug.c */
void intel_hpd_poll_init(struct drm_i915_private *dev_priv); void intel_hpd_poll_init(struct drm_i915_private *dev_priv);
bool intel_encoder_hotplug(struct intel_encoder *encoder,
struct intel_connector *connector);
/* legacy fbdev emulation in intel_fbdev.c */ /* legacy fbdev emulation in intel_fbdev.c */
#ifdef CONFIG_DRM_FBDEV_EMULATION #ifdef CONFIG_DRM_FBDEV_EMULATION
@ -1863,6 +1871,7 @@ bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port);
/* intel_psr.c */ /* intel_psr.c */
#define CAN_PSR(dev_priv) (HAS_PSR(dev_priv) && dev_priv->psr.sink_support) #define CAN_PSR(dev_priv) (HAS_PSR(dev_priv) && dev_priv->psr.sink_support)
void intel_psr_init_dpcd(struct intel_dp *intel_dp);
void intel_psr_enable(struct intel_dp *intel_dp, void intel_psr_enable(struct intel_dp *intel_dp,
const struct intel_crtc_state *crtc_state); const struct intel_crtc_state *crtc_state);
void intel_psr_disable(struct intel_dp *intel_dp, void intel_psr_disable(struct intel_dp *intel_dp,
@ -1989,8 +1998,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv);
void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv);
void gen6_rps_boost(struct drm_i915_gem_request *rq, void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
struct intel_rps_client *rps);
void g4x_wm_get_hw_state(struct drm_device *dev); void g4x_wm_get_hw_state(struct drm_device *dev);
void vlv_wm_get_hw_state(struct drm_device *dev); void vlv_wm_get_hw_state(struct drm_device *dev);
void ilk_wm_get_hw_state(struct drm_device *dev); void ilk_wm_get_hw_state(struct drm_device *dev);

View File

@ -123,6 +123,22 @@ static const struct engine_info intel_engines[] = {
.mmio_base = GEN8_BSD2_RING_BASE, .mmio_base = GEN8_BSD2_RING_BASE,
.irq_shift = GEN8_VCS2_IRQ_SHIFT, .irq_shift = GEN8_VCS2_IRQ_SHIFT,
}, },
[VCS3] = {
.hw_id = VCS3_HW,
.uabi_id = I915_EXEC_BSD,
.class = VIDEO_DECODE_CLASS,
.instance = 2,
.mmio_base = GEN11_BSD3_RING_BASE,
.irq_shift = 0, /* not used */
},
[VCS4] = {
.hw_id = VCS4_HW,
.uabi_id = I915_EXEC_BSD,
.class = VIDEO_DECODE_CLASS,
.instance = 3,
.mmio_base = GEN11_BSD4_RING_BASE,
.irq_shift = 0, /* not used */
},
[VECS] = { [VECS] = {
.hw_id = VECS_HW, .hw_id = VECS_HW,
.uabi_id = I915_EXEC_VEBOX, .uabi_id = I915_EXEC_VEBOX,
@ -131,6 +147,14 @@ static const struct engine_info intel_engines[] = {
.mmio_base = VEBOX_RING_BASE, .mmio_base = VEBOX_RING_BASE,
.irq_shift = GEN8_VECS_IRQ_SHIFT, .irq_shift = GEN8_VECS_IRQ_SHIFT,
}, },
[VECS2] = {
.hw_id = VECS2_HW,
.uabi_id = I915_EXEC_VEBOX,
.class = VIDEO_ENHANCEMENT_CLASS,
.instance = 1,
.mmio_base = GEN11_VEBOX2_RING_BASE,
.irq_shift = 0, /* not used */
},
}; };
/** /**
@ -210,6 +234,9 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
class_info = &intel_engine_classes[info->class]; class_info = &intel_engine_classes[info->class];
BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS)) if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS))
return -EINVAL; return -EINVAL;
@ -230,7 +257,25 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
class_info->name, info->instance) >= class_info->name, info->instance) >=
sizeof(engine->name)); sizeof(engine->name));
engine->hw_id = engine->guc_id = info->hw_id; engine->hw_id = engine->guc_id = info->hw_id;
engine->mmio_base = info->mmio_base; if (INTEL_GEN(dev_priv) >= 11) {
switch (engine->id) {
case VCS:
engine->mmio_base = GEN11_BSD_RING_BASE;
break;
case VCS2:
engine->mmio_base = GEN11_BSD2_RING_BASE;
break;
case VECS:
engine->mmio_base = GEN11_VEBOX_RING_BASE;
break;
default:
/* take the original value for all other engines */
engine->mmio_base = info->mmio_base;
break;
}
} else {
engine->mmio_base = info->mmio_base;
}
engine->irq_shift = info->irq_shift; engine->irq_shift = info->irq_shift;
engine->class = info->class; engine->class = info->class;
engine->instance = info->instance; engine->instance = info->instance;
@ -423,6 +468,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
execlists->queue_priority = INT_MIN;
execlists->queue = RB_ROOT; execlists->queue = RB_ROOT;
execlists->first = NULL; execlists->first = NULL;
} }
@ -1426,20 +1472,20 @@ int init_workarounds_ring(struct intel_engine_cs *engine)
return 0; return 0;
} }
int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) int intel_ring_workarounds_emit(struct i915_request *rq)
{ {
struct i915_workarounds *w = &req->i915->workarounds; struct i915_workarounds *w = &rq->i915->workarounds;
u32 *cs; u32 *cs;
int ret, i; int ret, i;
if (w->count == 0) if (w->count == 0)
return 0; return 0;
ret = req->engine->emit_flush(req, EMIT_BARRIER); ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
if (ret) if (ret)
return ret; return ret;
cs = intel_ring_begin(req, (w->count * 2 + 2)); cs = intel_ring_begin(rq, w->count * 2 + 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1450,9 +1496,9 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
} }
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
ret = req->engine->emit_flush(req, EMIT_BARRIER); ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
if (ret) if (ret)
return ret; return ret;
@ -1552,7 +1598,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
{ {
const struct i915_gem_context * const kernel_context = const struct i915_gem_context * const kernel_context =
engine->i915->kernel_context; engine->i915->kernel_context;
struct drm_i915_gem_request *rq; struct i915_request *rq;
lockdep_assert_held(&engine->i915->drm.struct_mutex); lockdep_assert_held(&engine->i915->drm.struct_mutex);
@ -1664,13 +1710,13 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
} }
static void print_request(struct drm_printer *m, static void print_request(struct drm_printer *m,
struct drm_i915_gem_request *rq, struct i915_request *rq,
const char *prefix) const char *prefix)
{ {
drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %dms: %s\n", prefix, drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix,
rq->global_seqno, rq->global_seqno,
i915_gem_request_completed(rq) ? "!" : "", i915_request_completed(rq) ? "!" : "",
rq->ctx->hw_id, rq->fence.seqno, rq->fence.context, rq->fence.seqno,
rq->priotree.priority, rq->priotree.priority,
jiffies_to_msecs(jiffies - rq->emitted_jiffies), jiffies_to_msecs(jiffies - rq->emitted_jiffies),
rq->timeline->common->name); rq->timeline->common->name);
@ -1803,7 +1849,7 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
rcu_read_lock(); rcu_read_lock();
for (idx = 0; idx < execlists_num_ports(execlists); idx++) { for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
struct drm_i915_gem_request *rq; struct i915_request *rq;
unsigned int count; unsigned int count;
rq = port_unpack(&execlists->port[idx], &count); rq = port_unpack(&execlists->port[idx], &count);
@ -1837,7 +1883,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct intel_breadcrumbs * const b = &engine->breadcrumbs; struct intel_breadcrumbs * const b = &engine->breadcrumbs;
const struct intel_engine_execlists * const execlists = &engine->execlists; const struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_gpu_error * const error = &engine->i915->gpu_error;
struct drm_i915_gem_request *rq; struct i915_request *rq;
struct rb_node *rb; struct rb_node *rb;
if (header) { if (header) {
@ -1866,12 +1912,12 @@ void intel_engine_dump(struct intel_engine_cs *engine,
drm_printf(m, "\tRequests:\n"); drm_printf(m, "\tRequests:\n");
rq = list_first_entry(&engine->timeline->requests, rq = list_first_entry(&engine->timeline->requests,
struct drm_i915_gem_request, link); struct i915_request, link);
if (&rq->link != &engine->timeline->requests) if (&rq->link != &engine->timeline->requests)
print_request(m, rq, "\t\tfirst "); print_request(m, rq, "\t\tfirst ");
rq = list_last_entry(&engine->timeline->requests, rq = list_last_entry(&engine->timeline->requests,
struct drm_i915_gem_request, link); struct i915_request, link);
if (&rq->link != &engine->timeline->requests) if (&rq->link != &engine->timeline->requests)
print_request(m, rq, "\t\tlast "); print_request(m, rq, "\t\tlast ");
@ -1903,6 +1949,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_lock_irq(&engine->timeline->lock); spin_lock_irq(&engine->timeline->lock);
list_for_each_entry(rq, &engine->timeline->requests, link) list_for_each_entry(rq, &engine->timeline->requests, link)
print_request(m, rq, "\t\tE "); print_request(m, rq, "\t\tE ");
drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
for (rb = execlists->first; rb; rb = rb_next(rb)) { for (rb = execlists->first; rb; rb = rb_next(rb)) {
struct i915_priolist *p = struct i915_priolist *p =
rb_entry(rb, typeof(*p), node); rb_entry(rb, typeof(*p), node);

View File

@ -46,16 +46,6 @@ static inline bool fbc_supported(struct drm_i915_private *dev_priv)
return HAS_FBC(dev_priv); return HAS_FBC(dev_priv);
} }
static inline bool fbc_on_pipe_a_only(struct drm_i915_private *dev_priv)
{
return IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8;
}
static inline bool fbc_on_plane_a_only(struct drm_i915_private *dev_priv)
{
return INTEL_GEN(dev_priv) < 4;
}
static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv) static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv)
{ {
return INTEL_GEN(dev_priv) <= 3; return INTEL_GEN(dev_priv) <= 3;
@ -819,6 +809,12 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
* Note that is possible for a tiled surface to be unmappable (and * Note that is possible for a tiled surface to be unmappable (and
* so have no fence associated with it) due to aperture constaints * so have no fence associated with it) due to aperture constaints
* at the time of pinning. * at the time of pinning.
*
* FIXME with 90/270 degree rotation we should use the fence on
* the normal GTT view (the rotated view doesn't even have a
* fence). Would need changes to the FBC fence Y offset as well.
* For now this will effecively disable FBC with 90/270 degree
* rotation.
*/ */
if (!(cache->flags & PLANE_HAS_FENCE)) { if (!(cache->flags & PLANE_HAS_FENCE)) {
fbc->no_fbc_reason = "framebuffer not tiled or fenced"; fbc->no_fbc_reason = "framebuffer not tiled or fenced";
@ -863,6 +859,17 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
return false; return false;
} }
/*
* Work around a problem on GEN9+ HW, where enabling FBC on a plane
* having a Y offset that isn't divisible by 4 causes FIFO underrun
* and screen flicker.
*/
if (IS_GEN(dev_priv, 9, 10) &&
(fbc->state_cache.plane.adjusted_y & 3)) {
fbc->no_fbc_reason = "plane Y offset is misaligned";
return false;
}
return true; return true;
} }
@ -953,6 +960,30 @@ unlock:
mutex_unlock(&fbc->lock); mutex_unlock(&fbc->lock);
} }
/**
* __intel_fbc_disable - disable FBC
* @dev_priv: i915 device instance
*
* This is the low level function that actually disables FBC. Callers should
* grab the FBC lock.
*/
static void __intel_fbc_disable(struct drm_i915_private *dev_priv)
{
struct intel_fbc *fbc = &dev_priv->fbc;
struct intel_crtc *crtc = fbc->crtc;
WARN_ON(!mutex_is_locked(&fbc->lock));
WARN_ON(!fbc->enabled);
WARN_ON(fbc->active);
DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe));
__intel_fbc_cleanup_cfb(dev_priv);
fbc->enabled = false;
fbc->crtc = NULL;
}
static void __intel_fbc_post_update(struct intel_crtc *crtc) static void __intel_fbc_post_update(struct intel_crtc *crtc)
{ {
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@ -964,6 +995,13 @@ static void __intel_fbc_post_update(struct intel_crtc *crtc)
if (!fbc->enabled || fbc->crtc != crtc) if (!fbc->enabled || fbc->crtc != crtc)
return; return;
if (!i915_modparams.enable_fbc) {
intel_fbc_deactivate(dev_priv, "disabled at runtime per module param");
__intel_fbc_disable(dev_priv);
return;
}
if (!intel_fbc_can_activate(crtc)) { if (!intel_fbc_can_activate(crtc)) {
WARN_ON(fbc->active); WARN_ON(fbc->active);
return; return;
@ -1094,15 +1132,12 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
struct intel_crtc_state *crtc_state; struct intel_crtc_state *crtc_state;
struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc);
if (!plane->has_fbc)
continue;
if (!plane_state->base.visible) if (!plane_state->base.visible)
continue; continue;
if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A)
continue;
if (fbc_on_plane_a_only(dev_priv) && plane->i9xx_plane != PLANE_A)
continue;
crtc_state = intel_atomic_get_new_crtc_state(state, crtc); crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
crtc_state->enable_fbc = true; crtc_state->enable_fbc = true;
@ -1170,31 +1205,6 @@ out:
mutex_unlock(&fbc->lock); mutex_unlock(&fbc->lock);
} }
/**
* __intel_fbc_disable - disable FBC
* @dev_priv: i915 device instance
*
* This is the low level function that actually disables FBC. Callers should
* grab the FBC lock.
*/
static void __intel_fbc_disable(struct drm_i915_private *dev_priv)
{
struct intel_fbc *fbc = &dev_priv->fbc;
struct intel_crtc *crtc = fbc->crtc;
WARN_ON(!mutex_is_locked(&fbc->lock));
WARN_ON(!fbc->enabled);
WARN_ON(fbc->active);
WARN_ON(crtc->active);
DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe));
__intel_fbc_cleanup_cfb(dev_priv);
fbc->enabled = false;
fbc->crtc = NULL;
}
/** /**
* intel_fbc_disable - disable FBC if it's associated with crtc * intel_fbc_disable - disable FBC if it's associated with crtc
* @crtc: the CRTC * @crtc: the CRTC
@ -1209,6 +1219,8 @@ void intel_fbc_disable(struct intel_crtc *crtc)
if (!fbc_supported(dev_priv)) if (!fbc_supported(dev_priv))
return; return;
WARN_ON(crtc->active);
mutex_lock(&fbc->lock); mutex_lock(&fbc->lock);
if (fbc->crtc == crtc) if (fbc->crtc == crtc)
__intel_fbc_disable(dev_priv); __intel_fbc_disable(dev_priv);
@ -1231,8 +1243,10 @@ void intel_fbc_global_disable(struct drm_i915_private *dev_priv)
return; return;
mutex_lock(&fbc->lock); mutex_lock(&fbc->lock);
if (fbc->enabled) if (fbc->enabled) {
WARN_ON(fbc->crtc->active);
__intel_fbc_disable(dev_priv); __intel_fbc_disable(dev_priv);
}
mutex_unlock(&fbc->lock); mutex_unlock(&fbc->lock);
cancel_work_sync(&fbc->work.work); cancel_work_sync(&fbc->work.work);
@ -1357,7 +1371,6 @@ static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
void intel_fbc_init(struct drm_i915_private *dev_priv) void intel_fbc_init(struct drm_i915_private *dev_priv)
{ {
struct intel_fbc *fbc = &dev_priv->fbc; struct intel_fbc *fbc = &dev_priv->fbc;
enum pipe pipe;
INIT_WORK(&fbc->work.work, intel_fbc_work_fn); INIT_WORK(&fbc->work.work, intel_fbc_work_fn);
INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn); INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn);
@ -1378,14 +1391,6 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
return; return;
} }
for_each_pipe(dev_priv, pipe) {
fbc->possible_framebuffer_bits |=
INTEL_FRONTBUFFER(pipe, PLANE_PRIMARY);
if (fbc_on_pipe_a_only(dev_priv))
break;
}
/* This value was pulled out of someone's hat */ /* This value was pulled out of someone's hat */
if (INTEL_GEN(dev_priv) <= 4 && !IS_GM45(dev_priv)) if (INTEL_GEN(dev_priv) <= 4 && !IS_GM45(dev_priv))
I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT); I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT);

View File

@ -215,7 +215,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
*/ */
vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base,
DRM_MODE_ROTATE_0, DRM_MODE_ROTATE_0,
&flags); false, &flags);
if (IS_ERR(vma)) { if (IS_ERR(vma)) {
ret = PTR_ERR(vma); ret = PTR_ERR(vma);
goto out_unlock; goto out_unlock;

View File

@ -79,6 +79,7 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
spin_unlock(&dev_priv->fb_tracking.lock); spin_unlock(&dev_priv->fb_tracking.lock);
} }
might_sleep();
intel_psr_invalidate(dev_priv, frontbuffer_bits); intel_psr_invalidate(dev_priv, frontbuffer_bits);
intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits); intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits);
intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin); intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
@ -108,6 +109,7 @@ static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv,
if (!frontbuffer_bits) if (!frontbuffer_bits)
return; return;
might_sleep();
intel_edp_drrs_flush(dev_priv, frontbuffer_bits); intel_edp_drrs_flush(dev_priv, frontbuffer_bits);
intel_psr_flush(dev_priv, frontbuffer_bits, origin); intel_psr_flush(dev_priv, frontbuffer_bits, origin);
intel_fbc_flush(dev_priv, frontbuffer_bits, origin); intel_fbc_flush(dev_priv, frontbuffer_bits, origin);

View File

@ -370,7 +370,7 @@ int intel_guc_sample_forcewake(struct intel_guc *guc)
u32 action[2]; u32 action[2];
action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE;
/* WaRsDisableCoarsePowerGating:skl,bxt */ /* WaRsDisableCoarsePowerGating:skl,cnl */
if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
action[1] = 0; action[1] = 0;
else else
@ -403,22 +403,15 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset)
/** /**
* intel_guc_suspend() - notify GuC entering suspend state * intel_guc_suspend() - notify GuC entering suspend state
* @dev_priv: i915 device private * @guc: the guc
*/ */
int intel_guc_suspend(struct drm_i915_private *dev_priv) int intel_guc_suspend(struct intel_guc *guc)
{ {
struct intel_guc *guc = &dev_priv->guc; u32 data[] = {
u32 data[3]; INTEL_GUC_ACTION_ENTER_S_STATE,
GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */
if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) guc_ggtt_offset(guc->shared_data)
return 0; };
gen9_disable_guc_interrupts(dev_priv);
data[0] = INTEL_GUC_ACTION_ENTER_S_STATE;
/* any value greater than GUC_POWER_D0 */
data[1] = GUC_POWER_D1;
data[2] = guc_ggtt_offset(guc->shared_data);
return intel_guc_send(guc, data, ARRAY_SIZE(data)); return intel_guc_send(guc, data, ARRAY_SIZE(data));
} }
@ -448,22 +441,15 @@ int intel_guc_reset_engine(struct intel_guc *guc,
/** /**
* intel_guc_resume() - notify GuC resuming from suspend state * intel_guc_resume() - notify GuC resuming from suspend state
* @dev_priv: i915 device private * @guc: the guc
*/ */
int intel_guc_resume(struct drm_i915_private *dev_priv) int intel_guc_resume(struct intel_guc *guc)
{ {
struct intel_guc *guc = &dev_priv->guc; u32 data[] = {
u32 data[3]; INTEL_GUC_ACTION_EXIT_S_STATE,
GUC_POWER_D0,
if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) guc_ggtt_offset(guc->shared_data)
return 0; };
if (i915_modparams.guc_log_level)
gen9_enable_guc_interrupts(dev_priv);
data[0] = INTEL_GUC_ACTION_EXIT_S_STATE;
data[1] = GUC_POWER_D0;
data[2] = guc_ggtt_offset(guc->shared_data);
return intel_guc_send(guc, data, ARRAY_SIZE(data)); return intel_guc_send(guc, data, ARRAY_SIZE(data));
} }

View File

@ -127,8 +127,8 @@ int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
int intel_guc_sample_forcewake(struct intel_guc *guc); int intel_guc_sample_forcewake(struct intel_guc *guc);
int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset);
int intel_guc_suspend(struct drm_i915_private *dev_priv); int intel_guc_suspend(struct intel_guc *guc);
int intel_guc_resume(struct drm_i915_private *dev_priv); int intel_guc_resume(struct intel_guc *guc);
struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);

View File

@ -269,15 +269,15 @@ static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma)
} }
/** /**
* intel_guc_fw_upload() - finish preparing the GuC for activity * intel_guc_fw_upload() - load GuC uCode to device
* @guc: intel_guc structure * @guc: intel_guc structure
* *
* Called during driver loading and also after a GPU reset. * Called from intel_uc_init_hw() during driver load, resume from sleep and
* after a GPU reset.
* *
* The main action required here it to load the GuC uCode into the device.
* The firmware image should have already been fetched into memory by the * The firmware image should have already been fetched into memory by the
* earlier call to intel_guc_init(), so here we need only check that * earlier call to intel_uc_init_fw(), so here we need to only check that
* worked, and then transfer the image to the h/w. * fetch succeeded, and then transfer the image to the h/w.
* *
* Return: non-zero code on error * Return: non-zero code on error
*/ */

View File

@ -61,8 +61,10 @@ static int guc_log_flush(struct intel_guc *guc)
static int guc_log_control(struct intel_guc *guc, bool enable, u32 verbosity) static int guc_log_control(struct intel_guc *guc, bool enable, u32 verbosity)
{ {
union guc_log_control control_val = { union guc_log_control control_val = {
.logging_enabled = enable, {
.verbosity = verbosity, .logging_enabled = enable,
.verbosity = verbosity,
},
}; };
u32 action[] = { u32 action[] = {
INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING, INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING,

View File

@ -26,8 +26,14 @@
#include <trace/events/dma_fence.h> #include <trace/events/dma_fence.h>
#include "intel_guc_submission.h" #include "intel_guc_submission.h"
#include "intel_lrc_reg.h"
#include "i915_drv.h" #include "i915_drv.h"
#define GUC_PREEMPT_FINISHED 0x1
#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8
#define GUC_PREEMPT_BREADCRUMB_BYTES \
(sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
/** /**
* DOC: GuC-based command submission * DOC: GuC-based command submission
* *
@ -75,6 +81,11 @@
* *
*/ */
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
}
static inline bool is_high_priority(struct intel_guc_client *client) static inline bool is_high_priority(struct intel_guc_client *client)
{ {
return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
@ -496,8 +507,7 @@ static void guc_ring_doorbell(struct intel_guc_client *client)
GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED);
} }
static void guc_add_request(struct intel_guc *guc, static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
struct drm_i915_gem_request *rq)
{ {
struct intel_guc_client *client = guc->execbuf_client; struct intel_guc_client *client = guc->execbuf_client;
struct intel_engine_cs *engine = rq->engine; struct intel_engine_cs *engine = rq->engine;
@ -531,8 +541,6 @@ static void flush_ggtt_writes(struct i915_vma *vma)
POSTING_READ_FW(GUC_STATUS); POSTING_READ_FW(GUC_STATUS);
} }
#define GUC_PREEMPT_FINISHED 0x1
#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8
static void inject_preempt_context(struct work_struct *work) static void inject_preempt_context(struct work_struct *work)
{ {
struct guc_preempt_work *preempt_work = struct guc_preempt_work *preempt_work =
@ -542,37 +550,17 @@ static void inject_preempt_context(struct work_struct *work)
preempt_work[engine->id]); preempt_work[engine->id]);
struct intel_guc_client *client = guc->preempt_client; struct intel_guc_client *client = guc->preempt_client;
struct guc_stage_desc *stage_desc = __get_stage_desc(client); struct guc_stage_desc *stage_desc = __get_stage_desc(client);
struct intel_ring *ring = client->owner->engine[engine->id].ring;
u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner,
engine)); engine));
u32 *cs = ring->vaddr + ring->tail;
u32 data[7]; u32 data[7];
if (engine->id == RCS) { /*
cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, * The ring contains commands to write GUC_PREEMPT_FINISHED into HWSP.
intel_hws_preempt_done_address(engine)); * See guc_fill_preempt_context().
} else { */
cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED,
intel_hws_preempt_done_address(engine));
*cs++ = MI_NOOP;
*cs++ = MI_NOOP;
}
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
GEM_BUG_ON(!IS_ALIGNED(ring->size,
GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32)));
GEM_BUG_ON((void *)cs - (ring->vaddr + ring->tail) !=
GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32));
ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32);
ring->tail &= (ring->size - 1);
flush_ggtt_writes(ring->vma);
spin_lock_irq(&client->wq_lock); spin_lock_irq(&client->wq_lock);
guc_wq_item_append(client, engine->guc_id, ctx_desc, guc_wq_item_append(client, engine->guc_id, ctx_desc,
ring->tail / sizeof(u64), 0); GUC_PREEMPT_BREADCRUMB_BYTES / sizeof(u64), 0);
spin_unlock_irq(&client->wq_lock); spin_unlock_irq(&client->wq_lock);
/* /*
@ -648,7 +636,7 @@ static void guc_submit(struct intel_engine_cs *engine)
unsigned int n; unsigned int n;
for (n = 0; n < execlists_num_ports(execlists); n++) { for (n = 0; n < execlists_num_ports(execlists); n++) {
struct drm_i915_gem_request *rq; struct i915_request *rq;
unsigned int count; unsigned int count;
rq = port_unpack(&port[n], &count); rq = port_unpack(&port[n], &count);
@ -662,19 +650,18 @@ static void guc_submit(struct intel_engine_cs *engine)
} }
} }
static void port_assign(struct execlist_port *port, static void port_assign(struct execlist_port *port, struct i915_request *rq)
struct drm_i915_gem_request *rq)
{ {
GEM_BUG_ON(port_isset(port)); GEM_BUG_ON(port_isset(port));
port_set(port, i915_gem_request_get(rq)); port_set(port, i915_request_get(rq));
} }
static void guc_dequeue(struct intel_engine_cs *engine) static void guc_dequeue(struct intel_engine_cs *engine)
{ {
struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port *port = execlists->port; struct execlist_port *port = execlists->port;
struct drm_i915_gem_request *last = NULL; struct i915_request *last = NULL;
const struct execlist_port * const last_port = const struct execlist_port * const last_port =
&execlists->port[execlists->port_mask]; &execlists->port[execlists->port_mask];
bool submit = false; bool submit = false;
@ -684,15 +671,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
rb = execlists->first; rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb); GEM_BUG_ON(rb_first(&execlists->queue) != rb);
if (!rb)
goto unlock;
if (port_isset(port)) { if (port_isset(port)) {
if (engine->i915->preempt_context) { if (engine->i915->preempt_context) {
struct guc_preempt_work *preempt_work = struct guc_preempt_work *preempt_work =
&engine->i915->guc.preempt_work[engine->id]; &engine->i915->guc.preempt_work[engine->id];
if (rb_entry(rb, struct i915_priolist, node)->priority > if (execlists->queue_priority >
max(port_request(port)->priotree.priority, 0)) { max(port_request(port)->priotree.priority, 0)) {
execlists_set_active(execlists, execlists_set_active(execlists,
EXECLISTS_ACTIVE_PREEMPT); EXECLISTS_ACTIVE_PREEMPT);
@ -708,9 +692,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
} }
GEM_BUG_ON(port_isset(port)); GEM_BUG_ON(port_isset(port));
do { while (rb) {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct i915_priolist *p = to_priolist(rb);
struct drm_i915_gem_request *rq, *rn; struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
if (last && rq->ctx != last->ctx) { if (last && rq->ctx != last->ctx) {
@ -727,9 +711,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&rq->priotree.link); INIT_LIST_HEAD(&rq->priotree.link);
__i915_gem_request_submit(rq); __i915_request_submit(rq);
trace_i915_gem_request_in(rq, trace_i915_request_in(rq, port_index(port, execlists));
port_index(port, execlists));
last = rq; last = rq;
submit = true; submit = true;
} }
@ -739,8 +722,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&p->requests); INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL) if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p); kmem_cache_free(engine->i915->priorities, p);
} while (rb); }
done: done:
execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb; execlists->first = rb;
if (submit) { if (submit) {
port_assign(port, last); port_assign(port, last);
@ -762,12 +746,12 @@ static void guc_submission_tasklet(unsigned long data)
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port *port = execlists->port; struct execlist_port *port = execlists->port;
struct drm_i915_gem_request *rq; struct i915_request *rq;
rq = port_request(&port[0]); rq = port_request(&port[0]);
while (rq && i915_gem_request_completed(rq)) { while (rq && i915_request_completed(rq)) {
trace_i915_gem_request_out(rq); trace_i915_request_out(rq);
i915_gem_request_put(rq); i915_request_put(rq);
execlists_port_complete(execlists, port); execlists_port_complete(execlists, port);
@ -972,6 +956,62 @@ static void guc_client_free(struct intel_guc_client *client)
kfree(client); kfree(client);
} }
static inline bool ctx_save_restore_disabled(struct intel_context *ce)
{
u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1];
#define SR_DISABLED \
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)
return (sr & SR_DISABLED) == SR_DISABLED;
#undef SR_DISABLED
}
static void guc_fill_preempt_context(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
struct intel_guc_client *client = guc->preempt_client;
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, dev_priv, id) {
struct intel_context *ce = &client->owner->engine[id];
u32 addr = intel_hws_preempt_done_address(engine);
u32 *cs;
GEM_BUG_ON(!ce->pin_count);
/*
* We rely on this context image *not* being saved after
* preemption. This ensures that the RING_HEAD / RING_TAIL
* remain pointing at initial values forever.
*/
GEM_BUG_ON(!ctx_save_restore_disabled(ce));
cs = ce->ring->vaddr;
if (id == RCS) {
cs = gen8_emit_ggtt_write_rcs(cs,
GUC_PREEMPT_FINISHED,
addr);
} else {
cs = gen8_emit_ggtt_write(cs,
GUC_PREEMPT_FINISHED,
addr);
*cs++ = MI_NOOP;
*cs++ = MI_NOOP;
}
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
GEM_BUG_ON((void *)cs - ce->ring->vaddr !=
GUC_PREEMPT_BREADCRUMB_BYTES);
flush_ggtt_writes(ce->ring->vma);
}
}
static int guc_clients_create(struct intel_guc *guc) static int guc_clients_create(struct intel_guc *guc)
{ {
struct drm_i915_private *dev_priv = guc_to_i915(guc); struct drm_i915_private *dev_priv = guc_to_i915(guc);
@ -1002,6 +1042,8 @@ static int guc_clients_create(struct intel_guc *guc)
return PTR_ERR(client); return PTR_ERR(client);
} }
guc->preempt_client = client; guc->preempt_client = client;
guc_fill_preempt_context(guc);
} }
return 0; return 0;

View File

@ -2383,6 +2383,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv,
&intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS,
"HDMI %c", port_name(port)); "HDMI %c", port_name(port));
intel_encoder->hotplug = intel_encoder_hotplug;
intel_encoder->compute_config = intel_hdmi_compute_config; intel_encoder->compute_config = intel_hdmi_compute_config;
if (HAS_PCH_SPLIT(dev_priv)) { if (HAS_PCH_SPLIT(dev_priv)) {
intel_encoder->disable = pch_disable_hdmi; intel_encoder->disable = pch_disable_hdmi;

View File

@ -274,24 +274,26 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
intel_runtime_pm_put(dev_priv); intel_runtime_pm_put(dev_priv);
} }
static bool intel_hpd_irq_event(struct drm_device *dev, bool intel_encoder_hotplug(struct intel_encoder *encoder,
struct drm_connector *connector) struct intel_connector *connector)
{ {
struct drm_device *dev = connector->base.dev;
enum drm_connector_status old_status; enum drm_connector_status old_status;
WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
old_status = connector->status; old_status = connector->base.status;
connector->status = drm_helper_probe_detect(connector, NULL, false); connector->base.status =
drm_helper_probe_detect(&connector->base, NULL, false);
if (old_status == connector->status) if (old_status == connector->base.status)
return false; return false;
DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n", DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n",
connector->base.id, connector->base.base.id,
connector->name, connector->base.name,
drm_get_connector_status_name(old_status), drm_get_connector_status_name(old_status),
drm_get_connector_status_name(connector->status)); drm_get_connector_status_name(connector->base.status));
return true; return true;
} }
@ -381,10 +383,9 @@ static void i915_hotplug_work_func(struct work_struct *work)
if (hpd_event_bits & (1 << intel_encoder->hpd_pin)) { if (hpd_event_bits & (1 << intel_encoder->hpd_pin)) {
DRM_DEBUG_KMS("Connector %s (pin %i) received hotplug event.\n", DRM_DEBUG_KMS("Connector %s (pin %i) received hotplug event.\n",
connector->name, intel_encoder->hpd_pin); connector->name, intel_encoder->hpd_pin);
if (intel_encoder->hot_plug)
intel_encoder->hot_plug(intel_encoder); changed |= intel_encoder->hotplug(intel_encoder,
if (intel_hpd_irq_event(dev, connector)) intel_connector);
changed = true;
} }
} }
drm_connector_list_iter_end(&conn_iter); drm_connector_list_iter_end(&conn_iter);

View File

@ -27,161 +27,9 @@
#include "intel_huc.h" #include "intel_huc.h"
#include "i915_drv.h" #include "i915_drv.h"
/**
* DOC: HuC Firmware
*
* Motivation:
* GEN9 introduces a new dedicated firmware for usage in media HEVC (High
* Efficiency Video Coding) operations. Userspace can use the firmware
* capabilities by adding HuC specific commands to batch buffers.
*
* Implementation:
* The same firmware loader is used as the GuC. However, the actual
* loading to HW is deferred until GEM initialization is done.
*
* Note that HuC firmware loading must be done before GuC loading.
*/
#define BXT_HUC_FW_MAJOR 01
#define BXT_HUC_FW_MINOR 07
#define BXT_BLD_NUM 1398
#define SKL_HUC_FW_MAJOR 01
#define SKL_HUC_FW_MINOR 07
#define SKL_BLD_NUM 1398
#define KBL_HUC_FW_MAJOR 02
#define KBL_HUC_FW_MINOR 00
#define KBL_BLD_NUM 1810
#define HUC_FW_PATH(platform, major, minor, bld_num) \
"i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \
__stringify(minor) "_" __stringify(bld_num) ".bin"
#define I915_SKL_HUC_UCODE HUC_FW_PATH(skl, SKL_HUC_FW_MAJOR, \
SKL_HUC_FW_MINOR, SKL_BLD_NUM)
MODULE_FIRMWARE(I915_SKL_HUC_UCODE);
#define I915_BXT_HUC_UCODE HUC_FW_PATH(bxt, BXT_HUC_FW_MAJOR, \
BXT_HUC_FW_MINOR, BXT_BLD_NUM)
MODULE_FIRMWARE(I915_BXT_HUC_UCODE);
#define I915_KBL_HUC_UCODE HUC_FW_PATH(kbl, KBL_HUC_FW_MAJOR, \
KBL_HUC_FW_MINOR, KBL_BLD_NUM)
MODULE_FIRMWARE(I915_KBL_HUC_UCODE);
static void huc_fw_select(struct intel_uc_fw *huc_fw)
{
struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
struct drm_i915_private *dev_priv = huc_to_i915(huc);
GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC);
if (!HAS_HUC(dev_priv))
return;
if (i915_modparams.huc_firmware_path) {
huc_fw->path = i915_modparams.huc_firmware_path;
huc_fw->major_ver_wanted = 0;
huc_fw->minor_ver_wanted = 0;
} else if (IS_SKYLAKE(dev_priv)) {
huc_fw->path = I915_SKL_HUC_UCODE;
huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR;
huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR;
} else if (IS_BROXTON(dev_priv)) {
huc_fw->path = I915_BXT_HUC_UCODE;
huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR;
huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR;
} else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) {
huc_fw->path = I915_KBL_HUC_UCODE;
huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR;
huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR;
} else {
DRM_WARN("%s: No firmware known for this platform!\n",
intel_uc_fw_type_repr(huc_fw->type));
}
}
/**
* intel_huc_init_early() - initializes HuC struct
* @huc: intel_huc struct
*
* On platforms with HuC selects firmware for uploading
*/
void intel_huc_init_early(struct intel_huc *huc) void intel_huc_init_early(struct intel_huc *huc)
{ {
struct intel_uc_fw *huc_fw = &huc->fw; intel_huc_fw_init_early(huc);
intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC);
huc_fw_select(huc_fw);
}
/**
* huc_ucode_xfer() - DMA's the firmware
* @huc_fw: the firmware descriptor
* @vma: the firmware image (bound into the GGTT)
*
* Transfer the firmware image to RAM for execution by the microcontroller.
*
* Return: 0 on success, non-zero on failure
*/
static int huc_ucode_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma)
{
struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
struct drm_i915_private *dev_priv = huc_to_i915(huc);
unsigned long offset = 0;
u32 size;
int ret;
GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC);
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
/* Set the source address for the uCode */
offset = guc_ggtt_offset(vma) + huc_fw->header_offset;
I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
/* Hardware doesn't look at destination address for HuC. Set it to 0,
* but still program the correct address space.
*/
I915_WRITE(DMA_ADDR_1_LOW, 0);
I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
size = huc_fw->header_size + huc_fw->ucode_size;
I915_WRITE(DMA_COPY_SIZE, size);
/* Start the DMA */
I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA));
/* Wait for DMA to finish */
ret = intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, 100);
DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret);
/* Disable the bits once DMA is over */
I915_WRITE(DMA_CTRL, _MASKED_BIT_DISABLE(HUC_UKERNEL));
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
return ret;
}
/**
* intel_huc_init_hw() - load HuC uCode to device
* @huc: intel_huc structure
*
* Called from intel_uc_init_hw() during driver loading and also after a GPU
* reset. Be note that HuC loading must be done before GuC loading.
*
* The firmware image should have already been fetched into memory by the
* earlier call to intel_uc_init_fw(), so here we need only check that
* is succeeded, and then transfer the image to the h/w.
*
*/
int intel_huc_init_hw(struct intel_huc *huc)
{
return intel_uc_fw_upload(&huc->fw, huc_ucode_xfer);
} }
/** /**
@ -200,6 +48,7 @@ int intel_huc_auth(struct intel_huc *huc)
struct drm_i915_private *i915 = huc_to_i915(huc); struct drm_i915_private *i915 = huc_to_i915(huc);
struct intel_guc *guc = &i915->guc; struct intel_guc *guc = &i915->guc;
struct i915_vma *vma; struct i915_vma *vma;
u32 status;
int ret; int ret;
if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
@ -210,28 +59,35 @@ int intel_huc_auth(struct intel_huc *huc)
if (IS_ERR(vma)) { if (IS_ERR(vma)) {
ret = PTR_ERR(vma); ret = PTR_ERR(vma);
DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret); DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret);
return ret; goto fail;
} }
ret = intel_guc_auth_huc(guc, ret = intel_guc_auth_huc(guc,
guc_ggtt_offset(vma) + huc->fw.rsa_offset); guc_ggtt_offset(vma) + huc->fw.rsa_offset);
if (ret) { if (ret) {
DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret);
goto out; goto fail_unpin;
} }
/* Check authentication status, it should be done by now */ /* Check authentication status, it should be done by now */
ret = intel_wait_for_register(i915, ret = __intel_wait_for_register(i915,
HUC_STATUS2, HUC_STATUS2,
HUC_FW_VERIFIED, HUC_FW_VERIFIED,
HUC_FW_VERIFIED, HUC_FW_VERIFIED,
50); 2, 50, &status);
if (ret) { if (ret) {
DRM_ERROR("HuC: Authentication failed %d\n", ret); DRM_ERROR("HuC: Firmware not verified %#x\n", status);
goto out; goto fail_unpin;
} }
out:
i915_vma_unpin(vma); i915_vma_unpin(vma);
return 0;
fail_unpin:
i915_vma_unpin(vma);
fail:
huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL;
DRM_ERROR("HuC: Authentication failed %d\n", ret);
return ret; return ret;
} }

View File

@ -26,6 +26,7 @@
#define _INTEL_HUC_H_ #define _INTEL_HUC_H_
#include "intel_uc_fw.h" #include "intel_uc_fw.h"
#include "intel_huc_fw.h"
struct intel_huc { struct intel_huc {
/* Generic uC firmware management */ /* Generic uC firmware management */
@ -35,7 +36,6 @@ struct intel_huc {
}; };
void intel_huc_init_early(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc);
int intel_huc_init_hw(struct intel_huc *huc);
int intel_huc_auth(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc);
#endif #endif

View File

@ -0,0 +1,166 @@
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2014-2018 Intel Corporation
*/
#include "intel_huc_fw.h"
#include "i915_drv.h"
/**
* DOC: HuC Firmware
*
* Motivation:
* GEN9 introduces a new dedicated firmware for usage in media HEVC (High
* Efficiency Video Coding) operations. Userspace can use the firmware
* capabilities by adding HuC specific commands to batch buffers.
*
* Implementation:
* The same firmware loader is used as the GuC. However, the actual
* loading to HW is deferred until GEM initialization is done.
*
* Note that HuC firmware loading must be done before GuC loading.
*/
#define BXT_HUC_FW_MAJOR 01
#define BXT_HUC_FW_MINOR 07
#define BXT_BLD_NUM 1398
#define SKL_HUC_FW_MAJOR 01
#define SKL_HUC_FW_MINOR 07
#define SKL_BLD_NUM 1398
#define KBL_HUC_FW_MAJOR 02
#define KBL_HUC_FW_MINOR 00
#define KBL_BLD_NUM 1810
#define HUC_FW_PATH(platform, major, minor, bld_num) \
"i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \
__stringify(minor) "_" __stringify(bld_num) ".bin"
#define I915_SKL_HUC_UCODE HUC_FW_PATH(skl, SKL_HUC_FW_MAJOR, \
SKL_HUC_FW_MINOR, SKL_BLD_NUM)
MODULE_FIRMWARE(I915_SKL_HUC_UCODE);
#define I915_BXT_HUC_UCODE HUC_FW_PATH(bxt, BXT_HUC_FW_MAJOR, \
BXT_HUC_FW_MINOR, BXT_BLD_NUM)
MODULE_FIRMWARE(I915_BXT_HUC_UCODE);
#define I915_KBL_HUC_UCODE HUC_FW_PATH(kbl, KBL_HUC_FW_MAJOR, \
KBL_HUC_FW_MINOR, KBL_BLD_NUM)
MODULE_FIRMWARE(I915_KBL_HUC_UCODE);
static void huc_fw_select(struct intel_uc_fw *huc_fw)
{
struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
struct drm_i915_private *dev_priv = huc_to_i915(huc);
GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC);
if (!HAS_HUC(dev_priv))
return;
if (i915_modparams.huc_firmware_path) {
huc_fw->path = i915_modparams.huc_firmware_path;
huc_fw->major_ver_wanted = 0;
huc_fw->minor_ver_wanted = 0;
} else if (IS_SKYLAKE(dev_priv)) {
huc_fw->path = I915_SKL_HUC_UCODE;
huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR;
huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR;
} else if (IS_BROXTON(dev_priv)) {
huc_fw->path = I915_BXT_HUC_UCODE;
huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR;
huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR;
} else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) {
huc_fw->path = I915_KBL_HUC_UCODE;
huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR;
huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR;
} else {
DRM_WARN("%s: No firmware known for this platform!\n",
intel_uc_fw_type_repr(huc_fw->type));
}
}
/**
* intel_huc_fw_init_early() - initializes HuC firmware struct
* @huc: intel_huc struct
*
* On platforms with HuC selects firmware for uploading
*/
void intel_huc_fw_init_early(struct intel_huc *huc)
{
struct intel_uc_fw *huc_fw = &huc->fw;
intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC);
huc_fw_select(huc_fw);
}
/**
* huc_fw_xfer() - DMA's the firmware
* @huc_fw: the firmware descriptor
* @vma: the firmware image (bound into the GGTT)
*
* Transfer the firmware image to RAM for execution by the microcontroller.
*
* Return: 0 on success, non-zero on failure
*/
static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma)
{
struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
struct drm_i915_private *dev_priv = huc_to_i915(huc);
unsigned long offset = 0;
u32 size;
int ret;
GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC);
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
/* Set the source address for the uCode */
offset = guc_ggtt_offset(vma) + huc_fw->header_offset;
I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
/* Hardware doesn't look at destination address for HuC. Set it to 0,
* but still program the correct address space.
*/
I915_WRITE(DMA_ADDR_1_LOW, 0);
I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
size = huc_fw->header_size + huc_fw->ucode_size;
I915_WRITE(DMA_COPY_SIZE, size);
/* Start the DMA */
I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA));
/* Wait for DMA to finish */
ret = intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, 100);
DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret);
/* Disable the bits once DMA is over */
I915_WRITE(DMA_CTRL, _MASKED_BIT_DISABLE(HUC_UKERNEL));
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
return ret;
}
/**
* intel_huc_fw_upload() - load HuC uCode to device
* @huc: intel_huc structure
*
* Called from intel_uc_init_hw() during driver load, resume from sleep and
* after a GPU reset. Note that HuC must be loaded before GuC.
*
* The firmware image should have already been fetched into memory by the
* earlier call to intel_uc_init_fw(), so here we need to only check that
* fetch succeeded, and then transfer the image to the h/w.
*
* Return: non-zero code on error
*/
int intel_huc_fw_upload(struct intel_huc *huc)
{
return intel_uc_fw_upload(&huc->fw, huc_fw_xfer);
}

View File

@ -0,0 +1,15 @@
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2014-2018 Intel Corporation
*/
#ifndef _INTEL_HUC_FW_H_
#define _INTEL_HUC_FW_H_
struct intel_huc;
void intel_huc_fw_init_early(struct intel_huc *huc);
int intel_huc_fw_upload(struct intel_huc *huc);
#endif

View File

@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
struct intel_engine_cs *engine, struct intel_engine_cs *engine,
struct intel_ring *ring); struct intel_ring *ring);
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
}
static inline int rq_prio(const struct i915_request *rq)
{
return rq->priotree.priority;
}
static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *last,
int prio)
{
return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
}
/** /**
* intel_lr_context_descriptor_update() - calculate & cache the descriptor * intel_lr_context_descriptor_update() - calculate & cache the descriptor
* descriptor for a pinned context * descriptor for a pinned context
@ -187,6 +204,18 @@ static void execlists_init_reg_state(u32 *reg_state,
* bits 32-52: ctx ID, a globally unique tag * bits 32-52: ctx ID, a globally unique tag
* bits 53-54: mbz, reserved for use by hardware * bits 53-54: mbz, reserved for use by hardware
* bits 55-63: group ID, currently unused and set to 0 * bits 55-63: group ID, currently unused and set to 0
*
* Starting from Gen11, the upper dword of the descriptor has a new format:
*
* bits 32-36: reserved
* bits 37-47: SW context ID
* bits 48:53: engine instance
* bit 54: mbz, reserved for use by hardware
* bits 55-60: SW counter
* bits 61-63: engine class
*
* engine info, SW context ID and SW counter need to form a unique number
* (Context ID) per lrc.
*/ */
static void static void
intel_lr_context_descriptor_update(struct i915_gem_context *ctx, intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
@ -195,12 +224,32 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
struct intel_context *ce = &ctx->engine[engine->id]; struct intel_context *ce = &ctx->engine[engine->id];
u64 desc; u64 desc;
BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH)); BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
desc = ctx->desc_template; /* bits 0-11 */ desc = ctx->desc_template; /* bits 0-11 */
GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
/* bits 12-31 */ /* bits 12-31 */
desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
if (INTEL_GEN(ctx->i915) >= 11) {
GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
/* bits 37-47 */
desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
/* bits 48-53 */
/* TODO: decide what to do with SW counter (bits 55-60) */
desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
/* bits 61-63 */
} else {
GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
}
ce->lrc_desc = desc; ce->lrc_desc = desc;
} }
@ -224,7 +273,7 @@ find_priolist:
parent = &execlists->queue.rb_node; parent = &execlists->queue.rb_node;
while (*parent) { while (*parent) {
rb = *parent; rb = *parent;
p = rb_entry(rb, typeof(*p), node); p = to_priolist(rb);
if (prio > p->priority) { if (prio > p->priority) {
parent = &rb->rb_left; parent = &rb->rb_left;
} else if (prio < p->priority) { } else if (prio < p->priority) {
@ -264,10 +313,10 @@ find_priolist:
if (first) if (first)
execlists->first = &p->node; execlists->first = &p->node;
return ptr_pack_bits(p, first, 1); return p;
} }
static void unwind_wa_tail(struct drm_i915_gem_request *rq) static void unwind_wa_tail(struct i915_request *rq)
{ {
rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
assert_ring_tail_valid(rq->ring, rq->tail); assert_ring_tail_valid(rq->ring, rq->tail);
@ -275,7 +324,7 @@ static void unwind_wa_tail(struct drm_i915_gem_request *rq)
static void __unwind_incomplete_requests(struct intel_engine_cs *engine) static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_request *rq, *rn; struct i915_request *rq, *rn;
struct i915_priolist *uninitialized_var(p); struct i915_priolist *uninitialized_var(p);
int last_prio = I915_PRIORITY_INVALID; int last_prio = I915_PRIORITY_INVALID;
@ -284,20 +333,16 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe_reverse(rq, rn, list_for_each_entry_safe_reverse(rq, rn,
&engine->timeline->requests, &engine->timeline->requests,
link) { link) {
if (i915_gem_request_completed(rq)) if (i915_request_completed(rq))
return; return;
__i915_gem_request_unsubmit(rq); __i915_request_unsubmit(rq);
unwind_wa_tail(rq); unwind_wa_tail(rq);
GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq->priotree.priority != last_prio) { if (rq_prio(rq) != last_prio) {
p = lookup_priolist(engine, last_prio = rq_prio(rq);
&rq->priotree, p = lookup_priolist(engine, &rq->priotree, last_prio);
rq->priotree.priority);
p = ptr_mask_bits(p, 1);
last_prio = rq->priotree.priority;
} }
list_add(&rq->priotree.link, &p->requests); list_add(&rq->priotree.link, &p->requests);
@ -316,8 +361,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
} }
static inline void static inline void
execlists_context_status_change(struct drm_i915_gem_request *rq, execlists_context_status_change(struct i915_request *rq, unsigned long status)
unsigned long status)
{ {
/* /*
* Only used when GVT-g is enabled now. When GVT-g is disabled, * Only used when GVT-g is enabled now. When GVT-g is disabled,
@ -331,14 +375,14 @@ execlists_context_status_change(struct drm_i915_gem_request *rq,
} }
static inline void static inline void
execlists_context_schedule_in(struct drm_i915_gem_request *rq) execlists_context_schedule_in(struct i915_request *rq)
{ {
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(rq->engine); intel_engine_context_in(rq->engine);
} }
static inline void static inline void
execlists_context_schedule_out(struct drm_i915_gem_request *rq) execlists_context_schedule_out(struct i915_request *rq)
{ {
intel_engine_context_out(rq->engine); intel_engine_context_out(rq->engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@ -353,7 +397,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
ASSIGN_CTX_PDP(ppgtt, reg_state, 0); ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
} }
static u64 execlists_update_context(struct drm_i915_gem_request *rq) static u64 execlists_update_context(struct i915_request *rq)
{ {
struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
struct i915_hw_ppgtt *ppgtt = struct i915_hw_ppgtt *ppgtt =
@ -373,19 +417,31 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
return ce->lrc_desc; return ce->lrc_desc;
} }
static inline void elsp_write(u64 desc, u32 __iomem *elsp) static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
{ {
writel(upper_32_bits(desc), elsp); if (execlists->ctrl_reg) {
writel(lower_32_bits(desc), elsp); writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
} else {
writel(upper_32_bits(desc), execlists->submit_reg);
writel(lower_32_bits(desc), execlists->submit_reg);
}
} }
static void execlists_submit_ports(struct intel_engine_cs *engine) static void execlists_submit_ports(struct intel_engine_cs *engine)
{ {
struct execlist_port *port = engine->execlists.port; struct intel_engine_execlists *execlists = &engine->execlists;
struct execlist_port *port = execlists->port;
unsigned int n; unsigned int n;
for (n = execlists_num_ports(&engine->execlists); n--; ) { /*
struct drm_i915_gem_request *rq; * ELSQ note: the submit queue is not cleared after being submitted
* to the HW so we need to make sure we always clean it up. This is
* currently ensured by the fact that we always write the same number
* of elsq entries, keep this in mind before changing the loop below.
*/
for (n = execlists_num_ports(execlists); n--; ) {
struct i915_request *rq;
unsigned int count; unsigned int count;
u64 desc; u64 desc;
@ -398,18 +454,24 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = execlists_update_context(rq); desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n", GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n",
engine->name, n, engine->name, n,
port[n].context_id, count, port[n].context_id, count,
rq->global_seqno); rq->global_seqno,
rq_prio(rq));
} else { } else {
GEM_BUG_ON(!n); GEM_BUG_ON(!n);
desc = 0; desc = 0;
} }
elsp_write(desc, engine->execlists.elsp); write_desc(execlists, desc, n);
} }
execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
/* we need to manually load the submit queue */
if (execlists->ctrl_reg)
writel(EL_CTRL_LOAD, execlists->ctrl_reg);
execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
} }
static bool ctx_single_port_submission(const struct i915_gem_context *ctx) static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@ -430,44 +492,47 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
return true; return true;
} }
static void port_assign(struct execlist_port *port, static void port_assign(struct execlist_port *port, struct i915_request *rq)
struct drm_i915_gem_request *rq)
{ {
GEM_BUG_ON(rq == port_request(port)); GEM_BUG_ON(rq == port_request(port));
if (port_isset(port)) if (port_isset(port))
i915_gem_request_put(port_request(port)); i915_request_put(port_request(port));
port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); port_set(port, port_pack(i915_request_get(rq), port_count(port)));
} }
static void inject_preempt_context(struct intel_engine_cs *engine) static void inject_preempt_context(struct intel_engine_cs *engine)
{ {
struct intel_engine_execlists *execlists = &engine->execlists;
struct intel_context *ce = struct intel_context *ce =
&engine->i915->preempt_context->engine[engine->id]; &engine->i915->preempt_context->engine[engine->id];
unsigned int n; unsigned int n;
GEM_BUG_ON(engine->execlists.preempt_complete_status != GEM_BUG_ON(execlists->preempt_complete_status !=
upper_32_bits(ce->lrc_desc)); upper_32_bits(ce->lrc_desc));
GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES));
memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES);
ce->ring->tail += WA_TAIL_BYTES;
ce->ring->tail &= (ce->ring->size - 1);
ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] & GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) != CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)); CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
/*
* Switch to our empty preempt context so
* the state of the GPU is known (idle).
*/
GEM_TRACE("%s\n", engine->name); GEM_TRACE("%s\n", engine->name);
for (n = execlists_num_ports(&engine->execlists); --n; ) for (n = execlists_num_ports(execlists); --n; )
elsp_write(0, engine->execlists.elsp); write_desc(execlists, 0, n);
write_desc(execlists, ce->lrc_desc, n);
/* we need to manually load the submit queue */
if (execlists->ctrl_reg)
writel(EL_CTRL_LOAD, execlists->ctrl_reg);
elsp_write(ce->lrc_desc, engine->execlists.elsp);
execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
} }
static void execlists_dequeue(struct intel_engine_cs *engine) static void execlists_dequeue(struct intel_engine_cs *engine)
@ -476,7 +541,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct execlist_port *port = execlists->port; struct execlist_port *port = execlists->port;
const struct execlist_port * const last_port = const struct execlist_port * const last_port =
&execlists->port[execlists->port_mask]; &execlists->port[execlists->port_mask];
struct drm_i915_gem_request *last = port_request(port); struct i915_request *last = port_request(port);
struct rb_node *rb; struct rb_node *rb;
bool submit = false; bool submit = false;
@ -504,8 +569,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
spin_lock_irq(&engine->timeline->lock); spin_lock_irq(&engine->timeline->lock);
rb = execlists->first; rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb); GEM_BUG_ON(rb_first(&execlists->queue) != rb);
if (!rb)
goto unlock;
if (last) { if (last) {
/* /*
@ -528,55 +591,49 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
goto unlock; goto unlock;
if (engine->i915->preempt_context && if (need_preempt(engine, last, execlists->queue_priority)) {
rb_entry(rb, struct i915_priolist, node)->priority >
max(last->priotree.priority, 0)) {
/*
* Switch to our empty preempt context so
* the state of the GPU is known (idle).
*/
inject_preempt_context(engine); inject_preempt_context(engine);
execlists_set_active(execlists,
EXECLISTS_ACTIVE_PREEMPT);
goto unlock; goto unlock;
} else {
/*
* In theory, we could coalesce more requests onto
* the second port (the first port is active, with
* no preemptions pending). However, that means we
* then have to deal with the possible lite-restore
* of the second port (as we submit the ELSP, there
* may be a context-switch) but also we may complete
* the resubmission before the context-switch. Ergo,
* coalescing onto the second port will cause a
* preemption event, but we cannot predict whether
* that will affect port[0] or port[1].
*
* If the second port is already active, we can wait
* until the next context-switch before contemplating
* new requests. The GPU will be busy and we should be
* able to resubmit the new ELSP before it idles,
* avoiding pipeline bubbles (momentary pauses where
* the driver is unable to keep up the supply of new
* work).
*/
if (port_count(&port[1]))
goto unlock;
/* WaIdleLiteRestore:bdw,skl
* Apply the wa NOOPs to prevent
* ring:HEAD == req:TAIL as we resubmit the
* request. See gen8_emit_breadcrumb() for
* where we prepare the padding after the
* end of the request.
*/
last->tail = last->wa_tail;
} }
/*
* In theory, we could coalesce more requests onto
* the second port (the first port is active, with
* no preemptions pending). However, that means we
* then have to deal with the possible lite-restore
* of the second port (as we submit the ELSP, there
* may be a context-switch) but also we may complete
* the resubmission before the context-switch. Ergo,
* coalescing onto the second port will cause a
* preemption event, but we cannot predict whether
* that will affect port[0] or port[1].
*
* If the second port is already active, we can wait
* until the next context-switch before contemplating
* new requests. The GPU will be busy and we should be
* able to resubmit the new ELSP before it idles,
* avoiding pipeline bubbles (momentary pauses where
* the driver is unable to keep up the supply of new
* work). However, we have to double check that the
* priorities of the ports haven't been switch.
*/
if (port_count(&port[1]))
goto unlock;
/*
* WaIdleLiteRestore:bdw,skl
* Apply the wa NOOPs to prevent
* ring:HEAD == rq:TAIL as we resubmit the
* request. See gen8_emit_breadcrumb() for
* where we prepare the padding after the
* end of the request.
*/
last->tail = last->wa_tail;
} }
do { while (rb) {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct i915_priolist *p = to_priolist(rb);
struct drm_i915_gem_request *rq, *rn; struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
/* /*
@ -626,8 +683,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
} }
INIT_LIST_HEAD(&rq->priotree.link); INIT_LIST_HEAD(&rq->priotree.link);
__i915_gem_request_submit(rq); __i915_request_submit(rq);
trace_i915_gem_request_in(rq, port_index(port, execlists)); trace_i915_request_in(rq, port_index(port, execlists));
last = rq; last = rq;
submit = true; submit = true;
} }
@ -637,8 +694,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&p->requests); INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL) if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p); kmem_cache_free(engine->i915->priorities, p);
} while (rb); }
done: done:
execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb; execlists->first = rb;
if (submit) if (submit)
port_assign(port, last); port_assign(port, last);
@ -665,12 +723,17 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
unsigned int num_ports = execlists_num_ports(execlists); unsigned int num_ports = execlists_num_ports(execlists);
while (num_ports-- && port_isset(port)) { while (num_ports-- && port_isset(port)) {
struct drm_i915_gem_request *rq = port_request(port); struct i915_request *rq = port_request(port);
GEM_BUG_ON(!execlists->active); GEM_BUG_ON(!execlists->active);
intel_engine_context_out(rq->engine); intel_engine_context_out(rq->engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
i915_gem_request_put(rq); execlists_context_status_change(rq,
i915_request_completed(rq) ?
INTEL_CONTEXT_SCHEDULE_OUT :
INTEL_CONTEXT_SCHEDULE_PREEMPTED);
i915_request_put(rq);
memset(port, 0, sizeof(*port)); memset(port, 0, sizeof(*port));
port++; port++;
@ -680,32 +743,50 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
static void execlists_cancel_requests(struct intel_engine_cs *engine) static void execlists_cancel_requests(struct intel_engine_cs *engine)
{ {
struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_engine_execlists * const execlists = &engine->execlists;
struct drm_i915_gem_request *rq, *rn; struct i915_request *rq, *rn;
struct rb_node *rb; struct rb_node *rb;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&engine->timeline->lock, flags); GEM_TRACE("%s\n", engine->name);
/*
* Before we call engine->cancel_requests(), we should have exclusive
* access to the submission state. This is arranged for us by the
* caller disabling the interrupt generation, the tasklet and other
* threads that may then access the same state, giving us a free hand
* to reset state. However, we still need to let lockdep be aware that
* we know this state may be accessed in hardirq context, so we
* disable the irq around this manipulation and we want to keep
* the spinlock focused on its duties and not accidentally conflate
* coverage to the submission's irq state. (Similarly, although we
* shouldn't need to disable irq around the manipulation of the
* submission's irq state, we also wish to remind ourselves that
* it is irq state.)
*/
local_irq_save(flags);
/* Cancel the requests on the HW and clear the ELSP tracker. */ /* Cancel the requests on the HW and clear the ELSP tracker. */
execlists_cancel_port_requests(execlists); execlists_cancel_port_requests(execlists);
spin_lock(&engine->timeline->lock);
/* Mark all executing requests as skipped. */ /* Mark all executing requests as skipped. */
list_for_each_entry(rq, &engine->timeline->requests, link) { list_for_each_entry(rq, &engine->timeline->requests, link) {
GEM_BUG_ON(!rq->global_seqno); GEM_BUG_ON(!rq->global_seqno);
if (!i915_gem_request_completed(rq)) if (!i915_request_completed(rq))
dma_fence_set_error(&rq->fence, -EIO); dma_fence_set_error(&rq->fence, -EIO);
} }
/* Flush the queued requests to the timeline list (for retiring). */ /* Flush the queued requests to the timeline list (for retiring). */
rb = execlists->first; rb = execlists->first;
while (rb) { while (rb) {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct i915_priolist *p = to_priolist(rb);
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
INIT_LIST_HEAD(&rq->priotree.link); INIT_LIST_HEAD(&rq->priotree.link);
dma_fence_set_error(&rq->fence, -EIO); dma_fence_set_error(&rq->fence, -EIO);
__i915_gem_request_submit(rq); __i915_request_submit(rq);
} }
rb = rb_next(rb); rb = rb_next(rb);
@ -717,11 +798,13 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
/* Remaining _unready_ requests will be nop'ed when submitted */ /* Remaining _unready_ requests will be nop'ed when submitted */
execlists->queue_priority = INT_MIN;
execlists->queue = RB_ROOT; execlists->queue = RB_ROOT;
execlists->first = NULL; execlists->first = NULL;
GEM_BUG_ON(port_isset(execlists->port)); GEM_BUG_ON(port_isset(execlists->port));
spin_unlock(&engine->timeline->lock);
/* /*
* The port is checked prior to scheduling a tasklet, but * The port is checked prior to scheduling a tasklet, but
* just in case we have suspended the tasklet to do the * just in case we have suspended the tasklet to do the
@ -730,7 +813,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
*/ */
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
spin_unlock_irqrestore(&engine->timeline->lock, flags); /* Mark all CS interrupts as complete */
execlists->active = 0;
local_irq_restore(flags);
} }
/* /*
@ -806,7 +892,7 @@ static void execlists_submission_tasklet(unsigned long data)
tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
while (head != tail) { while (head != tail) {
struct drm_i915_gem_request *rq; struct i915_request *rq;
unsigned int status; unsigned int status;
unsigned int count; unsigned int count;
@ -872,23 +958,28 @@ static void execlists_submission_tasklet(unsigned long data)
GEM_BUG_ON(!execlists_is_active(execlists, GEM_BUG_ON(!execlists_is_active(execlists,
EXECLISTS_ACTIVE_USER)); EXECLISTS_ACTIVE_USER));
rq = port_unpack(port, &count);
GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
engine->name,
port->context_id, count,
rq ? rq->global_seqno : 0,
rq ? rq_prio(rq) : 0);
/* Check the context/desc id for this event matches */ /* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
rq = port_unpack(port, &count);
GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
engine->name,
port->context_id, count,
rq ? rq->global_seqno : 0);
GEM_BUG_ON(count == 0); GEM_BUG_ON(count == 0);
if (--count == 0) { if (--count == 0) {
GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
GEM_BUG_ON(port_isset(&port[1]) && GEM_BUG_ON(port_isset(&port[1]) &&
!(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
GEM_BUG_ON(!i915_gem_request_completed(rq)); GEM_BUG_ON(!i915_request_completed(rq));
execlists_context_schedule_out(rq); execlists_context_schedule_out(rq);
trace_i915_gem_request_out(rq); trace_i915_request_out(rq);
i915_gem_request_put(rq); i915_request_put(rq);
GEM_TRACE("%s completed ctx=%d\n",
engine->name, port->context_id);
execlists_port_complete(execlists, port); execlists_port_complete(execlists, port);
} else { } else {
@ -917,18 +1008,22 @@ static void execlists_submission_tasklet(unsigned long data)
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
} }
static void insert_request(struct intel_engine_cs *engine, static void queue_request(struct intel_engine_cs *engine,
struct i915_priotree *pt, struct i915_priotree *pt,
int prio) int prio)
{ {
struct i915_priolist *p = lookup_priolist(engine, pt, prio); list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
if (ptr_unmask_bits(p, 1))
tasklet_hi_schedule(&engine->execlists.tasklet);
} }
static void execlists_submit_request(struct drm_i915_gem_request *request) static void submit_queue(struct intel_engine_cs *engine, int prio)
{
if (prio > engine->execlists.queue_priority) {
engine->execlists.queue_priority = prio;
tasklet_hi_schedule(&engine->execlists.tasklet);
}
}
static void execlists_submit_request(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
unsigned long flags; unsigned long flags;
@ -936,7 +1031,8 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
/* Will be called from irq-context when using foreign fences. */ /* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags); spin_lock_irqsave(&engine->timeline->lock, flags);
insert_request(engine, &request->priotree, request->priotree.priority); queue_request(engine, &request->priotree, rq_prio(request));
submit_queue(engine, rq_prio(request));
GEM_BUG_ON(!engine->execlists.first); GEM_BUG_ON(!engine->execlists.first);
GEM_BUG_ON(list_empty(&request->priotree.link)); GEM_BUG_ON(list_empty(&request->priotree.link));
@ -944,9 +1040,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
spin_unlock_irqrestore(&engine->timeline->lock, flags); spin_unlock_irqrestore(&engine->timeline->lock, flags);
} }
static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt) static struct i915_request *pt_to_request(struct i915_priotree *pt)
{ {
return container_of(pt, struct drm_i915_gem_request, priotree); return container_of(pt, struct i915_request, priotree);
} }
static struct intel_engine_cs * static struct intel_engine_cs *
@ -964,7 +1060,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
return engine; return engine;
} }
static void execlists_schedule(struct drm_i915_gem_request *request, int prio) static void execlists_schedule(struct i915_request *request, int prio)
{ {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
struct i915_dependency *dep, *p; struct i915_dependency *dep, *p;
@ -973,7 +1069,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
GEM_BUG_ON(prio == I915_PRIORITY_INVALID); GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
if (i915_gem_request_completed(request)) if (i915_request_completed(request))
return; return;
if (prio <= READ_ONCE(request->priotree.priority)) if (prio <= READ_ONCE(request->priotree.priority))
@ -992,7 +1088,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
* static void update_priorities(struct i915_priotree *pt, prio) { * static void update_priorities(struct i915_priotree *pt, prio) {
* list_for_each_entry(dep, &pt->signalers_list, signal_link) * list_for_each_entry(dep, &pt->signalers_list, signal_link)
* update_priorities(dep->signal, prio) * update_priorities(dep->signal, prio)
* insert_request(pt); * queue_request(pt);
* } * }
* but that may have unlimited recursion depth and so runs a very * but that may have unlimited recursion depth and so runs a very
* real risk of overunning the kernel stack. Instead, we build * real risk of overunning the kernel stack. Instead, we build
@ -1055,8 +1151,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
pt->priority = prio; pt->priority = prio;
if (!list_empty(&pt->link)) { if (!list_empty(&pt->link)) {
__list_del_entry(&pt->link); __list_del_entry(&pt->link);
insert_request(engine, pt, prio); queue_request(engine, pt, prio);
} }
submit_queue(engine, prio);
} }
spin_unlock_irq(&engine->timeline->lock); spin_unlock_irq(&engine->timeline->lock);
@ -1158,7 +1255,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
i915_gem_context_put(ctx); i915_gem_context_put(ctx);
} }
static int execlists_request_alloc(struct drm_i915_gem_request *request) static int execlists_request_alloc(struct i915_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
struct intel_context *ce = &request->ctx->engine[engine->id]; struct intel_context *ce = &request->ctx->engine[engine->id];
@ -1590,7 +1687,7 @@ static void reset_irq(struct intel_engine_cs *engine)
} }
static void reset_common_ring(struct intel_engine_cs *engine, static void reset_common_ring(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request) struct i915_request *request)
{ {
struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_engine_execlists * const execlists = &engine->execlists;
struct intel_context *ce; struct intel_context *ce;
@ -1599,9 +1696,10 @@ static void reset_common_ring(struct intel_engine_cs *engine,
GEM_TRACE("%s seqno=%x\n", GEM_TRACE("%s seqno=%x\n",
engine->name, request ? request->global_seqno : 0); engine->name, request ? request->global_seqno : 0);
reset_irq(engine); /* See execlists_cancel_requests() for the irq/spinlock split. */
local_irq_save(flags);
spin_lock_irqsave(&engine->timeline->lock, flags); reset_irq(engine);
/* /*
* Catch up with any missed context-switch interrupts. * Catch up with any missed context-switch interrupts.
@ -1615,14 +1713,17 @@ static void reset_common_ring(struct intel_engine_cs *engine,
execlists_cancel_port_requests(execlists); execlists_cancel_port_requests(execlists);
/* Push back any incomplete requests for replay after the reset. */ /* Push back any incomplete requests for replay after the reset. */
spin_lock(&engine->timeline->lock);
__unwind_incomplete_requests(engine); __unwind_incomplete_requests(engine);
spin_unlock(&engine->timeline->lock);
spin_unlock_irqrestore(&engine->timeline->lock, flags);
/* Mark all CS interrupts as complete */ /* Mark all CS interrupts as complete */
execlists->active = 0; execlists->active = 0;
/* If the request was innocent, we leave the request in the ELSP local_irq_restore(flags);
/*
* If the request was innocent, we leave the request in the ELSP
* and will try to replay it on restarting. The context image may * and will try to replay it on restarting. The context image may
* have been corrupted by the reset, in which case we may have * have been corrupted by the reset, in which case we may have
* to service a new GPU hang, but more likely we can continue on * to service a new GPU hang, but more likely we can continue on
@ -1635,7 +1736,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
if (!request || request->fence.error != -EIO) if (!request || request->fence.error != -EIO)
return; return;
/* We want a simple context + ring to execute the breadcrumb update. /*
* We want a simple context + ring to execute the breadcrumb update.
* We cannot rely on the context being intact across the GPU hang, * We cannot rely on the context being intact across the GPU hang,
* so clear it and rebuild just what we need for the breadcrumb. * so clear it and rebuild just what we need for the breadcrumb.
* All pending requests for this context will be zapped, and any * All pending requests for this context will be zapped, and any
@ -1658,15 +1760,15 @@ static void reset_common_ring(struct intel_engine_cs *engine,
unwind_wa_tail(request); unwind_wa_tail(request);
} }
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) static int intel_logical_ring_emit_pdps(struct i915_request *rq)
{ {
struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = rq->engine;
const int num_lri_cmds = GEN8_3LVL_PDPES * 2; const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
u32 *cs; u32 *cs;
int i; int i;
cs = intel_ring_begin(req, num_lri_cmds * 2 + 2); cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1681,12 +1783,12 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
} }
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int gen8_emit_bb_start(struct drm_i915_gem_request *req, static int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len, u64 offset, u32 len,
const unsigned int flags) const unsigned int flags)
{ {
@ -1699,18 +1801,18 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
* it is unsafe in case of lite-restore (because the ctx is * it is unsafe in case of lite-restore (because the ctx is
* not idle). PML4 is allocated during ppgtt init so this is * not idle). PML4 is allocated during ppgtt init so this is
* not needed in 48-bit.*/ * not needed in 48-bit.*/
if (req->ctx->ppgtt && if (rq->ctx->ppgtt &&
(intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) && (intel_engine_flag(rq->engine) & rq->ctx->ppgtt->pd_dirty_rings) &&
!i915_vm_is_48bit(&req->ctx->ppgtt->base) && !i915_vm_is_48bit(&rq->ctx->ppgtt->base) &&
!intel_vgpu_active(req->i915)) { !intel_vgpu_active(rq->i915)) {
ret = intel_logical_ring_emit_pdps(req); ret = intel_logical_ring_emit_pdps(rq);
if (ret) if (ret)
return ret; return ret;
req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
} }
cs = intel_ring_begin(req, 4); cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1739,7 +1841,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
*cs++ = lower_32_bits(offset); *cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset); *cs++ = upper_32_bits(offset);
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -1758,7 +1860,7 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
I915_WRITE_IMR(engine, ~engine->irq_keep_mask); I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
} }
static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) static int gen8_emit_flush(struct i915_request *request, u32 mode)
{ {
u32 cmd, *cs; u32 cmd, *cs;
@ -1790,7 +1892,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
return 0; return 0;
} }
static int gen8_emit_flush_render(struct drm_i915_gem_request *request, static int gen8_emit_flush_render(struct i915_request *request,
u32 mode) u32 mode)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
@ -1865,7 +1967,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
* used as a workaround for not being allowed to do lite * used as a workaround for not being allowed to do lite
* restore with HEAD==TAIL (WaIdleLiteRestore). * restore with HEAD==TAIL (WaIdleLiteRestore).
*/ */
static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
{ {
/* Ensure there's always at least one preemption point per-request. */ /* Ensure there's always at least one preemption point per-request. */
*cs++ = MI_ARB_CHECK; *cs++ = MI_ARB_CHECK;
@ -1873,7 +1975,7 @@ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
request->wa_tail = intel_ring_offset(request, cs); request->wa_tail = intel_ring_offset(request, cs);
} }
static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
{ {
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
@ -1889,8 +1991,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
} }
static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request, static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
u32 *cs)
{ {
/* We're using qword write, seqno should be aligned to 8 bytes. */ /* We're using qword write, seqno should be aligned to 8 bytes. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
@ -1906,15 +2007,15 @@ static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request,
} }
static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
static int gen8_init_rcs_context(struct drm_i915_gem_request *req) static int gen8_init_rcs_context(struct i915_request *rq)
{ {
int ret; int ret;
ret = intel_ring_workarounds_emit(req); ret = intel_ring_workarounds_emit(rq);
if (ret) if (ret)
return ret; return ret;
ret = intel_rcs_context_init_mocs(req); ret = intel_rcs_context_init_mocs(rq);
/* /*
* Failing to program the MOCS is non-fatal.The system will not * Failing to program the MOCS is non-fatal.The system will not
* run at peak performance. So generate an error and carry on. * run at peak performance. So generate an error and carry on.
@ -1922,7 +2023,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
if (ret) if (ret)
DRM_ERROR("MOCS failed to program: expect performance issues.\n"); DRM_ERROR("MOCS failed to program: expect performance issues.\n");
return i915_gem_render_state_emit(req); return i915_gem_render_state_emit(rq);
} }
/** /**
@ -1996,8 +2097,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->set_default_submission = execlists_set_default_submission; engine->set_default_submission = execlists_set_default_submission;
engine->irq_enable = gen8_logical_ring_enable_irq; if (INTEL_GEN(engine->i915) < 11) {
engine->irq_disable = gen8_logical_ring_disable_irq; engine->irq_enable = gen8_logical_ring_enable_irq;
engine->irq_disable = gen8_logical_ring_disable_irq;
} else {
/*
* TODO: On Gen11 interrupt masks need to be clear
* to allow C6 entry. Keep interrupts enabled at
* and take the hit of generating extra interrupts
* until a more refined solution exists.
*/
}
engine->emit_bb_start = gen8_emit_bb_start; engine->emit_bb_start = gen8_emit_bb_start;
} }
@ -2049,8 +2159,15 @@ static int logical_ring_init(struct intel_engine_cs *engine)
if (ret) if (ret)
goto error; goto error;
engine->execlists.elsp = if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); engine->execlists.submit_reg = engine->i915->regs +
i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
engine->execlists.ctrl_reg = engine->i915->regs +
i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
} else {
engine->execlists.submit_reg = engine->i915->regs +
i915_mmio_reg_offset(RING_ELSP(engine));
}
engine->execlists.preempt_complete_status = ~0u; engine->execlists.preempt_complete_status = ~0u;
if (engine->i915->preempt_context) if (engine->i915->preempt_context)
@ -2136,7 +2253,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
rpcs |= GEN8_RPCS_SS_CNT_ENABLE; rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
GEN8_RPCS_SS_CNT_SHIFT; GEN8_RPCS_SS_CNT_SHIFT;
rpcs |= GEN8_RPCS_ENABLE; rpcs |= GEN8_RPCS_ENABLE;
} }
@ -2160,6 +2277,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
default: default:
MISSING_CASE(INTEL_GEN(engine->i915)); MISSING_CASE(INTEL_GEN(engine->i915));
/* fall through */ /* fall through */
case 11:
indirect_ctx_offset =
GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
break;
case 10: case 10:
indirect_ctx_offset = indirect_ctx_offset =
GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
@ -2319,7 +2440,7 @@ populate_lr_context(struct i915_gem_context *ctx,
if (!engine->default_state) if (!engine->default_state)
regs[CTX_CONTEXT_CONTROL + 1] |= regs[CTX_CONTEXT_CONTROL + 1] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
if (ctx == ctx->i915->preempt_context) if (ctx == ctx->i915->preempt_context && INTEL_GEN(engine->i915) < 11)
regs[CTX_CONTEXT_CONTROL + 1] |= regs[CTX_CONTEXT_CONTROL + 1] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);

View File

@ -42,6 +42,9 @@
#define RING_CONTEXT_STATUS_BUF_LO(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8) #define RING_CONTEXT_STATUS_BUF_LO(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8)
#define RING_CONTEXT_STATUS_BUF_HI(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8 + 4) #define RING_CONTEXT_STATUS_BUF_HI(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8 + 4)
#define RING_CONTEXT_STATUS_PTR(engine) _MMIO((engine)->mmio_base + 0x3a0) #define RING_CONTEXT_STATUS_PTR(engine) _MMIO((engine)->mmio_base + 0x3a0)
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
/* The docs specify that the write pointer wraps around after 5h, "After status /* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer * is written out to the last available status QW at offset 5h, this pointer

View File

@ -63,5 +63,6 @@
#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26
#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19 #define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19
#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A
#endif /* _INTEL_LRC_REG_H_ */ #endif /* _INTEL_LRC_REG_H_ */

View File

@ -265,7 +265,7 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine)
/** /**
* emit_mocs_control_table() - emit the mocs control table * emit_mocs_control_table() - emit the mocs control table
* @req: Request to set up the MOCS table for. * @rq: Request to set up the MOCS table for.
* @table: The values to program into the control regs. * @table: The values to program into the control regs.
* *
* This function simply emits a MI_LOAD_REGISTER_IMM command for the * This function simply emits a MI_LOAD_REGISTER_IMM command for the
@ -273,17 +273,17 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine)
* *
* Return: 0 on success, otherwise the error status. * Return: 0 on success, otherwise the error status.
*/ */
static int emit_mocs_control_table(struct drm_i915_gem_request *req, static int emit_mocs_control_table(struct i915_request *rq,
const struct drm_i915_mocs_table *table) const struct drm_i915_mocs_table *table)
{ {
enum intel_engine_id engine = req->engine->id; enum intel_engine_id engine = rq->engine->id;
unsigned int index; unsigned int index;
u32 *cs; u32 *cs;
if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
return -ENODEV; return -ENODEV;
cs = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -308,7 +308,7 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req,
} }
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -323,7 +323,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
/** /**
* emit_mocs_l3cc_table() - emit the mocs control table * emit_mocs_l3cc_table() - emit the mocs control table
* @req: Request to set up the MOCS table for. * @rq: Request to set up the MOCS table for.
* @table: The values to program into the control regs. * @table: The values to program into the control regs.
* *
* This function simply emits a MI_LOAD_REGISTER_IMM command for the * This function simply emits a MI_LOAD_REGISTER_IMM command for the
@ -332,7 +332,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
* *
* Return: 0 on success, otherwise the error status. * Return: 0 on success, otherwise the error status.
*/ */
static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, static int emit_mocs_l3cc_table(struct i915_request *rq,
const struct drm_i915_mocs_table *table) const struct drm_i915_mocs_table *table)
{ {
unsigned int i; unsigned int i;
@ -341,7 +341,7 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req,
if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
return -ENODEV; return -ENODEV;
cs = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -370,7 +370,7 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req,
} }
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -417,7 +417,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
/** /**
* intel_rcs_context_init_mocs() - program the MOCS register. * intel_rcs_context_init_mocs() - program the MOCS register.
* @req: Request to set up the MOCS tables for. * @rq: Request to set up the MOCS tables for.
* *
* This function will emit a batch buffer with the values required for * This function will emit a batch buffer with the values required for
* programming the MOCS register values for all the currently supported * programming the MOCS register values for all the currently supported
@ -431,19 +431,19 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
* *
* Return: 0 on success, otherwise the error status. * Return: 0 on success, otherwise the error status.
*/ */
int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req) int intel_rcs_context_init_mocs(struct i915_request *rq)
{ {
struct drm_i915_mocs_table t; struct drm_i915_mocs_table t;
int ret; int ret;
if (get_mocs_settings(req->i915, &t)) { if (get_mocs_settings(rq->i915, &t)) {
/* Program the RCS control registers */ /* Program the RCS control registers */
ret = emit_mocs_control_table(req, &t); ret = emit_mocs_control_table(rq, &t);
if (ret) if (ret)
return ret; return ret;
/* Now program the l3cc registers */ /* Now program the l3cc registers */
ret = emit_mocs_l3cc_table(req, &t); ret = emit_mocs_l3cc_table(rq, &t);
if (ret) if (ret)
return ret; return ret;
} }

View File

@ -52,7 +52,7 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include "i915_drv.h" #include "i915_drv.h"
int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req); int intel_rcs_context_init_mocs(struct i915_request *rq);
void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv); void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv);
int intel_mocs_init_engine(struct intel_engine_cs *engine); int intel_mocs_init_engine(struct intel_engine_cs *engine);

View File

@ -234,50 +234,50 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
} }
static void intel_overlay_submit_request(struct intel_overlay *overlay, static void intel_overlay_submit_request(struct intel_overlay *overlay,
struct drm_i915_gem_request *req, struct i915_request *rq,
i915_gem_retire_fn retire) i915_gem_retire_fn retire)
{ {
GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip,
&overlay->i915->drm.struct_mutex)); &overlay->i915->drm.struct_mutex));
i915_gem_active_set_retire_fn(&overlay->last_flip, retire, i915_gem_active_set_retire_fn(&overlay->last_flip, retire,
&overlay->i915->drm.struct_mutex); &overlay->i915->drm.struct_mutex);
i915_gem_active_set(&overlay->last_flip, req); i915_gem_active_set(&overlay->last_flip, rq);
i915_add_request(req); i915_request_add(rq);
} }
static int intel_overlay_do_wait_request(struct intel_overlay *overlay, static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
struct drm_i915_gem_request *req, struct i915_request *rq,
i915_gem_retire_fn retire) i915_gem_retire_fn retire)
{ {
intel_overlay_submit_request(overlay, req, retire); intel_overlay_submit_request(overlay, rq, retire);
return i915_gem_active_retire(&overlay->last_flip, return i915_gem_active_retire(&overlay->last_flip,
&overlay->i915->drm.struct_mutex); &overlay->i915->drm.struct_mutex);
} }
static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay) static struct i915_request *alloc_request(struct intel_overlay *overlay)
{ {
struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_private *dev_priv = overlay->i915;
struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct intel_engine_cs *engine = dev_priv->engine[RCS];
return i915_gem_request_alloc(engine, dev_priv->kernel_context); return i915_request_alloc(engine, dev_priv->kernel_context);
} }
/* overlay needs to be disable in OCMD reg */ /* overlay needs to be disable in OCMD reg */
static int intel_overlay_on(struct intel_overlay *overlay) static int intel_overlay_on(struct intel_overlay *overlay)
{ {
struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_private *dev_priv = overlay->i915;
struct drm_i915_gem_request *req; struct i915_request *rq;
u32 *cs; u32 *cs;
WARN_ON(overlay->active); WARN_ON(overlay->active);
req = alloc_request(overlay); rq = alloc_request(overlay);
if (IS_ERR(req)) if (IS_ERR(rq))
return PTR_ERR(req); return PTR_ERR(rq);
cs = intel_ring_begin(req, 4); cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) { if (IS_ERR(cs)) {
i915_add_request(req); i915_request_add(rq);
return PTR_ERR(cs); return PTR_ERR(cs);
} }
@ -290,9 +290,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
*cs++ = overlay->flip_addr | OFC_UPDATE; *cs++ = overlay->flip_addr | OFC_UPDATE;
*cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return intel_overlay_do_wait_request(overlay, req, NULL); return intel_overlay_do_wait_request(overlay, rq, NULL);
} }
static void intel_overlay_flip_prepare(struct intel_overlay *overlay, static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
@ -322,7 +322,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
bool load_polyphase_filter) bool load_polyphase_filter)
{ {
struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_private *dev_priv = overlay->i915;
struct drm_i915_gem_request *req; struct i915_request *rq;
u32 flip_addr = overlay->flip_addr; u32 flip_addr = overlay->flip_addr;
u32 tmp, *cs; u32 tmp, *cs;
@ -336,23 +336,23 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
if (tmp & (1 << 17)) if (tmp & (1 << 17))
DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
req = alloc_request(overlay); rq = alloc_request(overlay);
if (IS_ERR(req)) if (IS_ERR(rq))
return PTR_ERR(req); return PTR_ERR(rq);
cs = intel_ring_begin(req, 2); cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) { if (IS_ERR(cs)) {
i915_add_request(req); i915_request_add(rq);
return PTR_ERR(cs); return PTR_ERR(cs);
} }
*cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE;
*cs++ = flip_addr; *cs++ = flip_addr;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
intel_overlay_flip_prepare(overlay, vma); intel_overlay_flip_prepare(overlay, vma);
intel_overlay_submit_request(overlay, req, NULL); intel_overlay_submit_request(overlay, rq, NULL);
return 0; return 0;
} }
@ -373,7 +373,7 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay)
} }
static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
struct drm_i915_gem_request *req) struct i915_request *rq)
{ {
struct intel_overlay *overlay = struct intel_overlay *overlay =
container_of(active, typeof(*overlay), last_flip); container_of(active, typeof(*overlay), last_flip);
@ -382,7 +382,7 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
} }
static void intel_overlay_off_tail(struct i915_gem_active *active, static void intel_overlay_off_tail(struct i915_gem_active *active,
struct drm_i915_gem_request *req) struct i915_request *rq)
{ {
struct intel_overlay *overlay = struct intel_overlay *overlay =
container_of(active, typeof(*overlay), last_flip); container_of(active, typeof(*overlay), last_flip);
@ -401,7 +401,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active,
/* overlay needs to be disabled in OCMD reg */ /* overlay needs to be disabled in OCMD reg */
static int intel_overlay_off(struct intel_overlay *overlay) static int intel_overlay_off(struct intel_overlay *overlay)
{ {
struct drm_i915_gem_request *req; struct i915_request *rq;
u32 *cs, flip_addr = overlay->flip_addr; u32 *cs, flip_addr = overlay->flip_addr;
WARN_ON(!overlay->active); WARN_ON(!overlay->active);
@ -412,13 +412,13 @@ static int intel_overlay_off(struct intel_overlay *overlay)
* of the hw. Do it in both cases */ * of the hw. Do it in both cases */
flip_addr |= OFC_UPDATE; flip_addr |= OFC_UPDATE;
req = alloc_request(overlay); rq = alloc_request(overlay);
if (IS_ERR(req)) if (IS_ERR(rq))
return PTR_ERR(req); return PTR_ERR(rq);
cs = intel_ring_begin(req, 6); cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs)) { if (IS_ERR(cs)) {
i915_add_request(req); i915_request_add(rq);
return PTR_ERR(cs); return PTR_ERR(cs);
} }
@ -432,11 +432,11 @@ static int intel_overlay_off(struct intel_overlay *overlay)
*cs++ = flip_addr; *cs++ = flip_addr;
*cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
intel_overlay_flip_prepare(overlay, NULL); intel_overlay_flip_prepare(overlay, NULL);
return intel_overlay_do_wait_request(overlay, req, return intel_overlay_do_wait_request(overlay, rq,
intel_overlay_off_tail); intel_overlay_off_tail);
} }
@ -468,23 +468,23 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
/* synchronous slowpath */ /* synchronous slowpath */
struct drm_i915_gem_request *req; struct i915_request *rq;
req = alloc_request(overlay); rq = alloc_request(overlay);
if (IS_ERR(req)) if (IS_ERR(rq))
return PTR_ERR(req); return PTR_ERR(rq);
cs = intel_ring_begin(req, 2); cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) { if (IS_ERR(cs)) {
i915_add_request(req); i915_request_add(rq);
return PTR_ERR(cs); return PTR_ERR(cs);
} }
*cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
ret = intel_overlay_do_wait_request(overlay, req, ret = intel_overlay_do_wait_request(overlay, rq,
intel_overlay_release_old_vid_tail); intel_overlay_release_old_vid_tail);
if (ret) if (ret)
return ret; return ret;

View File

@ -6360,7 +6360,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
mutex_unlock(&dev_priv->pcu_lock); mutex_unlock(&dev_priv->pcu_lock);
} }
void gen6_rps_boost(struct drm_i915_gem_request *rq, void gen6_rps_boost(struct i915_request *rq,
struct intel_rps_client *rps_client) struct intel_rps_client *rps_client)
{ {
struct intel_rps *rps = &rq->i915->gt_pm.rps; struct intel_rps *rps = &rq->i915->gt_pm.rps;
@ -6376,7 +6376,7 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq,
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
return; return;
/* Serializes with i915_gem_request_retire() */ /* Serializes with i915_request_retire() */
boost = false; boost = false;
spin_lock_irqsave(&rq->lock, flags); spin_lock_irqsave(&rq->lock, flags);
if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) { if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
@ -6715,7 +6715,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
/* /*
* 3b: Enable Coarse Power Gating only when RC6 is enabled. * 3b: Enable Coarse Power Gating only when RC6 is enabled.
* WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
*/ */
if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
I915_WRITE(GEN9_PG_ENABLE, 0); I915_WRITE(GEN9_PG_ENABLE, 0);
@ -8026,7 +8026,10 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
intel_disable_gt_powersave(dev_priv); intel_disable_gt_powersave(dev_priv);
gen6_reset_rps_interrupts(dev_priv); if (INTEL_GEN(dev_priv) < 11)
gen6_reset_rps_interrupts(dev_priv);
else
WARN_ON_ONCE(1);
} }
static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
@ -8139,6 +8142,8 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv)
cherryview_enable_rps(dev_priv); cherryview_enable_rps(dev_priv);
} else if (IS_VALLEYVIEW(dev_priv)) { } else if (IS_VALLEYVIEW(dev_priv)) {
valleyview_enable_rps(dev_priv); valleyview_enable_rps(dev_priv);
} else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) {
/* TODO */
} else if (INTEL_GEN(dev_priv) >= 9) { } else if (INTEL_GEN(dev_priv) >= 9) {
gen9_enable_rps(dev_priv); gen9_enable_rps(dev_priv);
} else if (IS_BROADWELL(dev_priv)) { } else if (IS_BROADWELL(dev_priv)) {
@ -8487,7 +8492,7 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
if (!HAS_PCH_CNP(dev_priv)) if (!HAS_PCH_CNP(dev_priv))
return; return;
/* Display WA #1181: cnp */ /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) | I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
CNP_PWM_CGE_GATING_DISABLE); CNP_PWM_CGE_GATING_DISABLE);
} }
@ -8517,7 +8522,13 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
val |= SARBUNIT_CLKGATE_DIS; val |= SARBUNIT_CLKGATE_DIS;
I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
/* Wa_2201832410:cnl */
val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
val |= GWUNIT_CLKGATE_DIS;
I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
/* WaDisableVFclkgate:cnl */ /* WaDisableVFclkgate:cnl */
/* WaVFUnitClockGatingDisable:cnl */
val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE); val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
val |= VFUNIT_CLKGATE_DIS; val |= VFUNIT_CLKGATE_DIS;
I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val); I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);

View File

@ -56,6 +56,111 @@
#include "intel_drv.h" #include "intel_drv.h"
#include "i915_drv.h" #include "i915_drv.h"
static inline enum intel_display_power_domain
psr_aux_domain(struct intel_dp *intel_dp)
{
/* CNL HW requires corresponding AUX IOs to be powered up for PSR.
* However, for non-A AUX ports the corresponding non-EDP transcoders
* would have already enabled power well 2 and DC_OFF. This means we can
* acquire a wider POWER_DOMAIN_AUX_{B,C,D,F} reference instead of a
* specific AUX_IO reference without powering up any extra wells.
* Note that PSR is enabled only on Port A even though this function
* returns the correct domain for other ports too.
*/
return intel_dp->aux_ch == AUX_CH_A ? POWER_DOMAIN_AUX_IO_A :
intel_dp->aux_power_domain;
}
static void psr_aux_io_power_get(struct intel_dp *intel_dp)
{
struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
if (INTEL_GEN(dev_priv) < 10)
return;
intel_display_power_get(dev_priv, psr_aux_domain(intel_dp));
}
static void psr_aux_io_power_put(struct intel_dp *intel_dp)
{
struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
if (INTEL_GEN(dev_priv) < 10)
return;
intel_display_power_put(dev_priv, psr_aux_domain(intel_dp));
}
static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp)
{
uint8_t psr_caps = 0;
if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps) != 1)
return false;
return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED;
}
static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp)
{
uint8_t dprx = 0;
if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST,
&dprx) != 1)
return false;
return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED;
}
static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp)
{
uint8_t alpm_caps = 0;
if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP,
&alpm_caps) != 1)
return false;
return alpm_caps & DP_ALPM_CAP;
}
void intel_psr_init_dpcd(struct intel_dp *intel_dp)
{
struct drm_i915_private *dev_priv =
to_i915(dp_to_dig_port(intel_dp)->base.base.dev);
drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd,
sizeof(intel_dp->psr_dpcd));
if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) {
dev_priv->psr.sink_support = true;
DRM_DEBUG_KMS("Detected EDP PSR Panel.\n");
}
if (INTEL_GEN(dev_priv) >= 9 &&
(intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) {
uint8_t frame_sync_cap;
dev_priv->psr.sink_support = true;
if (drm_dp_dpcd_readb(&intel_dp->aux,
DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
&frame_sync_cap) != 1)
frame_sync_cap = 0;
dev_priv->psr.aux_frame_sync = frame_sync_cap & DP_AUX_FRAME_SYNC_CAP;
/* PSR2 needs frame sync as well */
dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync;
DRM_DEBUG_KMS("PSR2 %s on sink",
dev_priv->psr.psr2_support ? "supported" : "not supported");
if (dev_priv->psr.psr2_support) {
dev_priv->psr.y_cord_support =
intel_dp_get_y_cord_status(intel_dp);
dev_priv->psr.colorimetry_support =
intel_dp_get_colorimetry_status(intel_dp);
dev_priv->psr.alpm =
intel_dp_get_alpm_status(intel_dp);
}
}
}
static bool vlv_is_psr_active_on_pipe(struct drm_device *dev, int pipe) static bool vlv_is_psr_active_on_pipe(struct drm_device *dev, int pipe)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
@ -341,6 +446,50 @@ static void hsw_psr_activate(struct intel_dp *intel_dp)
hsw_activate_psr1(intel_dp); hsw_activate_psr1(intel_dp);
} }
static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
struct intel_crtc_state *crtc_state)
{
struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
int crtc_hdisplay = crtc_state->base.adjusted_mode.crtc_hdisplay;
int crtc_vdisplay = crtc_state->base.adjusted_mode.crtc_vdisplay;
int psr_max_h = 0, psr_max_v = 0;
/*
* FIXME psr2_support is messed up. It's both computed
* dynamically during PSR enable, and extracted from sink
* caps during eDP detection.
*/
if (!dev_priv->psr.psr2_support)
return false;
if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
psr_max_h = 4096;
psr_max_v = 2304;
} else if (IS_GEN9(dev_priv)) {
psr_max_h = 3640;
psr_max_v = 2304;
}
if (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v) {
DRM_DEBUG_KMS("PSR2 not enabled, resolution %dx%d > max supported %dx%d\n",
crtc_hdisplay, crtc_vdisplay,
psr_max_h, psr_max_v);
return false;
}
/*
* FIXME:enable psr2 only for y-cordinate psr2 panels
* After gtc implementation , remove this restriction.
*/
if (!dev_priv->psr.y_cord_support) {
DRM_DEBUG_KMS("PSR2 not enabled, panel does not support Y coordinate\n");
return false;
}
return true;
}
void intel_psr_compute_config(struct intel_dp *intel_dp, void intel_psr_compute_config(struct intel_dp *intel_dp,
struct intel_crtc_state *crtc_state) struct intel_crtc_state *crtc_state)
{ {
@ -403,34 +552,14 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
return; return;
} }
/* if (!(intel_dp->edp_dpcd[1] & DP_EDP_SET_POWER_CAP)) {
* FIXME psr2_support is messed up. It's both computed DRM_DEBUG_KMS("PSR condition failed: panel lacks power state control\n");
* dynamically during PSR enable, and extracted from sink
* caps during eDP detection.
*/
if (!dev_priv->psr.psr2_support) {
crtc_state->has_psr = true;
return;
}
/* PSR2 is restricted to work with panel resolutions upto 3200x2000 */
if (adjusted_mode->crtc_hdisplay > 3200 ||
adjusted_mode->crtc_vdisplay > 2000) {
DRM_DEBUG_KMS("PSR2 disabled, panel resolution too big\n");
return;
}
/*
* FIXME:enable psr2 only for y-cordinate psr2 panels
* After gtc implementation , remove this restriction.
*/
if (!dev_priv->psr.y_cord_support) {
DRM_DEBUG_KMS("PSR2 disabled, panel does not support Y coordinate\n");
return; return;
} }
crtc_state->has_psr = true; crtc_state->has_psr = true;
crtc_state->has_psr2 = true; crtc_state->has_psr2 = intel_psr2_config_valid(intel_dp, crtc_state);
DRM_DEBUG_KMS("Enabling PSR%s\n", crtc_state->has_psr2 ? "2" : "");
} }
static void intel_psr_activate(struct intel_dp *intel_dp) static void intel_psr_activate(struct intel_dp *intel_dp)
@ -459,6 +588,8 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp,
enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
u32 chicken; u32 chicken;
psr_aux_io_power_get(intel_dp);
if (dev_priv->psr.psr2_support) { if (dev_priv->psr.psr2_support) {
chicken = PSR2_VSC_ENABLE_PROG_HEADER; chicken = PSR2_VSC_ENABLE_PROG_HEADER;
if (dev_priv->psr.y_cord_support) if (dev_priv->psr.y_cord_support)
@ -617,6 +748,8 @@ static void hsw_psr_disable(struct intel_dp *intel_dp,
else else
WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE);
} }
psr_aux_io_power_put(intel_dp);
} }
/** /**

View File

@ -66,7 +66,7 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
} }
static int static int
gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen2_render_ring_flush(struct i915_request *rq, u32 mode)
{ {
u32 cmd, *cs; u32 cmd, *cs;
@ -75,19 +75,19 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
if (mode & EMIT_INVALIDATE) if (mode & EMIT_INVALIDATE)
cmd |= MI_READ_FLUSH; cmd |= MI_READ_FLUSH;
cs = intel_ring_begin(req, 2); cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
*cs++ = cmd; *cs++ = cmd;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int static int
gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen4_render_ring_flush(struct i915_request *rq, u32 mode)
{ {
u32 cmd, *cs; u32 cmd, *cs;
@ -122,17 +122,17 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
cmd = MI_FLUSH; cmd = MI_FLUSH;
if (mode & EMIT_INVALIDATE) { if (mode & EMIT_INVALIDATE) {
cmd |= MI_EXE_FLUSH; cmd |= MI_EXE_FLUSH;
if (IS_G4X(req->i915) || IS_GEN5(req->i915)) if (IS_G4X(rq->i915) || IS_GEN5(rq->i915))
cmd |= MI_INVALIDATE_ISP; cmd |= MI_INVALIDATE_ISP;
} }
cs = intel_ring_begin(req, 2); cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
*cs++ = cmd; *cs++ = cmd;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -175,13 +175,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
* really our business. That leaves only stall at scoreboard. * really our business. That leaves only stall at scoreboard.
*/ */
static int static int
intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
{ {
u32 scratch_addr = u32 scratch_addr =
i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
u32 *cs; u32 *cs;
cs = intel_ring_begin(req, 6); cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -191,9 +191,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
*cs++ = 0; /* low dword */ *cs++ = 0; /* low dword */
*cs++ = 0; /* high dword */ *cs++ = 0; /* high dword */
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
cs = intel_ring_begin(req, 6); cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -203,21 +203,21 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
*cs++ = 0; *cs++ = 0;
*cs++ = 0; *cs++ = 0;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int static int
gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen6_render_ring_flush(struct i915_request *rq, u32 mode)
{ {
u32 scratch_addr = u32 scratch_addr =
i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
u32 *cs, flags = 0; u32 *cs, flags = 0;
int ret; int ret;
/* Force SNB workarounds for PIPE_CONTROL flushes */ /* Force SNB workarounds for PIPE_CONTROL flushes */
ret = intel_emit_post_sync_nonzero_flush(req); ret = intel_emit_post_sync_nonzero_flush(rq);
if (ret) if (ret)
return ret; return ret;
@ -247,7 +247,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
} }
cs = intel_ring_begin(req, 4); cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -255,17 +255,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
*cs++ = flags; *cs++ = flags;
*cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = 0; *cs++ = 0;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int static int
gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) gen7_render_ring_cs_stall_wa(struct i915_request *rq)
{ {
u32 *cs; u32 *cs;
cs = intel_ring_begin(req, 4); cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -273,16 +273,16 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
*cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
*cs++ = 0; *cs++ = 0;
*cs++ = 0; *cs++ = 0;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int static int
gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen7_render_ring_flush(struct i915_request *rq, u32 mode)
{ {
u32 scratch_addr = u32 scratch_addr =
i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
u32 *cs, flags = 0; u32 *cs, flags = 0;
/* /*
@ -324,10 +324,10 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
/* Workaround: we must issue a pipe_control with CS-stall bit /* Workaround: we must issue a pipe_control with CS-stall bit
* set before a pipe_control command that has the state cache * set before a pipe_control command that has the state cache
* invalidate bit set. */ * invalidate bit set. */
gen7_render_ring_cs_stall_wa(req); gen7_render_ring_cs_stall_wa(rq);
} }
cs = intel_ring_begin(req, 4); cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -335,7 +335,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
*cs++ = flags; *cs++ = flags;
*cs++ = scratch_addr; *cs++ = scratch_addr;
*cs++ = 0; *cs++ = 0;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -531,7 +531,7 @@ out:
} }
static void reset_ring_common(struct intel_engine_cs *engine, static void reset_ring_common(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request) struct i915_request *request)
{ {
/* /*
* RC6 must be prevented until the reset is complete and the engine * RC6 must be prevented until the reset is complete and the engine
@ -595,15 +595,15 @@ static void reset_ring_common(struct intel_engine_cs *engine,
} }
} }
static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) static int intel_rcs_ctx_init(struct i915_request *rq)
{ {
int ret; int ret;
ret = intel_ring_workarounds_emit(req); ret = intel_ring_workarounds_emit(rq);
if (ret != 0) if (ret != 0)
return ret; return ret;
ret = i915_gem_render_state_emit(req); ret = i915_gem_render_state_emit(rq);
if (ret) if (ret)
return ret; return ret;
@ -661,9 +661,9 @@ static int init_render_ring(struct intel_engine_cs *engine)
return init_workarounds_ring(engine); return init_workarounds_ring(engine);
} }
static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) static u32 *gen6_signal(struct i915_request *rq, u32 *cs)
{ {
struct drm_i915_private *dev_priv = req->i915; struct drm_i915_private *dev_priv = rq->i915;
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
int num_rings = 0; int num_rings = 0;
@ -674,11 +674,11 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs)
if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
continue; continue;
mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id];
if (i915_mmio_reg_valid(mbox_reg)) { if (i915_mmio_reg_valid(mbox_reg)) {
*cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(mbox_reg); *cs++ = i915_mmio_reg_offset(mbox_reg);
*cs++ = req->global_seqno; *cs++ = rq->global_seqno;
num_rings++; num_rings++;
} }
} }
@ -690,7 +690,7 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs)
static void cancel_requests(struct intel_engine_cs *engine) static void cancel_requests(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_request *request; struct i915_request *request;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&engine->timeline->lock, flags); spin_lock_irqsave(&engine->timeline->lock, flags);
@ -698,7 +698,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
/* Mark all submitted requests as skipped. */ /* Mark all submitted requests as skipped. */
list_for_each_entry(request, &engine->timeline->requests, link) { list_for_each_entry(request, &engine->timeline->requests, link) {
GEM_BUG_ON(!request->global_seqno); GEM_BUG_ON(!request->global_seqno);
if (!i915_gem_request_completed(request)) if (!i915_request_completed(request))
dma_fence_set_error(&request->fence, -EIO); dma_fence_set_error(&request->fence, -EIO);
} }
/* Remaining _unready_ requests will be nop'ed when submitted */ /* Remaining _unready_ requests will be nop'ed when submitted */
@ -706,48 +706,46 @@ static void cancel_requests(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&engine->timeline->lock, flags); spin_unlock_irqrestore(&engine->timeline->lock, flags);
} }
static void i9xx_submit_request(struct drm_i915_gem_request *request) static void i9xx_submit_request(struct i915_request *request)
{ {
struct drm_i915_private *dev_priv = request->i915; struct drm_i915_private *dev_priv = request->i915;
i915_gem_request_submit(request); i915_request_submit(request);
I915_WRITE_TAIL(request->engine, I915_WRITE_TAIL(request->engine,
intel_ring_set_tail(request->ring, request->tail)); intel_ring_set_tail(request->ring, request->tail));
} }
static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
*cs++ = MI_STORE_DWORD_INDEX; *cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
*cs++ = req->global_seqno; *cs++ = rq->global_seqno;
*cs++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
req->tail = intel_ring_offset(req, cs); rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(req->ring, req->tail); assert_ring_tail_valid(rq->ring, rq->tail);
} }
static const int i9xx_emit_breadcrumb_sz = 4; static const int i9xx_emit_breadcrumb_sz = 4;
static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{ {
return i9xx_emit_breadcrumb(req, return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs));
req->engine->semaphore.signal(req, cs));
} }
static int static int
gen6_ring_sync_to(struct drm_i915_gem_request *req, gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal)
struct drm_i915_gem_request *signal)
{ {
u32 dw1 = MI_SEMAPHORE_MBOX | u32 dw1 = MI_SEMAPHORE_MBOX |
MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER; MI_SEMAPHORE_REGISTER;
u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id];
u32 *cs; u32 *cs;
WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
cs = intel_ring_begin(req, 4); cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -759,7 +757,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req,
*cs++ = signal->global_seqno - 1; *cs++ = signal->global_seqno - 1;
*cs++ = 0; *cs++ = 0;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -858,17 +856,17 @@ i8xx_irq_disable(struct intel_engine_cs *engine)
} }
static int static int
bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) bsd_ring_flush(struct i915_request *rq, u32 mode)
{ {
u32 *cs; u32 *cs;
cs = intel_ring_begin(req, 2); cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
*cs++ = MI_FLUSH; *cs++ = MI_FLUSH;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -911,20 +909,20 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine)
} }
static int static int
i965_emit_bb_start(struct drm_i915_gem_request *req, i965_emit_bb_start(struct i915_request *rq,
u64 offset, u32 length, u64 offset, u32 length,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
u32 *cs; u32 *cs;
cs = intel_ring_begin(req, 2); cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
*cs++ = offset; *cs++ = offset;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -934,13 +932,13 @@ i965_emit_bb_start(struct drm_i915_gem_request *req,
#define I830_TLB_ENTRIES (2) #define I830_TLB_ENTRIES (2)
#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
static int static int
i830_emit_bb_start(struct drm_i915_gem_request *req, i830_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len, u64 offset, u32 len,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch); u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch);
cs = intel_ring_begin(req, 6); cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -951,13 +949,13 @@ i830_emit_bb_start(struct drm_i915_gem_request *req,
*cs++ = cs_offset; *cs++ = cs_offset;
*cs++ = 0xdeadbeef; *cs++ = 0xdeadbeef;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
if (len > I830_BATCH_LIMIT) if (len > I830_BATCH_LIMIT)
return -ENOSPC; return -ENOSPC;
cs = intel_ring_begin(req, 6 + 2); cs = intel_ring_begin(rq, 6 + 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -974,39 +972,39 @@ i830_emit_bb_start(struct drm_i915_gem_request *req,
*cs++ = MI_FLUSH; *cs++ = MI_FLUSH;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
/* ... and execute it. */ /* ... and execute it. */
offset = cs_offset; offset = cs_offset;
} }
cs = intel_ring_begin(req, 2); cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
*cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
MI_BATCH_NON_SECURE); MI_BATCH_NON_SECURE);
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int static int
i915_emit_bb_start(struct drm_i915_gem_request *req, i915_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len, u64 offset, u32 len,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
u32 *cs; u32 *cs;
cs = intel_ring_begin(req, 2); cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
*cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
MI_BATCH_NON_SECURE); MI_BATCH_NON_SECURE);
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
@ -1377,7 +1375,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
intel_ring_reset(engine->buffer, 0); intel_ring_reset(engine->buffer, 0);
} }
static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags) static inline int mi_set_context(struct i915_request *rq, u32 flags)
{ {
struct drm_i915_private *i915 = rq->i915; struct drm_i915_private *i915 = rq->i915;
struct intel_engine_cs *engine = rq->engine; struct intel_engine_cs *engine = rq->engine;
@ -1463,7 +1461,7 @@ static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags)
return 0; return 0;
} }
static int remap_l3(struct drm_i915_gem_request *rq, int slice) static int remap_l3(struct i915_request *rq, int slice)
{ {
u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
int i; int i;
@ -1491,7 +1489,7 @@ static int remap_l3(struct drm_i915_gem_request *rq, int slice)
return 0; return 0;
} }
static int switch_context(struct drm_i915_gem_request *rq) static int switch_context(struct i915_request *rq)
{ {
struct intel_engine_cs *engine = rq->engine; struct intel_engine_cs *engine = rq->engine;
struct i915_gem_context *to_ctx = rq->ctx; struct i915_gem_context *to_ctx = rq->ctx;
@ -1561,7 +1559,7 @@ err:
return ret; return ret;
} }
static int ring_request_alloc(struct drm_i915_gem_request *request) static int ring_request_alloc(struct i915_request *request)
{ {
int ret; int ret;
@ -1587,7 +1585,7 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
{ {
struct drm_i915_gem_request *target; struct i915_request *target;
long timeout; long timeout;
lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex);
@ -1605,13 +1603,13 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
if (WARN_ON(&target->ring_link == &ring->request_list)) if (WARN_ON(&target->ring_link == &ring->request_list))
return -ENOSPC; return -ENOSPC;
timeout = i915_wait_request(target, timeout = i915_request_wait(target,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT); MAX_SCHEDULE_TIMEOUT);
if (timeout < 0) if (timeout < 0)
return timeout; return timeout;
i915_gem_request_retire_upto(target); i915_request_retire_upto(target);
intel_ring_update_space(ring); intel_ring_update_space(ring);
GEM_BUG_ON(ring->space < bytes); GEM_BUG_ON(ring->space < bytes);
@ -1634,10 +1632,9 @@ int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes)
return 0; return 0;
} }
u32 *intel_ring_begin(struct drm_i915_gem_request *req, u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
unsigned int num_dwords)
{ {
struct intel_ring *ring = req->ring; struct intel_ring *ring = rq->ring;
const unsigned int remain_usable = ring->effective_size - ring->emit; const unsigned int remain_usable = ring->effective_size - ring->emit;
const unsigned int bytes = num_dwords * sizeof(u32); const unsigned int bytes = num_dwords * sizeof(u32);
unsigned int need_wrap = 0; unsigned int need_wrap = 0;
@ -1647,7 +1644,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req,
/* Packets must be qword aligned. */ /* Packets must be qword aligned. */
GEM_BUG_ON(num_dwords & 1); GEM_BUG_ON(num_dwords & 1);
total_bytes = bytes + req->reserved_space; total_bytes = bytes + rq->reserved_space;
GEM_BUG_ON(total_bytes > ring->effective_size); GEM_BUG_ON(total_bytes > ring->effective_size);
if (unlikely(total_bytes > remain_usable)) { if (unlikely(total_bytes > remain_usable)) {
@ -1668,7 +1665,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req,
* wrap and only need to effectively wait for the * wrap and only need to effectively wait for the
* reserved size from the start of ringbuffer. * reserved size from the start of ringbuffer.
*/ */
total_bytes = req->reserved_space + remain_actual; total_bytes = rq->reserved_space + remain_actual;
} }
} }
@ -1682,9 +1679,9 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req,
* overallocation and the assumption is that then we never need * overallocation and the assumption is that then we never need
* to wait (which has the risk of failing with EINTR). * to wait (which has the risk of failing with EINTR).
* *
* See also i915_gem_request_alloc() and i915_add_request(). * See also i915_request_alloc() and i915_request_add().
*/ */
GEM_BUG_ON(!req->reserved_space); GEM_BUG_ON(!rq->reserved_space);
ret = wait_for_space(ring, total_bytes); ret = wait_for_space(ring, total_bytes);
if (unlikely(ret)) if (unlikely(ret))
@ -1713,29 +1710,28 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req,
} }
/* Align the ring tail to a cacheline boundary */ /* Align the ring tail to a cacheline boundary */
int intel_ring_cacheline_align(struct drm_i915_gem_request *req) int intel_ring_cacheline_align(struct i915_request *rq)
{ {
int num_dwords = int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
(req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
u32 *cs; u32 *cs;
if (num_dwords == 0) if (num_dwords == 0)
return 0; return 0;
num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords;
cs = intel_ring_begin(req, num_dwords); cs = intel_ring_begin(rq, num_dwords);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
while (num_dwords--) while (num_dwords--)
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) static void gen6_bsd_submit_request(struct i915_request *request)
{ {
struct drm_i915_private *dev_priv = request->i915; struct drm_i915_private *dev_priv = request->i915;
@ -1772,11 +1768,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
} }
static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
{ {
u32 cmd, *cs; u32 cmd, *cs;
cs = intel_ring_begin(req, 4); cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1802,18 +1798,18 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = 0; *cs++ = 0;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int static int
hsw_emit_bb_start(struct drm_i915_gem_request *req, hsw_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len, u64 offset, u32 len,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
u32 *cs; u32 *cs;
cs = intel_ring_begin(req, 2); cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1823,19 +1819,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req,
MI_BATCH_RESOURCE_STREAMER : 0); MI_BATCH_RESOURCE_STREAMER : 0);
/* bit0-7 is the length on GEN6+ */ /* bit0-7 is the length on GEN6+ */
*cs++ = offset; *cs++ = offset;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
static int static int
gen6_emit_bb_start(struct drm_i915_gem_request *req, gen6_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len, u64 offset, u32 len,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
u32 *cs; u32 *cs;
cs = intel_ring_begin(req, 2); cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1843,18 +1839,18 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req,
0 : MI_BATCH_NON_SECURE_I965); 0 : MI_BATCH_NON_SECURE_I965);
/* bit0-7 is the length on GEN6+ */ /* bit0-7 is the length on GEN6+ */
*cs++ = offset; *cs++ = offset;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }
/* Blitter support (SandyBridge+) */ /* Blitter support (SandyBridge+) */
static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen6_ring_flush(struct i915_request *rq, u32 mode)
{ {
u32 cmd, *cs; u32 cmd, *cs;
cs = intel_ring_begin(req, 4); cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) if (IS_ERR(cs))
return PTR_ERR(cs); return PTR_ERR(cs);
@ -1879,7 +1875,7 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = 0; *cs++ = 0;
*cs++ = MI_NOOP; *cs++ = MI_NOOP;
intel_ring_advance(req, cs); intel_ring_advance(rq, cs);
return 0; return 0;
} }

View File

@ -3,10 +3,12 @@
#define _INTEL_RINGBUFFER_H_ #define _INTEL_RINGBUFFER_H_
#include <linux/hashtable.h> #include <linux/hashtable.h>
#include "i915_gem_batch_pool.h" #include "i915_gem_batch_pool.h"
#include "i915_gem_request.h"
#include "i915_gem_timeline.h" #include "i915_gem_timeline.h"
#include "i915_pmu.h" #include "i915_pmu.h"
#include "i915_request.h"
#include "i915_selftest.h" #include "i915_selftest.h"
struct drm_printer; struct drm_printer;
@ -90,7 +92,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
#define instdone_subslice_mask(dev_priv__) \ #define instdone_subslice_mask(dev_priv__) \
(INTEL_GEN(dev_priv__) == 7 ? \ (INTEL_GEN(dev_priv__) == 7 ? \
1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0])
#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
for ((slice__) = 0, (subslice__) = 0; \ for ((slice__) = 0, (subslice__) = 0; \
@ -115,7 +117,7 @@ struct intel_engine_hangcheck {
unsigned long action_timestamp; unsigned long action_timestamp;
int deadlock; int deadlock;
struct intel_instdone instdone; struct intel_instdone instdone;
struct drm_i915_gem_request *active_request; struct i915_request *active_request;
bool stalled; bool stalled;
}; };
@ -156,7 +158,10 @@ struct i915_ctx_workarounds {
struct i915_vma *vma; struct i915_vma *vma;
}; };
struct drm_i915_gem_request; struct i915_request;
#define I915_MAX_VCS 4
#define I915_MAX_VECS 2
/* /*
* Engine IDs definitions. * Engine IDs definitions.
@ -167,8 +172,12 @@ enum intel_engine_id {
BCS, BCS,
VCS, VCS,
VCS2, VCS2,
VCS3,
VCS4,
#define _VCS(n) (VCS + (n)) #define _VCS(n) (VCS + (n))
VECS VECS,
VECS2
#define _VECS(n) (VECS + (n))
}; };
struct i915_priolist { struct i915_priolist {
@ -200,9 +209,17 @@ struct intel_engine_execlists {
bool no_priolist; bool no_priolist;
/** /**
* @elsp: the ExecList Submission Port register * @submit_reg: gen-specific execlist submission register
* set to the ExecList Submission Port (elsp) register pre-Gen11 and to
* the ExecList Submission Queue Contents register array for Gen11+
*/ */
u32 __iomem *elsp; u32 __iomem *submit_reg;
/**
* @ctrl_reg: the enhanced execlists control register, used to load the
* submit queue on the HW and to request preemptions to idle
*/
u32 __iomem *ctrl_reg;
/** /**
* @port: execlist port states * @port: execlist port states
@ -218,7 +235,7 @@ struct intel_engine_execlists {
/** /**
* @request_count: combined request and submission count * @request_count: combined request and submission count
*/ */
struct drm_i915_gem_request *request_count; struct i915_request *request_count;
#define EXECLIST_COUNT_BITS 2 #define EXECLIST_COUNT_BITS 2
#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) #define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) #define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
@ -255,6 +272,16 @@ struct intel_engine_execlists {
*/ */
unsigned int port_mask; unsigned int port_mask;
/**
* @queue_priority: Highest pending priority.
*
* When we add requests into the queue, or adjust the priority of
* executing requests, we compute the maximum priority of those
* pending requests. We can then use this value to determine if
* we need to preempt the executing requests to service the queue.
*/
int queue_priority;
/** /**
* @queue: queue of requests, in priority lists * @queue: queue of requests, in priority lists
*/ */
@ -337,9 +364,9 @@ struct intel_engine_cs {
spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root waiters; /* sorted by retirement, priority */
struct rb_root signals; /* sorted by retirement */ struct list_head signals; /* sorted by retirement */
struct task_struct *signaler; /* used for fence signalling */ struct task_struct *signaler; /* used for fence signalling */
struct drm_i915_gem_request __rcu *first_signal;
struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list fake_irq; /* used after a missed interrupt */
struct timer_list hangcheck; /* detect missed interrupts */ struct timer_list hangcheck; /* detect missed interrupts */
@ -391,7 +418,7 @@ struct intel_engine_cs {
int (*init_hw)(struct intel_engine_cs *engine); int (*init_hw)(struct intel_engine_cs *engine);
void (*reset_hw)(struct intel_engine_cs *engine, void (*reset_hw)(struct intel_engine_cs *engine,
struct drm_i915_gem_request *req); struct i915_request *rq);
void (*park)(struct intel_engine_cs *engine); void (*park)(struct intel_engine_cs *engine);
void (*unpark)(struct intel_engine_cs *engine); void (*unpark)(struct intel_engine_cs *engine);
@ -402,22 +429,20 @@ struct intel_engine_cs {
struct i915_gem_context *ctx); struct i915_gem_context *ctx);
void (*context_unpin)(struct intel_engine_cs *engine, void (*context_unpin)(struct intel_engine_cs *engine,
struct i915_gem_context *ctx); struct i915_gem_context *ctx);
int (*request_alloc)(struct drm_i915_gem_request *req); int (*request_alloc)(struct i915_request *rq);
int (*init_context)(struct drm_i915_gem_request *req); int (*init_context)(struct i915_request *rq);
int (*emit_flush)(struct drm_i915_gem_request *request, int (*emit_flush)(struct i915_request *request, u32 mode);
u32 mode);
#define EMIT_INVALIDATE BIT(0) #define EMIT_INVALIDATE BIT(0)
#define EMIT_FLUSH BIT(1) #define EMIT_FLUSH BIT(1)
#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
int (*emit_bb_start)(struct drm_i915_gem_request *req, int (*emit_bb_start)(struct i915_request *rq,
u64 offset, u32 length, u64 offset, u32 length,
unsigned int dispatch_flags); unsigned int dispatch_flags);
#define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_SECURE BIT(0)
#define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_PINNED BIT(1)
#define I915_DISPATCH_RS BIT(2) #define I915_DISPATCH_RS BIT(2)
void (*emit_breadcrumb)(struct drm_i915_gem_request *req, void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
u32 *cs);
int emit_breadcrumb_sz; int emit_breadcrumb_sz;
/* Pass the request to the hardware queue (e.g. directly into /* Pass the request to the hardware queue (e.g. directly into
@ -426,7 +451,7 @@ struct intel_engine_cs {
* This is called from an atomic context with irqs disabled; must * This is called from an atomic context with irqs disabled; must
* be irq safe. * be irq safe.
*/ */
void (*submit_request)(struct drm_i915_gem_request *req); void (*submit_request)(struct i915_request *rq);
/* Call when the priority on a request has changed and it and its /* Call when the priority on a request has changed and it and its
* dependencies may need rescheduling. Note the request itself may * dependencies may need rescheduling. Note the request itself may
@ -434,8 +459,7 @@ struct intel_engine_cs {
* *
* Called under the struct_mutex. * Called under the struct_mutex.
*/ */
void (*schedule)(struct drm_i915_gem_request *request, void (*schedule)(struct i915_request *request, int priority);
int priority);
/* /*
* Cancel all requests on the hardware, or queued for execution. * Cancel all requests on the hardware, or queued for execution.
@ -503,9 +527,9 @@ struct intel_engine_cs {
} mbox; } mbox;
/* AKA wait() */ /* AKA wait() */
int (*sync_to)(struct drm_i915_gem_request *req, int (*sync_to)(struct i915_request *rq,
struct drm_i915_gem_request *signal); struct i915_request *signal);
u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); u32 *(*signal)(struct i915_request *rq, u32 *cs);
} semaphore; } semaphore;
struct intel_engine_execlists execlists; struct intel_engine_execlists execlists;
@ -726,14 +750,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine);
void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); int __must_check intel_ring_cacheline_align(struct i915_request *rq);
int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
unsigned int n);
static inline void static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
{ {
/* Dummy function. /* Dummy function.
* *
@ -743,22 +765,20 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
* reserved for the command packet (i.e. the value passed to * reserved for the command packet (i.e. the value passed to
* intel_ring_begin()). * intel_ring_begin()).
*/ */
GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
} }
static inline u32 static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
intel_ring_wrap(const struct intel_ring *ring, u32 pos)
{ {
return pos & (ring->size - 1); return pos & (ring->size - 1);
} }
static inline u32 static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
intel_ring_offset(const struct drm_i915_gem_request *req, void *addr)
{ {
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
u32 offset = addr - req->ring->vaddr; u32 offset = addr - rq->ring->vaddr;
GEM_BUG_ON(offset > req->ring->size); GEM_BUG_ON(offset > rq->ring->size);
return intel_ring_wrap(req->ring, offset); return intel_ring_wrap(rq->ring, offset);
} }
static inline void static inline void
@ -796,7 +816,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
{ {
/* Whilst writes to the tail are strictly order, there is no /* Whilst writes to the tail are strictly order, there is no
* serialisation between readers and the writers. The tail may be * serialisation between readers and the writers. The tail may be
* read by i915_gem_request_retire() just as it is being updated * read by i915_request_retire() just as it is being updated
* by execlists, as although the breadcrumb is complete, the context * by execlists, as although the breadcrumb is complete, the context
* switch hasn't been seen. * switch hasn't been seen.
*/ */
@ -838,7 +858,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
} }
int init_workarounds_ring(struct intel_engine_cs *engine); int init_workarounds_ring(struct intel_engine_cs *engine);
int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); int intel_ring_workarounds_emit(struct i915_request *rq);
void intel_engine_get_instdone(struct intel_engine_cs *engine, void intel_engine_get_instdone(struct intel_engine_cs *engine,
struct intel_instdone *instdone); struct intel_instdone *instdone);
@ -866,7 +886,7 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
static inline void intel_wait_init(struct intel_wait *wait, static inline void intel_wait_init(struct intel_wait *wait,
struct drm_i915_gem_request *rq) struct i915_request *rq)
{ {
wait->tsk = current; wait->tsk = current;
wait->request = rq; wait->request = rq;
@ -892,9 +912,9 @@ intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
static inline bool static inline bool
intel_wait_update_request(struct intel_wait *wait, intel_wait_update_request(struct intel_wait *wait,
const struct drm_i915_gem_request *rq) const struct i915_request *rq)
{ {
return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); return intel_wait_update_seqno(wait, i915_request_global_seqno(rq));
} }
static inline bool static inline bool
@ -905,9 +925,9 @@ intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
static inline bool static inline bool
intel_wait_check_request(const struct intel_wait *wait, intel_wait_check_request(const struct intel_wait *wait,
const struct drm_i915_gem_request *rq) const struct i915_request *rq)
{ {
return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); return intel_wait_check_seqno(wait, i915_request_global_seqno(rq));
} }
static inline bool intel_wait_complete(const struct intel_wait *wait) static inline bool intel_wait_complete(const struct intel_wait *wait)
@ -919,9 +939,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
struct intel_wait *wait); struct intel_wait *wait);
void intel_engine_remove_wait(struct intel_engine_cs *engine, void intel_engine_remove_wait(struct intel_engine_cs *engine,
struct intel_wait *wait); struct intel_wait *wait);
void intel_engine_enable_signaling(struct drm_i915_gem_request *request, void intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
bool wakeup); void intel_engine_cancel_signaling(struct i915_request *request);
void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
{ {
@ -940,7 +959,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
{ {

View File

@ -130,6 +130,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain)
return "AUX_D"; return "AUX_D";
case POWER_DOMAIN_AUX_F: case POWER_DOMAIN_AUX_F:
return "AUX_F"; return "AUX_F";
case POWER_DOMAIN_AUX_IO_A:
return "AUX_IO_A";
case POWER_DOMAIN_GMBUS: case POWER_DOMAIN_GMBUS:
return "GMBUS"; return "GMBUS";
case POWER_DOMAIN_INIT: case POWER_DOMAIN_INIT:
@ -1853,6 +1855,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
BIT_ULL(POWER_DOMAIN_INIT)) BIT_ULL(POWER_DOMAIN_INIT))
#define CNL_DISPLAY_AUX_A_POWER_DOMAINS ( \ #define CNL_DISPLAY_AUX_A_POWER_DOMAINS ( \
BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \
BIT_ULL(POWER_DOMAIN_AUX_IO_A) | \
BIT_ULL(POWER_DOMAIN_INIT)) BIT_ULL(POWER_DOMAIN_INIT))
#define CNL_DISPLAY_AUX_B_POWER_DOMAINS ( \ #define CNL_DISPLAY_AUX_B_POWER_DOMAINS ( \
BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \

View File

@ -1705,7 +1705,15 @@ static void intel_sdvo_enable_hotplug(struct intel_encoder *encoder)
struct intel_sdvo *intel_sdvo = to_sdvo(encoder); struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG,
&intel_sdvo->hotplug_active, 2); &intel_sdvo->hotplug_active, 2);
}
static bool intel_sdvo_hotplug(struct intel_encoder *encoder,
struct intel_connector *connector)
{
intel_sdvo_enable_hotplug(encoder);
return intel_encoder_hotplug(encoder, connector);
} }
static bool static bool
@ -2516,7 +2524,7 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device)
* Some SDVO devices have one-shot hotplug interrupts. * Some SDVO devices have one-shot hotplug interrupts.
* Ensure that they get re-enabled when an interrupt happens. * Ensure that they get re-enabled when an interrupt happens.
*/ */
intel_encoder->hot_plug = intel_sdvo_enable_hotplug; intel_encoder->hotplug = intel_sdvo_hotplug;
intel_sdvo_enable_hotplug(intel_encoder); intel_sdvo_enable_hotplug(intel_encoder);
} else { } else {
intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT;

View File

@ -361,7 +361,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
goto err_out; goto err_out;
if (USES_HUC(dev_priv)) { if (USES_HUC(dev_priv)) {
ret = intel_huc_init_hw(huc); ret = intel_huc_fw_upload(huc);
if (ret) if (ret)
goto err_out; goto err_out;
} }
@ -445,3 +445,48 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
if (USES_GUC_SUBMISSION(dev_priv)) if (USES_GUC_SUBMISSION(dev_priv))
gen9_disable_guc_interrupts(dev_priv); gen9_disable_guc_interrupts(dev_priv);
} }
int intel_uc_suspend(struct drm_i915_private *i915)
{
struct intel_guc *guc = &i915->guc;
int err;
if (!USES_GUC(i915))
return 0;
if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
return 0;
err = intel_guc_suspend(guc);
if (err) {
DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
return err;
}
gen9_disable_guc_interrupts(i915);
return 0;
}
int intel_uc_resume(struct drm_i915_private *i915)
{
struct intel_guc *guc = &i915->guc;
int err;
if (!USES_GUC(i915))
return 0;
if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
return 0;
if (i915_modparams.guc_log_level)
gen9_enable_guc_interrupts(i915);
err = intel_guc_resume(guc);
if (err) {
DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err);
return err;
}
return 0;
}

View File

@ -39,6 +39,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv);
void intel_uc_fini_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
int intel_uc_init(struct drm_i915_private *dev_priv); int intel_uc_init(struct drm_i915_private *dev_priv);
void intel_uc_fini(struct drm_i915_private *dev_priv); void intel_uc_fini(struct drm_i915_private *dev_priv);
int intel_uc_suspend(struct drm_i915_private *dev_priv);
int intel_uc_resume(struct drm_i915_private *dev_priv);
static inline bool intel_uc_is_using_guc(void) static inline bool intel_uc_is_using_guc(void)
{ {

View File

@ -37,6 +37,12 @@ static const char * const forcewake_domain_names[] = {
"render", "render",
"blitter", "blitter",
"media", "media",
"vdbox0",
"vdbox1",
"vdbox2",
"vdbox3",
"vebox0",
"vebox1",
}; };
const char * const char *
@ -774,6 +780,9 @@ void assert_forcewakes_active(struct drm_i915_private *dev_priv,
/* We give fast paths for the really cool registers */ /* We give fast paths for the really cool registers */
#define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000) #define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000)
#define GEN11_NEEDS_FORCE_WAKE(reg) \
((reg) < 0x40000 || ((reg) >= 0x1c0000 && (reg) < 0x1dc000))
#define __gen6_reg_read_fw_domains(offset) \ #define __gen6_reg_read_fw_domains(offset) \
({ \ ({ \
enum forcewake_domains __fwd; \ enum forcewake_domains __fwd; \
@ -826,6 +835,14 @@ find_fw_domain(struct drm_i915_private *dev_priv, u32 offset)
if (!entry) if (!entry)
return 0; return 0;
/*
* The list of FW domains depends on the SKU in gen11+ so we
* can't determine it statically. We use FORCEWAKE_ALL and
* translate it here to the list of available domains.
*/
if (entry->domains == FORCEWAKE_ALL)
return dev_priv->uncore.fw_domains;
WARN(entry->domains & ~dev_priv->uncore.fw_domains, WARN(entry->domains & ~dev_priv->uncore.fw_domains,
"Uninitialized forcewake domain(s) 0x%x accessed at 0x%x\n", "Uninitialized forcewake domain(s) 0x%x accessed at 0x%x\n",
entry->domains & ~dev_priv->uncore.fw_domains, offset); entry->domains & ~dev_priv->uncore.fw_domains, offset);
@ -860,6 +877,14 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = {
__fwd; \ __fwd; \
}) })
#define __gen11_fwtable_reg_read_fw_domains(offset) \
({ \
enum forcewake_domains __fwd = 0; \
if (GEN11_NEEDS_FORCE_WAKE((offset))) \
__fwd = find_fw_domain(dev_priv, offset); \
__fwd; \
})
/* *Must* be sorted by offset! See intel_shadow_table_check(). */ /* *Must* be sorted by offset! See intel_shadow_table_check(). */
static const i915_reg_t gen8_shadowed_regs[] = { static const i915_reg_t gen8_shadowed_regs[] = {
RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */
@ -871,6 +896,20 @@ static const i915_reg_t gen8_shadowed_regs[] = {
/* TODO: Other registers are not yet used */ /* TODO: Other registers are not yet used */
}; };
static const i915_reg_t gen11_shadowed_regs[] = {
RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */
GEN6_RPNSWREQ, /* 0xA008 */
GEN6_RC_VIDEO_FREQ, /* 0xA00C */
RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */
RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */
RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */
RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */
RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */
RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */
RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */
/* TODO: Other registers are not yet used */
};
static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) static int mmio_reg_cmp(u32 key, const i915_reg_t *reg)
{ {
u32 offset = i915_mmio_reg_offset(*reg); u32 offset = i915_mmio_reg_offset(*reg);
@ -883,14 +922,17 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg)
return 0; return 0;
} }
static bool is_gen8_shadowed(u32 offset) #define __is_genX_shadowed(x) \
{ static bool is_gen##x##_shadowed(u32 offset) \
const i915_reg_t *regs = gen8_shadowed_regs; { \
const i915_reg_t *regs = gen##x##_shadowed_regs; \
return BSEARCH(offset, regs, ARRAY_SIZE(gen8_shadowed_regs), return BSEARCH(offset, regs, ARRAY_SIZE(gen##x##_shadowed_regs), \
mmio_reg_cmp); mmio_reg_cmp); \
} }
__is_genX_shadowed(8)
__is_genX_shadowed(11)
#define __gen8_reg_write_fw_domains(offset) \ #define __gen8_reg_write_fw_domains(offset) \
({ \ ({ \
enum forcewake_domains __fwd; \ enum forcewake_domains __fwd; \
@ -929,6 +971,14 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = {
__fwd; \ __fwd; \
}) })
#define __gen11_fwtable_reg_write_fw_domains(offset) \
({ \
enum forcewake_domains __fwd = 0; \
if (GEN11_NEEDS_FORCE_WAKE((offset)) && !is_gen11_shadowed(offset)) \
__fwd = find_fw_domain(dev_priv, offset); \
__fwd; \
})
/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
static const struct intel_forcewake_range __gen9_fw_ranges[] = { static const struct intel_forcewake_range __gen9_fw_ranges[] = {
GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER),
@ -965,6 +1015,40 @@ static const struct intel_forcewake_range __gen9_fw_ranges[] = {
GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA), GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA),
}; };
/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
static const struct intel_forcewake_range __gen11_fw_ranges[] = {
GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */
GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER),
GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER),
GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER),
GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER),
GEN_FW_RANGE(0x8160, 0x82ff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER),
GEN_FW_RANGE(0x8500, 0x8bff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0x8c00, 0x8cff, FORCEWAKE_RENDER),
GEN_FW_RANGE(0x8d00, 0x93ff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL),
GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER),
GEN_FW_RANGE(0xb480, 0xdfff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0xe000, 0xe8ff, FORCEWAKE_RENDER),
GEN_FW_RANGE(0xe900, 0x243ff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER),
GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0x40000, 0x1bffff, 0),
GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0),
GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1),
GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0),
GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_BLITTER),
GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2),
GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3),
GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1)
};
static void static void
ilk_dummy_write(struct drm_i915_private *dev_priv) ilk_dummy_write(struct drm_i915_private *dev_priv)
{ {
@ -1095,7 +1179,12 @@ func##_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) {
} }
#define __gen6_read(x) __gen_read(gen6, x) #define __gen6_read(x) __gen_read(gen6, x)
#define __fwtable_read(x) __gen_read(fwtable, x) #define __fwtable_read(x) __gen_read(fwtable, x)
#define __gen11_fwtable_read(x) __gen_read(gen11_fwtable, x)
__gen11_fwtable_read(8)
__gen11_fwtable_read(16)
__gen11_fwtable_read(32)
__gen11_fwtable_read(64)
__fwtable_read(8) __fwtable_read(8)
__fwtable_read(16) __fwtable_read(16)
__fwtable_read(32) __fwtable_read(32)
@ -1105,6 +1194,7 @@ __gen6_read(16)
__gen6_read(32) __gen6_read(32)
__gen6_read(64) __gen6_read(64)
#undef __gen11_fwtable_read
#undef __fwtable_read #undef __fwtable_read
#undef __gen6_read #undef __gen6_read
#undef GEN6_READ_FOOTER #undef GEN6_READ_FOOTER
@ -1181,7 +1271,11 @@ func##_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, boo
} }
#define __gen8_write(x) __gen_write(gen8, x) #define __gen8_write(x) __gen_write(gen8, x)
#define __fwtable_write(x) __gen_write(fwtable, x) #define __fwtable_write(x) __gen_write(fwtable, x)
#define __gen11_fwtable_write(x) __gen_write(gen11_fwtable, x)
__gen11_fwtable_write(8)
__gen11_fwtable_write(16)
__gen11_fwtable_write(32)
__fwtable_write(8) __fwtable_write(8)
__fwtable_write(16) __fwtable_write(16)
__fwtable_write(32) __fwtable_write(32)
@ -1192,6 +1286,7 @@ __gen6_write(8)
__gen6_write(16) __gen6_write(16)
__gen6_write(32) __gen6_write(32)
#undef __gen11_fwtable_write
#undef __fwtable_write #undef __fwtable_write
#undef __gen8_write #undef __gen8_write
#undef __gen6_write #undef __gen6_write
@ -1240,6 +1335,13 @@ static void fw_domain_init(struct drm_i915_private *dev_priv,
BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER)); BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER));
BUILD_BUG_ON(FORCEWAKE_BLITTER != (1 << FW_DOMAIN_ID_BLITTER)); BUILD_BUG_ON(FORCEWAKE_BLITTER != (1 << FW_DOMAIN_ID_BLITTER));
BUILD_BUG_ON(FORCEWAKE_MEDIA != (1 << FW_DOMAIN_ID_MEDIA)); BUILD_BUG_ON(FORCEWAKE_MEDIA != (1 << FW_DOMAIN_ID_MEDIA));
BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX0));
BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX1));
BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX2));
BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX3));
BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX0));
BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1));
d->mask = BIT(domain_id); d->mask = BIT(domain_id);
@ -1267,7 +1369,34 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
dev_priv->uncore.fw_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); dev_priv->uncore.fw_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL);
} }
if (INTEL_GEN(dev_priv) >= 9) { if (INTEL_GEN(dev_priv) >= 11) {
int i;
dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
FORCEWAKE_RENDER_GEN9,
FORCEWAKE_ACK_RENDER_GEN9);
fw_domain_init(dev_priv, FW_DOMAIN_ID_BLITTER,
FORCEWAKE_BLITTER_GEN9,
FORCEWAKE_ACK_BLITTER_GEN9);
for (i = 0; i < I915_MAX_VCS; i++) {
if (!HAS_ENGINE(dev_priv, _VCS(i)))
continue;
fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA_VDBOX0 + i,
FORCEWAKE_MEDIA_VDBOX_GEN11(i),
FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(i));
}
for (i = 0; i < I915_MAX_VECS; i++) {
if (!HAS_ENGINE(dev_priv, _VECS(i)))
continue;
fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA_VEBOX0 + i,
FORCEWAKE_MEDIA_VEBOX_GEN11(i),
FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
}
} else if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) {
dev_priv->uncore.funcs.force_wake_get = dev_priv->uncore.funcs.force_wake_get =
fw_domains_get_with_fallback; fw_domains_get_with_fallback;
dev_priv->uncore.funcs.force_wake_put = fw_domains_put; dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
@ -1422,10 +1551,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8); ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8);
ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6); ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6);
} }
} else { } else if (IS_GEN(dev_priv, 9, 10)) {
ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable); ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable);
ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
} else {
ASSIGN_FW_DOMAINS_TABLE(__gen11_fw_ranges);
ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen11_fwtable);
ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen11_fwtable);
} }
iosf_mbi_register_pmic_bus_access_notifier( iosf_mbi_register_pmic_bus_access_notifier(
@ -1994,7 +2127,9 @@ intel_uncore_forcewake_for_read(struct drm_i915_private *dev_priv,
u32 offset = i915_mmio_reg_offset(reg); u32 offset = i915_mmio_reg_offset(reg);
enum forcewake_domains fw_domains; enum forcewake_domains fw_domains;
if (HAS_FWTABLE(dev_priv)) { if (INTEL_GEN(dev_priv) >= 11) {
fw_domains = __gen11_fwtable_reg_read_fw_domains(offset);
} else if (HAS_FWTABLE(dev_priv)) {
fw_domains = __fwtable_reg_read_fw_domains(offset); fw_domains = __fwtable_reg_read_fw_domains(offset);
} else if (INTEL_GEN(dev_priv) >= 6) { } else if (INTEL_GEN(dev_priv) >= 6) {
fw_domains = __gen6_reg_read_fw_domains(offset); fw_domains = __gen6_reg_read_fw_domains(offset);
@ -2015,7 +2150,9 @@ intel_uncore_forcewake_for_write(struct drm_i915_private *dev_priv,
u32 offset = i915_mmio_reg_offset(reg); u32 offset = i915_mmio_reg_offset(reg);
enum forcewake_domains fw_domains; enum forcewake_domains fw_domains;
if (HAS_FWTABLE(dev_priv) && !IS_VALLEYVIEW(dev_priv)) { if (INTEL_GEN(dev_priv) >= 11) {
fw_domains = __gen11_fwtable_reg_write_fw_domains(offset);
} else if (HAS_FWTABLE(dev_priv) && !IS_VALLEYVIEW(dev_priv)) {
fw_domains = __fwtable_reg_write_fw_domains(offset); fw_domains = __fwtable_reg_write_fw_domains(offset);
} else if (IS_GEN8(dev_priv)) { } else if (IS_GEN8(dev_priv)) {
fw_domains = __gen8_reg_write_fw_domains(offset); fw_domains = __gen8_reg_write_fw_domains(offset);

View File

@ -37,17 +37,28 @@ enum forcewake_domain_id {
FW_DOMAIN_ID_RENDER = 0, FW_DOMAIN_ID_RENDER = 0,
FW_DOMAIN_ID_BLITTER, FW_DOMAIN_ID_BLITTER,
FW_DOMAIN_ID_MEDIA, FW_DOMAIN_ID_MEDIA,
FW_DOMAIN_ID_MEDIA_VDBOX0,
FW_DOMAIN_ID_MEDIA_VDBOX1,
FW_DOMAIN_ID_MEDIA_VDBOX2,
FW_DOMAIN_ID_MEDIA_VDBOX3,
FW_DOMAIN_ID_MEDIA_VEBOX0,
FW_DOMAIN_ID_MEDIA_VEBOX1,
FW_DOMAIN_ID_COUNT FW_DOMAIN_ID_COUNT
}; };
enum forcewake_domains { enum forcewake_domains {
FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA),
FORCEWAKE_ALL = (FORCEWAKE_RENDER | FORCEWAKE_MEDIA_VDBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX0),
FORCEWAKE_BLITTER | FORCEWAKE_MEDIA_VDBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX1),
FORCEWAKE_MEDIA) FORCEWAKE_MEDIA_VDBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX2),
FORCEWAKE_MEDIA_VDBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX3),
FORCEWAKE_MEDIA_VEBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX0),
FORCEWAKE_MEDIA_VEBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX1),
FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1
}; };
struct intel_uncore_funcs { struct intel_uncore_funcs {

View File

@ -964,7 +964,7 @@ static int gpu_write(struct i915_vma *vma,
u32 dword, u32 dword,
u32 value) u32 value)
{ {
struct drm_i915_gem_request *rq; struct i915_request *rq;
struct i915_vma *batch; struct i915_vma *batch;
int flags = 0; int flags = 0;
int err; int err;
@ -975,7 +975,7 @@ static int gpu_write(struct i915_vma *vma,
if (err) if (err)
return err; return err;
rq = i915_gem_request_alloc(engine, ctx); rq = i915_request_alloc(engine, ctx);
if (IS_ERR(rq)) if (IS_ERR(rq))
return PTR_ERR(rq); return PTR_ERR(rq);
@ -1003,7 +1003,7 @@ static int gpu_write(struct i915_vma *vma,
reservation_object_unlock(vma->resv); reservation_object_unlock(vma->resv);
err_request: err_request:
__i915_add_request(rq, err == 0); __i915_request_add(rq, err == 0);
return err; return err;
} }

View File

@ -178,7 +178,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
u32 v) u32 v)
{ {
struct drm_i915_private *i915 = to_i915(obj->base.dev); struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct drm_i915_gem_request *rq; struct i915_request *rq;
struct i915_vma *vma; struct i915_vma *vma;
u32 *cs; u32 *cs;
int err; int err;
@ -191,7 +191,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
if (IS_ERR(vma)) if (IS_ERR(vma))
return PTR_ERR(vma); return PTR_ERR(vma);
rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
i915_vma_unpin(vma); i915_vma_unpin(vma);
return PTR_ERR(rq); return PTR_ERR(rq);
@ -199,7 +199,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
cs = intel_ring_begin(rq, 4); cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) { if (IS_ERR(cs)) {
__i915_add_request(rq, false); __i915_request_add(rq, false);
i915_vma_unpin(vma); i915_vma_unpin(vma);
return PTR_ERR(cs); return PTR_ERR(cs);
} }
@ -229,7 +229,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
reservation_object_add_excl_fence(obj->resv, &rq->fence); reservation_object_add_excl_fence(obj->resv, &rq->fence);
reservation_object_unlock(obj->resv); reservation_object_unlock(obj->resv);
__i915_add_request(rq, true); __i915_request_add(rq, true);
return 0; return 0;
} }

View File

@ -114,7 +114,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev); struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_address_space *vm = struct i915_address_space *vm =
ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base;
struct drm_i915_gem_request *rq; struct i915_request *rq;
struct i915_vma *vma; struct i915_vma *vma;
struct i915_vma *batch; struct i915_vma *batch;
unsigned int flags; unsigned int flags;
@ -152,7 +152,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
goto err_vma; goto err_vma;
} }
rq = i915_gem_request_alloc(engine, ctx); rq = i915_request_alloc(engine, ctx);
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
err = PTR_ERR(rq); err = PTR_ERR(rq);
goto err_batch; goto err_batch;
@ -180,12 +180,12 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
reservation_object_add_excl_fence(obj->resv, &rq->fence); reservation_object_add_excl_fence(obj->resv, &rq->fence);
reservation_object_unlock(obj->resv); reservation_object_unlock(obj->resv);
__i915_add_request(rq, true); __i915_request_add(rq, true);
return 0; return 0;
err_request: err_request:
__i915_add_request(rq, false); __i915_request_add(rq, false);
err_batch: err_batch:
i915_vma_unpin(batch); i915_vma_unpin(batch);
err_vma: err_vma:

View File

@ -407,7 +407,7 @@ static int igt_evict_contexts(void *arg)
mutex_lock(&i915->drm.struct_mutex); mutex_lock(&i915->drm.struct_mutex);
onstack_fence_init(&fence); onstack_fence_init(&fence);
do { do {
struct drm_i915_gem_request *rq; struct i915_request *rq;
struct i915_gem_context *ctx; struct i915_gem_context *ctx;
ctx = live_context(i915, file); ctx = live_context(i915, file);
@ -416,7 +416,7 @@ static int igt_evict_contexts(void *arg)
/* We will need some GGTT space for the rq's context */ /* We will need some GGTT space for the rq's context */
igt_evict_ctl.fail_if_busy = true; igt_evict_ctl.fail_if_busy = true;
rq = i915_gem_request_alloc(engine, ctx); rq = i915_request_alloc(engine, ctx);
igt_evict_ctl.fail_if_busy = false; igt_evict_ctl.fail_if_busy = false;
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
@ -437,7 +437,7 @@ static int igt_evict_contexts(void *arg)
if (err < 0) if (err < 0)
break; break;
i915_add_request(rq); i915_request_add(rq);
count++; count++;
err = 0; err = 0;
} while(1); } while(1);

View File

@ -436,7 +436,7 @@ out:
static int make_obj_busy(struct drm_i915_gem_object *obj) static int make_obj_busy(struct drm_i915_gem_object *obj)
{ {
struct drm_i915_private *i915 = to_i915(obj->base.dev); struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct drm_i915_gem_request *rq; struct i915_request *rq;
struct i915_vma *vma; struct i915_vma *vma;
int err; int err;
@ -448,14 +448,14 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
if (err) if (err)
return err; return err;
rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
i915_vma_unpin(vma); i915_vma_unpin(vma);
return PTR_ERR(rq); return PTR_ERR(rq);
} }
i915_vma_move_to_active(vma, rq, 0); i915_vma_move_to_active(vma, rq, 0);
i915_add_request(rq); i915_request_add(rq);
i915_gem_object_set_active_reference(obj); i915_gem_object_set_active_reference(obj);
i915_vma_unpin(vma); i915_vma_unpin(vma);

View File

@ -11,7 +11,7 @@
*/ */
selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
selftest(uncore, intel_uncore_live_selftests) selftest(uncore, intel_uncore_live_selftests)
selftest(requests, i915_gem_request_live_selftests) selftest(requests, i915_request_live_selftests)
selftest(objects, i915_gem_object_live_selftests) selftest(objects, i915_gem_object_live_selftests)
selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests)
selftest(coherency, i915_gem_coherency_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests)

Some files were not shown because too many files have changed in this diff Show More