drm fixes for v5.7-rc6

i915 (two weeks):
 - Handle idling during i915_gem_evict_something busy loops (Chris)
 - Mark current submissions with a weak-dependency (Chris)
 - Propagate error from completed fences (Chris)
 - Fixes on execlist to avoid GPU hang situation (Chris)
 - Fixes couple deadlocks (Chris)
 - Timeslice preemption fixes (Chris)
 - Fix Display Port interrupt handling on Tiger Lake (Imre)
 - Reduce debug noise around Frame Buffer Compression (Peter)
 - Fix logic around IPC W/a for Coffee Lake and Kaby Lake (Sultan)
 - Avoid dereferencing a dead context (Chris)
 
 tegra:
 - tegra120/4 smmu fixes
 
  amdgpu:
 - Clockgating fixes
 - Fix fbdev with scatter/gather display
 - S4 fix for navi
 - Soft recovery for gfx10
 - Freesync fixes
 - Atomic check cursor fix
 - Add a gfxoff quirk
 - MST fix
 
 amdkfd:
 - Fix GEM reference counting
 
 meson:
 - error code propogation fix
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJevjILAAoJEAx081l5xIa+eGMQAJ13DoMXrJUdaH8XCsh77kLn
 cQzV/pHFJP8yjHaU1BFebbzcOMTIrNFXn2kncPgCzaPwh8mBoWeYxjMT3eWeJNvy
 CDEbBvPTL/j13Yq0H9jzp3LxAMI/atXyL/ujUI3AAEtDoMaMbJ5ai//4rHxoXeQ5
 UDXNk9VarbTGKXovJK2/Wq0xNMrMspLQi85+uejF6FA6uztgfyCk+Me5WvuRk2vx
 A5OFa3Gl+PhVqjwQ1pVwO8eii33YVosQ4TfnYMrDV1YnjB6Od8oOeUCUIio9lh2x
 3hIlhAR1CELdx66U2BmGPd0ZXiE8CBXZ8Eh8JBoFIms3S88kIKypeZglnJnIV9cA
 ELC4eXiXNViyHwhgq8+h6le3JXdiYJ+PvUVvKXtKw2N9w67pUP8q571jwA4PGnTv
 9iAiXc55K9cBTbIiL4BE9zU/Ap7eMkMMFiQsQeOtXZb8PrhvlfVChDpfcESjh6uR
 9qIg1JyRYiOLZv3UOiR1MJVvjOssX0YzUsO4riw+5W1uaMNCcBFG9d4YphrRJjBK
 ReBu0YbI9ZwhMGldj8iXANdxqV7B2VELBquDg65ev6epFOw40skAqsIxAfAGRXO9
 fI7OWX25TPHqbLWZNBoSyYfMbsbMwfwX+5j7Sg0cF2T/CYXlCd1rJgYye55ifE3b
 wYC4wlYNKj9K0LQs8Rik
 =b/ek
 -----END PGP SIGNATURE-----

Merge tag 'drm-fixes-2020-05-15' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie:
 "As mentioned last week an i915 PR came in late, but I left it, so the
  i915 bits of this cover 2 weeks, which is why it's likely a bit larger
  than usual.

  Otherwise it's mostly amdgpu fixes, one tegra fix, one meson fix.

  i915:
   - Handle idling during i915_gem_evict_something busy loops (Chris)
   - Mark current submissions with a weak-dependency (Chris)
   - Propagate error from completed fences (Chris)
   - Fixes on execlist to avoid GPU hang situation (Chris)
   - Fixes couple deadlocks (Chris)
   - Timeslice preemption fixes (Chris)
   - Fix Display Port interrupt handling on Tiger Lake (Imre)
   - Reduce debug noise around Frame Buffer Compression (Peter)
   - Fix logic around IPC W/a for Coffee Lake and Kaby Lake (Sultan)
   - Avoid dereferencing a dead context (Chris)

  tegra:
   - tegra120/4 smmu fixes

  amdgpu:
   - Clockgating fixes
   - Fix fbdev with scatter/gather display
   - S4 fix for navi
   - Soft recovery for gfx10
   - Freesync fixes
   - Atomic check cursor fix
   - Add a gfxoff quirk
   - MST fix

  amdkfd:
   - Fix GEM reference counting

  meson:
   - error code propogation fix"

* tag 'drm-fixes-2020-05-15' of git://anongit.freedesktop.org/drm/drm: (29 commits)
  drm/i915: Handle idling during i915_gem_evict_something busy loops
  drm/meson: pm resume add return errno branch
  drm/amd/amdgpu: Update update_config() logic
  drm/amd/amdgpu: add raven1 part to the gfxoff quirk list
  drm/i915: Mark concurrent submissions with a weak-dependency
  drm/i915: Propagate error from completed fences
  drm/i915/gvt: Fix kernel oops for 3-level ppgtt guest
  drm/i915/gvt: Init DPLL/DDI vreg for virtual display instead of inheritance.
  drm/amd/display: add basic atomic check for cursor plane
  drm/amd/display: Fix vblank and pageflip event handling for FreeSync
  drm/amdgpu: implement soft_recovery for gfx10
  drm/amdgpu: enable hibernate support on Navi1X
  drm/amdgpu: Use GEM obj reference for KFD BOs
  drm/amdgpu: force fbdev into vram
  drm/amd/powerplay: perform PG ungate prior to CG ungate
  drm/amdgpu: drop unnecessary cancel_delayed_work_sync on PG ungate
  drm/amdgpu: disable MGCG/MGLS also on gfx CG ungate
  drm/i915/execlists: Track inflight CCID
  drm/i915/execlists: Avoid reusing the same logical CCID
  drm/i915/gem: Remove object_is_locked assertion from unpin_from_display_plane
  ...
This commit is contained in:
Linus Torvalds 2020-05-15 09:59:49 -07:00
commit e7cea79058
38 changed files with 439 additions and 274 deletions

View File

@ -945,6 +945,7 @@ struct amdgpu_device {
/* s3/s4 mask */
bool in_suspend;
bool in_hibernate;
/* record last mm index being written through WREG32*/
unsigned long last_mm_index;

View File

@ -1343,7 +1343,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
/* Free the BO*/
amdgpu_bo_unref(&mem->bo);
drm_gem_object_put_unlocked(&mem->bo->tbo.base);
mutex_destroy(&mem->lock);
kfree(mem);
@ -1688,7 +1688,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
(*mem)->bo = amdgpu_bo_ref(bo);
drm_gem_object_get(&bo->tbo.base);
(*mem)->bo = bo;
(*mem)->va = va;
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;

View File

@ -1181,7 +1181,9 @@ static int amdgpu_pmops_freeze(struct device *dev)
struct amdgpu_device *adev = drm_dev->dev_private;
int r;
adev->in_hibernate = true;
r = amdgpu_device_suspend(drm_dev, true);
adev->in_hibernate = false;
if (r)
return r;
return amdgpu_asic_reset(adev);

View File

@ -133,8 +133,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
u32 cpp;
u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_VRAM_CLEARED |
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
AMDGPU_GEM_CREATE_VRAM_CLEARED;
info = drm_get_format_info(adev->ddev, mode_cmd);
cpp = info->cpp[0];

View File

@ -4273,7 +4273,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === CGCG /CGLS for GFX 3D Only === */
gfx_v10_0_update_3d_clock_gating(adev, enable);
/* === MGCG + MGLS === */
/* gfx_v10_0_update_medium_grain_clock_gating(adev, enable); */
gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
}
if (adev->cg_flags &
@ -4353,11 +4353,7 @@ static int gfx_v10_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
if (!enable) {
amdgpu_gfx_off_ctrl(adev, false);
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
} else
amdgpu_gfx_off_ctrl(adev, true);
amdgpu_gfx_off_ctrl(adev, enable);
break;
default:
break;
@ -4918,6 +4914,19 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
unsigned vmid)
{
struct amdgpu_device *adev = ring->adev;
uint32_t value = 0;
value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
WREG32_SOC15(GC, 0, mmSQ_CMD, value);
}
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@ -5309,6 +5318,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
.soft_recovery = gfx_v10_0_ring_soft_recovery,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {

View File

@ -1236,6 +1236,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
{ 0, 0, 0, 0, 0 },
};
@ -5025,10 +5027,9 @@ static int gfx_v9_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_RAVEN:
case CHIP_RENOIR:
if (!enable) {
if (!enable)
amdgpu_gfx_off_ctrl(adev, false);
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
}
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
@ -5052,12 +5053,7 @@ static int gfx_v9_0_set_powergating_state(void *handle,
amdgpu_gfx_off_ctrl(adev, true);
break;
case CHIP_VEGA12:
if (!enable) {
amdgpu_gfx_off_ctrl(adev, false);
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
} else {
amdgpu_gfx_off_ctrl(adev, true);
}
amdgpu_gfx_off_ctrl(adev, enable);
break;
default:
break;

View File

@ -441,7 +441,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)
/**
* dm_crtc_high_irq() - Handles CRTC interrupt
* @interrupt_params: ignored
* @interrupt_params: used for determining the CRTC instance
*
* Handles the CRTC/VSYNC interrupt by notfying DRM's VBLANK
* event handler.
@ -455,70 +455,6 @@ static void dm_crtc_high_irq(void *interrupt_params)
unsigned long flags;
acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
if (acrtc) {
acrtc_state = to_dm_crtc_state(acrtc->base.state);
DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n",
acrtc->crtc_id,
amdgpu_dm_vrr_active(acrtc_state));
/* Core vblank handling at start of front-porch is only possible
* in non-vrr mode, as only there vblank timestamping will give
* valid results while done in front-porch. Otherwise defer it
* to dm_vupdate_high_irq after end of front-porch.
*/
if (!amdgpu_dm_vrr_active(acrtc_state))
drm_crtc_handle_vblank(&acrtc->base);
/* Following stuff must happen at start of vblank, for crc
* computation and below-the-range btr support in vrr mode.
*/
amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
if (acrtc_state->stream && adev->family >= AMDGPU_FAMILY_AI &&
acrtc_state->vrr_params.supported &&
acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
spin_lock_irqsave(&adev->ddev->event_lock, flags);
mod_freesync_handle_v_update(
adev->dm.freesync_module,
acrtc_state->stream,
&acrtc_state->vrr_params);
dc_stream_adjust_vmin_vmax(
adev->dm.dc,
acrtc_state->stream,
&acrtc_state->vrr_params.adjust);
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
}
}
}
#if defined(CONFIG_DRM_AMD_DC_DCN)
/**
* dm_dcn_crtc_high_irq() - Handles VStartup interrupt for DCN generation ASICs
* @interrupt params - interrupt parameters
*
* Notify DRM's vblank event handler at VSTARTUP
*
* Unlike DCE hardware, we trigger the handler at VSTARTUP. at which:
* * We are close enough to VUPDATE - the point of no return for hw
* * We are in the fixed portion of variable front porch when vrr is enabled
* * We are before VUPDATE, where double-buffered vrr registers are swapped
*
* It is therefore the correct place to signal vblank, send user flip events,
* and update VRR.
*/
static void dm_dcn_crtc_high_irq(void *interrupt_params)
{
struct common_irq_params *irq_params = interrupt_params;
struct amdgpu_device *adev = irq_params->adev;
struct amdgpu_crtc *acrtc;
struct dm_crtc_state *acrtc_state;
unsigned long flags;
acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
if (!acrtc)
return;
@ -528,22 +464,35 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
amdgpu_dm_vrr_active(acrtc_state),
acrtc_state->active_planes);
/**
* Core vblank handling at start of front-porch is only possible
* in non-vrr mode, as only there vblank timestamping will give
* valid results while done in front-porch. Otherwise defer it
* to dm_vupdate_high_irq after end of front-porch.
*/
if (!amdgpu_dm_vrr_active(acrtc_state))
drm_crtc_handle_vblank(&acrtc->base);
/**
* Following stuff must happen at start of vblank, for crc
* computation and below-the-range btr support in vrr mode.
*/
amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
drm_crtc_handle_vblank(&acrtc->base);
/* BTR updates need to happen before VUPDATE on Vega and above. */
if (adev->family < AMDGPU_FAMILY_AI)
return;
spin_lock_irqsave(&adev->ddev->event_lock, flags);
if (acrtc_state->vrr_params.supported &&
if (acrtc_state->stream && acrtc_state->vrr_params.supported &&
acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
mod_freesync_handle_v_update(
adev->dm.freesync_module,
acrtc_state->stream,
&acrtc_state->vrr_params);
mod_freesync_handle_v_update(adev->dm.freesync_module,
acrtc_state->stream,
&acrtc_state->vrr_params);
dc_stream_adjust_vmin_vmax(
adev->dm.dc,
acrtc_state->stream,
&acrtc_state->vrr_params.adjust);
dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc_state->stream,
&acrtc_state->vrr_params.adjust);
}
/*
@ -556,7 +505,8 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
* avoid race conditions between flip programming and completion,
* which could cause too early flip completion events.
*/
if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&
if (adev->family >= AMDGPU_FAMILY_RV &&
acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&
acrtc_state->active_planes == 0) {
if (acrtc->event) {
drm_crtc_send_vblank_event(&acrtc->base, acrtc->event);
@ -568,7 +518,6 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
}
#endif
static int dm_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
@ -2445,8 +2394,36 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
amdgpu_dm_irq_register_interrupt(
adev, &int_params, dm_crtc_high_irq, c_irq_params);
}
/* Use VUPDATE_NO_LOCK interrupt on DCN, which seems to correspond to
* the regular VUPDATE interrupt on DCE. We want DC_IRQ_SOURCE_VUPDATEx
* to trigger at end of each vblank, regardless of state of the lock,
* matching DCE behaviour.
*/
for (i = DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT;
i <= DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT + adev->mode_info.num_crtc - 1;
i++) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq);
if (r) {
DRM_ERROR("Failed to add vupdate irq id!\n");
return r;
}
int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
amdgpu_dm_irq_register_interrupt(adev, &int_params,
dm_dcn_crtc_high_irq, c_irq_params);
dm_vupdate_high_irq, c_irq_params);
}
/* Use GRPH_PFLIP interrupt */
@ -4453,10 +4430,6 @@ static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
struct amdgpu_device *adev = crtc->dev->dev_private;
int rc;
/* Do not set vupdate for DCN hardware */
if (adev->family > AMDGPU_FAMILY_AI)
return 0;
irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst;
rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
@ -7882,6 +7855,7 @@ static int dm_update_plane_state(struct dc *dc,
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state;
struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state;
struct amdgpu_crtc *new_acrtc;
bool needs_reset;
int ret = 0;
@ -7891,9 +7865,30 @@ static int dm_update_plane_state(struct dc *dc,
dm_new_plane_state = to_dm_plane_state(new_plane_state);
dm_old_plane_state = to_dm_plane_state(old_plane_state);
/*TODO Implement atomic check for cursor plane */
if (plane->type == DRM_PLANE_TYPE_CURSOR)
/*TODO Implement better atomic check for cursor plane */
if (plane->type == DRM_PLANE_TYPE_CURSOR) {
if (!enable || !new_plane_crtc ||
drm_atomic_plane_disabling(plane->state, new_plane_state))
return 0;
new_acrtc = to_amdgpu_crtc(new_plane_crtc);
if ((new_plane_state->crtc_w > new_acrtc->max_cursor_width) ||
(new_plane_state->crtc_h > new_acrtc->max_cursor_height)) {
DRM_DEBUG_ATOMIC("Bad cursor size %d x %d\n",
new_plane_state->crtc_w, new_plane_state->crtc_h);
return -EINVAL;
}
if (new_plane_state->crtc_x <= -new_acrtc->max_cursor_width ||
new_plane_state->crtc_y <= -new_acrtc->max_cursor_height) {
DRM_DEBUG_ATOMIC("Bad cursor position %d, %d\n",
new_plane_state->crtc_x, new_plane_state->crtc_y);
return -EINVAL;
}
return 0;
}
needs_reset = should_reset_plane(state, plane, old_plane_state,
new_plane_state);

View File

@ -398,15 +398,15 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
struct mod_hdcp_display *display = &hdcp_work[link_index].display;
struct mod_hdcp_link *link = &hdcp_work[link_index].link;
memset(display, 0, sizeof(*display));
memset(link, 0, sizeof(*link));
display->index = aconnector->base.index;
if (config->dpms_off) {
hdcp_remove_display(hdcp_work, link_index, aconnector);
return;
}
memset(display, 0, sizeof(*display));
memset(link, 0, sizeof(*link));
display->index = aconnector->base.index;
display->state = MOD_HDCP_DISPLAY_ACTIVE;
if (aconnector->dc_sink != NULL)

View File

@ -319,12 +319,12 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr,
if (*level & profile_mode_mask) {
hwmgr->saved_dpm_level = hwmgr->dpm_level;
hwmgr->en_umd_pstate = true;
amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_CG_STATE_UNGATE);
amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_UNGATE);
amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_CG_STATE_UNGATE);
}
} else {
/* exit umd pstate, restore level, enable gfx cg*/

View File

@ -1476,7 +1476,7 @@ static int smu_disable_dpm(struct smu_context *smu)
bool use_baco = !smu->is_apu &&
((adev->in_gpu_reset &&
(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
(adev->in_runpm && amdgpu_asic_supports_baco(adev)));
((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev)));
ret = smu_get_smc_version(smu, NULL, &smu_version);
if (ret) {
@ -1744,12 +1744,12 @@ static int smu_enable_umd_pstate(void *handle,
if (*level & profile_mode_mask) {
smu_dpm_ctx->saved_dpm_level = smu_dpm_ctx->dpm_level;
smu_dpm_ctx->enable_umd_pstate = true;
amdgpu_device_ip_set_clockgating_state(smu->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_CG_STATE_UNGATE);
amdgpu_device_ip_set_powergating_state(smu->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_UNGATE);
amdgpu_device_ip_set_clockgating_state(smu->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_CG_STATE_UNGATE);
}
} else {
/* exit umd pstate, restore level, enable gfx cg*/

View File

@ -485,8 +485,7 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv,
if (!ret)
goto err_llb;
else if (ret > 1) {
DRM_INFO("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
DRM_INFO_ONCE("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
}
fbc->threshold = ret;

View File

@ -368,7 +368,6 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_vma *vma;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
if (!atomic_read(&obj->bind_count))
return;
@ -400,12 +399,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
void
i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
assert_object_held(obj);
/* Bump the LRU to try and avoid premature eviction whilst flipping */
i915_gem_object_bump_inactive_ggtt(obj);
i915_gem_object_bump_inactive_ggtt(vma->obj);
i915_vma_unpin(vma);
}

View File

@ -69,7 +69,13 @@ struct intel_context {
#define CONTEXT_NOPREEMPT 7
u32 *lrc_reg_state;
u64 lrc_desc;
union {
struct {
u32 lrca;
u32 ccid;
};
u64 desc;
} lrc;
u32 tag; /* cookie passed to HW to track this context on submission */
/* Time on GPU as tracked by the hw. */

View File

@ -333,13 +333,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
return intel_engine_has_preemption(engine);
}
static inline bool
intel_engine_has_timeslices(const struct intel_engine_cs *engine)
{
if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
return false;
return intel_engine_has_semaphores(engine);
}
#endif /* _INTEL_RINGBUFFER_H_ */

View File

@ -1295,6 +1295,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
if (HAS_EXECLISTS(dev_priv)) {
drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
}
drm_printf(m, "\tRING_START: 0x%08x\n",
ENGINE_READ(engine, RING_START));
drm_printf(m, "\tRING_HEAD: 0x%08x\n",

View File

@ -156,6 +156,20 @@ struct intel_engine_execlists {
*/
struct i915_priolist default_priolist;
/**
* @ccid: identifier for contexts submitted to this engine
*/
u32 ccid;
/**
* @yield: CCID at the time of the last semaphore-wait interrupt.
*
* Instead of leaving a semaphore busy-spinning on an engine, we would
* like to switch to another ready context, i.e. yielding the semaphore
* timeslice.
*/
u32 yield;
/**
* @error_interrupt: CS Master EIR
*
@ -295,8 +309,7 @@ struct intel_engine_cs {
u32 context_size;
u32 mmio_base;
unsigned int context_tag;
#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
unsigned long context_tag;
struct rb_node uabi_node;
@ -483,10 +496,11 @@ struct intel_engine_cs {
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
#define I915_ENGINE_HAS_TIMESLICES BIT(4)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
#define I915_ENGINE_IS_VIRTUAL BIT(6)
#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
unsigned int flags;
/*
@ -584,6 +598,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
}
static inline bool
intel_engine_has_timeslices(const struct intel_engine_cs *engine)
{
if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
return false;
return engine->flags & I915_ENGINE_HAS_TIMESLICES;
}
static inline bool
intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
{

View File

@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
}
}
if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
WRITE_ONCE(engine->execlists.yield,
ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
ENGINE_TRACE(engine, "semaphore yield: %08x\n",
engine->execlists.yield);
if (del_timer(&engine->execlists.timer))
tasklet = true;
}
if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
tasklet = true;
@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
GT_CONTEXT_SWITCH_INTERRUPT;
GT_CONTEXT_SWITCH_INTERRUPT |
GT_WAIT_SEMAPHORE_INTERRUPT;
struct intel_uncore *uncore = gt->uncore;
const u32 dmask = irqs << 16 | irqs;
const u32 smask = irqs << 16;
@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
GT_CONTEXT_SWITCH_INTERRUPT;
GT_CONTEXT_SWITCH_INTERRUPT |
GT_WAIT_SEMAPHORE_INTERRUPT;
const u32 gt_interrupts[] = {
irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,

View File

@ -456,10 +456,10 @@ assert_priority_queue(const struct i915_request *prev,
* engine info, SW context ID and SW counter need to form a unique number
* (Context ID) per lrc.
*/
static u64
static u32
lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
{
u64 desc;
u32 desc;
desc = INTEL_LEGACY_32B_CONTEXT;
if (i915_vm_is_4lvl(ce->vm))
@ -470,21 +470,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
if (IS_GEN(engine->i915, 8))
desc |= GEN8_CTX_L3LLC_COHERENT;
desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
/*
* The following 32bits are copied into the OA reports (dword 2).
* Consider updating oa_get_render_ctx_id in i915_perf.c when changing
* anything below.
*/
if (INTEL_GEN(engine->i915) >= 11) {
desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
/* bits 48-53 */
desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
/* bits 61-63 */
}
return desc;
return i915_ggtt_offset(ce->state) | desc;
}
static inline unsigned int dword_in_page(void *addr)
@ -1192,7 +1178,7 @@ static void reset_active(struct i915_request *rq,
__execlists_update_reg_state(ce, engine, head);
/* We've switched away, so this should be a no-op, but intent matters */
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
static u32 intel_context_get_runtime(const struct intel_context *ce)
@ -1251,18 +1237,23 @@ __execlists_schedule_in(struct i915_request *rq)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
execlists_check_context(ce, engine);
ce->lrc_desc &= ~GENMASK_ULL(47, 37);
if (ce->tag) {
/* Use a fixed tag for OA and friends */
ce->lrc_desc |= (u64)ce->tag << 32;
GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
ce->lrc.ccid = ce->tag;
} else {
/* We don't need a strict matching tag, just different values */
ce->lrc_desc |=
(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
GEN11_SW_CTX_ID_SHIFT;
BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
unsigned int tag = ffs(engine->context_tag);
GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
clear_bit(tag - 1, &engine->context_tag);
ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
}
ce->lrc.ccid |= engine->execlists.ccid;
__intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@ -1302,7 +1293,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
static inline void
__execlists_schedule_out(struct i915_request *rq,
struct intel_engine_cs * const engine)
struct intel_engine_cs * const engine,
unsigned int ccid)
{
struct intel_context * const ce = rq->context;
@ -1320,6 +1312,14 @@ __execlists_schedule_out(struct i915_request *rq,
i915_request_completed(rq))
intel_engine_add_retire(engine, ce->timeline);
ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
ccid &= GEN12_MAX_CONTEXT_HW_ID;
if (ccid < BITS_PER_LONG) {
GEM_BUG_ON(ccid == 0);
GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
set_bit(ccid - 1, &engine->context_tag);
}
intel_context_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@ -1345,15 +1345,17 @@ execlists_schedule_out(struct i915_request *rq)
{
struct intel_context * const ce = rq->context;
struct intel_engine_cs *cur, *old;
u32 ccid;
trace_i915_request_out(rq);
ccid = rq->context->lrc.ccid;
old = READ_ONCE(ce->inflight);
do
cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
while (!try_cmpxchg(&ce->inflight, &old, cur));
if (!cur)
__execlists_schedule_out(rq, old);
__execlists_schedule_out(rq, old, ccid);
i915_request_put(rq);
}
@ -1361,7 +1363,7 @@ execlists_schedule_out(struct i915_request *rq)
static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
u64 desc = ce->lrc_desc;
u64 desc = ce->lrc.desc;
u32 tail, prev;
/*
@ -1400,7 +1402,7 @@ static u64 execlists_update_context(struct i915_request *rq)
*/
wmb();
ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
return desc;
}
@ -1719,6 +1721,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
if (p->flags & I915_DEPENDENCY_WEAK)
continue;
/* Leave semaphores spinning on the other engines */
if (w->engine != rq->engine)
continue;
@ -1754,7 +1759,8 @@ static void defer_active(struct intel_engine_cs *engine)
}
static bool
need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
need_timeslice(const struct intel_engine_cs *engine,
const struct i915_request *rq)
{
int hint;
@ -1768,6 +1774,32 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
return hint >= effective_prio(rq);
}
static bool
timeslice_yield(const struct intel_engine_execlists *el,
const struct i915_request *rq)
{
/*
* Once bitten, forever smitten!
*
* If the active context ever busy-waited on a semaphore,
* it will be treated as a hog until the end of its timeslice (i.e.
* until it is scheduled out and replaced by a new submission,
* possibly even its own lite-restore). The HW only sends an interrupt
* on the first miss, and we do know if that semaphore has been
* signaled, or even if it is now stuck on another semaphore. Play
* safe, yield if it might be stuck -- it will be given a fresh
* timeslice in the near future.
*/
return rq->context->lrc.ccid == READ_ONCE(el->yield);
}
static bool
timeslice_expired(const struct intel_engine_execlists *el,
const struct i915_request *rq)
{
return timer_expired(&el->timer) || timeslice_yield(el, rq);
}
static int
switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
{
@ -1783,8 +1815,7 @@ timeslice(const struct intel_engine_cs *engine)
return READ_ONCE(engine->props.timeslice_duration_ms);
}
static unsigned long
active_timeslice(const struct intel_engine_cs *engine)
static unsigned long active_timeslice(const struct intel_engine_cs *engine)
{
const struct intel_engine_execlists *execlists = &engine->execlists;
const struct i915_request *rq = *execlists->active;
@ -1946,13 +1977,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last = NULL;
} else if (need_timeslice(engine, last) &&
timer_expired(&engine->execlists.timer)) {
timeslice_expired(execlists, last)) {
ENGINE_TRACE(engine,
"expired last=%llx:%lld, prio=%d, hint=%d\n",
"expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
last->fence.context,
last->fence.seqno,
last->sched.attr.priority,
execlists->queue_priority_hint);
execlists->queue_priority_hint,
yesno(timeslice_yield(execlists, last)));
ring_set_paused(engine, 1);
defer_active(engine);
@ -2213,6 +2245,7 @@ done:
}
clear_ports(port + 1, last_port - port);
WRITE_ONCE(execlists->yield, -1);
execlists_submit_ports(engine);
set_preempt_timeout(engine, *active);
} else {
@ -3043,7 +3076,7 @@ __execlists_context_pin(struct intel_context *ce,
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
__execlists_update_reg_state(ce, engine, ce->ring->tail);
@ -3072,7 +3105,7 @@ static void execlists_context_reset(struct intel_context *ce)
ce, ce->engine, ce->ring, true);
__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
static const struct intel_context_ops execlists_context_ops = {
@ -3541,7 +3574,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
enable_error_interrupt(engine);
engine->context_tag = 0;
engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
}
static bool unexpected_starting_state(struct intel_engine_cs *engine)
@ -3753,7 +3786,7 @@ out_replay:
head, ce->ring->tail);
__execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine, head);
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
unwind:
/* Push back any incomplete requests for replay after the reset. */
@ -4369,8 +4402,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
if (!intel_vgpu_active(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
engine->flags |= I915_ENGINE_HAS_TIMESLICES;
}
}
if (INTEL_GEN(engine->i915) >= 12)
@ -4449,6 +4485,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
@ -4516,6 +4553,11 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
else
execlists->csb_size = GEN11_CSB_ENTRIES;
if (INTEL_GEN(engine->i915) >= 11) {
execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
}
reset_csb_pointers(engine);
/* Finally, take ownership and responsibility for cleanup! */

View File

@ -929,7 +929,7 @@ create_rewinder(struct intel_context *ce,
goto err;
}
cs = intel_ring_begin(rq, 10);
cs = intel_ring_begin(rq, 14);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
goto err;
@ -941,8 +941,8 @@ create_rewinder(struct intel_context *ce,
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_NEQ_SDD;
*cs++ = 0;
MI_SEMAPHORE_SAD_GTE_SDD;
*cs++ = idx;
*cs++ = offset;
*cs++ = 0;
@ -951,6 +951,11 @@ create_rewinder(struct intel_context *ce,
*cs++ = offset + idx * sizeof(u32);
*cs++ = 0;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = offset;
*cs++ = 0;
*cs++ = idx + 1;
intel_ring_advance(rq, cs);
rq->sched.attr.priority = I915_PRIORITY_MASK;
@ -984,7 +989,7 @@ static int live_timeslice_rewind(void *arg)
for_each_engine(engine, gt, id) {
enum { A1, A2, B1 };
enum { X = 1, Y, Z };
enum { X = 1, Z, Y };
struct i915_request *rq[3] = {};
struct intel_context *ce;
unsigned long heartbeat;
@ -1017,13 +1022,13 @@ static int live_timeslice_rewind(void *arg)
goto err;
}
rq[0] = create_rewinder(ce, NULL, slot, 1);
rq[0] = create_rewinder(ce, NULL, slot, X);
if (IS_ERR(rq[0])) {
intel_context_put(ce);
goto err;
}
rq[1] = create_rewinder(ce, NULL, slot, 2);
rq[1] = create_rewinder(ce, NULL, slot, Y);
intel_context_put(ce);
if (IS_ERR(rq[1]))
goto err;
@ -1041,7 +1046,7 @@ static int live_timeslice_rewind(void *arg)
goto err;
}
rq[2] = create_rewinder(ce, rq[0], slot, 3);
rq[2] = create_rewinder(ce, rq[0], slot, Z);
intel_context_put(ce);
if (IS_ERR(rq[2]))
goto err;
@ -1055,15 +1060,12 @@ static int live_timeslice_rewind(void *arg)
GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
GEM_BUG_ON(!i915_request_is_active(rq[A1]));
GEM_BUG_ON(!i915_request_is_active(rq[A2]));
GEM_BUG_ON(!i915_request_is_active(rq[B1]));
/* Wait for the timeslice to kick in */
del_timer(&engine->execlists.timer);
tasklet_hi_schedule(&engine->execlists.tasklet);
intel_engine_flush_submission(engine);
if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
/* Wait for the timeslice to kick in */
del_timer(&engine->execlists.timer);
tasklet_hi_schedule(&engine->execlists.tasklet);
intel_engine_flush_submission(engine);
}
/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
GEM_BUG_ON(!i915_request_is_active(rq[A1]));
GEM_BUG_ON(!i915_request_is_active(rq[B1]));

View File

@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc,
static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
u32 ctx_desc = rq->context->lrc.ccid;
u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
guc_wq_item_append(guc, engine->guc_id, ctx_desc,

View File

@ -208,14 +208,41 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |
SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |
SKL_FUSE_PG_DIST_STATUS(SKL_PG2);
vgpu_vreg_t(vgpu, LCPLL1_CTL) |=
LCPLL_PLL_ENABLE |
LCPLL_PLL_LOCK;
vgpu_vreg_t(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
/*
* Only 1 PIPE enabled in current vGPU display and PIPE_A is
* tied to TRANSCODER_A in HW, so it's safe to assume PIPE_A,
* TRANSCODER_A can be enabled. PORT_x depends on the input of
* setup_virtual_dp_monitor, we can bind DPLL0 to any PORT_x
* so we fixed to DPLL0 here.
* Setup DPLL0: DP link clk 1620 MHz, non SSC, DP Mode
*/
vgpu_vreg_t(vgpu, DPLL_CTRL1) =
DPLL_CTRL1_OVERRIDE(DPLL_ID_SKL_DPLL0);
vgpu_vreg_t(vgpu, DPLL_CTRL1) |=
DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, DPLL_ID_SKL_DPLL0);
vgpu_vreg_t(vgpu, LCPLL1_CTL) =
LCPLL_PLL_ENABLE | LCPLL_PLL_LOCK;
vgpu_vreg_t(vgpu, DPLL_STATUS) = DPLL_LOCK(DPLL_ID_SKL_DPLL0);
/*
* Golden M/N are calculated based on:
* 24 bpp, 4 lanes, 154000 pixel clk (from virtual EDID),
* DP link clk 1620 MHz and non-constant_n.
* TODO: calculate DP link symbol clk and stream clk m/n.
*/
vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) = 63 << TU_SIZE_SHIFT;
vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) |= 0x5b425e;
vgpu_vreg_t(vgpu, PIPE_DATA_N1(TRANSCODER_A)) = 0x800000;
vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A)) = 0x3cd6e;
vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A)) = 0x80000;
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
~DPLL_CTRL2_DDI_CLK_OFF(PORT_B);
vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_B);
vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_B);
vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
@ -236,6 +263,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
~DPLL_CTRL2_DDI_CLK_OFF(PORT_C);
vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_C);
vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_C);
vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
@ -256,6 +289,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
~DPLL_CTRL2_DDI_CLK_OFF(PORT_D);
vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_D);
vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_D);
vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |

View File

@ -290,7 +290,7 @@ static void
shadow_context_descriptor_update(struct intel_context *ce,
struct intel_vgpu_workload *workload)
{
u64 desc = ce->lrc_desc;
u64 desc = ce->lrc.desc;
/*
* Update bits 0-11 of the context descriptor which includes flags
@ -300,7 +300,7 @@ shadow_context_descriptor_update(struct intel_context *ce,
desc |= (u64)workload->ctx_desc.addressing_mode <<
GEN8_CTX_ADDRESSING_MODE_SHIFT;
ce->lrc_desc = desc;
ce->lrc.desc = desc;
}
static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
@ -379,7 +379,11 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
struct i915_page_directory * const pd =
i915_pd_entry(ppgtt->pd, i);
/* skip now as current i915 ppgtt alloc won't allocate
top level pdp for non 4-level table, won't impact
shadow ppgtt. */
if (!pd)
break;
px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
}
}

View File

@ -128,6 +128,13 @@ search_again:
active = NULL;
INIT_LIST_HEAD(&eviction_list);
list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
if (vma == active) { /* now seen this vma twice */
if (flags & PIN_NONBLOCK)
break;
active = ERR_PTR(-EAGAIN);
}
/*
* We keep this list in a rough least-recently scanned order
* of active elements (inactive elements are cheap to reap).
@ -143,21 +150,12 @@ search_again:
* To notice when we complete one full cycle, we record the
* first active element seen, before moving it to the tail.
*/
if (i915_vma_is_active(vma)) {
if (vma == active) {
if (flags & PIN_NONBLOCK)
break;
if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) {
if (!active)
active = vma;
active = ERR_PTR(-EAGAIN);
}
if (active != ERR_PTR(-EAGAIN)) {
if (!active)
active = vma;
list_move_tail(&vma->vm_link, &vm->bound_list);
continue;
}
list_move_tail(&vma->vm_link, &vm->bound_list);
continue;
}
if (mark_free(&scan, vma, flags, &eviction_list))

View File

@ -1207,8 +1207,6 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
static void record_request(const struct i915_request *request,
struct i915_request_coredump *erq)
{
const struct i915_gem_context *ctx;
erq->flags = request->fence.flags;
erq->context = request->fence.context;
erq->seqno = request->fence.seqno;
@ -1219,9 +1217,13 @@ static void record_request(const struct i915_request *request,
erq->pid = 0;
rcu_read_lock();
ctx = rcu_dereference(request->context->gem_context);
if (ctx)
erq->pid = pid_nr(ctx->pid);
if (!intel_context_is_closed(request->context)) {
const struct i915_gem_context *ctx;
ctx = rcu_dereference(request->context->gem_context);
if (ctx)
erq->pid = pid_nr(ctx->pid);
}
rcu_read_unlock();
}

View File

@ -3361,7 +3361,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) |
GEN8_PIPE_CDCLK_CRC_DONE;
u32 de_pipe_enables;
u32 de_port_masked = GEN8_AUX_CHANNEL_A;
u32 de_port_masked = gen8_de_port_aux_mask(dev_priv);
u32 de_port_enables;
u32 de_misc_masked = GEN8_DE_EDP_PSR;
enum pipe pipe;
@ -3369,18 +3369,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
if (INTEL_GEN(dev_priv) <= 10)
de_misc_masked |= GEN8_DE_MISC_GSE;
if (INTEL_GEN(dev_priv) >= 9) {
de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C |
GEN9_AUX_CHANNEL_D;
if (IS_GEN9_LP(dev_priv))
de_port_masked |= BXT_DE_PORT_GMBUS;
}
if (INTEL_GEN(dev_priv) >= 11)
de_port_masked |= ICL_AUX_CHANNEL_E;
if (IS_CNL_WITH_PORT_F(dev_priv) || INTEL_GEN(dev_priv) >= 11)
de_port_masked |= CNL_AUX_CHANNEL_F;
if (IS_GEN9_LP(dev_priv))
de_port_masked |= BXT_DE_PORT_GMBUS;
de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK |
GEN8_PIPE_FIFO_UNDERRUN;

View File

@ -1310,8 +1310,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
* dropped by GuC. They won't be part of the context
* ID in the OA reports, so squash those lower bits.
*/
stream->specific_ctx_id =
lower_32_bits(ce->lrc_desc) >> 12;
stream->specific_ctx_id = ce->lrc.lrca >> 12;
/*
* GuC uses the top bit to signal proxy submission, so
@ -1328,11 +1327,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
/*
* Pick an unused context id
* 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts
* 0 - BITS_PER_LONG are used by other contexts
* GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
*/
stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG);
break;
}

View File

@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GT_BSD_CS_ERROR_INTERRUPT (1 << 15)
#define GT_BSD_USER_INTERRUPT (1 << 12)
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) /* bdw+ */
#define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8)
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */
#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4)

View File

@ -1017,11 +1017,15 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
GEM_BUG_ON(to == from);
GEM_BUG_ON(to->timeline == from->timeline);
if (i915_request_completed(from))
if (i915_request_completed(from)) {
i915_sw_fence_set_error_once(&to->submit, from->fence.error);
return 0;
}
if (to->engine->schedule) {
ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
ret = i915_sched_node_add_dependency(&to->sched,
&from->sched,
I915_DEPENDENCY_EXTERNAL);
if (ret < 0)
return ret;
}
@ -1183,7 +1187,9 @@ __i915_request_await_execution(struct i915_request *to,
/* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) {
err = i915_sched_node_add_dependency(&to->sched, &from->sched);
err = i915_sched_node_add_dependency(&to->sched,
&from->sched,
I915_DEPENDENCY_WEAK);
if (err < 0)
return err;
}

View File

@ -456,7 +456,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
}
int i915_sched_node_add_dependency(struct i915_sched_node *node,
struct i915_sched_node *signal)
struct i915_sched_node *signal,
unsigned long flags)
{
struct i915_dependency *dep;
@ -465,8 +466,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
return -ENOMEM;
if (!__i915_sched_node_add_dependency(node, signal, dep,
I915_DEPENDENCY_EXTERNAL |
I915_DEPENDENCY_ALLOC))
flags | I915_DEPENDENCY_ALLOC))
i915_dependency_free(dep);
return 0;

View File

@ -34,7 +34,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
unsigned long flags);
int i915_sched_node_add_dependency(struct i915_sched_node *node,
struct i915_sched_node *signal);
struct i915_sched_node *signal,
unsigned long flags);
void i915_sched_node_fini(struct i915_sched_node *node);

View File

@ -78,6 +78,7 @@ struct i915_dependency {
unsigned long flags;
#define I915_DEPENDENCY_ALLOC BIT(0)
#define I915_DEPENDENCY_EXTERNAL BIT(1)
#define I915_DEPENDENCY_WEAK BIT(2)
};
#endif /* _I915_SCHEDULER_TYPES_H_ */

View File

@ -1228,18 +1228,6 @@ int __i915_vma_unbind(struct i915_vma *vma)
lockdep_assert_held(&vma->vm->mutex);
/*
* First wait upon any activity as retiring the request may
* have side-effects such as unpinning or even unbinding this vma.
*
* XXX Actually waiting under the vm->mutex is a hinderance and
* should be pipelined wherever possible. In cases where that is
* unavoidable, we should lift the wait to before the mutex.
*/
ret = i915_vma_sync(vma);
if (ret)
return ret;
if (i915_vma_is_pinned(vma)) {
vma_print_allocator(vma, "is pinned");
return -EAGAIN;
@ -1313,15 +1301,20 @@ int i915_vma_unbind(struct i915_vma *vma)
if (!drm_mm_node_allocated(&vma->node))
return 0;
if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
/* XXX not always required: nop_clear_range */
wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
/* Optimistic wait before taking the mutex */
err = i915_vma_sync(vma);
if (err)
goto out_rpm;
if (i915_vma_is_pinned(vma)) {
vma_print_allocator(vma, "is pinned");
return -EAGAIN;
}
if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
/* XXX not always required: nop_clear_range */
wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
err = mutex_lock_interruptible(&vm->mutex);
if (err)
goto out_rpm;

View File

@ -4992,7 +4992,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
* WaIncreaseLatencyIPCEnabled: kbl,cfl
* Display WA #1141: kbl,cfl
*/
if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) ||
if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
dev_priv->ipc_enabled)
latency += 4;

View File

@ -173,7 +173,7 @@ static int igt_vma_create(void *arg)
}
nc = 0;
for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) {
for_each_prime_number(num_ctx, 2 * BITS_PER_LONG) {
for (; nc < num_ctx; nc++) {
ctx = mock_context(i915, "mock");
if (!ctx)

View File

@ -412,9 +412,7 @@ static int __maybe_unused meson_drv_pm_resume(struct device *dev)
if (priv->afbcd.ops)
priv->afbcd.ops->init(priv);
drm_mode_config_helper_resume(priv->drm);
return 0;
return drm_mode_config_helper_resume(priv->drm);
}
static int compare_of(struct device *dev, void *data)

View File

@ -1039,6 +1039,7 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
static bool host1x_drm_wants_iommu(struct host1x_device *dev)
{
struct host1x *host1x = dev_get_drvdata(dev->dev.parent);
struct iommu_domain *domain;
/*
@ -1076,7 +1077,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev)
* sufficient and whether or not the host1x is attached to an IOMMU
* doesn't matter.
*/
if (!domain && dma_get_mask(dev->dev.parent) <= DMA_BIT_MASK(32))
if (!domain && host1x_get_dma_mask(host1x) <= DMA_BIT_MASK(32))
return true;
return domain != NULL;

View File

@ -192,17 +192,55 @@ static void host1x_setup_sid_table(struct host1x *host)
}
}
static bool host1x_wants_iommu(struct host1x *host1x)
{
/*
* If we support addressing a maximum of 32 bits of physical memory
* and if the host1x firewall is enabled, there's no need to enable
* IOMMU support. This can happen for example on Tegra20, Tegra30
* and Tegra114.
*
* Tegra124 and later can address up to 34 bits of physical memory and
* many platforms come equipped with more than 2 GiB of system memory,
* which requires crossing the 4 GiB boundary. But there's a catch: on
* SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can
* only address up to 32 bits of memory in GATHER opcodes, which means
* that command buffers need to either be in the first 2 GiB of system
* memory (which could quickly lead to memory exhaustion), or command
* buffers need to be treated differently from other buffers (which is
* not possible with the current ABI).
*
* A third option is to use the IOMMU in these cases to make sure all
* buffers will be mapped into a 32-bit IOVA space that host1x can
* address. This allows all of the system memory to be used and works
* within the limitations of the host1x on these SoCs.
*
* In summary, default to enable IOMMU on Tegra124 and later. For any
* of the earlier SoCs, only use the IOMMU for additional safety when
* the host1x firewall is disabled.
*/
if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) {
if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
return false;
}
return true;
}
static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
int err;
/*
* If the host1x firewall is enabled, there's no need to enable IOMMU
* support. Similarly, if host1x is already attached to an IOMMU (via
* the DMA API), don't try to attach again.
* We may not always want to enable IOMMU support (for example if the
* host1x firewall is already enabled and we don't support addressing
* more than 32 bits of physical memory), so check for that first.
*
* Similarly, if host1x is already attached to an IOMMU (via the DMA
* API), don't try to attach again.
*/
if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain)
if (!host1x_wants_iommu(host) || domain)
return domain;
host->group = iommu_group_get(host->dev);
@ -502,6 +540,19 @@ static void __exit tegra_host1x_exit(void)
}
module_exit(tegra_host1x_exit);
/**
* host1x_get_dma_mask() - query the supported DMA mask for host1x
* @host1x: host1x instance
*
* Note that this returns the supported DMA mask for host1x, which can be
* different from the applicable DMA mask under certain circumstances.
*/
u64 host1x_get_dma_mask(struct host1x *host1x)
{
return host1x->info->dma_mask;
}
EXPORT_SYMBOL(host1x_get_dma_mask);
MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
MODULE_DESCRIPTION("Host1x driver for Tegra products");

View File

@ -17,9 +17,12 @@ enum host1x_class {
HOST1X_CLASS_GR3D = 0x60,
};
struct host1x;
struct host1x_client;
struct iommu_group;
u64 host1x_get_dma_mask(struct host1x *host1x);
/**
* struct host1x_client_ops - host1x client operations
* @init: host1x client initialization code