From 1dab4561a341afdbaafe0ce6091106d0c63c79e0 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 29 Jun 2022 21:39:59 -0700
Subject: [PATCH 01/37] drm/i915/reset: Handle reset timeouts under unrelated
 kernel hangs

When resuming after hibernate sometimes we see hangs in unrelated kernel
subsystems. These hangs often result in the following i915 trace:

i915 0000:00:02.0: [drm] *ERROR* \
	intel_gt_reset_global timed out, cancelling all in-flight rendering

implying our reset task has been starved by the hanging kernel subsystem,
causing us to inappropiately declare the system as wedged beyond recovery.

The trace would be caused by our synchronize_srcu_expedited() taking more
than the allowed 5s due to the unrelated kernel hang. But we neither need
to perform that synchronisation inside the reset watchdog, nor do we need
such a short timeout before declaring the device as unrecoverable.

v2: Restore watchdog timeout to the previous 5 seconds (Ashutosh)

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/3575
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220630043959.5708-1-ashutosh.dixit@intel.com
---
 drivers/gpu/drm/i915/gt/intel_reset.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index c68d36fb5bbd..1211774e1d91 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1281,9 +1281,6 @@ static void intel_gt_reset_global(struct intel_gt *gt,
 	intel_wedge_on_timeout(&w, gt, 5 * HZ) {
 		intel_display_prepare_reset(gt->i915);
 
-		/* Flush everyone using a resource about to be clobbered */
-		synchronize_srcu_expedited(&gt->reset.backoff_srcu);
-
 		intel_gt_reset(gt, engine_mask, reason);
 
 		intel_display_finish_reset(gt->i915);
@@ -1392,6 +1389,9 @@ void intel_gt_handle_error(struct intel_gt *gt,
 		}
 	}
 
+	/* Flush everyone using a resource about to be clobbered */
+	synchronize_srcu_expedited(&gt->reset.backoff_srcu);
+
 	intel_gt_reset_global(gt, engine_mask, msg);
 
 	if (!intel_uc_uses_guc_submission(&gt->uc)) {

From 5dca122f8388695b1048a48a69d3cf9210036d9a Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Mon, 18 Jul 2022 18:07:07 -0700
Subject: [PATCH 02/37] drm/i915/gt: Create gt/gtN/.defaults/ for per gt sysfs
 defaults

Create a gt/gtN/.defaults/ directory (similar to
engine/<engine-name>/.defaults/) to expose default parameter values for
each gt in sysfs. This allows userspace to restore default parameter values
after they have changed. The empty 'struct gt_defaults' will be populated
by subsequent patches.

v2: Changed 'struct intel_rps_defaults rps_defaults' to
    'struct gt_defaults defaults' (Andi)

Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/be7c30d0ae58be9d8d5b8242ba00a1b2825e63ad.1658192398.git.ashutosh.dixit@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 5 +++++
 drivers/gpu/drm/i915/gt/intel_gt_types.h | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
index 9e4ebf53379b..6447c0376dbd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -101,6 +101,10 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
 				 gt->i915->sysfs_gt, "gt%d", gt->info.id))
 		goto exit_fail;
 
+	gt->sysfs_defaults = kobject_create_and_add(".defaults", &gt->sysfs_gt);
+	if (!gt->sysfs_defaults)
+		goto exit_fail;
+
 	intel_gt_sysfs_pm_init(gt, &gt->sysfs_gt);
 
 	return;
@@ -113,5 +117,6 @@ exit_fail:
 
 void intel_gt_sysfs_unregister(struct intel_gt *gt)
 {
+	kobject_put(gt->sysfs_defaults);
 	kobject_put(&gt->sysfs_gt);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index df708802889d..92839b9adb12 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -75,6 +75,9 @@ enum intel_submission_method {
 	INTEL_SUBMISSION_GUC,
 };
 
+struct gt_defaults {
+};
+
 struct intel_gt {
 	struct drm_i915_private *i915;
 	struct intel_uncore *uncore;
@@ -235,6 +238,10 @@ struct intel_gt {
 
 	/* gt/gtN sysfs */
 	struct kobject sysfs_gt;
+
+	/* sysfs defaults per gt */
+	struct gt_defaults defaults;
+	struct kobject *sysfs_defaults;
 };
 
 enum intel_gt_scratch_field {

From fdff0a852ec8b8ddba6fc193f187d5d85717ab2e Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Mon, 18 Jul 2022 18:07:08 -0700
Subject: [PATCH 03/37] drm/i915/gt: Expose per-gt RPS defaults in sysfs

Add the following sysfs files to gt/gtN/.defaults/:
* rps_min_freq_mhz
* rps_max_freq_mhz

v2: Correct gt/gtN/.defaults/* file names in commit message
v3: Remove rps_boost_freq_mhz since it is not consumed by userspace

Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/cf6e483bf79f871c2c8c74af6005bf6a83a3a1ce.1658192398.git.ashutosh.dixit@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt_sysfs.c    |  5 ---
 drivers/gpu/drm/i915/gt/intel_gt_sysfs.h    |  6 ++++
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 34 +++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_types.h    |  2 ++
 drivers/gpu/drm/i915/gt/intel_rps.c         |  2 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 12 +++++---
 6 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
index 6447c0376dbd..d651ccd0ab20 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -22,11 +22,6 @@ bool is_object_gt(struct kobject *kobj)
 	return !strncmp(kobj->name, "gt", 2);
 }
 
-static struct intel_gt *kobj_to_gt(struct kobject *kobj)
-{
-	return container_of(kobj, struct intel_gt, sysfs_gt);
-}
-
 struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
 					    const char *name)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
index a99aa7e8b01a..6232923a420d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
@@ -10,6 +10,7 @@
 #include <linux/kobject.h>
 
 #include "i915_gem.h" /* GEM_BUG_ON() */
+#include "intel_gt_types.h"
 
 struct intel_gt;
 
@@ -22,6 +23,11 @@ intel_gt_create_kobj(struct intel_gt *gt,
 		     struct kobject *dir,
 		     const char *name);
 
+static inline struct intel_gt *kobj_to_gt(struct kobject *kobj)
+{
+	return container_of(kobj, struct intel_gt, sysfs_gt);
+}
+
 void intel_gt_sysfs_register(struct intel_gt *gt);
 void intel_gt_sysfs_unregister(struct intel_gt *gt);
 struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index 73a8b46e0234..e066cc33d9f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -727,6 +727,34 @@ static const struct attribute *media_perf_power_attrs[] = {
 	NULL
 };
 
+static ssize_t
+default_min_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_gt *gt = kobj_to_gt(kobj->parent);
+
+	return sysfs_emit(buf, "%u\n", gt->defaults.min_freq);
+}
+
+static struct kobj_attribute default_min_freq_mhz =
+__ATTR(rps_min_freq_mhz, 0444, default_min_freq_mhz_show, NULL);
+
+static ssize_t
+default_max_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_gt *gt = kobj_to_gt(kobj->parent);
+
+	return sysfs_emit(buf, "%u\n", gt->defaults.max_freq);
+}
+
+static struct kobj_attribute default_max_freq_mhz =
+__ATTR(rps_max_freq_mhz, 0444, default_max_freq_mhz_show, NULL);
+
+static const struct attribute * const rps_defaults_attrs[] = {
+	&default_min_freq_mhz.attr,
+	&default_max_freq_mhz.attr,
+	NULL
+};
+
 static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj,
 				const struct attribute * const *attrs)
 {
@@ -776,4 +804,10 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
 				 "failed to create gt%u media_perf_power_attrs sysfs (%pe)\n",
 				 gt->info.id, ERR_PTR(ret));
 	}
+
+	ret = sysfs_create_files(gt->sysfs_defaults, rps_defaults_attrs);
+	if (ret)
+		drm_warn(&gt->i915->drm,
+			 "failed to add gt%u rps defaults (%pe)\n",
+			 gt->info.id, ERR_PTR(ret));
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 92839b9adb12..80f52532290e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -76,6 +76,8 @@ enum intel_submission_method {
 };
 
 struct gt_defaults {
+	u32 min_freq;
+	u32 max_freq;
 };
 
 struct intel_gt {
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index fb3f57ee450b..c7d381ad90cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1979,7 +1979,9 @@ void intel_rps_init(struct intel_rps *rps)
 
 	/* Derive initial user preferences/limits from the hardware limits */
 	rps->max_freq_softlimit = rps->max_freq;
+	rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit;
 	rps->min_freq_softlimit = rps->min_freq;
+	rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit;
 
 	/* After setting max-softlimit, find the overclock max freq */
 	if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index ec9c4ca0f615..e1fa1f32f29e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -575,20 +575,24 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
 	 * unless they have deviated from defaults, in which case,
 	 * we retain the values and set min/max accordingly.
 	 */
-	if (!slpc->max_freq_softlimit)
+	if (!slpc->max_freq_softlimit) {
 		slpc->max_freq_softlimit = slpc->rp0_freq;
-	else if (slpc->max_freq_softlimit != slpc->rp0_freq)
+		slpc_to_gt(slpc)->defaults.max_freq = slpc->max_freq_softlimit;
+	} else if (slpc->max_freq_softlimit != slpc->rp0_freq) {
 		ret = intel_guc_slpc_set_max_freq(slpc,
 						  slpc->max_freq_softlimit);
+	}
 
 	if (unlikely(ret))
 		return ret;
 
-	if (!slpc->min_freq_softlimit)
+	if (!slpc->min_freq_softlimit) {
 		slpc->min_freq_softlimit = slpc->min_freq;
-	else if (slpc->min_freq_softlimit != slpc->min_freq)
+		slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit;
+	} else if (slpc->min_freq_softlimit != slpc->min_freq) {
 		return intel_guc_slpc_set_min_freq(slpc,
 						   slpc->min_freq_softlimit);
+	}
 
 	return 0;
 }

From 774ce1510e6ccb9c0752d4aa7a9ff3624b3db3f3 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Mon, 18 Jul 2022 16:07:32 -0700
Subject: [PATCH 04/37] drm/i915/guc: support v69 in parallel to v70

This patch re-introduces support for GuC v69 in parallel to v70. As this
is a quick fix, v69 has been re-introduced as the single "fallback" guc
version in case v70 is not available on disk and only for platforms that
are out of force_probe and require the GuC by default. All v69 specific
code has been labeled as such for easy identification, and the same was
done for all v70 functions for which there is a separate v69 version,
to avoid accidentally calling the wrong version via the unlabeled name.

When the fallback mode kicks in, a drm_notice message is printed in
dmesg to inform the user of the required update. The existing
logging of the fetch function has also been updated so that we no
longer complain immediately if we can't find a fw and we only throw an
error if the fetch of both the base and fallback blobs fails.

The plan is to follow this up with a more complex rework to allow for
multiple different GuC versions to be supported at the same time.

v2: reduce the fallback to platform that require it, switch to
firmware_request_nowarn(), improve logs.

Fixes: 2584b3549f4c ("drm/i915/guc: Update to GuC version 70.1.1")
Link: https://lists.freedesktop.org/archives/intel-gfx/2022-July/301640.html
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Dave Airlie <airlied@gmail.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220718230732.1409641-1-daniele.ceraolospurio@intel.com
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |  11 +-
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   3 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h        |   5 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  45 +++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 354 +++++++++++++++---
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c      |  56 ++-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h      |   7 +
 7 files changed, 418 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index d2d75d9c0c8d..04eacae1aca5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -275,10 +275,17 @@ struct intel_context {
 		u8 child_index;
 		/** @guc: GuC specific members for parallel submission */
 		struct {
-			/** @wqi_head: head pointer in work queue */
+			/** @wqi_head: cached head pointer in work queue */
 			u16 wqi_head;
-			/** @wqi_tail: tail pointer in work queue */
+			/** @wqi_tail: cached tail pointer in work queue */
 			u16 wqi_tail;
+			/** @wq_head: pointer to the actual head in work queue */
+			u32 *wq_head;
+			/** @wq_tail: pointer to the actual head in work queue */
+			u32 *wq_tail;
+			/** @wq_status: pointer to the status in work queue */
+			u32 *wq_status;
+
 			/**
 			 * @parent_page: page in context state (ce->state) used
 			 * by parent for work queue, process descriptor
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 4ef9990ed7f8..29ef8afc8c2e 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -122,6 +122,9 @@ enum intel_guc_action {
 	INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
 	INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
 	INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+	INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY = 0x1005,
+	INTEL_GUC_ACTION_V69_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+	INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
 	INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
 	INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
 	INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index d0d99f178f2d..a7acffbf15d1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -170,6 +170,11 @@ struct intel_guc {
 	/** @ads_engine_usage_size: size of engine usage in the ADS */
 	u32 ads_engine_usage_size;
 
+	/** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */
+	struct i915_vma *lrc_desc_pool_v69;
+	/** @lrc_desc_pool_vaddr_v69: contents of the GuC LRC descriptor pool */
+	void *lrc_desc_pool_vaddr_v69;
+
 	/**
 	 * @context_lookup: used to resolve intel_context from guc_id, if a
 	 * context is present in this structure it is registered with the GuC
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index b3c9a9327f76..323b055e5db9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -204,6 +204,20 @@ struct guc_wq_item {
 	u32 fence_id;
 } __packed;
 
+struct guc_process_desc_v69 {
+	u32 stage_id;
+	u64 db_base_addr;
+	u32 head;
+	u32 tail;
+	u32 error_offset;
+	u64 wq_base_addr;
+	u32 wq_size_bytes;
+	u32 wq_status;
+	u32 engine_presence;
+	u32 priority;
+	u32 reserved[36];
+} __packed;
+
 struct guc_sched_wq_desc {
 	u32 head;
 	u32 tail;
@@ -228,6 +242,37 @@ struct guc_ctxt_registration_info {
 };
 #define CONTEXT_REGISTRATION_FLAG_KMD	BIT(0)
 
+/* Preempt to idle on quantum expiry */
+#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69	BIT(0)
+
+/*
+ * GuC Context registration descriptor.
+ * FIXME: This is only required to exist during context registration.
+ * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC
+ * is not required.
+ */
+struct guc_lrc_desc_v69 {
+	u32 hw_context_desc;
+	u32 slpm_perf_mode_hint;	/* SPLC v1 only */
+	u32 slpm_freq_hint;
+	u32 engine_submit_mask;		/* In logical space */
+	u8 engine_class;
+	u8 reserved0[3];
+	u32 priority;
+	u32 process_desc;
+	u32 wq_addr;
+	u32 wq_size;
+	u32 context_flags;		/* CONTEXT_REGISTRATION_* */
+	/* Time for one workload to execute. (in micro seconds) */
+	u32 execution_quantum;
+	/* Time to wait for a preemption request to complete before issuing a
+	 * reset. (in micro seconds).
+	 */
+	u32 preemption_timeout;
+	u32 policy_flags;		/* CONTEXT_POLICY_* */
+	u32 reserved1[19];
+} __packed;
+
 /* 32-bit KLV structure as used by policy updates and others */
 struct guc_klv_generic_dw_t {
 	u32 kl;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 40f726c61e95..76916aed897a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -414,12 +414,15 @@ struct sync_semaphore {
 };
 
 struct parent_scratch {
-	struct guc_sched_wq_desc wq_desc;
+	union guc_descs {
+		struct guc_sched_wq_desc wq_desc;
+		struct guc_process_desc_v69 pdesc;
+	} descs;
 
 	struct sync_semaphore go;
 	struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
 
-	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+	u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
 		sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
 
 	u32 wq[WQ_SIZE / sizeof(u32)];
@@ -456,17 +459,23 @@ __get_parent_scratch(struct intel_context *ce)
 		   LRC_STATE_OFFSET) / sizeof(u32)));
 }
 
-static struct guc_sched_wq_desc *
-__get_wq_desc(struct intel_context *ce)
+static struct guc_process_desc_v69 *
+__get_process_desc_v69(struct intel_context *ce)
 {
 	struct parent_scratch *ps = __get_parent_scratch(ce);
 
-	return &ps->wq_desc;
+	return &ps->descs.pdesc;
 }
 
-static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
-			   struct intel_context *ce,
-			   u32 wqi_size)
+static struct guc_sched_wq_desc *
+__get_wq_desc_v70(struct intel_context *ce)
+{
+	struct parent_scratch *ps = __get_parent_scratch(ce);
+
+	return &ps->descs.wq_desc;
+}
+
+static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
 {
 	/*
 	 * Check for space in work queue. Caching a value of head pointer in
@@ -476,7 +485,7 @@ static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
 #define AVAILABLE_SPACE	\
 	CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
 	if (wqi_size > AVAILABLE_SPACE) {
-		ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head);
+		ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
 
 		if (wqi_size > AVAILABLE_SPACE)
 			return NULL;
@@ -495,11 +504,55 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
 	return ce;
 }
 
+static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
+{
+	struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
+
+	if (!base)
+		return NULL;
+
+	GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
+
+	return &base[index];
+}
+
+static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
+{
+	u32 size;
+	int ret;
+
+	size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
+			  GUC_MAX_CONTEXT_ID);
+	ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
+					     (void **)&guc->lrc_desc_pool_vaddr_v69);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
+{
+	if (!guc->lrc_desc_pool_vaddr_v69)
+		return;
+
+	guc->lrc_desc_pool_vaddr_v69 = NULL;
+	i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
+}
+
 static inline bool guc_submission_initialized(struct intel_guc *guc)
 {
 	return guc->submission_initialized;
 }
 
+static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
+{
+	struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
+
+	if (desc)
+		memset(desc, 0, sizeof(*desc));
+}
+
 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
 {
 	return __get_context(guc, id);
@@ -526,6 +579,8 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
 	if (unlikely(!guc_submission_initialized(guc)))
 		return;
 
+	_reset_lrc_desc_v69(guc, id);
+
 	/*
 	 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
 	 * the lower level functions directly.
@@ -611,7 +666,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
 					      true, timeout);
 }
 
-static int guc_context_policy_init(struct intel_context *ce, bool loop);
+static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
 static int try_context_registration(struct intel_context *ce, bool loop);
 
 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
@@ -639,7 +694,7 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 	GEM_BUG_ON(context_guc_id_invalid(ce));
 
 	if (context_policy_required(ce)) {
-		err = guc_context_policy_init(ce, false);
+		err = guc_context_policy_init_v70(ce, false);
 		if (err)
 			return err;
 	}
@@ -737,9 +792,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce)
 	return (WQ_SIZE - ce->parallel.guc.wqi_tail);
 }
 
-static void write_wqi(struct guc_sched_wq_desc *wq_desc,
-		      struct intel_context *ce,
-		      u32 wqi_size)
+static void write_wqi(struct intel_context *ce, u32 wqi_size)
 {
 	BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
 
@@ -750,13 +803,12 @@ static void write_wqi(struct guc_sched_wq_desc *wq_desc,
 
 	ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
 		(WQ_SIZE - 1);
-	WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail);
+	WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
 }
 
 static int guc_wq_noop_append(struct intel_context *ce)
 {
-	struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
-	u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce));
+	u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
 	u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
 
 	if (!wqi)
@@ -775,7 +827,6 @@ static int __guc_wq_item_append(struct i915_request *rq)
 {
 	struct intel_context *ce = request_to_scheduling_context(rq);
 	struct intel_context *child;
-	struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
 	unsigned int wqi_size = (ce->parallel.number_children + 4) *
 		sizeof(u32);
 	u32 *wqi;
@@ -795,7 +846,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
 			return ret;
 	}
 
-	wqi = get_wq_pointer(wq_desc, ce, wqi_size);
+	wqi = get_wq_pointer(ce, wqi_size);
 	if (!wqi)
 		return -EBUSY;
 
@@ -810,7 +861,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
 	for_each_child(ce, child)
 		*wqi++ = child->ring->tail / sizeof(u64);
 
-	write_wqi(wq_desc, ce, wqi_size);
+	write_wqi(ce, wqi_size);
 
 	return 0;
 }
@@ -1812,20 +1863,34 @@ static void reset_fail_worker_func(struct work_struct *w);
 int intel_guc_submission_init(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
+	int ret;
 
 	if (guc->submission_initialized)
 		return 0;
 
+	if (guc->fw.major_ver_found < 70) {
+		ret = guc_lrc_desc_pool_create_v69(guc);
+		if (ret)
+			return ret;
+	}
+
 	guc->submission_state.guc_ids_bitmap =
 		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
-	if (!guc->submission_state.guc_ids_bitmap)
-		return -ENOMEM;
+	if (!guc->submission_state.guc_ids_bitmap) {
+		ret = -ENOMEM;
+		goto destroy_pool;
+	}
 
 	guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
 	guc->timestamp.shift = gpm_timestamp_shift(gt);
 	guc->submission_initialized = true;
 
 	return 0;
+
+destroy_pool:
+	guc_lrc_desc_pool_destroy_v69(guc);
+
+	return ret;
 }
 
 void intel_guc_submission_fini(struct intel_guc *guc)
@@ -1834,6 +1899,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
 		return;
 
 	guc_flush_destroyed_contexts(guc);
+	guc_lrc_desc_pool_destroy_v69(guc);
 	i915_sched_engine_put(guc->sched_engine);
 	bitmap_free(guc->submission_state.guc_ids_bitmap);
 	guc->submission_initialized = false;
@@ -2091,10 +2157,34 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
 	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
 }
 
-static int __guc_action_register_multi_lrc(struct intel_guc *guc,
-					   struct intel_context *ce,
-					   struct guc_ctxt_registration_info *info,
-					   bool loop)
+static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
+					       struct intel_context *ce,
+					       u32 guc_id,
+					       u32 offset,
+					       bool loop)
+{
+	struct intel_context *child;
+	u32 action[4 + MAX_ENGINE_INSTANCE];
+	int len = 0;
+
+	GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
+
+	action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+	action[len++] = guc_id;
+	action[len++] = ce->parallel.number_children + 1;
+	action[len++] = offset;
+	for_each_child(ce, child) {
+		offset += sizeof(struct guc_lrc_desc_v69);
+		action[len++] = offset;
+	}
+
+	return guc_submission_send_busy_loop(guc, action, len, 0, loop);
+}
+
+static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
+					       struct intel_context *ce,
+					       struct guc_ctxt_registration_info *info,
+					       bool loop)
 {
 	struct intel_context *child;
 	u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
@@ -2134,9 +2224,24 @@ static int __guc_action_register_multi_lrc(struct intel_guc *guc,
 	return guc_submission_send_busy_loop(guc, action, len, 0, loop);
 }
 
-static int __guc_action_register_context(struct intel_guc *guc,
-					 struct guc_ctxt_registration_info *info,
-					 bool loop)
+static int __guc_action_register_context_v69(struct intel_guc *guc,
+					     u32 guc_id,
+					     u32 offset,
+					     bool loop)
+{
+	u32 action[] = {
+		INTEL_GUC_ACTION_REGISTER_CONTEXT,
+		guc_id,
+		offset,
+	};
+
+	return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
+					     0, loop);
+}
+
+static int __guc_action_register_context_v70(struct intel_guc *guc,
+					     struct guc_ctxt_registration_info *info,
+					     bool loop)
 {
 	u32 action[] = {
 		INTEL_GUC_ACTION_REGISTER_CONTEXT,
@@ -2157,24 +2262,52 @@ static int __guc_action_register_context(struct intel_guc *guc,
 					     0, loop);
 }
 
-static void prepare_context_registration_info(struct intel_context *ce,
-					      struct guc_ctxt_registration_info *info);
+static void prepare_context_registration_info_v69(struct intel_context *ce);
+static void prepare_context_registration_info_v70(struct intel_context *ce,
+						  struct guc_ctxt_registration_info *info);
+
+static int
+register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
+{
+	u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
+		ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
+
+	prepare_context_registration_info_v69(ce);
+
+	if (intel_context_is_parent(ce))
+		return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
+							   offset, loop);
+	else
+		return __guc_action_register_context_v69(guc, ce->guc_id.id,
+							 offset, loop);
+}
+
+static int
+register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
+{
+	struct guc_ctxt_registration_info info;
+
+	prepare_context_registration_info_v70(ce, &info);
+
+	if (intel_context_is_parent(ce))
+		return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
+	else
+		return __guc_action_register_context_v70(guc, &info, loop);
+}
 
 static int register_context(struct intel_context *ce, bool loop)
 {
-	struct guc_ctxt_registration_info info;
 	struct intel_guc *guc = ce_to_guc(ce);
 	int ret;
 
 	GEM_BUG_ON(intel_context_is_child(ce));
 	trace_intel_context_register(ce);
 
-	prepare_context_registration_info(ce, &info);
-
-	if (intel_context_is_parent(ce))
-		ret = __guc_action_register_multi_lrc(guc, ce, &info, loop);
+	if (guc->fw.major_ver_found >= 70)
+		ret = register_context_v70(guc, ce, loop);
 	else
-		ret = __guc_action_register_context(guc, &info, loop);
+		ret = register_context_v69(guc, ce, loop);
+
 	if (likely(!ret)) {
 		unsigned long flags;
 
@@ -2182,7 +2315,8 @@ static int register_context(struct intel_context *ce, bool loop)
 		set_context_registered(ce);
 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 
-		guc_context_policy_init(ce, loop);
+		if (guc->fw.major_ver_found >= 70)
+			guc_context_policy_init_v70(ce, loop);
 	}
 
 	return ret;
@@ -2279,7 +2413,7 @@ static int __guc_context_set_context_policies(struct intel_guc *guc,
 					0, loop);
 }
 
-static int guc_context_policy_init(struct intel_context *ce, bool loop)
+static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
 {
 	struct intel_engine_cs *engine = ce->engine;
 	struct intel_guc *guc = &engine->gt->uc.guc;
@@ -2338,6 +2472,19 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop)
 	return ret;
 }
 
+static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
+					struct guc_lrc_desc_v69 *desc)
+{
+	desc->policy_flags = 0;
+
+	if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
+		desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
+
+	/* NB: For both of these, zero means disabled. */
+	desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
+	desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
+}
+
 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
 {
 	/*
@@ -2358,8 +2505,75 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
 	}
 }
 
-static void prepare_context_registration_info(struct intel_context *ce,
-					      struct guc_ctxt_registration_info *info)
+static void prepare_context_registration_info_v69(struct intel_context *ce)
+{
+	struct intel_engine_cs *engine = ce->engine;
+	struct intel_guc *guc = &engine->gt->uc.guc;
+	u32 ctx_id = ce->guc_id.id;
+	struct guc_lrc_desc_v69 *desc;
+	struct intel_context *child;
+
+	GEM_BUG_ON(!engine->mask);
+
+	/*
+	 * Ensure LRC + CT vmas are is same region as write barrier is done
+	 * based on CT vma region.
+	 */
+	GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
+		   i915_gem_object_is_lmem(ce->ring->vma->obj));
+
+	desc = __get_lrc_desc_v69(guc, ctx_id);
+	desc->engine_class = engine_class_to_guc_class(engine->class);
+	desc->engine_submit_mask = engine->logical_mask;
+	desc->hw_context_desc = ce->lrc.lrca;
+	desc->priority = ce->guc_state.prio;
+	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+	guc_context_policy_init_v69(engine, desc);
+
+	/*
+	 * If context is a parent, we need to register a process descriptor
+	 * describing a work queue and register all child contexts.
+	 */
+	if (intel_context_is_parent(ce)) {
+		struct guc_process_desc_v69 *pdesc;
+
+		ce->parallel.guc.wqi_tail = 0;
+		ce->parallel.guc.wqi_head = 0;
+
+		desc->process_desc = i915_ggtt_offset(ce->state) +
+			__get_parent_scratch_offset(ce);
+		desc->wq_addr = i915_ggtt_offset(ce->state) +
+			__get_wq_offset(ce);
+		desc->wq_size = WQ_SIZE;
+
+		pdesc = __get_process_desc_v69(ce);
+		memset(pdesc, 0, sizeof(*(pdesc)));
+		pdesc->stage_id = ce->guc_id.id;
+		pdesc->wq_base_addr = desc->wq_addr;
+		pdesc->wq_size_bytes = desc->wq_size;
+		pdesc->wq_status = WQ_STATUS_ACTIVE;
+
+		ce->parallel.guc.wq_head = &pdesc->head;
+		ce->parallel.guc.wq_tail = &pdesc->tail;
+		ce->parallel.guc.wq_status = &pdesc->wq_status;
+
+		for_each_child(ce, child) {
+			desc = __get_lrc_desc_v69(guc, child->guc_id.id);
+
+			desc->engine_class =
+				engine_class_to_guc_class(engine->class);
+			desc->hw_context_desc = child->lrc.lrca;
+			desc->priority = ce->guc_state.prio;
+			desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+			guc_context_policy_init_v69(engine, desc);
+		}
+
+		clear_children_join_go_memory(ce);
+	}
+}
+
+static void prepare_context_registration_info_v70(struct intel_context *ce,
+						  struct guc_ctxt_registration_info *info)
 {
 	struct intel_engine_cs *engine = ce->engine;
 	struct intel_guc *guc = &engine->gt->uc.guc;
@@ -2409,10 +2623,14 @@ static void prepare_context_registration_info(struct intel_context *ce,
 		info->wq_base_hi = upper_32_bits(wq_base_offset);
 		info->wq_size = WQ_SIZE;
 
-		wq_desc = __get_wq_desc(ce);
+		wq_desc = __get_wq_desc_v70(ce);
 		memset(wq_desc, 0, sizeof(*wq_desc));
 		wq_desc->wq_status = WQ_STATUS_ACTIVE;
 
+		ce->parallel.guc.wq_head = &wq_desc->head;
+		ce->parallel.guc.wq_tail = &wq_desc->tail;
+		ce->parallel.guc.wq_status = &wq_desc->wq_status;
+
 		clear_children_join_go_memory(ce);
 	}
 }
@@ -2727,11 +2945,21 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
 						 u16 guc_id,
 						 u32 preemption_timeout)
 {
-	struct context_policy policy;
+	if (guc->fw.major_ver_found >= 70) {
+		struct context_policy policy;
 
-	__guc_context_policy_start_klv(&policy, guc_id);
-	__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
-	__guc_context_set_context_policies(guc, &policy, true);
+		__guc_context_policy_start_klv(&policy, guc_id);
+		__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+		__guc_context_set_context_policies(guc, &policy, true);
+	} else {
+		u32 action[] = {
+			INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
+			guc_id,
+			preemption_timeout
+		};
+
+		intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+	}
 }
 
 static void
@@ -2982,11 +3210,21 @@ static int guc_context_alloc(struct intel_context *ce)
 static void __guc_context_set_prio(struct intel_guc *guc,
 				   struct intel_context *ce)
 {
-	struct context_policy policy;
+	if (guc->fw.major_ver_found >= 70) {
+		struct context_policy policy;
 
-	__guc_context_policy_start_klv(&policy, ce->guc_id.id);
-	__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
-	__guc_context_set_context_policies(guc, &policy, true);
+		__guc_context_policy_start_klv(&policy, ce->guc_id.id);
+		__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
+		__guc_context_set_context_policies(guc, &policy, true);
+	} else {
+		u32 action[] = {
+			INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
+			ce->guc_id.id,
+			ce->guc_state.prio,
+		};
+
+		guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+	}
 }
 
 static void guc_context_set_prio(struct intel_guc *guc,
@@ -4496,17 +4734,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
 		guc_log_context_priority(p, ce);
 
 		if (intel_context_is_parent(ce)) {
-			struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
 			struct intel_context *child;
 
 			drm_printf(p, "\t\tNumber children: %u\n",
 				   ce->parallel.number_children);
-			drm_printf(p, "\t\tWQI Head: %u\n",
-				   READ_ONCE(wq_desc->head));
-			drm_printf(p, "\t\tWQI Tail: %u\n",
-				   READ_ONCE(wq_desc->tail));
-			drm_printf(p, "\t\tWQI Status: %u\n\n",
-				   READ_ONCE(wq_desc->wq_status));
+
+			if (ce->parallel.guc.wq_status) {
+				drm_printf(p, "\t\tWQI Head: %u\n",
+					   READ_ONCE(*ce->parallel.guc.wq_head));
+				drm_printf(p, "\t\tWQI Tail: %u\n",
+					   READ_ONCE(*ce->parallel.guc.wq_tail));
+				drm_printf(p, "\t\tWQI Status: %u\n\n",
+					   READ_ONCE(*ce->parallel.guc.wq_status));
+			}
 
 			if (ce->engine->emit_bb_start ==
 			    emit_bb_start_parent_no_preempt_mid_batch) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 27363091e1af..56a0d80f88ba 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -70,6 +70,10 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
 	fw_def(BROXTON,      0, guc_def(bxt,  70, 1, 1)) \
 	fw_def(SKYLAKE,      0, guc_def(skl,  70, 1, 1))
 
+#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \
+	fw_def(ALDERLAKE_P,  0, guc_def(adlp, 69, 0, 3)) \
+	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  69, 0, 3))
+
 #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
 	fw_def(ALDERLAKE_P,  0, huc_def(tgl,  7, 9, 3)) \
 	fw_def(ALDERLAKE_S,  0, huc_def(tgl,  7, 9, 3)) \
@@ -105,6 +109,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
 	MODULE_FIRMWARE(uc_);
 
 INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
 INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH)
 
 /* The below structs and macros are used to iterate across the list of blobs */
@@ -149,6 +154,9 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
 	static const struct uc_fw_platform_requirement blobs_guc[] = {
 		INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
 	};
+	static const struct uc_fw_platform_requirement blobs_guc_fallback[] = {
+		INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB)
+	};
 	static const struct uc_fw_platform_requirement blobs_huc[] = {
 		INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
 	};
@@ -179,12 +187,29 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
 		if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
 			const struct uc_fw_blob *blob = &fw_blobs[i].blob;
 			uc_fw->path = blob->path;
+			uc_fw->wanted_path = blob->path;
 			uc_fw->major_ver_wanted = blob->major;
 			uc_fw->minor_ver_wanted = blob->minor;
 			break;
 		}
 	}
 
+	if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) {
+		const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback;
+		u32 count = ARRAY_SIZE(blobs_guc_fallback);
+
+		for (i = 0; i < count && p <= blobs[i].p; i++) {
+			if (p == blobs[i].p && rev >= blobs[i].rev) {
+				const struct uc_fw_blob *blob = &blobs[i].blob;
+
+				uc_fw->fallback.path = blob->path;
+				uc_fw->fallback.major_ver = blob->major;
+				uc_fw->fallback.minor_ver = blob->minor;
+				break;
+			}
+		}
+	}
+
 	/* make sure the list is ordered as expected */
 	if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
 		for (i = 1; i < fw_count; i++) {
@@ -412,7 +437,24 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 	__force_fw_fetch_failures(uc_fw, -EINVAL);
 	__force_fw_fetch_failures(uc_fw, -ESTALE);
 
-	err = request_firmware(&fw, uc_fw->path, dev);
+	err = firmware_request_nowarn(&fw, uc_fw->path, dev);
+	if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) {
+		err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev);
+		if (!err) {
+			drm_notice(&i915->drm,
+				   "%s firmware %s is recommended, but only %s was found\n",
+				   intel_uc_fw_type_repr(uc_fw->type),
+				   uc_fw->wanted_path,
+				   uc_fw->fallback.path);
+			drm_info(&i915->drm,
+				 "Consider updating your linux-firmware pkg or downloading from %s\n",
+				 INTEL_UC_FIRMWARE_URL);
+
+			uc_fw->path = uc_fw->fallback.path;
+			uc_fw->major_ver_wanted = uc_fw->fallback.major_ver;
+			uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver;
+		}
+	}
 	if (err)
 		goto fail;
 
@@ -460,8 +502,8 @@ fail:
 				  INTEL_UC_FIRMWARE_MISSING :
 				  INTEL_UC_FIRMWARE_ERROR);
 
-	drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n",
-		   intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+	i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n",
+			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
 	drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n",
 		 intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
 
@@ -822,7 +864,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
 void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
 {
 	drm_printf(p, "%s firmware: %s\n",
-		   intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+		   intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path);
+	if (uc_fw->fallback.path) {
+		drm_printf(p, "%s firmware fallback: %s\n",
+			   intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path);
+		drm_printf(p, "fallback selected: %s\n",
+			   str_yes_no(uc_fw->path == uc_fw->fallback.path));
+	}
 	drm_printf(p, "\tstatus: %s\n",
 		   intel_uc_fw_status_repr(uc_fw->status));
 	drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 4f169035f504..7aa2644400b9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -74,6 +74,7 @@ struct intel_uc_fw {
 		const enum intel_uc_fw_status status;
 		enum intel_uc_fw_status __status; /* no accidental overwrites */
 	};
+	const char *wanted_path;
 	const char *path;
 	bool user_overridden;
 	size_t size;
@@ -98,6 +99,12 @@ struct intel_uc_fw {
 	u16 major_ver_found;
 	u16 minor_ver_found;
 
+	struct {
+		const char *path;
+		u16 major_ver;
+		u16 minor_ver;
+	} fallback;
+
 	u32 rsa_size;
 	u32 ucode_size;
 

From a4a4307003307220321f946892829a5364433809 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Thu, 14 Jul 2022 17:40:28 -0700
Subject: [PATCH 05/37] drm/i915/guc: Don't use pr_err when not necessary

A bunch of code was copy/pasted using pr_err as the default way to
report errors. However, drm_err is significantly more useful in
identifying where the error came from. So update the code to use that
instead.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220715004028.2126239-1-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c      | 10 ++---
 drivers/gpu/drm/i915/gt/uc/selftest_guc.c     | 37 +++++++++----------
 .../drm/i915/gt/uc/selftest_guc_multi_lrc.c   | 10 ++---
 3 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 56a0d80f88ba..d5fca1f68eff 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -220,11 +220,11 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
 			    fw_blobs[i].rev < fw_blobs[i - 1].rev)
 				continue;
 
-			pr_err("invalid FW blob order: %s r%u comes before %s r%u\n",
-			       intel_platform_name(fw_blobs[i - 1].p),
-			       fw_blobs[i - 1].rev,
-			       intel_platform_name(fw_blobs[i].p),
-			       fw_blobs[i].rev);
+			drm_err(&i915->drm, "Invalid FW blob order: %s r%u comes before %s r%u\n",
+				intel_platform_name(fw_blobs[i - 1].p),
+				fw_blobs[i - 1].rev,
+				intel_platform_name(fw_blobs[i].p),
+				fw_blobs[i].rev);
 
 			uc_fw->path = NULL;
 		}
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index 1df71d0796ae..20e0c39259fb 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -62,7 +62,7 @@ static int intel_guc_scrub_ctbs(void *arg)
 		ce = intel_context_create(engine);
 		if (IS_ERR(ce)) {
 			ret = PTR_ERR(ce);
-			pr_err("Failed to create context, %d: %d\n", i, ret);
+			drm_err(&gt->i915->drm, "Failed to create context, %d: %d\n", i, ret);
 			goto err;
 		}
 
@@ -83,7 +83,7 @@ static int intel_guc_scrub_ctbs(void *arg)
 
 		if (IS_ERR(rq)) {
 			ret = PTR_ERR(rq);
-			pr_err("Failed to create request, %d: %d\n", i, ret);
+			drm_err(&gt->i915->drm, "Failed to create request, %d: %d\n", i, ret);
 			goto err;
 		}
 
@@ -93,7 +93,7 @@ static int intel_guc_scrub_ctbs(void *arg)
 	for (i = 0; i < 3; ++i) {
 		ret = i915_request_wait(last[i], 0, HZ);
 		if (ret < 0) {
-			pr_err("Last request failed to complete: %d\n", ret);
+			drm_err(&gt->i915->drm, "Last request failed to complete: %d\n", ret);
 			goto err;
 		}
 		i915_request_put(last[i]);
@@ -110,7 +110,7 @@ static int intel_guc_scrub_ctbs(void *arg)
 	/* GT will not idle if G2H are lost */
 	ret = intel_gt_wait_for_idle(gt, HZ);
 	if (ret < 0) {
-		pr_err("GT failed to idle: %d\n", ret);
+		drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
 		goto err;
 	}
 
@@ -150,7 +150,7 @@ static int intel_guc_steal_guc_ids(void *arg)
 
 	ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL);
 	if (!ce) {
-		pr_err("Context array allocation failed\n");
+		drm_err(&gt->i915->drm, "Context array allocation failed\n");
 		return -ENOMEM;
 	}
 
@@ -164,24 +164,24 @@ static int intel_guc_steal_guc_ids(void *arg)
 	if (IS_ERR(ce[context_index])) {
 		ret = PTR_ERR(ce[context_index]);
 		ce[context_index] = NULL;
-		pr_err("Failed to create context: %d\n", ret);
+		drm_err(&gt->i915->drm, "Failed to create context: %d\n", ret);
 		goto err_wakeref;
 	}
 	ret = igt_spinner_init(&spin, engine->gt);
 	if (ret) {
-		pr_err("Failed to create spinner: %d\n", ret);
+		drm_err(&gt->i915->drm, "Failed to create spinner: %d\n", ret);
 		goto err_contexts;
 	}
 	spin_rq = igt_spinner_create_request(&spin, ce[context_index],
 					     MI_ARB_CHECK);
 	if (IS_ERR(spin_rq)) {
 		ret = PTR_ERR(spin_rq);
-		pr_err("Failed to create spinner request: %d\n", ret);
+		drm_err(&gt->i915->drm, "Failed to create spinner request: %d\n", ret);
 		goto err_contexts;
 	}
 	ret = request_add_spin(spin_rq, &spin);
 	if (ret) {
-		pr_err("Failed to add Spinner request: %d\n", ret);
+		drm_err(&gt->i915->drm, "Failed to add Spinner request: %d\n", ret);
 		goto err_spin_rq;
 	}
 
@@ -191,7 +191,7 @@ static int intel_guc_steal_guc_ids(void *arg)
 		if (IS_ERR(ce[context_index])) {
 			ret = PTR_ERR(ce[context_index--]);
 			ce[context_index] = NULL;
-			pr_err("Failed to create context: %d\n", ret);
+			drm_err(&gt->i915->drm, "Failed to create context: %d\n", ret);
 			goto err_spin_rq;
 		}
 
@@ -200,8 +200,8 @@ static int intel_guc_steal_guc_ids(void *arg)
 			ret = PTR_ERR(rq);
 			rq = NULL;
 			if (ret != -EAGAIN) {
-				pr_err("Failed to create request, %d: %d\n",
-				       context_index, ret);
+				drm_err(&gt->i915->drm, "Failed to create request, %d: %d\n",
+					context_index, ret);
 				goto err_spin_rq;
 			}
 		} else {
@@ -215,7 +215,7 @@ static int intel_guc_steal_guc_ids(void *arg)
 	igt_spinner_end(&spin);
 	ret = intel_selftest_wait_for_rq(spin_rq);
 	if (ret) {
-		pr_err("Spin request failed to complete: %d\n", ret);
+		drm_err(&gt->i915->drm, "Spin request failed to complete: %d\n", ret);
 		i915_request_put(last);
 		goto err_spin_rq;
 	}
@@ -227,7 +227,7 @@ static int intel_guc_steal_guc_ids(void *arg)
 	ret = i915_request_wait(last, 0, HZ * 30);
 	i915_request_put(last);
 	if (ret < 0) {
-		pr_err("Last request failed to complete: %d\n", ret);
+		drm_err(&gt->i915->drm, "Last request failed to complete: %d\n", ret);
 		goto err_spin_rq;
 	}
 
@@ -235,7 +235,7 @@ static int intel_guc_steal_guc_ids(void *arg)
 	rq = nop_user_request(ce[context_index], NULL);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
-		pr_err("Failed to steal guc_id, %d: %d\n", context_index, ret);
+		drm_err(&gt->i915->drm, "Failed to steal guc_id, %d: %d\n", context_index, ret);
 		goto err_spin_rq;
 	}
 
@@ -243,21 +243,20 @@ static int intel_guc_steal_guc_ids(void *arg)
 	ret = i915_request_wait(rq, 0, HZ);
 	i915_request_put(rq);
 	if (ret < 0) {
-		pr_err("Request with stolen guc_id failed to complete: %d\n",
-		       ret);
+		drm_err(&gt->i915->drm, "Request with stolen guc_id failed to complete: %d\n", ret);
 		goto err_spin_rq;
 	}
 
 	/* Wait for idle */
 	ret = intel_gt_wait_for_idle(gt, HZ * 30);
 	if (ret < 0) {
-		pr_err("GT failed to idle: %d\n", ret);
+		drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
 		goto err_spin_rq;
 	}
 
 	/* Verify a guc_id was stolen */
 	if (guc->number_guc_id_stolen == number_guc_id_stolen) {
-		pr_err("No guc_id was stolen");
+		drm_err(&gt->i915->drm, "No guc_id was stolen");
 		ret = -EINVAL;
 	} else {
 		ret = 0;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
index 812220a43df8..d17982c36d25 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
@@ -115,30 +115,30 @@ static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class)
 
 	parent = multi_lrc_create_parent(gt, class, 0);
 	if (IS_ERR(parent)) {
-		pr_err("Failed creating contexts: %ld", PTR_ERR(parent));
+		drm_err(&gt->i915->drm, "Failed creating contexts: %ld", PTR_ERR(parent));
 		return PTR_ERR(parent);
 	} else if (!parent) {
-		pr_debug("Not enough engines in class: %d", class);
+		drm_dbg(&gt->i915->drm, "Not enough engines in class: %d", class);
 		return 0;
 	}
 
 	rq = multi_lrc_nop_request(parent);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
-		pr_err("Failed creating requests: %d", ret);
+		drm_err(&gt->i915->drm, "Failed creating requests: %d", ret);
 		goto out;
 	}
 
 	ret = intel_selftest_wait_for_rq(rq);
 	if (ret)
-		pr_err("Failed waiting on request: %d", ret);
+		drm_err(&gt->i915->drm, "Failed waiting on request: %d", ret);
 
 	i915_request_put(rq);
 
 	if (ret >= 0) {
 		ret = intel_gt_wait_for_idle(gt, HZ * 5);
 		if (ret < 0)
-			pr_err("GT failed to idle: %d\n", ret);
+			drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
 	}
 
 out:

From 2be1959ecefd9f655e6262144a1b970bc2f34054 Mon Sep 17 00:00:00 2001
From: Jason Wang <wangborong@cdjrlc.com>
Date: Sun, 17 Jul 2022 02:44:39 +0800
Subject: [PATCH 06/37] drm/i915/gt: Remove unneeded semicolon

The semicolon after the `}' is unneeded.

Signed-off-by: Jason Wang <wangborong@cdjrlc.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Removed line mention when pushing]
Link: https://patchwork.freedesktop.org/patch/msgid/20220716184439.72056-1-wangborong@cdjrlc.com
---
 drivers/gpu/drm/i915/gt/intel_migrate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 2c35324b5f68..a69b244f14d0 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -645,7 +645,7 @@ static int scatter_list_length(struct scatterlist *sg)
 	while (sg && sg_dma_len(sg)) {
 		len += sg_dma_len(sg);
 		sg = sg_next(sg);
-	};
+	}
 
 	return len;
 }

From b25c377a5b26f107ea831af05919fc3782aabebc Mon Sep 17 00:00:00 2001
From: Jason Wang <wangborong@cdjrlc.com>
Date: Sat, 16 Jul 2022 12:05:20 +0800
Subject: [PATCH 07/37] drm/i915/selftests: Fix comment typo

Fix the double `wait' typo in comment.

Signed-off-by: Jason Wang <wangborong@cdjrlc.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220716040520.31676-1-wangborong@cdjrlc.com
---
 drivers/gpu/drm/i915/selftests/i915_request.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index c56a0c2cd2f7..ec05f578a698 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -971,7 +971,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 	if (err)
 		goto err;
 
-	/* Force the wait wait now to avoid including it in the benchmark */
+	/* Force the wait now to avoid including it in the benchmark */
 	err = i915_vma_sync(vma);
 	if (err)
 		goto err_pin;

From a8c18bec83a82a90d5e35a080bc465581459313e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 27 Jul 2022 19:40:23 +0200
Subject: [PATCH 08/37] drm/i915: Suppress oom warning for shmemfs object
 allocation failure

We report object allocation failures to userspace with ENOMEM, yet we
still show the memory warning after failing to shrink device allocated
pages. While this warning is similar to other system page allocation
failures, it is superfluous to the ENOMEM provided directly to
userspace.

v2: Add NOWARN in few more places from where we might return
    ENOMEM to userspace.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4936
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Co-developed-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220727174023.16766-1-nirmoy.das@intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 955844f19193..6a80ad8097bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -75,7 +75,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
 	if (size > resource_size(&mr->region))
 		return -ENOMEM;
 
-	if (sg_alloc_table(st, page_count, GFP_KERNEL))
+	if (sg_alloc_table(st, page_count, GFP_KERNEL | __GFP_NOWARN))
 		return -ENOMEM;
 
 	/*
@@ -137,7 +137,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
 				 * trigger the out-of-memory killer and for
 				 * this we want __GFP_RETRY_MAYFAIL.
 				 */
-				gfp |= __GFP_RETRY_MAYFAIL;
+				gfp |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
 			}
 		} while (1);
 
@@ -209,7 +209,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
 
 rebuild_st:
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	st = kmalloc(sizeof(*st), GFP_KERNEL | __GFP_NOWARN);
 	if (!st)
 		return -ENOMEM;
 

From f5dfbfc0ae00c2c2c0518da9e1f9a8cca50ae544 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Wed, 27 Jul 2022 19:33:06 +0200
Subject: [PATCH 09/37] drm/i915: disable pci resize on 32-bit machine

PCI bar resize only works with 64 bit BAR so disable
this on 32-bit machine and resolve below compilation error:

drivers/gpu/drm/i915/gt/intel_region_lmem.c:94:23: error: result of
comparison of constant 4294967296 with expression of type
'resource_size_t' (aka 'unsigned int') is always false
[-Werror,-Wtautological-constant-out-of-range-compare]
                    root_res->start > 0x100000000ull)

Fixes: a91d1a17cd341 ("drm/i915: Add support for LMEM PCIe resizable bar")
Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
Acked-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220727173306.16247-1-nirmoy.das@intel.com
---
 drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index 6e90032e12e9..aa6aed837194 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -15,6 +15,7 @@
 #include "gt/intel_gt_mcr.h"
 #include "gt/intel_gt_regs.h"
 
+#ifdef CONFIG_64BIT
 static void _release_bars(struct pci_dev *pdev)
 {
 	int resno;
@@ -111,6 +112,9 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t
 	pci_assign_unassigned_bus_resources(pdev->bus);
 	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
 }
+#else
+static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) {}
+#endif
 
 static int
 region_lmem_release(struct intel_memory_region *mem)

From 353819d85f87be46aeb9c1dd929d445a006fc6ec Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 27 Jul 2022 17:43:46 +0100
Subject: [PATCH 10/37] drm/i915/ttm: don't leak the ccs state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The kernel only manages the ccs state with lmem-only objects, however
the kernel should still take care not to leak the CCS state from the
previous user.

Fixes: 48760ffe923a ("drm/i915/gt: Clear compress metadata for Flat-ccs objects")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
Reviewed-by: Ramalingam C <ramalingam.c@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220727164346.282407-1-matthew.auld@intel.com
---
 drivers/gpu/drm/i915/gt/intel_migrate.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index a69b244f14d0..9a0814422ba4 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -708,7 +708,7 @@ intel_context_migrate_copy(struct intel_context *ce,
 	u8 src_access, dst_access;
 	struct i915_request *rq;
 	int src_sz, dst_sz;
-	bool ccs_is_src;
+	bool ccs_is_src, overwrite_ccs;
 	int err;
 
 	GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
@@ -749,6 +749,8 @@ intel_context_migrate_copy(struct intel_context *ce,
 			get_ccs_sg_sgt(&it_ccs, bytes_to_cpy);
 	}
 
+	overwrite_ccs = HAS_FLAT_CCS(i915) && !ccs_bytes_to_cpy && dst_is_lmem;
+
 	src_offset = 0;
 	dst_offset = CHUNK_SZ;
 	if (HAS_64K_PAGES(ce->engine->i915)) {
@@ -852,6 +854,25 @@ intel_context_migrate_copy(struct intel_context *ce,
 			if (err)
 				goto out_rq;
 			ccs_bytes_to_cpy -= ccs_sz;
+		} else if (overwrite_ccs) {
+			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+			if (err)
+				goto out_rq;
+
+			/*
+			 * While we can't always restore/manage the CCS state,
+			 * we still need to ensure we don't leak the CCS state
+			 * from the previous user, so make sure we overwrite it
+			 * with something.
+			 */
+			err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS,
+					    dst_offset, DIRECT_ACCESS, len);
+			if (err)
+				goto out_rq;
+
+			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+			if (err)
+				goto out_rq;
 		}
 
 		/* Arbitration is re-enabled between requests. */

From 4bedceaed1ae1172cfe72d3ff752b3a1d32fe4d9 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson@intel.com>
Date: Wed, 27 Jul 2022 14:29:51 +0200
Subject: [PATCH 11/37] drm/i915/gt: Ignore TLB invalidations on idle engines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Check if the device is powered down prior to any engine activity,
as, on such cases, all the TLBs were already invalidated, so an
explicit TLB invalidation is not needed, thus reducing the
performance regression impact due to it.

This becomes more significant with GuC, as it can only do so when
the connection to the GuC is awake.

Cc: stable@vger.kernel.org
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Cc: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/278a57a672edac75683f0818b292e95da583a5fe.1658924372.git.mchehab@kernel.org
---
 drivers/gpu/drm/i915/gem/i915_gem_pages.c | 10 ++++++----
 drivers/gpu/drm/i915/gt/intel_gt.c        | 17 ++++++++++-------
 drivers/gpu/drm/i915/gt/intel_gt_pm.h     |  3 +++
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 97c820eee115..6835279943df 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -6,14 +6,15 @@
 
 #include <drm/drm_cache.h>
 
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+
 #include "i915_drv.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
 #include "i915_gem_lmem.h"
 #include "i915_gem_mman.h"
 
-#include "gt/intel_gt.h"
-
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
 				 unsigned int sg_page_sizes)
@@ -217,10 +218,11 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 
 	if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
 		struct drm_i915_private *i915 = to_i915(obj->base.dev);
+		struct intel_gt *gt = to_gt(i915);
 		intel_wakeref_t wakeref;
 
-		with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
-			intel_gt_invalidate_tlbs(to_gt(i915));
+		with_intel_gt_pm_if_awake(gt, wakeref)
+			intel_gt_invalidate_tlbs(gt);
 	}
 
 	return pages;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 68c2b0d8f187..c4d43da84d8e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -12,6 +12,7 @@
 
 #include "i915_drv.h"
 #include "intel_context.h"
+#include "intel_engine_pm.h"
 #include "intel_engine_regs.h"
 #include "intel_ggtt_gmch.h"
 #include "intel_gt.h"
@@ -924,6 +925,7 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 	struct drm_i915_private *i915 = gt->i915;
 	struct intel_uncore *uncore = gt->uncore;
 	struct intel_engine_cs *engine;
+	intel_engine_mask_t awake, tmp;
 	enum intel_engine_id id;
 	const i915_reg_t *regs;
 	unsigned int num = 0;
@@ -947,26 +949,31 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 
 	GEM_TRACE("\n");
 
-	assert_rpm_wakelock_held(&i915->runtime_pm);
-
 	mutex_lock(&gt->tlb_invalidate_lock);
 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
 	spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
 
+	awake = 0;
 	for_each_engine(engine, gt, id) {
 		struct reg_and_bit rb;
 
+		if (!intel_engine_pm_is_awake(engine))
+			continue;
+
 		rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
 		if (!i915_mmio_reg_offset(rb.reg))
 			continue;
 
 		intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+		awake |= engine->mask;
 	}
 
 	spin_unlock_irq(&uncore->lock);
 
-	for_each_engine(engine, gt, id) {
+	for_each_engine_masked(engine, gt, awake, tmp) {
+		struct reg_and_bit rb;
+
 		/*
 		 * HW architecture suggest typical invalidation time at 40us,
 		 * with pessimistic cases up to 100us and a recommendation to
@@ -974,12 +981,8 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 		 */
 		const unsigned int timeout_us = 100;
 		const unsigned int timeout_ms = 4;
-		struct reg_and_bit rb;
 
 		rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-		if (!i915_mmio_reg_offset(rb.reg))
-			continue;
-
 		if (__intel_wait_for_register_fw(uncore,
 						 rb.reg, rb.bit, 0,
 						 timeout_us, timeout_ms,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index bc898df7a48c..a334787a4939 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -55,6 +55,9 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
 	for (tmp = 1, intel_gt_pm_get(gt); tmp; \
 	     intel_gt_pm_put(gt), tmp = 0)
 
+#define with_intel_gt_pm_if_awake(gt, wf) \
+	for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+
 static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
 {
 	return intel_wakeref_wait_for_idle(&gt->wakeref);

From 4d87d36247e7fb9419b25331051c0746d09e57bb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@kernel.org>
Date: Wed, 27 Jul 2022 14:29:52 +0200
Subject: [PATCH 12/37] drm/i915/gt: document with_intel_gt_pm_if_awake()

Add a kernel-doc markup to document this new macro.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/b974905bd0f6b5308b91561cc85eeecd94f1452a.1658924372.git.mchehab@kernel.org
---
 drivers/gpu/drm/i915/gt/intel_gt_pm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index a334787a4939..6c9a46452364 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -55,6 +55,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
 	for (tmp = 1, intel_gt_pm_get(gt); tmp; \
 	     intel_gt_pm_put(gt), tmp = 0)
 
+/**
+ * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent
+ *	it to sleep, run some code and then asynchrously put the reference
+ *	away.
+ *
+ * @gt: pointer to the gt
+ * @wf: pointer to a temporary wakeref.
+ */
 #define with_intel_gt_pm_if_awake(gt, wf) \
 	for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
 

From dfc83de118ff7930acc9a4c8dfdba7c153aa44d6 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson@intel.com>
Date: Wed, 27 Jul 2022 14:29:53 +0200
Subject: [PATCH 13/37] drm/i915/gt: Invalidate TLB of the OA unit at TLB
 invalidations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ensure that the TLB of the OA unit is also invalidated
on gen12 HW, as just invalidating the TLB of an engine is not
enough.

Cc: stable@vger.kernel.org
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Cc: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/59724d9f5cf1e93b1620d01b8332ac991555283d.1658924372.git.mchehab@kernel.org
---
 drivers/gpu/drm/i915/gt/intel_gt.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index c4d43da84d8e..1d84418e8676 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -11,6 +11,7 @@
 #include "pxp/intel_pxp.h"
 
 #include "i915_drv.h"
+#include "i915_perf_oa_regs.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_regs.h"
@@ -969,6 +970,15 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 		awake |= engine->mask;
 	}
 
+	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
+	if (awake &&
+	    (IS_TIGERLAKE(i915) ||
+	     IS_DG1(i915) ||
+	     IS_ROCKETLAKE(i915) ||
+	     IS_ALDERLAKE_S(i915) ||
+	     IS_ALDERLAKE_P(i915)))
+		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
+
 	spin_unlock_irq(&uncore->lock);
 
 	for_each_engine_masked(engine, gt, awake, tmp) {

From be0366f168033374a93e4c43fdaa1a90ab905184 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson@intel.com>
Date: Wed, 27 Jul 2022 14:29:54 +0200
Subject: [PATCH 14/37] drm/i915/gt: Skip TLB invalidations once wedged
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Skip all further TLB invalidations once the device is wedged and
had been reset, as, on such cases, it can no longer process instructions
on the GPU and the user no longer has access to the TLB's in each engine.

So, an attempt to do a TLB cache invalidation will produce a timeout.

That helps to reduce the performance regression introduced by TLB
invalidate logic.

Cc: stable@vger.kernel.org
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Cc: Fei Yang <fei.yang@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/5aa86564b9ec5fe7fe605c1dd7de76855401ed73.1658924372.git.mchehab@kernel.org
---
 drivers/gpu/drm/i915/gt/intel_gt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 1d84418e8676..5c55a90672f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -934,6 +934,9 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
 		return;
 
+	if (intel_gt_is_wedged(gt))
+		return;
+
 	if (GRAPHICS_VER(i915) == 12) {
 		regs = gen12_regs;
 		num = ARRAY_SIZE(gen12_regs);

From 5d36acb7198b0e5eb88e6b701f9ad7b9448f8df9 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson@intel.com>
Date: Wed, 27 Jul 2022 14:29:55 +0200
Subject: [PATCH 15/37] drm/i915/gt: Batch TLB invalidations

Invalidate TLB in batches, in order to reduce performance regressions.

Currently, every caller performs a full barrier around a TLB
invalidation, ignoring all other invalidations that may have already
removed their PTEs from the cache. As this is a synchronous operation
and can be quite slow, we cause multiple threads to contend on the TLB
invalidate mutex blocking userspace.

We only need to invalidate the TLB once after replacing our PTE to
ensure that there is no possible continued access to the physical
address before releasing our pages. By tracking a seqno for each full
TLB invalidate we can quickly determine if one has been performed since
rewriting the PTE, and only if necessary trigger one for ourselves.

That helps to reduce the performance regression introduced by TLB
invalidate logic.

[mchehab: rebased to not require moving the code to a separate file]

Cc: stable@vger.kernel.org
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Cc: Fei Yang <fei.yang@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/4e97ef5deb6739cadaaf40aa45620547e9c4ec06.1658924372.git.mchehab@kernel.org
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     | 21 +++++---
 drivers/gpu/drm/i915/gt/intel_gt.c            | 53 ++++++++++++++-----
 drivers/gpu/drm/i915/gt/intel_gt.h            | 12 ++++-
 drivers/gpu/drm/i915/gt/intel_gt_types.h      | 18 ++++++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c         |  8 ++-
 drivers/gpu/drm/i915/i915_vma.c               | 33 +++++++++---
 drivers/gpu/drm/i915/i915_vma.h               |  1 +
 drivers/gpu/drm/i915/i915_vma_resource.c      |  5 +-
 drivers/gpu/drm/i915/i915_vma_resource.h      |  6 ++-
 10 files changed, 125 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 5cf36a130061..9f6b14ec189a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -335,7 +335,6 @@ struct drm_i915_gem_object {
 #define I915_BO_READONLY          BIT(7)
 #define I915_TILING_QUIRK_BIT     8 /* unknown swizzling; do not release! */
 #define I915_BO_PROTECTED         BIT(9)
-#define I915_BO_WAS_BOUND_BIT     10
 	/**
 	 * @mem_flags - Mutable placement-related flags
 	 *
@@ -616,6 +615,8 @@ struct drm_i915_gem_object {
 		 * pages were last acquired.
 		 */
 		bool dirty:1;
+
+		u32 tlb;
 	} mm;
 
 	struct {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 6835279943df..8357dbdcab5c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -191,6 +191,18 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
 		vunmap(ptr);
 }
 
+static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_gt *gt = to_gt(i915);
+
+	if (!obj->mm.tlb)
+		return;
+
+	intel_gt_invalidate_tlb(gt, obj->mm.tlb);
+	obj->mm.tlb = 0;
+}
+
 struct sg_table *
 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 {
@@ -216,14 +228,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 	__i915_gem_object_reset_page_iter(obj);
 	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
 
-	if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
-		struct drm_i915_private *i915 = to_i915(obj->base.dev);
-		struct intel_gt *gt = to_gt(i915);
-		intel_wakeref_t wakeref;
-
-		with_intel_gt_pm_if_awake(gt, wakeref)
-			intel_gt_invalidate_tlbs(gt);
-	}
+	flush_tlb_invalidate(obj);
 
 	return pages;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 5c55a90672f4..f435e06125aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -38,8 +38,6 @@ static void __intel_gt_init_early(struct intel_gt *gt)
 {
 	spin_lock_init(&gt->irq_lock);
 
-	mutex_init(&gt->tlb_invalidate_lock);
-
 	INIT_LIST_HEAD(&gt->closed_vma);
 	spin_lock_init(&gt->closed_lock);
 
@@ -50,6 +48,8 @@ static void __intel_gt_init_early(struct intel_gt *gt)
 	intel_gt_init_reset(gt);
 	intel_gt_init_requests(gt);
 	intel_gt_init_timelines(gt);
+	mutex_init(&gt->tlb.invalidate_lock);
+	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
 	intel_gt_pm_init_early(gt);
 
 	intel_uc_init_early(&gt->uc);
@@ -770,6 +770,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
 		intel_gt_fini_requests(gt);
 		intel_gt_fini_reset(gt);
 		intel_gt_fini_timelines(gt);
+		mutex_destroy(&gt->tlb.invalidate_lock);
 		intel_engines_free(gt);
 	}
 }
@@ -908,7 +909,7 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
 	return rb;
 }
 
-void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+static void mmio_invalidate_full(struct intel_gt *gt)
 {
 	static const i915_reg_t gen8_regs[] = {
 		[RENDER_CLASS]			= GEN8_RTCR,
@@ -931,12 +932,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 	const i915_reg_t *regs;
 	unsigned int num = 0;
 
-	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
-		return;
-
-	if (intel_gt_is_wedged(gt))
-		return;
-
 	if (GRAPHICS_VER(i915) == 12) {
 		regs = gen12_regs;
 		num = ARRAY_SIZE(gen12_regs);
@@ -951,9 +946,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 			  "Platform does not implement TLB invalidation!"))
 		return;
 
-	GEM_TRACE("\n");
-
-	mutex_lock(&gt->tlb_invalidate_lock);
 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
 	spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
@@ -973,6 +965,8 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 		awake |= engine->mask;
 	}
 
+	GT_TRACE(gt, "invalidated engines %08x\n", awake);
+
 	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
 	if (awake &&
 	    (IS_TIGERLAKE(i915) ||
@@ -1012,5 +1006,38 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
 	 * transitions.
 	 */
 	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
-	mutex_unlock(&gt->tlb_invalidate_lock);
+}
+
+static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
+{
+	u32 cur = intel_gt_tlb_seqno(gt);
+
+	/* Only skip if a *full* TLB invalidate barrier has passed */
+	return (s32)(cur - ALIGN(seqno, 2)) > 0;
+}
+
+void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
+{
+	intel_wakeref_t wakeref;
+
+	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+		return;
+
+	if (intel_gt_is_wedged(gt))
+		return;
+
+	if (tlb_seqno_passed(gt, seqno))
+		return;
+
+	with_intel_gt_pm_if_awake(gt, wakeref) {
+		mutex_lock(&gt->tlb.invalidate_lock);
+		if (tlb_seqno_passed(gt, seqno))
+			goto unlock;
+
+		mmio_invalidate_full(gt);
+
+		write_seqcount_invalidate(&gt->tlb.seqno);
+unlock:
+		mutex_unlock(&gt->tlb.invalidate_lock);
+	}
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 82d6f248d876..40b06adf509a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -101,6 +101,16 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 
 void intel_gt_watchdog_work(struct work_struct *work);
 
-void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
+{
+	return seqprop_sequence(&gt->tlb.seqno);
+}
+
+static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
+{
+	return intel_gt_tlb_seqno(gt) | 1;
+}
+
+void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);
 
 #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 80f52532290e..4d56f7d5a3be 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -11,6 +11,7 @@
 #include <linux/llist.h>
 #include <linux/mutex.h>
 #include <linux/notifier.h>
+#include <linux/seqlock.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
@@ -88,7 +89,22 @@ struct intel_gt {
 	struct intel_uc uc;
 	struct intel_gsc gsc;
 
-	struct mutex tlb_invalidate_lock;
+	struct {
+		/* Serialize global tlb invalidations */
+		struct mutex invalidate_lock;
+
+		/*
+		 * Batch TLB invalidations
+		 *
+		 * After unbinding the PTE, we need to ensure the TLB
+		 * are invalidated prior to releasing the physical pages.
+		 * But we only need one such invalidation for all unbinds,
+		 * so we track how many TLB invalidations have been
+		 * performed since unbind the PTE and only emit an extra
+		 * invalidate if no full barrier has been passed.
+		 */
+		seqcount_mutex_t seqno;
+	} tlb;
 
 	struct i915_wa_list wa_list;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index d8b94d638559..2da6c82a8bd2 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -206,8 +206,12 @@ void ppgtt_bind_vma(struct i915_address_space *vm,
 void ppgtt_unbind_vma(struct i915_address_space *vm,
 		      struct i915_vma_resource *vma_res)
 {
-	if (vma_res->allocated)
-		vm->clear_range(vm, vma_res->start, vma_res->vma_size);
+	if (!vma_res->allocated)
+		return;
+
+	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
+	if (vma_res->tlb)
+		vma_invalidate_tlb(vm, *vma_res->tlb);
 }
 
 static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index ef3b04c7e153..84a9ccbc5fc5 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -538,8 +538,6 @@ int i915_vma_bind(struct i915_vma *vma,
 				   bind_flags);
 	}
 
-	set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
-
 	atomic_or(bind_flags, &vma->flags);
 	return 0;
 }
@@ -1310,6 +1308,19 @@ err_unpin:
 	return err;
 }
 
+void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb)
+{
+	/*
+	 * Before we release the pages that were bound by this vma, we
+	 * must invalidate all the TLBs that may still have a reference
+	 * back to our physical address. It only needs to be done once,
+	 * so after updating the PTE to point away from the pages, record
+	 * the most recent TLB invalidation seqno, and if we have not yet
+	 * flushed the TLBs upon release, perform a full invalidation.
+	 */
+	WRITE_ONCE(tlb, intel_gt_next_invalidate_tlb_full(vm->gt));
+}
+
 static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
 {
 	/* We allocate under vma_get_pages, so beware the shrinker */
@@ -1941,7 +1952,12 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
 		vma->vm->skip_pte_rewrite;
 	trace_i915_vma_unbind(vma);
 
-	unbind_fence = i915_vma_resource_unbind(vma_res);
+	if (async)
+		unbind_fence = i915_vma_resource_unbind(vma_res,
+							&vma->obj->mm.tlb);
+	else
+		unbind_fence = i915_vma_resource_unbind(vma_res, NULL);
+
 	vma->resource = NULL;
 
 	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
@@ -1949,10 +1965,13 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
 
 	i915_vma_detach(vma);
 
-	if (!async && unbind_fence) {
-		dma_fence_wait(unbind_fence, false);
-		dma_fence_put(unbind_fence);
-		unbind_fence = NULL;
+	if (!async) {
+		if (unbind_fence) {
+			dma_fence_wait(unbind_fence, false);
+			dma_fence_put(unbind_fence);
+			unbind_fence = NULL;
+		}
+		vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb);
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 88ca0bd9c900..5048eed536da 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -213,6 +213,7 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 void i915_vma_revoke_mmap(struct i915_vma *vma);
+void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb);
 struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
 int __i915_vma_unbind(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
index 27c55027387a..5a67995ea5fe 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.c
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -223,10 +223,13 @@ i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
  * Return: A refcounted pointer to a dma-fence that signals when unbinding is
  * complete.
  */
-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
+					   u32 *tlb)
 {
 	struct i915_address_space *vm = vma_res->vm;
 
+	vma_res->tlb = tlb;
+
 	/* Reference for the sw fence */
 	i915_vma_resource_get(vma_res);
 
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
index 5d8427caa2ba..06923d1816e7 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.h
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -67,6 +67,7 @@ struct i915_page_sizes {
  * taken when the unbind is scheduled.
  * @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting
  * needs to be skipped for unbind.
+ * @tlb: pointer for obj->mm.tlb, if async unbind. Otherwise, NULL
  *
  * The lifetime of a struct i915_vma_resource is from a binding request to
  * the actual possible asynchronous unbind has completed.
@@ -119,6 +120,8 @@ struct i915_vma_resource {
 	bool immediate_unbind:1;
 	bool needs_wakeref:1;
 	bool skip_pte_rewrite:1;
+
+	u32 *tlb;
 };
 
 bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
@@ -131,7 +134,8 @@ struct i915_vma_resource *i915_vma_resource_alloc(void);
 
 void i915_vma_resource_free(struct i915_vma_resource *vma_res);
 
-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res);
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
+					   u32 *tlb);
 
 void __i915_vma_resource_init(struct i915_vma_resource *vma_res);
 

From d1051db85f6496ed9c825802bef3709f2e7d816f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@kernel.org>
Date: Wed, 27 Jul 2022 14:29:56 +0200
Subject: [PATCH 16/37] drm/i915/gt: describe the new tlb parameter at
 i915_vma_resource

TLB cache invalidation can happen on two different situations:

1. synchronously, at __vma_put_pages();
2. asynchronously.

On the first case, TLB cache invalidation happens inside
__vma_put_pages(). So, no need to do it later on.

However, on the second case, the pages will keep in memory
until __i915_vma_evict() is called.

So, we need to store the TLB data at struct i915_vma_resource,
in order to do a TLB cache invalidation before allowing
userspace to re-use the same memory.

So, i915_vma_resource_unbind() has gained a new parameter
in order to store the TLB data at the second case.

Document it.

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/aa55eef7e63b8f3d0f69b525db2dd2eb87e9db6b.1658924372.git.mchehab@kernel.org
---
 drivers/gpu/drm/i915/i915_vma_resource.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
index 5a67995ea5fe..de1342dbfa12 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.c
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -216,6 +216,10 @@ i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
 /**
  * i915_vma_resource_unbind - Unbind a vma resource
  * @vma_res: The vma resource to unbind.
+ * @tlb: pointer to vma->obj->mm.tlb associated with the resource
+ *	 to be stored at vma_res->tlb. When not-NULL, it will be used
+ *	 to do TLB cache invalidation before freeing a VMA resource.
+ *	 Used only for async unbind.
  *
  * At this point this function does little more than publish a fence that
  * signals immediately unless signaling is held back.

From 22645976ae4c0e7eebc8cee7942692280457551e Mon Sep 17 00:00:00 2001
From: Zhanjun Dong <zhanjun.dong@intel.com>
Date: Fri, 15 Jul 2022 14:13:13 -0700
Subject: [PATCH 17/37] drm/i915/guc: Check for ct enabled while waiting for
 response

We are seeing error message of "No response for request". Some cases
happened while waiting for response and reset/suspend action was triggered.
In this case, no response is not an error, active requests will be
cancelled.

This patch will handle this condition and change the error message into
debug message.

Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220715211313.143645-1-zhanjun.dong@intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 27 +++++++++++++++++------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index f01325cd1b62..74194c11234d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -455,6 +455,7 @@ corrupted:
 
 /**
  * wait_for_ct_request_update - Wait for CT request state update.
+ * @ct:		pointer to CT
  * @req:	pointer to pending request
  * @status:	placeholder for status
  *
@@ -467,9 +468,10 @@ corrupted:
  * *	0 response received (status is valid)
  * *	-ETIMEDOUT no response within hardcoded timeout
  */
-static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
+static int wait_for_ct_request_update(struct intel_guc_ct *ct, struct ct_request *req, u32 *status)
 {
 	int err;
+	bool ct_enabled;
 
 	/*
 	 * Fast commands should complete in less than 10us, so sample quickly
@@ -481,12 +483,15 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
 #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
 #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
 #define done \
-	(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
+	(!(ct_enabled = intel_guc_ct_enabled(ct)) || \
+	 FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
 	 GUC_HXG_ORIGIN_GUC)
 	err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
 	if (err)
 		err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
 #undef done
+	if (!ct_enabled)
+		err = -ENODEV;
 
 	*status = req->status;
 	return err;
@@ -703,11 +708,18 @@ retry:
 
 	intel_guc_notify(ct_to_guc(ct));
 
-	err = wait_for_ct_request_update(&request, status);
+	err = wait_for_ct_request_update(ct, &request, status);
 	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
 	if (unlikely(err)) {
-		CT_ERROR(ct, "No response for request %#x (fence %u)\n",
-			 action[0], request.fence);
+		if (err == -ENODEV)
+			/* wait_for_ct_request_update returns -ENODEV on reset/suspend in progress.
+			 * In this case, output is debug rather than error info
+			 */
+			CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
+				 action[0], request.fence);
+		else
+			CT_ERROR(ct, "No response for request %#x (fence %u)\n",
+				 action[0], request.fence);
 		goto unlink;
 	}
 
@@ -771,8 +783,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
 
 	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
 	if (unlikely(ret < 0)) {
-		CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
-			 action[0], ERR_PTR(ret), status);
+		if (ret != -ENODEV)
+			CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
+				 action[0], ERR_PTR(ret), status);
 	} else if (unlikely(ret)) {
 		CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
 			 action[0], ret, ret);

From 9fb3473732908ff0f8d8f8763318c407489ae955 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Wed, 27 Jul 2022 19:42:20 -0700
Subject: [PATCH 18/37] drm/i915/guc: Route semaphores to GuC for Gen12+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In GuC submission mode, there is an option to use auto-switch out
semaphores and have GuC auto-switch in a waiting context. This
requires routing the semaphore interrupt to GuC.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728024225.2363663-2-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h        |  4 ++++
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 14 ++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index 8dc063f087eb..a7092f711e9c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -102,6 +102,10 @@
 #define   GUC_SEND_TRIGGER		  (1<<0)
 #define GEN11_GUC_HOST_INTERRUPT	_MMIO(0x1901f0)
 
+#define GEN12_GUC_SEM_INTR_ENABLES	_MMIO(0xc71c)
+#define   GUC_SEM_INTR_ROUTE_TO_GUC	BIT(31)
+#define   GUC_SEM_INTR_ENABLE_ALL	(0xff)
+
 #define GUC_NUM_DOORBELLS		256
 
 /* format of the HW-monitored doorbell cacheline */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 76916aed897a..0b8c6450fa34 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -4191,13 +4191,27 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
 
 void intel_guc_submission_enable(struct intel_guc *guc)
 {
+	struct intel_gt *gt = guc_to_gt(guc);
+
+	/* Enable and route to GuC */
+	if (GRAPHICS_VER(gt->i915) >= 12)
+		intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES,
+				   GUC_SEM_INTR_ROUTE_TO_GUC |
+				   GUC_SEM_INTR_ENABLE_ALL);
+
 	guc_init_lrc_mapping(guc);
 	guc_init_engine_stats(guc);
 }
 
 void intel_guc_submission_disable(struct intel_guc *guc)
 {
+	struct intel_gt *gt = guc_to_gt(guc);
+
 	/* Note: By the time we're here, GuC may have already been reset */
+
+	/* Disable and route to host */
+	if (GRAPHICS_VER(gt->i915) >= 12)
+		intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0);
 }
 
 static bool __guc_submission_supported(struct intel_guc *guc)

From 15c5401dee76e03a1b4e30b7ba8fcec972e5bebd Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 27 Jul 2022 19:42:21 -0700
Subject: [PATCH 19/37] drm/i915/guc: Fix issues with live_preempt_cancel

Having semaphores results in different behavior when a dependent request
is cancelled. In the case of semaphores the request could be on the HW
and complete successfully while without the request is held in the
driver and the error from the dependent request is propagated. Fix
live_preempt_cancel to take this behavior into account.

Also update live_preempt_cancel to use new function intel_context_ban
rather than intel_context_set_banned.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728024225.2363663-3-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/selftest_execlists.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 09f8cd2d0e2c..1e08b2473b99 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -2077,7 +2077,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
 		goto out;
 	}
 
-	intel_context_set_banned(rq->context);
+	intel_context_ban(rq->context, rq);
 	err = intel_engine_pulse(arg->engine);
 	if (err)
 		goto out;
@@ -2136,7 +2136,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
 	if (err)
 		goto out;
 
-	intel_context_set_banned(rq[1]->context);
+	intel_context_ban(rq[1]->context, rq[1]);
 	err = intel_engine_pulse(arg->engine);
 	if (err)
 		goto out;
@@ -2219,7 +2219,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
 	if (err)
 		goto out;
 
-	intel_context_set_banned(rq[2]->context);
+	intel_context_ban(rq[2]->context, rq[2]);
 	err = intel_engine_pulse(arg->engine);
 	if (err)
 		goto out;
@@ -2234,7 +2234,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
 		goto out;
 	}
 
-	if (rq[1]->fence.error != 0) {
+	/*
+	 * The behavior between having semaphores and not is different. With
+	 * semaphores the subsequent request is on the hardware and not cancelled
+	 * while without the request is held in the driver and cancelled.
+	 */
+	if (intel_engine_has_semaphores(rq[1]->engine) &&
+	    rq[1]->fence.error != 0) {
 		pr_err("Normal inflight1 request did not complete\n");
 		err = -EINVAL;
 		goto out;
@@ -2282,7 +2288,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
 		goto out;
 	}
 
-	intel_context_set_banned(rq->context);
+	intel_context_ban(rq->context, rq);
 	err = intel_engine_pulse(arg->engine); /* force reset */
 	if (err)
 		goto out;

From 69142c0a5f9a0afa6ec31d5f9a4ebf293d3b5029 Mon Sep 17 00:00:00 2001
From: Rahul Kumar Singh <rahul.kumar.singh@intel.com>
Date: Thu, 28 Jul 2022 11:26:16 -0700
Subject: [PATCH 20/37] drm/i915/guc: Add selftest for a hung GuC

Add a test to check that the hangcheck will recover from a submission
hang in the GuC.

Signed-off-by: Rahul Kumar Singh <rahul.kumar.singh@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728182616.2417491-1-John.C.Harrison@Intel.com
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   1 +
 .../drm/i915/gt/uc/selftest_guc_hangcheck.c   | 159 ++++++++++++++++++
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 3 files changed, 161 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0b8c6450fa34..ff205c412585 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -5177,4 +5177,5 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_guc.c"
 #include "selftest_guc_multi_lrc.c"
+#include "selftest_guc_hangcheck.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
new file mode 100644
index 000000000000..01f8cd3c3134
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "selftests/igt_spinner.h"
+#include "selftests/igt_reset.h"
+#include "selftests/intel_scheduler_helpers.h"
+#include "gt/intel_engine_heartbeat.h"
+#include "gem/selftests/mock_context.h"
+
+#define BEAT_INTERVAL	100
+
+static struct i915_request *nop_request(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	rq = intel_engine_create_kernel_request(engine);
+	if (IS_ERR(rq))
+		return rq;
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	return rq;
+}
+
+static int intel_hang_guc(void *arg)
+{
+	struct intel_gt *gt = arg;
+	int ret = 0;
+	struct i915_gem_context *ctx;
+	struct intel_context *ce;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	intel_wakeref_t wakeref;
+	struct i915_gpu_error *global = &gt->i915->gpu_error;
+	struct intel_engine_cs *engine;
+	unsigned int reset_count;
+	u32 guc_status;
+	u32 old_beat;
+
+	ctx = kernel_context(gt->i915, NULL);
+	if (IS_ERR(ctx)) {
+		drm_err(&gt->i915->drm, "Failed get kernel context: %ld\n", PTR_ERR(ctx));
+		return PTR_ERR(ctx);
+	}
+
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+	ce = intel_context_create(gt->engine[BCS0]);
+	if (IS_ERR(ce)) {
+		ret = PTR_ERR(ce);
+		drm_err(&gt->i915->drm, "Failed to create spinner request: %d\n", ret);
+		goto err;
+	}
+
+	engine = ce->engine;
+	reset_count = i915_reset_count(global);
+
+	old_beat = engine->props.heartbeat_interval_ms;
+	ret = intel_engine_set_heartbeat(engine, BEAT_INTERVAL);
+	if (ret) {
+		drm_err(&gt->i915->drm, "Failed to boost heatbeat interval: %d\n", ret);
+		goto err;
+	}
+
+	ret = igt_spinner_init(&spin, engine->gt);
+	if (ret) {
+		drm_err(&gt->i915->drm, "Failed to create spinner: %d\n", ret);
+		goto err;
+	}
+
+	rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+	intel_context_put(ce);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		drm_err(&gt->i915->drm, "Failed to create spinner request: %d\n", ret);
+		goto err_spin;
+	}
+
+	ret = request_add_spin(rq, &spin);
+	if (ret) {
+		i915_request_put(rq);
+		drm_err(&gt->i915->drm, "Failed to add Spinner request: %d\n", ret);
+		goto err_spin;
+	}
+
+	ret = intel_reset_guc(gt);
+	if (ret) {
+		i915_request_put(rq);
+		drm_err(&gt->i915->drm, "Failed to reset GuC, ret = %d\n", ret);
+		goto err_spin;
+	}
+
+	guc_status = intel_uncore_read(gt->uncore, GUC_STATUS);
+	if (!(guc_status & GS_MIA_IN_RESET)) {
+		i915_request_put(rq);
+		drm_err(&gt->i915->drm, "GuC failed to reset: status = 0x%08X\n", guc_status);
+		ret = -EIO;
+		goto err_spin;
+	}
+
+	/* Wait for the heartbeat to cause a reset */
+	ret = intel_selftest_wait_for_rq(rq);
+	i915_request_put(rq);
+	if (ret) {
+		drm_err(&gt->i915->drm, "Request failed to complete: %d\n", ret);
+		goto err_spin;
+	}
+
+	if (i915_reset_count(global) == reset_count) {
+		drm_err(&gt->i915->drm, "Failed to record a GPU reset\n");
+		ret = -EINVAL;
+		goto err_spin;
+	}
+
+err_spin:
+	igt_spinner_end(&spin);
+	igt_spinner_fini(&spin);
+	intel_engine_set_heartbeat(engine, old_beat);
+
+	if (ret == 0) {
+		rq = nop_request(engine);
+		if (IS_ERR(rq)) {
+			ret = PTR_ERR(rq);
+			goto err;
+		}
+
+		ret = intel_selftest_wait_for_rq(rq);
+		i915_request_put(rq);
+		if (ret) {
+			drm_err(&gt->i915->drm, "No-op failed to complete: %d\n", ret);
+			goto err;
+		}
+	}
+
+err:
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	kernel_context_close(ctx);
+
+	return ret;
+}
+
+int intel_guc_hang_check(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(intel_hang_guc),
+	};
+	struct intel_gt *gt = to_gt(i915);
+
+	if (intel_gt_is_wedged(gt))
+		return 0;
+
+	if (!intel_uc_uses_guc_submission(&gt->uc))
+		return 0;
+
+	return intel_gt_live_subtests(tests, gt);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index bdd290f2bf3c..aaf8a380e5c7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -49,5 +49,6 @@ selftest(perf, i915_perf_live_selftests)
 selftest(slpc, intel_slpc_live_selftests)
 selftest(guc, intel_guc_live_selftests)
 selftest(guc_multi_lrc, intel_guc_multi_lrc_live_selftests)
+selftest(guc_hang, intel_guc_hang_check)
 /* Here be dragons: keep last to run last! */
 selftest(late_gt_pm, intel_gt_pm_late_selftests)

From a96d8f052205d92c2fa934f72dd39229c8b75d79 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Wed, 27 Jul 2022 19:42:23 -0700
Subject: [PATCH 21/37] drm/i915/selftest: Cope with not having an RCS engine

It is no longer guaranteed that there will always be an RCS engine.
So, use the helper function for finding the first available engine that
can be used for general purpose selftets.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728024225.2363663-5-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 6493265d5f64..7f3bb1d34dfb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1302,13 +1302,15 @@ static int igt_reset_wait(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct i915_gpu_error *global = &gt->i915->gpu_error;
-	struct intel_engine_cs *engine = gt->engine[RCS0];
+	struct intel_engine_cs *engine;
 	struct i915_request *rq;
 	unsigned int reset_count;
 	struct hang h;
 	long timeout;
 	int err;
 
+	engine = intel_selftest_find_any_engine(gt);
+
 	if (!engine || !intel_engine_can_store_dword(engine))
 		return 0;
 
@@ -1432,7 +1434,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
 				 int (*fn)(void *),
 				 unsigned int flags)
 {
-	struct intel_engine_cs *engine = gt->engine[RCS0];
+	struct intel_engine_cs *engine;
 	struct drm_i915_gem_object *obj;
 	struct task_struct *tsk = NULL;
 	struct i915_request *rq;
@@ -1444,6 +1446,8 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
 	if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE)
 		return 0;
 
+	engine = intel_selftest_find_any_engine(gt);
+
 	if (!engine || !intel_engine_can_store_dword(engine))
 		return 0;
 
@@ -1819,12 +1823,14 @@ static int igt_handle_error(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct i915_gpu_error *global = &gt->i915->gpu_error;
-	struct intel_engine_cs *engine = gt->engine[RCS0];
+	struct intel_engine_cs *engine;
 	struct hang h;
 	struct i915_request *rq;
 	struct i915_gpu_coredump *error;
 	int err;
 
+	engine = intel_selftest_find_any_engine(gt);
+
 	/* Check that we can issue a global GPU and engine reset */
 
 	if (!intel_has_reset_engine(gt))

From 52d4cfdc2460cb7f0528e27cdc9fb808f299d325 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 27 Jul 2022 19:42:24 -0700
Subject: [PATCH 22/37] drm/i915/guc: Support larger contexts on newer hardware

The GuC needs a copy of a golden context for implementing watchdog
resets (aka media resets). This context is larger on newer platforms.
So adjust the size being allocated/copied accordingly.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728024225.2363663-6-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index ba7541f3ca61..74cbe8eaf531 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -464,7 +464,11 @@ static void fill_engine_enable_masks(struct intel_gt *gt,
 }
 
 #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
-#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE)
+#define XEHP_LR_HW_CONTEXT_SIZE (96 * sizeof(u32))
+#define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50) ? \
+				    XEHP_LR_HW_CONTEXT_SIZE : \
+				    LR_HW_CONTEXT_SIZE)
+#define LRC_SKIP_SIZE(i915) (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SZ(i915))
 static int guc_prep_golden_context(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
@@ -525,7 +529,7 @@ static int guc_prep_golden_context(struct intel_guc *guc)
 		 * on all engines).
 		 */
 		ads_blob_write(guc, ads.eng_state_size[guc_class],
-			       real_size - LRC_SKIP_SIZE);
+			       real_size - LRC_SKIP_SIZE(gt->i915));
 		ads_blob_write(guc, ads.golden_context_lrca[guc_class],
 			       addr_ggtt);
 
@@ -599,7 +603,7 @@ static void guc_init_golden_context(struct intel_guc *guc)
 		}
 
 		GEM_BUG_ON(ads_blob_read(guc, ads.eng_state_size[guc_class]) !=
-			   real_size - LRC_SKIP_SIZE);
+			   real_size - LRC_SKIP_SIZE(gt->i915));
 		GEM_BUG_ON(ads_blob_read(guc, ads.golden_context_lrca[guc_class]) != addr_ggtt);
 
 		addr_ggtt += alloc_size;

From dd9d3cbe9efcdb2a0d88c80d0e492ccae54a5b63 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Wed, 27 Jul 2022 19:42:25 -0700
Subject: [PATCH 23/37] drm/i915/guc: Don't abort on CTB_UNUSED status

When the KMD sends a CLIENT_RESET request to GuC (as part of the
suspend sequence), GuC will mark the CTB buffer as 'UNUSED'. If the
KMD then checked the CTB queue, it would see a non-zero status value
and report the buffer as corrupted.

Technically, no G2H messages should be received once the CLIENT_RESET
has been sent. However, if a context was outstanding on an engine then
it would get reset and a reset notification would be sent. So, don't
actually treat UNUSED as a catastrophic error. Just flag it up as
unexpected and keep going.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728024225.2363663-7-John.C.Harrison@Intel.com
---
 .../i915/gt/uc/abi/guc_communication_ctb_abi.h |  8 +++++---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c      | 18 ++++++++++++++++--
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
index df83c1cc7c7a..28b8387f97b7 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
@@ -37,6 +37,7 @@
  *  |   |       |   - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large)     |
  *  |   |       |   - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message)      |
  *  |   |       |   - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified)      |
+ *  |   |       |   - _`GUC_CTB_STATUS_UNUSED` = 8 (CTB is not in use)         |
  *  +---+-------+--------------------------------------------------------------+
  *  |...|       | RESERVED = MBZ                                               |
  *  +---+-------+--------------------------------------------------------------+
@@ -49,9 +50,10 @@ struct guc_ct_buffer_desc {
 	u32 tail;
 	u32 status;
 #define GUC_CTB_STATUS_NO_ERROR				0
-#define GUC_CTB_STATUS_OVERFLOW				(1 << 0)
-#define GUC_CTB_STATUS_UNDERFLOW			(1 << 1)
-#define GUC_CTB_STATUS_MISMATCH				(1 << 2)
+#define GUC_CTB_STATUS_OVERFLOW				BIT(0)
+#define GUC_CTB_STATUS_UNDERFLOW			BIT(1)
+#define GUC_CTB_STATUS_MISMATCH				BIT(2)
+#define GUC_CTB_STATUS_UNUSED				BIT(3)
 	u32 reserved[13];
 } __packed;
 static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 74194c11234d..2b22065e87bf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -829,8 +829,22 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 	if (unlikely(ctb->broken))
 		return -EPIPE;
 
-	if (unlikely(desc->status))
-		goto corrupted;
+	if (unlikely(desc->status)) {
+		u32 status = desc->status;
+
+		if (status & GUC_CTB_STATUS_UNUSED) {
+			/*
+			 * Potentially valid if a CLIENT_RESET request resulted in
+			 * contexts/engines being reset. But should never happen as
+			 * no contexts should be active when CLIENT_RESET is sent.
+			 */
+			CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n");
+			status &= ~GUC_CTB_STATUS_UNUSED;
+		}
+
+		if (status)
+			goto corrupted;
+	}
 
 	GEM_BUG_ON(head > size);
 

From 6c82c75230b87d986283189a58e8cb18ea0c519c Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Wed, 27 Jul 2022 17:33:39 -0700
Subject: [PATCH 24/37] drm/i915/guc: Don't send policy update for child
 contexts.

The GuC FW applies the parent context policy to all the children,
so individual updates to the children are not supported and we
should not send them.

Note that sending the message did not have any functional consequences,
because the GuC just drops it and logs an error; since we were trying
to set the child policy to match the parent anyway the message being
dropped was not a problem.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <john.c.harrison@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728003339.2361010-1-daniele.ceraolospurio@intel.com
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 26 +------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index ff205c412585..0d17da77e787 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2420,7 +2420,6 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
 	struct context_policy policy;
 	u32 execution_quantum;
 	u32 preemption_timeout;
-	bool missing = false;
 	unsigned long flags;
 	int ret;
 
@@ -2438,32 +2437,9 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
 		__guc_context_policy_add_preempt_to_idle(&policy, 1);
 
 	ret = __guc_context_set_context_policies(guc, &policy, loop);
-	missing = ret != 0;
-
-	if (!missing && intel_context_is_parent(ce)) {
-		struct intel_context *child;
-
-		for_each_child(ce, child) {
-			__guc_context_policy_start_klv(&policy, child->guc_id.id);
-
-			if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
-				__guc_context_policy_add_preempt_to_idle(&policy, 1);
-
-			child->guc_state.prio = ce->guc_state.prio;
-			__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
-			__guc_context_policy_add_execution_quantum(&policy, execution_quantum);
-			__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
-
-			ret = __guc_context_set_context_policies(guc, &policy, loop);
-			if (ret) {
-				missing = true;
-				break;
-			}
-		}
-	}
 
 	spin_lock_irqsave(&ce->guc_state.lock, flags);
-	if (missing)
+	if (ret != 0)
 		set_context_policy_required(ce);
 	else
 		clr_context_policy_required(ce);

From 2775e2016179331f1b8dcc24bb63e898d64d49e3 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Thu, 28 Jul 2022 16:07:22 -0700
Subject: [PATCH 25/37] drm/i915/dg2: Update DG2 to GuC v70.4.1

New release of GuC with a bunch of fixes specific to DG2. Some of
these require follow up i915 changes to enable.

Note also that it is not necessary to maintain backwards compatibility
with 70.1.2 for DG2 because DG2 is still under force probe protection.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728230722.2749701-2-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index d5fca1f68eff..58547292efa0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -53,7 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
  * firmware as TGL.
  */
 #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
-	fw_def(DG2,          0, guc_def(dg2,  70, 1, 2)) \
+	fw_def(DG2,          0, guc_def(dg2,  70, 4, 1)) \
 	fw_def(ALDERLAKE_P,  0, guc_def(adlp, 70, 1, 1)) \
 	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  70, 1, 1)) \
 	fw_def(DG1,          0, guc_def(dg1,  70, 1, 1)) \

From ae5a3d2c75a82c65564d32c42152ef7623ccf673 Mon Sep 17 00:00:00 2001
From: Harish Chegondi <harish.chegondi@intel.com>
Date: Mon, 1 Aug 2022 14:38:39 -0700
Subject: [PATCH 26/37] drm/i915/dg2: Add Wa_1509727124

Bspec: 46052
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Harish Chegondi <harish.chegondi@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220801213839.8549-1-harish.chegondi@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h     | 1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 60d6eb5f245b..b3b49f6d6d1c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1078,6 +1078,7 @@
 
 #define GEN10_SAMPLER_MODE			_MMIO(0xe18c)
 #define   ENABLE_SMALLPL			REG_BIT(15)
+#define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
 #define   GEN11_SAMPLER_ENABLE_HEADLESS_MSG	REG_BIT(5)
 
 #define GEN9_HALF_SLICE_CHICKEN7		_MMIO(0xe194)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index e8111fce56d0..59cf28baa472 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2119,6 +2119,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
 	}
 
+	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+		/* Wa_1509727124:dg2 */
+		wa_masked_en(wal, GEN10_SAMPLER_MODE,
+			     SC_DISABLE_POWER_OPTIMIZATION_EBB);
+	}
+
 	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
 	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
 		/* Wa_14012419201:dg2 */

From 7dd5c56531eb03696acdb17774721de5ef481c0b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 26 Jul 2022 16:48:44 +0200
Subject: [PATCH 27/37] drm/i915/gem: Remove shared locking on freeing objects

The obj->base.resv may be shared across many objects, some of which may
still be live and locked, preventing objects from being freed
indefintely. We could individualise the lock during the free, or rely on
a freed object having no contention and being able to immediately free
the pages it owns.

References: https://gitlab.freedesktop.org/drm/intel/-/issues/6469
Fixes: be7612fd6665 ("drm/i915: Require object lock when freeing pages during destruction")
Fixes: 6cb12fbda1c2 ("drm/i915: Use trylock instead of blocking lock for __i915_gem_free_objects.")
Cc: <stable@vger.kernel.org> # v5.17+
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Nirmoy Das <nirmoy.das@intel.com>
Acked-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220726144844.18429-1-nirmoy.das@intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c | 16 ++++------------
 drivers/gpu/drm/i915/i915_drv.h            |  4 ++--
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index ccec4055fde3..389e9f157ca5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -268,7 +268,7 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
  */
 void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
 {
-	assert_object_held(obj);
+	assert_object_held_shared(obj);
 
 	if (!list_empty(&obj->vma.list)) {
 		struct i915_vma *vma;
@@ -331,15 +331,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 			continue;
 		}
 
-		if (!i915_gem_object_trylock(obj, NULL)) {
-			/* busy, toss it back to the pile */
-			if (llist_add(&obj->freed, &i915->mm.free_list))
-				queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10));
-			continue;
-		}
-
 		__i915_gem_object_pages_fini(obj);
-		i915_gem_object_unlock(obj);
 		__i915_gem_free_object(obj);
 
 		/* But keep the pointer alive for RCU-protected lookups */
@@ -359,7 +351,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915)
 static void __i915_gem_free_work(struct work_struct *work)
 {
 	struct drm_i915_private *i915 =
-		container_of(work, struct drm_i915_private, mm.free_work.work);
+		container_of(work, struct drm_i915_private, mm.free_work);
 
 	i915_gem_flush_free_objects(i915);
 }
@@ -391,7 +383,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	 */
 
 	if (llist_add(&obj->freed, &i915->mm.free_list))
-		queue_delayed_work(i915->wq, &i915->mm.free_work, 0);
+		queue_work(i915->wq, &i915->mm.free_work);
 }
 
 void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
@@ -745,7 +737,7 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj)
 
 void i915_gem_init__objects(struct drm_i915_private *i915)
 {
-	INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work);
+	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
 }
 
 void i915_objects_module_exit(void)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3364a6e5169b..8feb4b2ace27 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -254,7 +254,7 @@ struct i915_gem_mm {
 	 * List of objects which are pending destruction.
 	 */
 	struct llist_head free_list;
-	struct delayed_work free_work;
+	struct work_struct free_work;
 	/**
 	 * Count of objects pending destructions. Used to skip needlessly
 	 * waiting on an RCU barrier if no objects are waiting to be freed.
@@ -1440,7 +1440,7 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
 	 * armed the work again.
 	 */
 	while (atomic_read(&i915->mm.free_count)) {
-		flush_delayed_work(&i915->mm.free_work);
+		flush_work(&i915->mm.free_work);
 		flush_delayed_work(&i915->bdev.wq);
 		rcu_barrier();
 	}

From 3d037d99e61a1e7a3ae3d214146d88db349dd19f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@kernel.org>
Date: Thu, 4 Aug 2022 09:37:22 +0200
Subject: [PATCH 28/37] drm/i915: pass a pointer for tlb seqno at
 vma_invalidate_tlb()

WRITE_ONCE() should happen at the original var, not on a local
copy of it.

Cc: stable@vger.kernel.org
Fixes: 5d36acb7198b ("drm/i915/gt: Batch TLB invalidations")
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[added cc-stable while merging it]
Link: https://patchwork.freedesktop.org/patch/msgid/f9550e6bacea10131ff40dd8981b69eb9251cdcd.1659598090.git.mchehab@kernel.org
---
 drivers/gpu/drm/i915/gt/intel_ppgtt.c | 2 +-
 drivers/gpu/drm/i915/i915_vma.c       | 6 +++---
 drivers/gpu/drm/i915/i915_vma.h       | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index 2da6c82a8bd2..6ee8d1127016 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -211,7 +211,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm,
 
 	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 	if (vma_res->tlb)
-		vma_invalidate_tlb(vm, *vma_res->tlb);
+		vma_invalidate_tlb(vm, vma_res->tlb);
 }
 
 static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 84a9ccbc5fc5..260371716490 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1308,7 +1308,7 @@ err_unpin:
 	return err;
 }
 
-void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb)
+void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb)
 {
 	/*
 	 * Before we release the pages that were bound by this vma, we
@@ -1318,7 +1318,7 @@ void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb)
 	 * the most recent TLB invalidation seqno, and if we have not yet
 	 * flushed the TLBs upon release, perform a full invalidation.
 	 */
-	WRITE_ONCE(tlb, intel_gt_next_invalidate_tlb_full(vm->gt));
+	WRITE_ONCE(*tlb, intel_gt_next_invalidate_tlb_full(vm->gt));
 }
 
 static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
@@ -1971,7 +1971,7 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
 			dma_fence_put(unbind_fence);
 			unbind_fence = NULL;
 		}
-		vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb);
+		vma_invalidate_tlb(vma->vm, &vma->obj->mm.tlb);
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5048eed536da..33a58f605d75 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -213,7 +213,7 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 void i915_vma_revoke_mmap(struct i915_vma *vma);
-void vma_invalidate_tlb(struct i915_address_space *vm, u32 tlb);
+void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb);
 struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
 int __i915_vma_unbind(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);

From dba4d442be8c4d41d3e1ee4f72a2cd8fa524b8cb Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 5 Aug 2022 14:22:39 +0100
Subject: [PATCH 29/37] drm/i915/ttm: remove calc_ctrl_surf_instr_size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We only ever need to emit one ccs block copy command.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
Reviewed-by: Ramalingam C<ramalingam.c@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220805132240.442747-1-matthew.auld@intel.com
---
 drivers/gpu/drm/i915/gt/intel_migrate.c | 35 +++----------------------
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 9a0814422ba4..1bbed7aa436a 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -511,44 +511,16 @@ static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)
 	return cmd;
 }
 
-static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size)
-{
-	u32 num_cmds, num_blks, total_size;
-
-	if (!GET_CCS_BYTES(i915, size))
-		return 0;
-
-	/*
-	 * XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte
-	 * blocks. one XY_CTRL_SURF_COPY_BLT command can
-	 * transfer upto 1024 blocks.
-	 */
-	num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
-				NUM_CCS_BYTES_PER_BLOCK);
-	num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER);
-	total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds;
-
-	/*
-	 * Adding a flush before and after XY_CTRL_SURF_COPY_BLT
-	 */
-	total_size += 2 * MI_FLUSH_DW_SIZE;
-
-	return total_size;
-}
-
 static int emit_copy_ccs(struct i915_request *rq,
 			 u32 dst_offset, u8 dst_access,
 			 u32 src_offset, u8 src_access, int size)
 {
 	struct drm_i915_private *i915 = rq->engine->i915;
 	int mocs = rq->engine->gt->mocs.uc_index << 1;
-	u32 num_ccs_blks, ccs_ring_size;
+	u32 num_ccs_blks;
 	u32 *cs;
 
-	ccs_ring_size = calc_ctrl_surf_instr_size(i915, size);
-	WARN_ON(!ccs_ring_size);
-
-	cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2));
+	cs = intel_ring_begin(rq, 12);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
@@ -583,8 +555,7 @@ static int emit_copy_ccs(struct i915_request *rq,
 		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
 
 	cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
-	if (ccs_ring_size & 1)
-		*cs++ = MI_NOOP;
+	*cs++ = MI_NOOP;
 
 	intel_ring_advance(rq, cs);
 

From 8676145eb2f53a9940ff70910caf0125bd8a4bc2 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Fri, 5 Aug 2022 14:22:40 +0100
Subject: [PATCH 30/37] drm/i915/ttm: fix CCS handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Crucible + recent Mesa seems to sometimes hit:

GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER)

And it looks like we can also trigger this with gem_lmem_swapping, if we
modify the test to use slightly larger object sizes.

Looking closer it looks like we have the following issues in
migrate_copy():

  - We are using plain integer in various places, which we can easily
    overflow with a large object.

  - We pass the entire object size (when the src is lmem) into
    emit_pte() and then try to copy it, which doesn't work, since we
    only have a few fixed sized windows in which to map the pages and
    perform the copy. With an object > 8M we therefore aren't properly
    copying the pages. And then with an object > 64M we trigger the
    GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER).

So it looks like our copy handling for any object > 8M (which is our
CHUNK_SZ) is currently broken on DG2.

Fixes: da0595ae91da ("drm/i915/migrate: Evict and restore the flatccs capable lmem obj")
Testcase: igt@gem_lmem_swapping
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
Reviewed-by: Ramalingam C<ramalingam.c@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220805132240.442747-2-matthew.auld@intel.com
---
 drivers/gpu/drm/i915/gt/intel_migrate.c | 44 ++++++++++++-------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 1bbed7aa436a..aaaf1906026c 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -609,9 +609,9 @@ static int emit_copy(struct i915_request *rq,
 	return 0;
 }
 
-static int scatter_list_length(struct scatterlist *sg)
+static u64 scatter_list_length(struct scatterlist *sg)
 {
-	int len = 0;
+	u64 len = 0;
 
 	while (sg && sg_dma_len(sg)) {
 		len += sg_dma_len(sg);
@@ -621,28 +621,26 @@ static int scatter_list_length(struct scatterlist *sg)
 	return len;
 }
 
-static void
+static int
 calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
-		   int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy)
+		   u64 bytes_to_cpy, u64 ccs_bytes_to_cpy)
 {
-	if (ccs_bytes_to_cpy) {
-		if (!src_is_lmem)
-			/*
-			 * When CHUNK_SZ is passed all the pages upto CHUNK_SZ
-			 * will be taken for the blt. in Flat-ccs supported
-			 * platform Smem obj will have more pages than required
-			 * for main meory hence limit it to the required size
-			 * for main memory
-			 */
-			*src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ);
-	} else { /* ccs handling is not required */
-		*src_sz = CHUNK_SZ;
-	}
+	if (ccs_bytes_to_cpy && !src_is_lmem)
+		/*
+		 * When CHUNK_SZ is passed all the pages upto CHUNK_SZ
+		 * will be taken for the blt. in Flat-ccs supported
+		 * platform Smem obj will have more pages than required
+		 * for main meory hence limit it to the required size
+		 * for main memory
+		 */
+		return min_t(u64, bytes_to_cpy, CHUNK_SZ);
+	else
+		return CHUNK_SZ;
 }
 
-static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy)
+static void get_ccs_sg_sgt(struct sgt_dma *it, u64 bytes_to_cpy)
 {
-	u32 len;
+	u64 len;
 
 	do {
 		GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg));
@@ -673,12 +671,12 @@ intel_context_migrate_copy(struct intel_context *ce,
 {
 	struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs;
 	struct drm_i915_private *i915 = ce->engine->i915;
-	u32 ccs_bytes_to_cpy = 0, bytes_to_cpy;
+	u64 ccs_bytes_to_cpy = 0, bytes_to_cpy;
 	enum i915_cache_level ccs_cache_level;
 	u32 src_offset, dst_offset;
 	u8 src_access, dst_access;
 	struct i915_request *rq;
-	int src_sz, dst_sz;
+	u64 src_sz, dst_sz;
 	bool ccs_is_src, overwrite_ccs;
 	int err;
 
@@ -761,8 +759,8 @@ intel_context_migrate_copy(struct intel_context *ce,
 		if (err)
 			goto out_rq;
 
-		calculate_chunk_sz(i915, src_is_lmem, &src_sz,
-				   bytes_to_cpy, ccs_bytes_to_cpy);
+		src_sz = calculate_chunk_sz(i915, src_is_lmem,
+					    bytes_to_cpy, ccs_bytes_to_cpy);
 
 		len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
 			       src_offset, src_sz);

From 7e55536c9c488febfeedacebae0831c070e72c36 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Tue, 16 Aug 2022 14:06:00 -0700
Subject: [PATCH 31/37] drm/i915/gt: Add dedicated function for non-ctx
 register tuning settings

The bspec performance tuning section gives recommended settings that the
driver should program for various MMIO registers.  Although these
settings aren't "workarounds" we use the workaround infrastructure to do
this programming to make sure it is handled at the appropriate places
and doesn't conflict with any real workarounds.

Since more of these are starting to show up on recent platforms, it's a
good time to create a dedicated function to hold them so that there's
less ambiguity about how/where to implement new ones.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220816210601.2041572-1-matthew.d.roper@intel.com
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 42 ++++++++++++++-------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 59cf28baa472..a68d279b01f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2102,13 +2102,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		/* Wa_1509235366:dg2 */
 		wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
 			    GLOBAL_INVALIDATION_MODE);
-
-		/*
-		 * The following are not actually "workarounds" but rather
-		 * recommended tuning settings documented in the bspec's
-		 * performance guide section.
-		 */
-		wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
 	}
 
 	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
@@ -2676,6 +2669,32 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	}
 }
 
+/*
+ * The bspec performance guide has recommended MMIO tuning settings.  These
+ * aren't truly "workarounds" but we want to program them with the same
+ * workaround infrastructure to ensure that they're automatically added to
+ * the GuC save/restore lists, re-applied at the right times, and checked for
+ * any conflicting programming requested by real workarounds.
+ *
+ * Programming settings should be added here only if their registers are not
+ * part of an engine's register state context.  If a register is part of a
+ * context, then any tuning settings should be programmed in an appropriate
+ * function invoked by __intel_engine_init_ctx_wa().
+ */
+static void
+add_render_compute_tuning_settings(struct drm_i915_private *i915,
+				   struct i915_wa_list *wal)
+{
+	if (IS_PONTEVECCHIO(i915)) {
+		wa_write(wal, XEHPC_L3SCRUB,
+			 SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
+	}
+
+	if (IS_DG2(i915)) {
+		wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+	}
+}
+
 /*
  * The workarounds in this function apply to shared registers in
  * the general render reset domain that aren't tied to a
@@ -2690,14 +2709,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 {
 	struct drm_i915_private *i915 = engine->i915;
 
-	if (IS_PONTEVECCHIO(i915)) {
-		/*
-		 * The following is not actually a "workaround" but rather
-		 * a recommended tuning setting documented in the bspec's
-		 * performance guide section.
-		 */
-		wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
+	add_render_compute_tuning_settings(i915, wal);
 
+	if (IS_PONTEVECCHIO(i915)) {
 		/* Wa_16016694945 */
 		wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
 	}

From 6dc85721df748385cfb011326ead52c01840b461 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Tue, 16 Aug 2022 14:06:01 -0700
Subject: [PATCH 32/37] drm/i915/dg2: Add additional tuning settings

Some additional MMIO tuning settings have appeared in the bspec's
performance tuning guide section.

One of the tuning settings here is also documented as formal workaround
Wa_22012654132 for some steppings of DG2.  However the tuning setting
applies to all DG2 variants and steppings, making it a superset of the
workaround.

v2:
 - Move DRAW_WATERMARK to engine workaround section.  It only moves into
   the engine context on future platforms.  (Lucas)
 - CHICKEN_RASTER_2 needs to be handled as a masked register.  (Lucas)

Bspec: 68331
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220816210601.2041572-2-matthew.d.roper@intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h     |  8 ++++++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 ++++++++++++++-------
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index b3b49f6d6d1c..f64fafe28f72 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -259,6 +259,9 @@
 #define   GEN9_PREEMPT_GPGPU_COMMAND_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(1, 0)
 #define   GEN9_PREEMPT_GPGPU_LEVEL_MASK		GEN9_PREEMPT_GPGPU_LEVEL(1, 1)
 
+#define DRAW_WATERMARK				_MMIO(0x26c0)
+#define   VERT_WM_VAL				REG_GENMASK(9, 0)
+
 #define GEN12_GLOBAL_MOCS(i)			_MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
 
 #define RENDER_HWS_PGA_GEN7			_MMIO(0x4080)
@@ -374,6 +377,9 @@
 #define CHICKEN_RASTER_1			_MMIO(0x6204)
 #define   DIS_SF_ROUND_NEAREST_EVEN		REG_BIT(8)
 
+#define CHICKEN_RASTER_2			_MMIO(0x6208)
+#define   TBIMR_FAST_CLIP			REG_BIT(5)
+
 #define VFLSKPD					_MMIO(0x62a8)
 #define   DIS_OVER_FETCH_CACHE			REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH		REG_BIT(0)
@@ -1124,6 +1130,8 @@
 
 #define RT_CTRL					_MMIO(0xe530)
 #define   DIS_NULL_QUERY			REG_BIT(10)
+#define   STACKID_CTRL				REG_GENMASK(6, 5)
+#define   STACKID_CTRL_512			REG_FIELD_PREP(STACKID_CTRL, 0x2)
 
 #define EU_PERF_CNTL1				_MMIO(0xe558)
 #define EU_PERF_CNTL5				_MMIO(0xe55c)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index a68d279b01f0..31e129329fb0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -568,6 +568,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
 				   struct i915_wa_list *wal)
 {
+	wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
 	wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
 			 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
 	wa_add(wal,
@@ -2195,15 +2196,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
 	}
 
-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) ||
-	    IS_DG2_G11(i915)) {
-		/* Wa_22012654132:dg2 */
-		wa_add(wal, GEN10_CACHE_MODE_SS, 0,
-		       _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
-		       0 /* write-only, so skip validation */,
-		       true);
-	}
-
 	/* Wa_14013202645:dg2 */
 	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
 	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
@@ -2692,6 +2684,23 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
 
 	if (IS_DG2(i915)) {
 		wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+		wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
+		wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL,
+				 REG_FIELD_PREP(VERT_WM_VAL, 0x3FF));
+
+		/*
+		 * This is also listed as Wa_22012654132 for certain DG2
+		 * steppings, but the tuning setting programming is a superset
+		 * since it applies to all DG2 variants and steppings.
+		 *
+		 * Note that register 0xE420 is write-only and cannot be read
+		 * back for verification on DG2 (due to Wa_14012342262), so
+		 * we need to explicitly skip the readback.
+		 */
+		wa_add(wal, GEN10_CACHE_MODE_SS, 0,
+		       _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+		       0 /* write-only, so skip validation */,
+		       true);
 	}
 }
 

From 5ce27d6210018e972197ff7e5da6309f919fd61b Mon Sep 17 00:00:00 2001
From: Alan Previn <alan.previn.teres.alexis@intel.com>
Date: Wed, 27 Jul 2022 19:20:22 -0700
Subject: [PATCH 33/37] drm/i915/guc: Add a helper for log buffer size

Add a helper to get GuC log buffer size.

Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728022028.2190627-2-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 49 ++++++++++++----------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 7f1bc8af6b82..e52ac17ad402 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -15,6 +15,32 @@
 
 static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log);
 
+static u32 intel_guc_log_size(struct intel_guc_log *log)
+{
+	/*
+	 *  GuC Log buffer Layout:
+	 *
+	 *  NB: Ordering must follow "enum guc_log_buffer_type".
+	 *
+	 *  +===============================+ 00B
+	 *  |      Debug state header       |
+	 *  +-------------------------------+ 32B
+	 *  |    Crash dump state header    |
+	 *  +-------------------------------+ 64B
+	 *  |     Capture state header      |
+	 *  +-------------------------------+ 96B
+	 *  |                               |
+	 *  +===============================+ PAGE_SIZE (4KB)
+	 *  |          Debug logs           |
+	 *  +===============================+ + DEBUG_SIZE
+	 *  |        Crash Dump logs        |
+	 *  +===============================+ + CRASH_SIZE
+	 *  |         Capture logs          |
+	 *  +===============================+ + CAPTURE_SIZE
+	 */
+	return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + CAPTURE_BUFFER_SIZE;
+}
+
 /**
  * DOC: GuC firmware log
  *
@@ -461,32 +487,11 @@ int intel_guc_log_create(struct intel_guc_log *log)
 
 	GEM_BUG_ON(log->vma);
 
-	/*
-	 *  GuC Log buffer Layout
-	 * (this ordering must follow "enum guc_log_buffer_type" definition)
-	 *
-	 *  +===============================+ 00B
-	 *  |      Debug state header       |
-	 *  +-------------------------------+ 32B
-	 *  |    Crash dump state header    |
-	 *  +-------------------------------+ 64B
-	 *  |     Capture state header      |
-	 *  +-------------------------------+ 96B
-	 *  |                               |
-	 *  +===============================+ PAGE_SIZE (4KB)
-	 *  |          Debug logs           |
-	 *  +===============================+ + DEBUG_SIZE
-	 *  |        Crash Dump logs        |
-	 *  +===============================+ + CRASH_SIZE
-	 *  |         Capture logs          |
-	 *  +===============================+ + CAPTURE_SIZE
-	 */
 	if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE)
 		DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n",
 			 CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc));
 
-	guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
-		       CAPTURE_BUFFER_SIZE;
+	guc_log_size = intel_guc_log_size(log);
 
 	vma = intel_guc_allocate_vma(guc, guc_log_size);
 	if (IS_ERR(vma)) {

From 56c7f0e2026328e56106b54cb0e2fe3a7e70ce4f Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Wed, 27 Jul 2022 19:20:23 -0700
Subject: [PATCH 34/37] drm/i915/guc: Fix capture size warning and bump the
 size

There was a size check to warn if the GuC error state capture buffer
allocation would be too small to fit a reasonable amount of capture
data for the current platform. Unfortunately, the test was done too
early in the boot sequence and was actually testing 'if(-ENODEV >
size)'.

Move the check to be later. The check is only used to print a warning
message, so it doesn't really matter how early or late it is done.
Note that it is not possible to dynamically size the buffer because
the allocation needs to be done before the engine information is
available (at least, it would be in the intended two-phase GuC init
process).

Now that the check works, it is reporting size too small for newer
platforms. The check includes a 3x oversample multiplier to allow for
multiple error captures to be bufferd by GuC before i915 has a chance
to read them out. This is less important than simply being big enough
to fit the first capture.

So a) bump the default size to be large enough for one capture minimum
and b) make the warning only if one capture won't fit, instead use a
notice for the 3x size.

Note that the size estimate is a worst case scenario. Actual captures
will likely be smaller.

Lastly, use drm_warn istead of DRM_WARN as the former provides more
infmration and the latter is deprecated.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728022028.2190627-3-John.C.Harrison@Intel.com
---
 .../gpu/drm/i915/gt/uc/intel_guc_capture.c    | 40 ++++++++++++++-----
 .../gpu/drm/i915/gt/uc/intel_guc_capture.h    |  1 -
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.c    |  4 --
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.h    |  4 +-
 4 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index 75257bd20ff0..b54b7883320b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -600,10 +600,8 @@ intel_guc_capture_getnullheader(struct intel_guc *guc,
 	return 0;
 }
 
-#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
-
-int
-intel_guc_capture_output_min_size_est(struct intel_guc *guc)
+static int
+guc_capture_output_min_size_est(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
 	struct intel_engine_cs *engine;
@@ -623,13 +621,8 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc)
 	 * For each engine instance, there would be 1 x guc_state_capture_group_t output
 	 * followed by 3 x guc_state_capture_t lists. The latter is how the register
 	 * dumps are split across different register types (where the '3' are global vs class
-	 * vs instance). Finally, let's multiply the whole thing by 3x (just so we are
-	 * not limited to just 1 round of data in a worst case full register dump log)
-	 *
-	 * NOTE: intel_guc_log that allocates the log buffer would round this size up to
-	 * a power of two.
+	 * vs instance).
 	 */
-
 	for_each_engine(engine, gt, id) {
 		worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
 					 (3 * sizeof(struct guc_state_capture_header_t));
@@ -649,7 +642,30 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc)
 
 	worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
 
-	return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER);
+	return worst_min_size;
+}
+
+/*
+ * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
+ * before the i915 can read the data out and process it
+ */
+#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
+
+static void check_guc_capture_size(struct intel_guc *guc)
+{
+	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	int min_size = guc_capture_output_min_size_est(guc);
+	int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
+
+	if (min_size < 0)
+		drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n",
+			 min_size);
+	else if (min_size > CAPTURE_BUFFER_SIZE)
+		drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n",
+			 CAPTURE_BUFFER_SIZE, min_size);
+	else if (spare_size > CAPTURE_BUFFER_SIZE)
+		drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n",
+			   CAPTURE_BUFFER_SIZE, spare_size, min_size);
 }
 
 /*
@@ -1580,5 +1596,7 @@ int intel_guc_capture_init(struct intel_guc *guc)
 	INIT_LIST_HEAD(&guc->capture->outlist);
 	INIT_LIST_HEAD(&guc->capture->cachelist);
 
+	check_guc_capture_size(guc);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
index d3d7bd0b6db6..fbd3713c7832 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
@@ -21,7 +21,6 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m,
 void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee,
 					 struct intel_context *ce);
 void intel_guc_capture_process(struct intel_guc *guc);
-int intel_guc_capture_output_min_size_est(struct intel_guc *guc);
 int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
 			      void **outptr);
 int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index e52ac17ad402..cc8f3d153d8f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -487,10 +487,6 @@ int intel_guc_log_create(struct intel_guc_log *log)
 
 	GEM_BUG_ON(log->vma);
 
-	if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE)
-		DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n",
-			 CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc));
-
 	guc_log_size = intel_guc_log_size(log);
 
 	vma = intel_guc_allocate_vma(guc, guc_log_size);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index 18007e639be9..dc9715411d62 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -22,11 +22,11 @@ struct intel_guc;
 #elif defined(CONFIG_DRM_I915_DEBUG_GEM)
 #define CRASH_BUFFER_SIZE	SZ_1M
 #define DEBUG_BUFFER_SIZE	SZ_2M
-#define CAPTURE_BUFFER_SIZE	SZ_1M
+#define CAPTURE_BUFFER_SIZE	SZ_4M
 #else
 #define CRASH_BUFFER_SIZE	SZ_8K
 #define DEBUG_BUFFER_SIZE	SZ_64K
-#define CAPTURE_BUFFER_SIZE	SZ_16K
+#define CAPTURE_BUFFER_SIZE	SZ_2M
 #endif
 
 /*

From 368d179adbac212a5adf5fa492397a593d52547a Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Wed, 27 Jul 2022 19:20:24 -0700
Subject: [PATCH 35/37] drm/i915/guc: Add GuC <-> kernel time stamp translation
 information

It is useful to be able to match GuC events to kernel events when
looking at the GuC log. That requires being able to convert GuC
timestamps to kernel time. So, when dumping error captures and/or GuC
logs, include a stamp in both time zones plus the clock frequency.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728022028.2190627-4-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h    |  2 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc.c     | 19 +++++++++++++++++++
 drivers/gpu/drm/i915/gt/uc/intel_guc.h     |  2 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.c |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c      | 12 ++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.h      |  3 +++
 6 files changed, 40 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index f64fafe28f72..94f9ddcfb3a5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1013,6 +1013,8 @@
 #define   GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC	(1 << 9)
 #define   GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC	(1 << 7)
 
+#define GUCPMTIMESTAMP				_MMIO(0xc3e8)
+
 #define __GEN9_RCS0_MOCS0			0xc800
 #define GEN9_GFX_MOCS(i)			_MMIO(__GEN9_RCS0_MOCS0 + (i) * 4)
 #define __GEN9_VCS0_MOCS0			0xc900
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 2706a8c65090..ab4aacc516aa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -389,6 +389,25 @@ void intel_guc_write_params(struct intel_guc *guc)
 	intel_uncore_forcewake_put(uncore, FORCEWAKE_GT);
 }
 
+void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+	intel_wakeref_t wakeref;
+	u32 stamp = 0;
+	u64 ktime;
+
+	intel_device_info_print_runtime(RUNTIME_INFO(gt->i915), p);
+
+	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+		stamp = intel_uncore_read(gt->uncore, GUCPMTIMESTAMP);
+	ktime = ktime_get_boottime_ns();
+
+	drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", ktime, ktime);
+	drm_printf(p, "GuC timestamp: 0x%08X [%u]\n", stamp, stamp);
+	drm_printf(p, "CS timestamp frequency: %u Hz, %u ns\n",
+		   gt->clock_frequency, gt->clock_period_ns);
+}
+
 int intel_guc_init(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index a7acffbf15d1..804133df1ac9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -464,4 +464,6 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
 
 void intel_guc_write_barrier(struct intel_guc *guc);
 
+void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index cc8f3d153d8f..5654feb6f908 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -764,6 +764,8 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
 	if (!obj)
 		return 0;
 
+	intel_guc_dump_time_info(guc, p);
+
 	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
 	if (IS_ERR(map)) {
 		DRM_DEBUG("Failed to pin object\n");
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 32e92651ef7c..addba7525234 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -678,6 +678,7 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
 
 	intel_uc_fw_dump(&error_uc->guc_fw, &p);
 	intel_uc_fw_dump(&error_uc->huc_fw, &p);
+	err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->timestamp);
 	intel_gpu_error_print_vma(m, NULL, error_uc->guc_log);
 }
 
@@ -720,6 +721,8 @@ static void err_print_gt_global_nonguc(struct drm_i915_error_state_buf *m,
 	int i;
 
 	err_printf(m, "GT awake: %s\n", str_yes_no(gt->awake));
+	err_printf(m, "CS timestamp frequency: %u Hz, %d ns\n",
+		   gt->clock_frequency, gt->clock_period_ns);
 	err_printf(m, "EIR: 0x%08x\n", gt->eir);
 	err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
 
@@ -1675,6 +1678,13 @@ gt_record_uc(struct intel_gt_coredump *gt,
 	 */
 	error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
 	error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
+
+	/*
+	 * Save the GuC log and include a timestamp reference for converting the
+	 * log times to system times (in conjunction with the error->boottime and
+	 * gt->clock_frequency fields saved elsewhere).
+	 */
+	error_uc->timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP);
 	error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
 						"GuC log buffer", compress);
 
@@ -1833,6 +1843,8 @@ static void gt_record_global_regs(struct intel_gt_coredump *gt)
 static void gt_record_info(struct intel_gt_coredump *gt)
 {
 	memcpy(&gt->info, &gt->_gt->info, sizeof(struct intel_gt_info));
+	gt->clock_frequency = gt->_gt->clock_frequency;
+	gt->clock_period_ns = gt->_gt->clock_period_ns;
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 55a143b92d10..d8a8b3d529e0 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -150,6 +150,8 @@ struct intel_gt_coredump {
 	u32 gtt_cache;
 	u32 aux_err; /* gen12 */
 	u32 gam_done; /* gen12 */
+	u32 clock_frequency;
+	u32 clock_period_ns;
 
 	/* Display related */
 	u32 derrmr;
@@ -164,6 +166,7 @@ struct intel_gt_coredump {
 		struct intel_uc_fw guc_fw;
 		struct intel_uc_fw huc_fw;
 		struct i915_vma_coredump *guc_log;
+		u32 timestamp;
 		bool is_guc_capture;
 	} *uc;
 

From c5de70f6e14ce02c3e604972f8fd46ffcdc184d9 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Wed, 27 Jul 2022 19:20:25 -0700
Subject: [PATCH 36/37] drm/i915/guc: Record CTB info in error logs

When debugging GuC communication issues, it is useful to have the CTB
info available. So add the state and buffer contents to the error
capture log.

Also, add a sub-structure for the GuC specific error capture info as
it is now becoming numerous.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728022028.2190627-5-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 59 +++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gpu_error.h | 20 +++++++--
 2 files changed, 67 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index addba7525234..543ba63f958e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -671,6 +671,18 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
 		   pdev->subsystem_device);
 }
 
+static void err_print_guc_ctb(struct drm_i915_error_state_buf *m,
+			      const char *name,
+			      const struct intel_ctb_coredump *ctb)
+{
+	if (!ctb->size)
+		return;
+
+	err_printf(m, "GuC %s CTB: raw: 0x%08X, 0x%08X/%08X, cached: 0x%08X/%08X, desc = 0x%08X, buf = 0x%08X x 0x%08X\n",
+		   name, ctb->raw_status, ctb->raw_head, ctb->raw_tail,
+		   ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size);
+}
+
 static void err_print_uc(struct drm_i915_error_state_buf *m,
 			 const struct intel_uc_coredump *error_uc)
 {
@@ -678,8 +690,12 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
 
 	intel_uc_fw_dump(&error_uc->guc_fw, &p);
 	intel_uc_fw_dump(&error_uc->huc_fw, &p);
-	err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->timestamp);
-	intel_gpu_error_print_vma(m, NULL, error_uc->guc_log);
+	err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp);
+	intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log);
+	err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence);
+	err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0);
+	err_print_guc_ctb(m, "Recv", error_uc->guc.ctb + 1);
+	intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_ctb);
 }
 
 static void err_free_sgl(struct scatterlist *sgl)
@@ -854,7 +870,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	if (error->gt) {
 		bool print_guc_capture = false;
 
-		if (error->gt->uc && error->gt->uc->is_guc_capture)
+		if (error->gt->uc && error->gt->uc->guc.is_guc_capture)
 			print_guc_capture = true;
 
 		err_print_gt_display(m, error->gt);
@@ -1009,7 +1025,8 @@ static void cleanup_uc(struct intel_uc_coredump *uc)
 {
 	kfree(uc->guc_fw.path);
 	kfree(uc->huc_fw.path);
-	i915_vma_coredump_free(uc->guc_log);
+	i915_vma_coredump_free(uc->guc.vma_log);
+	i915_vma_coredump_free(uc->guc.vma_ctb);
 
 	kfree(uc);
 }
@@ -1658,6 +1675,23 @@ gt_record_engines(struct intel_gt_coredump *gt,
 	}
 }
 
+static void gt_record_guc_ctb(struct intel_ctb_coredump *saved,
+			      const struct intel_guc_ct_buffer *ctb,
+			      const void *blob_ptr, struct intel_guc *guc)
+{
+	if (!ctb || !ctb->desc)
+		return;
+
+	saved->raw_status = ctb->desc->status;
+	saved->raw_head = ctb->desc->head;
+	saved->raw_tail = ctb->desc->tail;
+	saved->head = ctb->head;
+	saved->tail = ctb->tail;
+	saved->size = ctb->size;
+	saved->desc_offset = ((void *)ctb->desc) - blob_ptr;
+	saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr;
+}
+
 static struct intel_uc_coredump *
 gt_record_uc(struct intel_gt_coredump *gt,
 	     struct i915_vma_compress *compress)
@@ -1684,9 +1718,16 @@ gt_record_uc(struct intel_gt_coredump *gt,
 	 * log times to system times (in conjunction with the error->boottime and
 	 * gt->clock_frequency fields saved elsewhere).
 	 */
-	error_uc->timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP);
-	error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
-						"GuC log buffer", compress);
+	error_uc->guc.timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP);
+	error_uc->guc.vma_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
+						    "GuC log buffer", compress);
+	error_uc->guc.vma_ctb = create_vma_coredump(gt->_gt, uc->guc.ct.vma,
+						    "GuC CT buffer", compress);
+	error_uc->guc.last_fence = uc->guc.ct.requests.last_fence;
+	gt_record_guc_ctb(error_uc->guc.ctb + 0, &uc->guc.ct.ctbs.send,
+			  uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc);
+	gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv,
+			  uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc);
 
 	return error_uc;
 }
@@ -2039,9 +2080,9 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du
 			error->gt->uc = gt_record_uc(error->gt, compress);
 			if (error->gt->uc) {
 				if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
-					error->gt->uc->is_guc_capture = true;
+					error->gt->uc->guc.is_guc_capture = true;
 				else
-					GEM_BUG_ON(error->gt->uc->is_guc_capture);
+					GEM_BUG_ON(error->gt->uc->guc.is_guc_capture);
 			}
 		}
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index d8a8b3d529e0..efc75cc2ffdb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -125,6 +125,15 @@ struct intel_engine_coredump {
 	struct intel_engine_coredump *next;
 };
 
+struct intel_ctb_coredump {
+	u32 raw_head, head;
+	u32 raw_tail, tail;
+	u32 raw_status;
+	u32 desc_offset;
+	u32 cmds_offset;
+	u32 size;
+};
+
 struct intel_gt_coredump {
 	const struct intel_gt *_gt;
 	bool awake;
@@ -165,9 +174,14 @@ struct intel_gt_coredump {
 	struct intel_uc_coredump {
 		struct intel_uc_fw guc_fw;
 		struct intel_uc_fw huc_fw;
-		struct i915_vma_coredump *guc_log;
-		u32 timestamp;
-		bool is_guc_capture;
+		struct guc_info {
+			struct intel_ctb_coredump ctb[2];
+			struct i915_vma_coredump *vma_ctb;
+			struct i915_vma_coredump *vma_log;
+			u32 timestamp;
+			u16 last_fence;
+			bool is_guc_capture;
+		} guc;
 	} *uc;
 
 	struct intel_gt_coredump *next;

From 5ece208ab05e4042c80ed1e6fe6d7ce236eee89b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson@intel.com>
Date: Wed, 27 Jul 2022 19:20:26 -0700
Subject: [PATCH 37/37] drm/i915/guc: Use streaming loads to speed up dumping
 the guc log

Use a temporary page and mempy_from_wc to reduce the time it takes to
dump the guc log to debugfs.

Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220728022028.2190627-6-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 24 ++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 5654feb6f908..44e6e6853b38 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -750,8 +750,9 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
 	struct intel_guc *guc = log_to_guc(log);
 	struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
 	struct drm_i915_gem_object *obj = NULL;
-	u32 *map;
-	int i = 0;
+	void *map;
+	u32 *page;
+	int i, j;
 
 	if (!intel_guc_is_supported(guc))
 		return -ENODEV;
@@ -764,23 +765,34 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
 	if (!obj)
 		return 0;
 
+	page = (u32 *)__get_free_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
 	intel_guc_dump_time_info(guc, p);
 
 	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
 	if (IS_ERR(map)) {
 		DRM_DEBUG("Failed to pin object\n");
 		drm_puts(p, "(log data unaccessible)\n");
+		free_page((unsigned long)page);
 		return PTR_ERR(map);
 	}
 
-	for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
-		drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
-			   *(map + i), *(map + i + 1),
-			   *(map + i + 2), *(map + i + 3));
+	for (i = 0; i < obj->base.size; i += PAGE_SIZE) {
+		if (!i915_memcpy_from_wc(page, map + i, PAGE_SIZE))
+			memcpy(page, map + i, PAGE_SIZE);
+
+		for (j = 0; j < PAGE_SIZE / sizeof(u32); j += 4)
+			drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+				   *(page + j + 0), *(page + j + 1),
+				   *(page + j + 2), *(page + j + 3));
+	}
 
 	drm_puts(p, "\n");
 
 	i915_gem_object_unpin_map(obj);
+	free_page((unsigned long)page);
 
 	return 0;
 }