amd-drm-next-6.11-2024-07-03:

amdgpu: - Use vmalloc for dc_state - Replay fixes - Freesync fixes - DCN 4.0.1 fixes - DML fixes - DCC updates - Misc code cleanups and bug fixes - 8K display fixes - DCN 3.5 fixes - Restructure DIO code - DML1 fixes - DML2 fixes - GFX11 fix - GFX12 updates - GFX12 modifiers fixes - RAS fixes - IP dump fixes - Add some updated IP version checks _ Silence UBSAN warning radeon: - GPUVM fix -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZoW9RgAKCRC93/aFa7yZ 2AfjAQDJH95ijhdClk4S7t0ofjam7UPhVkJiUl8u73BNbuul/QEAk7RadnWw8SwC wvSXMCyHY3XszMJ1AdWG+4uwq5axPQE= =aWkn -----END PGP SIGNATURE----- Merge tag 'amd-drm-next-6.11-2024-07-03' of https://gitlab.freedesktop.org/agd5f/linux into drm-next amd-drm-next-6.11-2024-07-03: amdgpu: - Use vmalloc for dc_state - Replay fixes - Freesync fixes - DCN 4.0.1 fixes - DML fixes - DCC updates - Misc code cleanups and bug fixes - 8K display fixes - DCN 3.5 fixes - Restructure DIO code - DML1 fixes - DML2 fixes - GFX11 fix - GFX12 updates - GFX12 modifiers fixes - RAS fixes - IP dump fixes - Add some updated IP version checks _ Silence UBSAN warning radeon: - GPUVM fix Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240703211314.2041893-1-alexander.deucher@amd.com
2024-07-05 12:02:11 +02:00 · 2024-07-05 12:02:11 +02:00 · 6be146cf57
commit 6be146cf57
parent 71e9f407fd 4ed6a3689c
94 changed files with 622 additions and 316 deletions
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@ -1808,6 +1808,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
 		break;
 	case IP_VERSION(12, 0, 0):
@ -1861,6 +1862,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
 		break;
 	case IP_VERSION(12, 0, 0):
@ -1962,6 +1964,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(13, 0, 14):
 	case IP_VERSION(14, 0, 0):
 	case IP_VERSION(14, 0, 1):
+	case IP_VERSION(14, 0, 4):
 		amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
 		break;
 	case IP_VERSION(13, 0, 4):
@ -2025,6 +2028,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(14, 0, 1):
 	case IP_VERSION(14, 0, 2):
 	case IP_VERSION(14, 0, 3):
+	case IP_VERSION(14, 0, 4):
 		amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
 		break;
 	default:
@ -2152,6 +2156,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
 		break;
 	case IP_VERSION(12, 0, 0):
@ -2207,6 +2212,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(6, 0, 3):
 	case IP_VERSION(6, 1, 0):
 	case IP_VERSION(6, 1, 1):
+	case IP_VERSION(6, 1, 2):
 		amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
 		break;
 	case IP_VERSION(7, 0, 0):
@ -2325,6 +2331,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
 		adev->enable_mes = true;
 		adev->enable_mes_kiq = true;
@ -2360,6 +2367,7 @@ static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev)
 	switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
 	case IP_VERSION(6, 1, 0):
 	case IP_VERSION(6, 1, 1):
+	case IP_VERSION(6, 1, 3):
 		amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block);
 		break;
 	default:
@ -2634,6 +2642,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		break;
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		adev->family = AMDGPU_FAMILY_GC_11_5_0;
 		break;
 	case IP_VERSION(12, 0, 0):
@ -2658,6 +2667,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		adev->flags |= AMD_IS_APU;
 		break;
 	default:
@ -2696,6 +2706,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		break;
 	case IP_VERSION(7, 11, 0):
 	case IP_VERSION(7, 11, 1):
+	case IP_VERSION(7, 11, 3):
 		adev->nbio.funcs = &nbio_v7_11_funcs;
 		adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg;
 		break;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@ -654,6 +654,10 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier)
 	if (!IS_AMD_FMT_MOD(modifier))
 		return NULL;

+	if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) < AMD_FMT_MOD_TILE_VER_GFX9 ||
+	    AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12)
+		return NULL;
+
 	if (AMD_FMT_MOD_GET(DCC_RETILE, modifier))
 		return lookup_format_info(dcc_retile_formats,
 					  ARRAY_SIZE(dcc_retile_formats),
@ -720,32 +724,25 @@ extract_render_dcc_offset(struct amdgpu_device *adev,

 static int convert_tiling_flags_to_modifier_gfx12(struct amdgpu_framebuffer *afb)
 {
-	struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
-	const struct drm_format_info *format_info;
 	u64 modifier = 0;
-	int tile = 0;
-	int swizzle = 0;
+	int swizzle_mode = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE);

-	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
-		tile = AMD_FMT_MOD_TILE_VER_GFX12;
-		swizzle =  AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE);
+	if (!swizzle_mode) {
+		modifier = DRM_FORMAT_MOD_LINEAR;
+	} else {
+		int max_comp_block =
+			AMDGPU_TILING_GET(afb->tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+
+		modifier =
+			AMD_FMT_MOD |
+			AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12) |
+			AMD_FMT_MOD_SET(TILE, swizzle_mode) |
+			AMD_FMT_MOD_SET(DCC, afb->gfx12_dcc) |
+			AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_comp_block);
 	}

-	modifier =
-		AMD_FMT_MOD |
-		AMD_FMT_MOD_SET(TILE, swizzle) |
-		AMD_FMT_MOD_SET(TILE_VERSION,  tile) |
-		AMD_FMT_MOD_SET(DCC, 0) |
-		AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, 0);
-
-	format_info = amdgpu_lookup_format_info(afb->base.format->format,
-						modifier);
-	if (!format_info)
-		return -EINVAL;
-
 	afb->base.modifier = modifier;
 	afb->base.flags |= DRM_MODE_FB_MODIFIERS;
-
 	return 0;
 }

@ -773,12 +770,6 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
 		int pipes = ilog2(num_pipes);
 		uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B);

-
-		if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
-			convert_tiling_flags_to_modifier_gfx12(afb);
-			return 0;
-		}
-
 		switch (swizzle >> 2) {
 		case 0: /* 256B */
 			block_size_bits = 8;
@ -954,8 +945,7 @@ static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb)
 {
 	u64 micro_tile_mode;

-	/* Zero swizzle mode means linear */
-	if (AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0)
+	if (AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) == 1) /* LINEAR_ALIGNED */
 		return 0;

 	micro_tile_mode = AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE);
@ -1079,6 +1069,30 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
 			block_width = 256 / format_info->cpp[i];
 			block_height = 1;
 			block_size_log2 = 8;
+		} else if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12) {
+			int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+			switch (swizzle) {
+			case AMD_FMT_MOD_TILE_GFX12_256B_2D:
+				block_size_log2 = 8;
+				break;
+			case AMD_FMT_MOD_TILE_GFX12_4K_2D:
+				block_size_log2 = 12;
+				break;
+			case AMD_FMT_MOD_TILE_GFX12_64K_2D:
+				block_size_log2 = 16;
+				break;
+			case AMD_FMT_MOD_TILE_GFX12_256K_2D:
+				block_size_log2 = 18;
+				break;
+			default:
+				drm_dbg_kms(rfb->base.dev,
+					    "Gfx12 swizzle mode with unknown block size: %d\n", swizzle);
+				return -EINVAL;
+			}
+
+			get_block_dimensions(block_size_log2, format_info->cpp[i],
+					     &block_width, &block_height);
 		} else {
 			int swizzle = AMD_FMT_MOD_GET(TILE, modifier);

@ -1114,7 +1128,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
 			return ret;
 	}

-	if (AMD_FMT_MOD_GET(DCC, modifier)) {
+	if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11 &&
+	    AMD_FMT_MOD_GET(DCC, modifier)) {
 		if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
 			block_size_log2 = get_dcc_block_size(modifier, false, false);
 			get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
@ -1144,7 +1159,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
 }

 static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
-				      uint64_t *tiling_flags, bool *tmz_surface)
+				      uint64_t *tiling_flags, bool *tmz_surface,
+				      bool *gfx12_dcc)
 {
 	struct amdgpu_bo *rbo;
 	int r;
@ -1152,6 +1168,7 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb
 	if (!amdgpu_fb) {
 		*tiling_flags = 0;
 		*tmz_surface = false;
+		*gfx12_dcc = false;
 		return 0;
 	}

@ -1165,11 +1182,9 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb
 		return r;
 	}

-	if (tiling_flags)
-		amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
-
-	if (tmz_surface)
-		*tmz_surface = amdgpu_bo_encrypted(rbo);
+	amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
+	*tmz_surface = amdgpu_bo_encrypted(rbo);
+	*gfx12_dcc = rbo->flags & AMDGPU_GEM_CREATE_GFX12_DCC;

 	amdgpu_bo_unreserve(rbo);

@ -1238,7 +1253,8 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev,
 		}
 	}

-	ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface);
+	ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface,
+					 &rfb->gfx12_dcc);
 	if (ret)
 		return ret;

@ -1252,7 +1268,11 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev,

 	if (!dev->mode_config.fb_modifiers_not_supported &&
 	    !(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) {
-		ret = convert_tiling_flags_to_modifier(rfb);
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0))
+			ret = convert_tiling_flags_to_modifier_gfx12(rfb);
+		else
+			ret = convert_tiling_flags_to_modifier(rfb);
+
 		if (ret) {
 			drm_dbg_kms(dev, "Failed to convert tiling flags 0x%llX to a modifier",
 				    rfb->tiling_flags);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@ -848,6 +848,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		/* Don't enable it by default yet.
 		 */
 		if (amdgpu_tmz < 1) {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@ -300,6 +300,7 @@ struct amdgpu_framebuffer {

 	uint64_t tiling_flags;
 	bool tmz_surface;
+	bool gfx12_dcc;

 	/* caching for later use */
 	uint64_t address;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@ -219,6 +219,7 @@ static int psp_early_init(void *handle)
 	case IP_VERSION(13, 0, 11):
 	case IP_VERSION(14, 0, 0):
 	case IP_VERSION(14, 0, 1):
+	case IP_VERSION(14, 0, 4):
 		psp_v13_0_set_psp_funcs(psp);
 		psp->boot_time_tmr = false;
 		break;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@ -4565,7 +4565,7 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,

 	socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
 	aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error);
-	hbm_id = AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error);
+	hbm_id = ((1 == AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error)) ? 0 : 1);

 	if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error))
 		dev_info(adev->dev,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@ -302,6 +302,7 @@ static int vpe_early_init(void *handle)

 	switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
 	case IP_VERSION(6, 1, 0):
+	case IP_VERSION(6, 1, 3):
 		vpe_v6_1_set_funcs(vpe);
 		break;
 	case IP_VERSION(6, 1, 1):
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@ -9287,6 +9287,7 @@ static void gfx_v10_ip_print(void *handle, struct drm_printer *p)
 	if (!adev->gfx.ip_dump_gfx_queues)
 		return;

+	index = 0;
 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
 	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
 		   adev->gfx.me.num_me,
@ -9352,6 +9353,7 @@ static void gfx_v10_ip_dump(void *handle)
 	if (!adev->gfx.ip_dump_gfx_queues)
 		return;

+	index = 0;
 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
 	amdgpu_gfx_off_ctrl(adev, false);
 	mutex_lock(&adev->srbm_mutex);
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@ -93,6 +93,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");

 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
@ -1054,6 +1058,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		adev->gfx.config.max_hw_contexts = 8;
 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@ -1534,6 +1539,7 @@ static int gfx_v11_0_sw_init(void *handle)
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		adev->gfx.me.num_me = 1;
 		adev->gfx.me.num_pipe_per_me = 1;
 		adev->gfx.me.num_queue_per_pipe = 1;
@ -2782,7 +2788,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
 		    amdgpu_ip_version(adev, GC_HWIP, 0) ==
 			    IP_VERSION(11, 0, 4) ||
 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
-		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1))
+		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
+		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2))
 			bootload_status = RREG32_SOC15(GC, 0,
 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
 		else
@ -5296,6 +5303,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
 		case IP_VERSION(11, 0, 4):
 		case IP_VERSION(11, 5, 0):
 		case IP_VERSION(11, 5, 1):
+		case IP_VERSION(11, 5, 2):
 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
 			break;
 		default:
@ -5332,6 +5340,7 @@ static int gfx_v11_0_set_powergating_state(void *handle,
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		if (!enable)
 			amdgpu_gfx_off_ctrl(adev, false);

@ -5364,6 +5373,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 	        gfx_v11_0_update_gfx_clock_gating(adev,
 	                        state ==  AMD_CG_STATE_GATE);
 	        break;
@ -5604,11 +5614,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
-				 PACKET3_RELEASE_MEM_GCR_GL2_INV |
-				 PACKET3_RELEASE_MEM_GCR_GL2_US |
-				 PACKET3_RELEASE_MEM_GCR_GL1_INV |
-				 PACKET3_RELEASE_MEM_GCR_GLV_INV |
-				 PACKET3_RELEASE_MEM_GCR_GLM_INV |
+				 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
 				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
@ -6405,6 +6411,7 @@ static void gfx_v11_ip_print(void *handle, struct drm_printer *p)
 	if (!adev->gfx.ip_dump_gfx_queues)
 		return;

+	index = 0;
 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
 	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
 		   adev->gfx.me.num_me,
@ -6469,6 +6476,7 @@ static void gfx_v11_ip_dump(void *handle)
 	if (!adev->gfx.ip_dump_gfx_queues)
 		return;

+	index = 0;
 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
 	amdgpu_gfx_off_ctrl(adev, false);
 	mutex_lock(&adev->srbm_mutex);
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@ -4108,21 +4108,6 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 		/* inherit vmid from mqd */
 		control |= 0x40000000;

-	/* Currently, there is a high possibility to get wave ID mismatch
-	 * between ME and GDS, leading to a hw deadlock, because ME generates
-	 * different wave IDs than the GDS expects. This situation happens
-	 * randomly when at least 5 compute pipes use GDS ordered append.
-	 * The wave IDs generated by ME are also wrong after suspend/resume.
-	 * Those are probably bugs somewhere else in the kernel driver.
-	 *
-	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
-	 * GDS to 0 for this ring (me/pipe).
-	 */
-	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
-	}
-
 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
 	amdgpu_ring_write(ring,
@ -4144,12 +4129,6 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
-				 PACKET3_RELEASE_MEM_GCR_GL2_INV |
-				 PACKET3_RELEASE_MEM_GCR_GL2_US |
-				 PACKET3_RELEASE_MEM_GCR_GL1_INV |
-				 PACKET3_RELEASE_MEM_GCR_GLV_INV |
-				 PACKET3_RELEASE_MEM_GCR_GLM_INV |
-				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
@ -4727,7 +4706,6 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 		2 + /* VM_FLUSH */
 		8 + /* FENCE for VM_FLUSH */
-		20 + /* GDS switch */
 		5 + /* COND_EXEC */
 		7 + /* HDP_flush */
 		4 + /* VGT_flush */
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@ -592,6 +592,7 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev)
 		break;
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs;
 		break;
 	default:
@ -754,6 +755,7 @@ static int gmc_v11_0_sw_init(void *handle)
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
 		set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
 		/*
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@ -38,6 +38,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_imu.bin");

 static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
 {
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@ -51,6 +51,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin");

 static int mes_v11_0_hw_init(void *handle);
 static int mes_v11_0_hw_fini(void *handle);
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@ -569,7 +569,7 @@ static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev,
 	if (!amdgpu_sriov_vf(adev)) {
 		/* clear page fault status and address */
 		WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst,
-			 regVM_L2_PROTECTION_FAULT_STATUS), 1, ~1);
+			 regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1);
 	}

 	return fed;
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@ -57,6 +57,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin");
 MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin");
 MODULE_FIRMWARE("amdgpu/psp_14_0_1_toc.bin");
 MODULE_FIRMWARE("amdgpu/psp_14_0_1_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_4_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin");

 /* For large FW files the time to complete can be very long */
 #define USBC_PD_POLLING_LIMIT_S 240
@ -106,6 +108,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
 	case IP_VERSION(13, 0, 11):
 	case IP_VERSION(14, 0, 0):
 	case IP_VERSION(14, 0, 1):
+	case IP_VERSION(14, 0, 4):
 		err = psp_init_toc_microcode(psp, ucode_prefix);
 		if (err)
 			return err;
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@ -50,6 +50,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
 MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
 MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin");
 MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin");

 #define SDMA1_REG_OFFSET 0x600
 #define SDMA0_HYP_DEC_REG_START 0x5880
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@ -389,6 +389,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
 	case IP_VERSION(13, 0, 11):
 	case IP_VERSION(14, 0, 0):
 	case IP_VERSION(14, 0, 1):
+	case IP_VERSION(14, 0, 4):
 		return AMD_RESET_METHOD_MODE2;
 	default:
 		if (amdgpu_dpm_is_baco_supported(adev))
@ -752,6 +753,32 @@ static int soc21_common_early_init(void *handle)
 			AMD_PG_SUPPORT_JPEG;
 		adev->external_rev_id = adev->rev_id + 0xc1;
 		break;
+	case IP_VERSION(11, 5, 2):
+		adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+			AMD_CG_SUPPORT_JPEG_MGCG |
+			AMD_CG_SUPPORT_GFX_CGCG |
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_FGCG |
+			AMD_CG_SUPPORT_REPEATER_FGCG |
+			AMD_CG_SUPPORT_GFX_PERF_CLK	|
+			AMD_CG_SUPPORT_GFX_3D_CGCG |
+			AMD_CG_SUPPORT_GFX_3D_CGLS	|
+			AMD_CG_SUPPORT_MC_MGCG |
+			AMD_CG_SUPPORT_MC_LS |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_HDP_DS |
+			AMD_CG_SUPPORT_HDP_SD |
+			AMD_CG_SUPPORT_ATHUB_MGCG |
+			AMD_CG_SUPPORT_ATHUB_LS |
+			AMD_CG_SUPPORT_IH_CG |
+			AMD_CG_SUPPORT_BIF_MGCG |
+			AMD_CG_SUPPORT_BIF_LS;
+		adev->pg_flags = AMD_PG_SUPPORT_VCN |
+			AMD_PG_SUPPORT_JPEG |
+			AMD_PG_SUPPORT_GFX_PG;
+		adev->external_rev_id = adev->rev_id + 0x40;
+		break;
 	default:
 		/* FIXME: not supported yet */
 		return -EINVAL;
@ -927,6 +954,7 @@ static int soc21_common_set_clockgating_state(void *handle,
 	case IP_VERSION(7, 7, 1):
 	case IP_VERSION(7, 11, 0):
 	case IP_VERSION(7, 11, 1):
+	case IP_VERSION(7, 11, 3):
 		adev->nbio.funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE);
 		adev->nbio.funcs->update_medium_grain_light_sleep(adev,
--- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
@ -34,6 +34,7 @@

 MODULE_FIRMWARE("amdgpu/vpe_6_1_0.bin");
 MODULE_FIRMWARE("amdgpu/vpe_6_1_1.bin");
+MODULE_FIRMWARE("amdgpu/vpe_6_1_3.bin");

 #define VPE_THREAD1_UCODE_OFFSET	0x8000

--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@ -1678,6 +1678,7 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
 		case IP_VERSION(11, 0, 4):
 		case IP_VERSION(11, 5, 0):
 		case IP_VERSION(11, 5, 1):
+		case IP_VERSION(11, 5, 2):
 		case IP_VERSION(12, 0, 0):
 		case IP_VERSION(12, 0, 1):
 			num_of_cache_types =
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@ -99,6 +99,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
 	case IP_VERSION(6, 0, 3):
 	case IP_VERSION(6, 1, 0):
 	case IP_VERSION(6, 1, 1):
+	case IP_VERSION(6, 1, 2):
 	case IP_VERSION(7, 0, 0):
 	case IP_VERSION(7, 0, 1):
 		kfd->device_info.num_sdma_queues_per_engine = 8;
@ -119,6 +120,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
 	case IP_VERSION(6, 0, 3):
 	case IP_VERSION(6, 1, 0):
 	case IP_VERSION(6, 1, 1):
+	case IP_VERSION(6, 1, 2):
 	case IP_VERSION(7, 0, 0):
 	case IP_VERSION(7, 0, 1):
 		/* Reserve 1 for paging and 1 for gfx */
@ -175,6 +177,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
 		kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
 		break;
 	case IP_VERSION(12, 0, 0):
@ -438,6 +441,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
 			gfx_target_version = 110501;
 			f2g = &gfx_v11_kfd2kgd;
 			break;
+		case IP_VERSION(11, 5, 2):
+			gfx_target_version = 110502;
+			f2g = &gfx_v11_kfd2kgd;
+			break;
 		case IP_VERSION(12, 0, 0):
 			gfx_target_version = 120000;
 			f2g = &gfx_v12_kfd2kgd;
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@ -380,7 +380,8 @@ int kfd_init_apertures(struct kfd_process *process)

 		pdd = kfd_create_process_device_data(dev, process);
 		if (!pdd) {
-			pr_err("Failed to create process device data\n");
+			dev_err(dev->adev->dev,
+				"Failed to create process device data\n");
 			return -ENOMEM;
 		}
 		/*
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@ -69,7 +69,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
 		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
 		break;
 	default:
-		pr_err("Invalid queue type %d\n", type);
+		dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
 		return false;
 	}

@ -79,13 +79,14 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
 	prop.doorbell_ptr = kfd_get_kernel_doorbell(dev->kfd, &prop.doorbell_off);

 	if (!prop.doorbell_ptr) {
-		pr_err("Failed to initialize doorbell");
+		dev_err(dev->adev->dev, "Failed to initialize doorbell");
 		goto err_get_kernel_doorbell;
 	}

 	retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
 	if (retval != 0) {
-		pr_err("Failed to init pq queues size %d\n", queue_size);
+		dev_err(dev->adev->dev, "Failed to init pq queues size %d\n",
+			queue_size);
 		goto err_pq_allocate_vidmem;
 	}

@ -341,7 +342,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
 	if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
 		return kq;

-	pr_err("Failed to init kernel queue\n");
+	dev_err(dev->adev->dev, "Failed to init kernel queue\n");

 	kfree(kq);
 	return NULL;
@ -360,26 +361,26 @@ static __attribute__((unused)) void test_kq(struct kfd_node *dev)
 	uint32_t *buffer, i;
 	int retval;

-	pr_err("Starting kernel queue test\n");
+	dev_err(dev->adev->dev, "Starting kernel queue test\n");

 	kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
 	if (unlikely(!kq)) {
-		pr_err("  Failed to initialize HIQ\n");
-		pr_err("Kernel queue test failed\n");
+		dev_err(dev->adev->dev, "  Failed to initialize HIQ\n");
+		dev_err(dev->adev->dev, "Kernel queue test failed\n");
 		return;
 	}

 	retval = kq_acquire_packet_buffer(kq, 5, &buffer);
 	if (unlikely(retval != 0)) {
-		pr_err("  Failed to acquire packet buffer\n");
-		pr_err("Kernel queue test failed\n");
+		dev_err(dev->adev->dev, "  Failed to acquire packet buffer\n");
+		dev_err(dev->adev->dev, "Kernel queue test failed\n");
 		return;
 	}
 	for (i = 0; i < 5; i++)
 		buffer[i] = kq->nop_packet;
 	kq_submit_packet(kq);

-	pr_err("Ending kernel queue test\n");
+	dev_err(dev->adev->dev, "Ending kernel queue test\n");
 }


--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@ -118,12 +118,14 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
 	 * attention grabbing.
 	 */
 	if (gfx_info->max_shader_engines > KFD_MAX_NUM_SE) {
-		pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
-		       gfx_info->max_shader_engines);
+		dev_err(mm->dev->adev->dev,
+			"Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
+			gfx_info->max_shader_engines);
 		return;
 	}
 	if (gfx_info->max_sh_per_se > KFD_MAX_NUM_SH_PER_SE) {
-		pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
+		dev_err(mm->dev->adev->dev,
+			"Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
 			gfx_info->max_sh_per_se * gfx_info->max_shader_engines);
 		return;
 	}
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@ -45,7 +45,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 	unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
 	unsigned int map_queue_size;
 	unsigned int max_proc_per_quantum = 1;
-	struct kfd_node *dev = pm->dqm->dev;
+	struct kfd_node *node = pm->dqm->dev;
+	struct device *dev = node->adev->dev;

 	process_count = pm->dqm->processes_count;
 	queue_count = pm->dqm->active_queue_count;
@ -59,14 +60,14 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 	 */
 	*over_subscription = false;

-	if (dev->max_proc_per_quantum > 1)
-		max_proc_per_quantum = dev->max_proc_per_quantum;
+	if (node->max_proc_per_quantum > 1)
+		max_proc_per_quantum = node->max_proc_per_quantum;

 	if ((process_count > max_proc_per_quantum) ||
 	    compute_queue_count > get_cp_queues_num(pm->dqm) ||
 	    gws_queue_count > 1) {
 		*over_subscription = true;
-		pr_debug("Over subscribed runlist\n");
+		dev_dbg(dev, "Over subscribed runlist\n");
 	}

 	map_queue_size = pm->pmf->map_queues_size;
@ -81,7 +82,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 	if (*over_subscription)
 		*rlib_size += pm->pmf->runlist_size;

-	pr_debug("runlist ib size %d\n", *rlib_size);
+	dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
 }

 static int pm_allocate_runlist_ib(struct packet_manager *pm,
@ -90,6 +91,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
 				unsigned int *rl_buffer_size,
 				bool *is_over_subscription)
 {
+	struct kfd_node *node = pm->dqm->dev;
+	struct device *dev = node->adev->dev;
 	int retval;

 	if (WARN_ON(pm->allocated))
@ -99,11 +102,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,

 	mutex_lock(&pm->lock);

-	retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
-					&pm->ib_buffer_obj);
+	retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);

 	if (retval) {
-		pr_err("Failed to allocate runlist IB\n");
+		dev_err(dev, "Failed to allocate runlist IB\n");
 		goto out;
 	}

@ -125,6 +127,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 {
 	unsigned int alloc_size_bytes;
 	unsigned int *rl_buffer, rl_wptr, i;
+	struct kfd_node *node = pm->dqm->dev;
+	struct device *dev = node->adev->dev;
 	int retval, processes_mapped;
 	struct device_process_node *cur;
 	struct qcm_process_device *qpd;
@ -142,7 +146,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 	*rl_size_bytes = alloc_size_bytes;
 	pm->ib_size_bytes = alloc_size_bytes;

-	pr_debug("Building runlist ib process count: %d queues count %d\n",
+	dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
 		pm->dqm->processes_count, pm->dqm->active_queue_count);

 	/* build the run list ib packet */
@ -150,7 +154,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		qpd = cur->qpd;
 		/* build map process packet */
 		if (processes_mapped >= pm->dqm->processes_count) {
-			pr_debug("Not enough space left in runlist IB\n");
+			dev_dbg(dev, "Not enough space left in runlist IB\n");
 			pm_release_ib(pm);
 			return -ENOMEM;
 		}
@ -167,7 +171,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 			if (!kq->queue->properties.is_active)
 				continue;

-			pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
+			dev_dbg(dev,
+				"static_queue, mapping kernel q %d, is debug status %d\n",
 				kq->queue->queue, qpd->is_debug);

 			retval = pm->pmf->map_queues(pm,
@ -186,7 +191,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 			if (!q->properties.is_active)
 				continue;

-			pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
+			dev_dbg(dev,
+				"static_queue, mapping user queue %d, is debug status %d\n",
 				q->queue, qpd->is_debug);

 			retval = pm->pmf->map_queues(pm,
@ -203,11 +209,13 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		}
 	}

-	pr_debug("Finished map process and queues to runlist\n");
+	dev_dbg(dev, "Finished map process and queues to runlist\n");

 	if (is_over_subscription) {
 		if (!pm->is_over_subscription)
-			pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
+			dev_warn(
+				dev,
+				"Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
 		retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
 					*rl_gpu_addr,
 					alloc_size_bytes / sizeof(uint32_t),
@ -273,6 +281,8 @@ void pm_uninit(struct packet_manager *pm)
 int pm_send_set_resources(struct packet_manager *pm,
 				struct scheduling_resources *res)
 {
+	struct kfd_node *node = pm->dqm->dev;
+	struct device *dev = node->adev->dev;
 	uint32_t *buffer, size;
 	int retval = 0;

@ -282,7 +292,7 @@ int pm_send_set_resources(struct packet_manager *pm,
 					size / sizeof(uint32_t),
 					(unsigned int **)&buffer);
 	if (!buffer) {
-		pr_err("Failed to allocate buffer on kernel queue\n");
+		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
 		retval = -ENOMEM;
 		goto out;
 	}
@ -344,6 +354,8 @@ fail_create_runlist_ib:
 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 			uint64_t fence_value)
 {
+	struct kfd_node *node = pm->dqm->dev;
+	struct device *dev = node->adev->dev;
 	uint32_t *buffer, size;
 	int retval = 0;

@ -355,7 +367,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 	kq_acquire_packet_buffer(pm->priv_queue,
 			size / sizeof(uint32_t), (unsigned int **)&buffer);
 	if (!buffer) {
-		pr_err("Failed to allocate buffer on kernel queue\n");
+		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
 		retval = -ENOMEM;
 		goto out;
 	}
@ -373,6 +385,8 @@ out:

 int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
 {
+	struct kfd_node *node = pm->dqm->dev;
+	struct device *dev = node->adev->dev;
 	int retval = 0;
 	uint32_t *buffer, size;

@ -386,7 +400,8 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
 			(unsigned int **)&buffer);

 		if (!buffer) {
-			pr_err("Failed to allocate buffer on kernel queue\n");
+			dev_err(dev,
+				"Failed to allocate buffer on kernel queue\n");
 			retval = -ENOMEM;
 			goto out;
 		}
@ -407,6 +422,8 @@ int pm_send_unmap_queue(struct packet_manager *pm,
 			enum kfd_unmap_queues_filter filter,
 			uint32_t filter_param, bool reset)
 {
+	struct kfd_node *node = pm->dqm->dev;
+	struct device *dev = node->adev->dev;
 	uint32_t *buffer, size;
 	int retval = 0;

@ -415,7 +432,7 @@ int pm_send_unmap_queue(struct packet_manager *pm,
 	kq_acquire_packet_buffer(pm->priv_queue,
 			size / sizeof(uint32_t), (unsigned int **)&buffer);
 	if (!buffer) {
-		pr_err("Failed to allocate buffer on kernel queue\n");
+		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
 		retval = -ENOMEM;
 		goto out;
 	}
@ -464,6 +481,8 @@ out:

 int pm_debugfs_hang_hws(struct packet_manager *pm)
 {
+	struct kfd_node *node = pm->dqm->dev;
+	struct device *dev = node->adev->dev;
 	uint32_t *buffer, size;
 	int r = 0;

@ -475,16 +494,16 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
 	kq_acquire_packet_buffer(pm->priv_queue,
 			size / sizeof(uint32_t), (unsigned int **)&buffer);
 	if (!buffer) {
-		pr_err("Failed to allocate buffer on kernel queue\n");
+		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
 		r = -ENOMEM;
 		goto out;
 	}
 	memset(buffer, 0x55, size);
 	kq_submit_packet(pm->priv_queue);

-	pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
-		buffer[0], buffer[1], buffer[2], buffer[3],
-		buffer[4], buffer[5], buffer[6]);
+	dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
+		 buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],
+		 buffer[5], buffer[6]);
 out:
 	mutex_unlock(&pm->lock);
 	return r;
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@ -1311,7 +1311,8 @@ int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
 		if (IS_ERR_VALUE(qpd->tba_addr)) {
 			int err = qpd->tba_addr;

-			pr_err("Failure to set tba address. error %d.\n", err);
+			dev_err(dev->adev->dev,
+				"Failure to set tba address. error %d.\n", err);
 			qpd->tba_addr = 0;
 			qpd->cwsr_kaddr = NULL;
 			return err;
@ -1611,7 +1612,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
 						&pdd->proc_ctx_cpu_ptr,
 						false);
 		if (retval) {
-			pr_err("failed to allocate process context bo\n");
+			dev_err(dev->adev->dev,
+				"failed to allocate process context bo\n");
 			goto err_free_pdd;
 		}
 		memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
@ -1676,7 +1678,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 						     &p->kgd_process_info,
 						     &ef);
 	if (ret) {
-		pr_err("Failed to create process VM object\n");
+		dev_err(dev->adev->dev, "Failed to create process VM object\n");
 		return ret;
 	}
 	RCU_INIT_POINTER(p->ef, ef);
@ -1723,7 +1725,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,

 	pdd = kfd_get_process_device_data(dev, p);
 	if (!pdd) {
-		pr_err("Process device data doesn't exist\n");
+		dev_err(dev->adev->dev, "Process device data doesn't exist\n");
 		return ERR_PTR(-ENOMEM);
 	}

@ -1833,6 +1835,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)

 	for (i = 0; i < p->n_pdds; i++) {
 		struct kfd_process_device *pdd = p->pdds[i];
+		struct device *dev = pdd->dev->adev->dev;

 		kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
 					     trigger);
@ -1844,7 +1847,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
 		 * them been add back since they actually not be saved right now.
 		 */
 		if (r && r != -EIO) {
-			pr_err("Failed to evict process queues\n");
+			dev_err(dev, "Failed to evict process queues\n");
 			goto fail;
 		}
 		n_evicted++;
@ -1866,7 +1869,8 @@ fail:

 		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
 							      &pdd->qpd))
-			pr_err("Failed to restore queues\n");
+			dev_err(pdd->dev->adev->dev,
+				"Failed to restore queues\n");

 		n_evicted--;
 	}
@ -1882,13 +1886,14 @@ int kfd_process_restore_queues(struct kfd_process *p)

 	for (i = 0; i < p->n_pdds; i++) {
 		struct kfd_process_device *pdd = p->pdds[i];
+		struct device *dev = pdd->dev->adev->dev;

 		kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);

 		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
 							      &pdd->qpd);
 		if (r) {
-			pr_err("Failed to restore process queues\n");
+			dev_err(dev, "Failed to restore process queues\n");
 			if (!ret)
 				ret = r;
 		}
@ -2065,7 +2070,7 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
 	struct qcm_process_device *qpd;

 	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
-		pr_err("Incorrect CWSR mapping size.\n");
+		dev_err(dev->adev->dev, "Incorrect CWSR mapping size.\n");
 		return -EINVAL;
 	}

@ -2077,7 +2082,8 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
 	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 					get_order(KFD_CWSR_TBA_TMA_SIZE));
 	if (!qpd->cwsr_kaddr) {
-		pr_err("Error allocating per process CWSR buffer.\n");
+		dev_err(dev->adev->dev,
+			"Error allocating per process CWSR buffer.\n");
 		return -ENOMEM;
 	}

--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@ -7561,7 +7561,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
 			}
 		}

-		if (j == dc_state->stream_count)
+		if (j == dc_state->stream_count || pbn_div == 0)
 			continue;

 		slot_num = DIV_ROUND_UP(pbn, pbn_div);
@ -10462,6 +10462,7 @@ skip_modeset:
 	}

 	/* Update Freesync settings. */
+	reset_freesync_config_for_crtc(dm_new_crtc_state);
 	get_freesync_config_for_crtc(dm_new_crtc_state,
 				     dm_new_conn_state);

@ -10666,12 +10667,14 @@ static int dm_check_cursor_fb(struct amdgpu_crtc *new_acrtc,
 	 * check tiling flags when the FB doesn't have a modifier.
 	 */
 	if (!(fb->flags & DRM_MODE_FB_MODIFIERS)) {
-		if (adev->family < AMDGPU_FAMILY_AI) {
+		if (adev->family >= AMDGPU_FAMILY_GC_12_0_0) {
+			linear = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE) == 0;
+		} else if (adev->family >= AMDGPU_FAMILY_AI) {
+			linear = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0;
+		} else {
 			linear = AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_2D_TILED_THIN1 &&
 				 AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_1D_TILED_THIN1 &&
 				 AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE) == 0;
-		} else {
-			linear = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0;
 		}
 		if (!linear) {
 			DRM_DEBUG_ATOMIC("Cursor FB not linear");
@ -11863,6 +11866,49 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector,
 	return ret;
 }

+static void parse_edid_displayid_vrr(struct drm_connector *connector,
+		struct edid *edid)
+{
+	u8 *edid_ext = NULL;
+	int i;
+	int j = 0;
+	u16 min_vfreq;
+	u16 max_vfreq;
+
+	if (edid == NULL || edid->extensions == 0)
+		return;
+
+	/* Find DisplayID extension */
+	for (i = 0; i < edid->extensions; i++) {
+		edid_ext = (void *)(edid + (i + 1));
+		if (edid_ext[0] == DISPLAYID_EXT)
+			break;
+	}
+
+	if (edid_ext == NULL)
+		return;
+
+	while (j < EDID_LENGTH) {
+		/* Get dynamic video timing range from DisplayID if available */
+		if (EDID_LENGTH - j > 13 && edid_ext[j] == 0x25	&&
+		    (edid_ext[j+1] & 0xFE) == 0 && (edid_ext[j+2] == 9)) {
+			min_vfreq = edid_ext[j+9];
+			if (edid_ext[j+1] & 7)
+				max_vfreq = edid_ext[j+10] + ((edid_ext[j+11] & 3) << 8);
+			else
+				max_vfreq = edid_ext[j+10];
+
+			if (max_vfreq && min_vfreq) {
+				connector->display_info.monitor_range.max_vfreq = max_vfreq;
+				connector->display_info.monitor_range.min_vfreq = min_vfreq;
+
+				return;
+			}
+		}
+		j++;
+	}
+}
+
 static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
 			  struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
 {
@ -11983,6 +12029,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 	if (!adev->dm.freesync_module)
 		goto update;

+	/* Some eDP panels only have the refresh rate range info in DisplayID */
+	if ((connector->display_info.monitor_range.min_vfreq == 0 ||
+	     connector->display_info.monitor_range.max_vfreq == 0))
+		parse_edid_displayid_vrr(connector, edid);
+
 	if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT ||
 		     sink->sink_signal == SIGNAL_TYPE_EDP)) {
 		bool edid_check_required = false;
@ -11990,9 +12041,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 		if (is_dp_capable_without_timing_msa(adev->dm.dc,
 						     amdgpu_dm_connector)) {
 			if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) {
-				freesync_capable = true;
 				amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq;
 				amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq;
+				if (amdgpu_dm_connector->max_vfreq -
+				    amdgpu_dm_connector->min_vfreq > 10)
+					freesync_capable = true;
 			} else {
 				edid_check_required = edid->version > 1 ||
 						      (edid->version == 1 &&
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@ -575,10 +575,8 @@ bool dm_helpers_dp_write_dpcd(
 {
 	struct amdgpu_dm_connector *aconnector = link->priv;

-	if (!aconnector) {
-		DRM_ERROR("Failed to find connector for link!");
+	if (!aconnector)
 		return false;
-	}

 	return drm_dp_dpcd_write(&aconnector->dm_dp_aux.aux,
 			address, (uint8_t *)data, size) > 0;
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@ -23,6 +23,7 @@
 *
 */

+#include <linux/vmalloc.h>
 #include <drm/display/drm_dp_helper.h>
 #include <drm/display/drm_dp_mst_helper.h>
 #include <drm/drm_atomic.h>
@ -1491,9 +1492,10 @@ int pre_validate_dsc(struct drm_atomic_state *state,
 	 * from dm_state->context.
 	 */

-	local_dc_state = kmemdup(dm_state->context, sizeof(struct dc_state), GFP_KERNEL);
+	local_dc_state = vmalloc(sizeof(struct dc_state));
 	if (!local_dc_state)
 		return -ENOMEM;
+	memcpy(local_dc_state, dm_state->context, sizeof(struct dc_state));

 	for (i = 0; i < local_dc_state->stream_count; i++) {
 		struct dc_stream_state *stream = dm_state->context->streams[i];
@ -1563,7 +1565,7 @@ clean_exit:
 			dc_stream_release(local_dc_state->streams[i]);
 	}

-	kfree(local_dc_state);
+	vfree(local_dc_state);

 	return ret;
 }
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@ -352,6 +352,46 @@ static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdg
 	return ret;
 }

+static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amdgpu_device *adev,
+								      const struct amdgpu_framebuffer *afb,
+								      const enum surface_pixel_format format,
+								      const enum dc_rotation_angle rotation,
+								      const struct plane_size *plane_size,
+								      union dc_tiling_info *tiling_info,
+								      struct dc_plane_dcc_param *dcc,
+								      struct dc_plane_address *address,
+								      const bool force_disable_dcc)
+{
+	const uint64_t modifier = afb->base.modifier;
+	int ret = 0;
+
+	/* TODO: Most of this function shouldn't be needed on GFX12. */
+	amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(adev, tiling_info);
+
+	tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier);
+
+	if (amdgpu_dm_plane_modifier_has_dcc(modifier) && !force_disable_dcc) {
+		int max_compressed_block = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier);
+
+		dcc->enable = 1;
+		dcc->independent_64b_blks = max_compressed_block == 0;
+
+		if (max_compressed_block == 0)
+			dcc->dcc_ind_blk = hubp_ind_block_64b;
+		else if (max_compressed_block == 1)
+			dcc->dcc_ind_blk = hubp_ind_block_128b;
+		else
+			dcc->dcc_ind_blk = hubp_ind_block_unconstrained;
+	}
+
+	/* TODO: This seems wrong because there is no DCC plane on GFX12. */
+	ret = amdgpu_dm_plane_validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size);
+	if (ret)
+		drm_dbg_kms(adev_to_drm(adev), "amdgpu_dm_plane_validate_dcc: returned error: %d\n", ret);
+
+	return ret;
+}
+
 static void amdgpu_dm_plane_add_gfx10_1_modifiers(const struct amdgpu_device *adev,
 						  uint64_t **mods,
 						  uint64_t *size,
@ -648,12 +688,13 @@ static void amdgpu_dm_plane_add_gfx11_modifiers(struct amdgpu_device *adev,
 static void amdgpu_dm_plane_add_gfx12_modifiers(struct amdgpu_device *adev,
 		      uint64_t **mods, uint64_t *size, uint64_t *capacity)
 {
-	uint64_t mod_64K_2D = AMD_FMT_MOD |
-		AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12) |
-		AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_64K_2D);
+	uint64_t ver = AMD_FMT_MOD | AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12);

-	/* 64K without DCC */
-	amdgpu_dm_plane_add_modifier(mods, size, capacity, mod_64K_2D);
+	/* Without DCC: */
+	amdgpu_dm_plane_add_modifier(mods, size, capacity, ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_256K_2D));
+	amdgpu_dm_plane_add_modifier(mods, size, capacity, ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_64K_2D));
+	amdgpu_dm_plane_add_modifier(mods, size, capacity, ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_4K_2D));
+	amdgpu_dm_plane_add_modifier(mods, size, capacity, ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_256B_2D));
 	amdgpu_dm_plane_add_modifier(mods, size, capacity, DRM_FORMAT_MOD_LINEAR);
 }

@ -835,7 +876,15 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev,
 			upper_32_bits(chroma_addr);
 	}

-	if (adev->family >= AMDGPU_FAMILY_AI) {
+	if (adev->family >= AMDGPU_FAMILY_GC_12_0_0) {
+		ret = amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(adev, afb, format,
+										 rotation, plane_size,
+										 tiling_info, dcc,
+										 address,
+										 force_disable_dcc);
+		if (ret)
+			return ret;
+	} else if (adev->family >= AMDGPU_FAMILY_AI) {
 		ret = amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(adev, afb, format,
 										rotation, plane_size,
 										tiling_info, dcc,
@ -1419,8 +1468,6 @@ static bool amdgpu_dm_plane_format_mod_supported(struct drm_plane *plane,
 	const struct drm_format_info *info = drm_format_info(format);
 	int i;

-	enum dm_micro_swizzle microtile = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier) & 3;
-
 	if (!info)
 		return false;

@ -1442,29 +1489,34 @@ static bool amdgpu_dm_plane_format_mod_supported(struct drm_plane *plane,
 	if (i == plane->modifier_count)
 		return false;

-	/*
-	 * For D swizzle the canonical modifier depends on the bpp, so check
-	 * it here.
-	 */
-	if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&
-	    adev->family >= AMDGPU_FAMILY_NV) {
-		if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4)
-			return false;
-	}
+	/* GFX12 doesn't have these limitations. */
+	if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11) {
+		enum dm_micro_swizzle microtile = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier) & 3;

-	if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D &&
-	    info->cpp[0] < 8)
-		return false;
-
-	if (amdgpu_dm_plane_modifier_has_dcc(modifier)) {
-		/* Per radeonsi comments 16/64 bpp are more complicated. */
-		if (info->cpp[0] != 4)
-			return false;
-		/* We support multi-planar formats, but not when combined with
-		 * additional DCC metadata planes.
+		/*
+		 * For D swizzle the canonical modifier depends on the bpp, so check
+		 * it here.
 		 */
-		if (info->num_planes > 1)
+		if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&
+		    adev->family >= AMDGPU_FAMILY_NV) {
+			if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4)
+				return false;
+		}
+
+		if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D &&
+		    info->cpp[0] < 8)
 			return false;
+
+		if (amdgpu_dm_plane_modifier_has_dcc(modifier)) {
+			/* Per radeonsi comments 16/64 bpp are more complicated. */
+			if (info->cpp[0] != 4)
+				return false;
+			/* We support multi-planar formats, but not when combined with
+			 * additional DCC metadata planes.
+			 */
+			if (info->num_planes > 1)
+				return false;
+		}
 	}

 	return true;
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
@ -158,7 +158,7 @@ void amdgpu_dm_psr_enable(struct dc_stream_state *stream)
 	DRM_DEBUG_DRIVER("Enabling psr...\n");

 	vsync_rate_hz = div64_u64(div64_u64((
-			stream->timing.pix_clk_100hz * 100),
+			stream->timing.pix_clk_100hz * (uint64_t)100),
 			stream->timing.v_total),
 			stream->timing.h_total);

--- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
@ -1853,7 +1853,7 @@ static void calculate_bandwidth(
 	/*compute total time to request one chunk from each active display pipe*/
 	for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
 		if (data->enable[i]) {
-			data->chunk_request_time = bw_add(data->chunk_request_time, (bw_div((bw_div(bw_int_to_fixed(pixels_per_chunk * data->bytes_per_pixel[i]), data->useful_bytes_per_request[i])), bw_min2(sclk[data->sclk_level], bw_div(data->dispclk, bw_int_to_fixed(2))))));
+			data->chunk_request_time = bw_add(data->chunk_request_time, (bw_div((bw_div(bw_int_to_fixed(pixels_per_chunk * (int64_t)data->bytes_per_pixel[i]), data->useful_bytes_per_request[i])), bw_min2(sclk[data->sclk_level], bw_div(data->dispclk, bw_int_to_fixed(2))))));
 		}
 	}
 	/*compute total time to request cursor data*/
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
@ -131,7 +131,7 @@ int dce_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base)
 	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
 	int dprefclk_wdivider;
 	int dprefclk_src_sel;
-	int dp_ref_clk_khz = 600000;
+	int dp_ref_clk_khz;
 	int target_div;

 	/* ASSERT DP Reference Clock source is from DFS*/
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@ -576,7 +576,7 @@ static void dcn32_auto_dpm_test_log(
 			p_state_list[i] = curr_pipe_ctx->p_state_type;

 			refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 +
-				curr_pipe_ctx->stream->timing.v_total * curr_pipe_ctx->stream->timing.h_total - (uint64_t)1);
+				curr_pipe_ctx->stream->timing.v_total * (uint64_t)curr_pipe_ctx->stream->timing.h_total - (uint64_t)1);
 			refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total);
 			refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total);
 			disp_src_refresh_list[i] = refresh_rate;
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@ -137,8 +137,8 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *
 		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
 				     !pipe->stream->link_enc)) {
 			if (disable) {
-				if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
-					pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+				if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->disable_crtc)
+					pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg);

 				reset_sync_context_for_pipe(dc, context, i);
 			} else {
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
@ -306,27 +306,29 @@ static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr
 			struct dc_state *context,
 			int ref_dtbclk_khz)
 {
-	struct dccg *dccg = clk_mgr->dccg;
-	uint32_t tg_mask = 0;
 	int i;
+	struct dccg *dccg = clk_mgr->dccg;
+	struct pipe_ctx *otg_master;
+	bool use_hpo_encoder;

-	for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];

-		/* use mask to program DTO once per tg */
-		if (pipe_ctx->stream_res.tg &&
-				!(tg_mask & (1 << pipe_ctx->stream_res.tg->inst))) {
-			tg_mask |= (1 << pipe_ctx->stream_res.tg->inst);
+	for (i = 0; i < context->stream_count; i++) {
+		otg_master = resource_get_otg_master_for_stream(
+				&context->res_ctx, context->streams[i]);
+		ASSERT(otg_master);
+		ASSERT(otg_master->clock_source);
+		ASSERT(otg_master->clock_source->funcs->program_pix_clk);
+		ASSERT(otg_master->stream_res.pix_clk_params.controller_id >= CONTROLLER_ID_D0);

-			if (dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
-				pipe_ctx->clock_source->funcs->program_pix_clk(
-						pipe_ctx->clock_source,
-						&pipe_ctx->stream_res.pix_clk_params,
-						dccg->ctx->dc->link_srv->dp_get_encoding_format(&pipe_ctx->link_config.dp_link_settings),
-						&pipe_ctx->pll_settings);
-			}
+		use_hpo_encoder = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(otg_master);
+		if (!use_hpo_encoder)
+			continue;

-		}
+		otg_master->clock_source->funcs->program_pix_clk(
+				otg_master->clock_source,
+				&otg_master->stream_res.pix_clk_params,
+				dccg->ctx->dc->link_srv->dp_get_encoding_format(&otg_master->link_config.dp_link_settings),
+				&otg_master->pll_settings);
 	}
 }

--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@ -888,21 +888,21 @@ static struct rect calculate_plane_rec_in_timing_active(
 	struct rect rec_out = {0};
 	struct fixed31_32 temp;

-	temp = dc_fixpt_from_fraction(rec_in->x * stream->dst.width,
+	temp = dc_fixpt_from_fraction(rec_in->x * (long long)stream->dst.width,
 			stream->src.width);
 	rec_out.x = stream->dst.x + dc_fixpt_round(temp);

 	temp = dc_fixpt_from_fraction(
-			(rec_in->x + rec_in->width) * stream->dst.width,
+			(rec_in->x + rec_in->width) * (long long)stream->dst.width,
 			stream->src.width);
 	rec_out.width = stream->dst.x + dc_fixpt_round(temp) - rec_out.x;

-	temp = dc_fixpt_from_fraction(rec_in->y * stream->dst.height,
+	temp = dc_fixpt_from_fraction(rec_in->y * (long long)stream->dst.height,
 			stream->src.height);
 	rec_out.y = stream->dst.y + dc_fixpt_round(temp);

 	temp = dc_fixpt_from_fraction(
-			(rec_in->y + rec_in->height) * stream->dst.height,
+			(rec_in->y + rec_in->height) * (long long)stream->dst.height,
 			stream->src.height);
 	rec_out.height = stream->dst.y + dc_fixpt_round(temp) - rec_out.y;

--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@ -975,7 +975,7 @@ static int dc_stream_calculate_flickerless_refresh_rate(struct dc_stream_state *
 	}

 	if (search_for_max_increase)
-		return (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, stream->timing.v_total*stream->timing.h_total);
+		return (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, stream->timing.v_total*(long long)stream->timing.h_total);
 	else
 		return stream->lumin_data.refresh_rate_hz[0];
 }
@ -1024,7 +1024,7 @@ static unsigned int dc_stream_get_max_flickerless_instant_vtotal_delta(struct dc
 	if (stream->timing.v_total * stream->timing.h_total == 0)
 		return 0;

-	int current_refresh_hz = (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, stream->timing.v_total*stream->timing.h_total);
+	int current_refresh_hz = (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, stream->timing.v_total*(long long)stream->timing.h_total);

 	int safe_refresh_hz = dc_stream_calculate_flickerless_refresh_rate(stream,
 							 dc_stream_get_brightness_millinits_from_refresh(stream, current_refresh_hz),
@ -1032,7 +1032,7 @@ static unsigned int dc_stream_get_max_flickerless_instant_vtotal_delta(struct dc
 							 is_gaming,
 							 increase);

-	int safe_refresh_v_total = (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, safe_refresh_hz*stream->timing.h_total);
+	int safe_refresh_v_total = (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, safe_refresh_hz*(long long)stream->timing.h_total);

 	if (increase)
 		return (((int) stream->timing.v_total - safe_refresh_v_total) >= 0) ? (stream->timing.v_total - safe_refresh_v_total) : 0;
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@ -55,7 +55,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;

-#define DC_VER "3.2.290"
+#define DC_VER "3.2.291"

 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@ -293,6 +293,8 @@ struct dc_caps {
 	bool cursor_not_scaled;
 	bool dcmode_power_limits_present;
 	bool sequential_ono;
+	/* Conservative limit for DCC cases which require ODM4:1 to support*/
+	uint32_t dcc_plane_width_limit;
 };

 struct dc_bug_wa {
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
@ -353,7 +353,7 @@ static uint32_t calculate_required_audio_bw_in_symbols(
 	/* DP spec recommends between 1.05 to 1.1 safety margin to prevent sample under-run */
 	struct fixed31_32 audio_sdp_margin = dc_fixpt_from_fraction(110, 100);
 	struct fixed31_32 horizontal_line_freq_khz = dc_fixpt_from_fraction(
-			crtc_info->requested_pixel_clock_100Hz, crtc_info->h_total * 10);
+			crtc_info->requested_pixel_clock_100Hz, (long long)crtc_info->h_total * 10);
 	struct fixed31_32 samples_per_line;
 	struct fixed31_32 layouts_per_line;
 	struct fixed31_32 symbols_per_sdp_max_layout;
@ -455,7 +455,8 @@ static uint32_t calculate_available_hblank_bw_in_symbols(
 	available_hblank_bw -= crtc_info->dsc_num_slices * 4; /* EOC overhead */

 	if (available_hblank_bw < dp_link_info->hblank_min_symbol_width)
-		available_hblank_bw = dp_link_info->hblank_min_symbol_width;
+		/* Each symbol takes 4 frames */
+		available_hblank_bw = 4 * dp_link_info->hblank_min_symbol_width;

 	if (available_hblank_bw < 12)
 		available_hblank_bw = 0;
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@ -217,7 +217,7 @@ static bool calc_fb_divider_checking_tolerance(
 	actual_calc_clk_100hz = (uint64_t)feedback_divider *
 					calc_pll_cs->fract_fb_divider_factor +
 							fract_feedback_divider;
-	actual_calc_clk_100hz *= calc_pll_cs->ref_freq_khz * 10;
+	actual_calc_clk_100hz *= (uint64_t)calc_pll_cs->ref_freq_khz * 10;
 	actual_calc_clk_100hz =
 		div_u64(actual_calc_clk_100hz,
 			ref_divider * post_divider *
@ -680,7 +680,7 @@ static bool calculate_ss(
 	 * so have to divided by 100 * 100*/
 	ss_amount = dc_fixpt_mul(
 		fb_div, dc_fixpt_from_fraction(ss_data->percentage,
-					100 * ss_data->percentage_divider));
+					100 * (long long)ss_data->percentage_divider));
 	ds_data->feedback_amount = dc_fixpt_floor(ss_amount);

 	ss_nslip_amount = dc_fixpt_sub(ss_amount,
@ -695,8 +695,8 @@ static bool calculate_ss(

 	/* compute SS_STEP_SIZE_DSFRAC */
 	modulation_time = dc_fixpt_from_fraction(
-		pll_settings->reference_freq * 1000,
-		pll_settings->reference_divider * ss_data->modulation_freq_hz);
+		pll_settings->reference_freq * (uint64_t)1000,
+		pll_settings->reference_divider * (uint64_t)ss_data->modulation_freq_hz);

 	if (ss_data->flags.CENTER_SPREAD)
 		modulation_time = dc_fixpt_div_int(modulation_time, 4);
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
@ -218,7 +218,7 @@ static void dce_driver_set_backlight(struct panel_cntl *panel_cntl,
 	 * contain integer component, lower 16 bits contain fractional component
 	 * of active duty cycle e.g. 0x21BDC0 = 0xEFF0 * 0x24
 	 */
-	active_duty_cycle = backlight_pwm_u16_16 * masked_pwm_period;
+	active_duty_cycle = backlight_pwm_u16_16 * (uint64_t)masked_pwm_period;

 	/* 1.3 Calculate 16 bit active duty cycle from integer and fractional
 	 * components shift by bitCount then mask 16 bits and add rounding bit
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@ -27,7 +27,6 @@ DCN10 = dcn10_ipp.o \
 		dcn10_opp.o \
 		dcn10_mpc.o \
 		dcn10_cm_common.o \
-		dcn10_stream_encoder.o dcn10_link_encoder.o

 AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10))

--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@ -2,7 +2,6 @@
 # Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved.

 DCN20 = dcn20_mpc.o dcn20_opp.o dcn20_mmhubbub.o \
-		dcn20_stream_encoder.o dcn20_link_encoder.o \
 		dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o

 AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20))
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@ -25,12 +25,10 @@

 DCN30 := dcn30_mpc.o dcn30_vpg.o \
 	dcn30_afmt.o \
-	dcn30_dio_stream_encoder.o \
 	dcn30_dwb.o \
 	dcn30_dwb_cm.o \
 	dcn30_cm_common.o \
 	dcn30_mmhubbub.o \
-	dcn30_dio_link_encoder.o



--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@ -4,7 +4,7 @@
 #
 # Makefile for dcn31.

-DCN31 = dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
+DCN31 = dcn31_panel_cntl.o \
 	dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
 	dcn31_afmt.o dcn31_vpg.o

--- a/drivers/gpu/drm/amd/display/dc/dio/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dio/Makefile
@ -24,6 +24,42 @@
 #

 ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN10
+###############################################################################
+DIO_DCN10 = dcn10_link_encoder.o dcn10_stream_encoder.o
+
+AMD_DAL_DIO_DCN10 = $(addprefix $(AMDDALPATH)/dc/dio/dcn10/,$(DIO_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN10)
+
+###############################################################################
+# DCN20
+###############################################################################
+DIO_DCN20 = dcn20_link_encoder.o dcn20_stream_encoder.o
+
+AMD_DAL_DIO_DCN20 = $(addprefix $(AMDDALPATH)/dc/dio/dcn20/,$(DIO_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN20)
+
+###############################################################################
+# DCN30
+###############################################################################
+DIO_DCN30 = dcn30_dio_link_encoder.o dcn30_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN30 = $(addprefix $(AMDDALPATH)/dc/dio/dcn30/,$(DIO_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN30)
+
+###############################################################################
+# DCN31
+###############################################################################
+DIO_DCN31 = dcn31_dio_link_encoder.o
+
+AMD_DAL_DIO_DCN31 = $(addprefix $(AMDDALPATH)/dc/dio/dcn31/,$(DIO_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN31)
+
 ###############################################################################
 # DCN32
 ###############################################################################
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.h
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.h
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.h
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.h
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.c
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.h
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.c
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.h
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.h
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@ -997,7 +997,7 @@ static bool subvp_subvp_admissable(struct dc *dc,
 		if (pipe->plane_state && !pipe->top_pipe &&
 				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
 			refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
-				pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
+				pipe->stream->timing.v_total * (uint64_t)pipe->stream->timing.h_total - (uint64_t)1);
 			refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
 			refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);

--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@ -3364,6 +3364,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							&mode_lib->vba.UrgentBurstFactorLumaPre[k],
 							&mode_lib->vba.UrgentBurstFactorChromaPre[k],
 							&mode_lib->vba.NotUrgentLatencyHidingPre[k]);
+
+					v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] /
+							8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPreY[i][j][k];
 				}

 				{
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
@ -197,9 +197,14 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s
 	memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping));
 	memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params, 0, sizeof(struct dml2_core_mode_programming_in_out));

-	if (!context || context->stream_count == 0)
+	if (!context)
 		return true;

+	if (context->stream_count == 0) {
+		dml21_build_fams2_programming(in_dc, context, dml_ctx);
+		return true;
+	}
+
 	/* scrub phantom's from current dc_state */
 	dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context);
 	dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context);
@ -280,7 +285,8 @@ bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml

 void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx)
 {
-	unsigned int num_pipes, dml_prog_idx, dml_phantom_prog_idx, dc_pipe_index;
+	unsigned int dml_prog_idx, dml_phantom_prog_idx, dc_pipe_index;
+	int num_pipes;
 	struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
 	struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0};

@ -314,10 +320,8 @@ void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context
 		}

 		num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx);
-
-		if (num_pipes <= 0 ||
-				dc_main_pipes[0]->stream == NULL ||
-				dc_main_pipes[0]->plane_state == NULL)
+		if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL ||
+		    dc_main_pipes[0]->plane_state == NULL)
 			continue;

 		/* get config for each pipe */
@ -356,10 +360,8 @@ void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context
 		pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx];

 		num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx);
-
-		if (num_pipes <= 0 ||
-				dc_main_pipes[0]->stream == NULL ||
-				dc_main_pipes[0]->plane_state == NULL)
+		if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL ||
+		    dc_main_pipes[0]->plane_state == NULL)
 			continue;

 		/* get config for each pipe */
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c
@ -11,7 +11,7 @@ bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance
 {
 	bool result = false;

-	if (out == 0)
+	if (!out)
 		return false;

 	memset(out, 0, sizeof(struct dml2_core_instance));
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c
@ -21,7 +21,7 @@ bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance
 {
 	bool result = false;

-	if (out == 0)
+	if (!out)
 		return false;

 	memset(out, 0, sizeof(struct dml2_dpmm_instance));
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c
@ -16,7 +16,7 @@ bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *
 {
 	bool result = false;

-	if (out == 0)
+	if (!out)
 		return false;

 	memset(out, 0, sizeof(struct dml2_mcg_instance));
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c
@ -28,7 +28,7 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *
 {
 	bool result = false;

-	if (out == 0)
+	if (!out)
 		return false;

 	memset(out, 0, sizeof(struct dml2_pmo_instance));
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@ -296,6 +296,7 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s
 		out->round_trip_ping_latency_dcfclk_cycles = 106;
 		out->smn_latency_us = 2;
 		out->dispclk_dppclk_vco_speed_mhz = 3600;
+		out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0;
 		break;

 	case dml_project_dcn401:
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@ -288,13 +288,12 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
 {
 	unsigned int dc_pipe_ctx_index, dml_pipe_idx, plane_id;
 	enum mall_stream_type pipe_mall_type;
-	bool unbounded_req_enabled = false;
 	struct dml2_calculate_rq_and_dlg_params_scratch *s = &in_ctx->v20.scratch.calculate_rq_and_dlg_params_scratch;

 	context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000;
 	context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;

-	if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx] == dml_fclock_change_unsupported)
+	if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)
 		context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false;
 	else
 		context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true;
@ -302,14 +301,6 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
 	if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
 		context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;

-	unbounded_req_enabled = in_ctx->v20.dml_core_ctx.ms.UnboundedRequestEnabledThisState;
-
-	if (unbounded_req_enabled && pipe_cnt > 1) {
-		// Unbounded requesting should not ever be used when more than 1 pipe is enabled.
-		//ASSERT(false);
-		unbounded_req_enabled = false;
-	}
-
 	context->bw_ctx.bw.dcn.compbuf_size_kb = in_ctx->v20.dml_core_ctx.ip.config_return_buffer_size_in_kbytes;

 	for (dc_pipe_ctx_index = 0; dc_pipe_ctx_index < pipe_cnt; dc_pipe_ctx_index++) {
@ -344,7 +335,8 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = false;
 		} else {
 			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = dml_get_det_buffer_size_kbytes(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx);
-			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = unbounded_req_enabled;
+			// Unbounded requesting should not ever be used when more than 1 pipe is enabled.
+			context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = in_ctx->v20.dml_core_ctx.ms.UnboundedRequestEnabledThisState;
 		}

 		context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb;
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@ -626,21 +626,6 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s

 	if (result) {
 		unsigned int lowest_state_idx = s->mode_support_params.out_lowest_state_idx;
-		double min_fclk_mhz_for_urgent_workaround = (double)dml2->config.min_fclk_for_urgent_workaround_khz / 1000.0;
-		double max_frac_urgent = (double)dml2->config.max_frac_urgent_for_min_fclk_x1000 / 1000.0;
-
-		if (min_fclk_mhz_for_urgent_workaround > 0.0 && max_frac_urgent > 0.0 &&
-		    (dml2->v20.dml_core_ctx.mp.FractionOfUrgentBandwidth > max_frac_urgent ||
-		     dml2->v20.dml_core_ctx.mp.FractionOfUrgentBandwidthImmediateFlip > max_frac_urgent)) {
-			unsigned int forced_lowest_state_idx = lowest_state_idx;
-
-			while (forced_lowest_state_idx < dml2->v20.dml_core_ctx.states.num_states &&
-			       dml2->v20.dml_core_ctx.states.state_array[forced_lowest_state_idx].fabricclk_mhz <= min_fclk_mhz_for_urgent_workaround) {
-				forced_lowest_state_idx += 1;
-			}
-			lowest_state_idx = forced_lowest_state_idx;
-		}
-
 		out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.mp.Dispclk_calculated * 1000;
 		out_clks.p_state_supported = s->mode_support_info.DRAMClockChangeSupport[0] != dml_dram_clock_change_unsupported;
 		if (in_dc->config.use_default_clock_table &&
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
@ -237,8 +237,6 @@ struct dml2_configuration_options {
 	bool use_clock_dc_limits;
 	bool gpuvm_enable;
 	struct dml2_soc_bb *bb_from_dmub;
-	int max_frac_urgent_for_min_fclk_x1000;
-	int min_fclk_for_urgent_workaround_khz;
 };

 /*
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c
@ -829,6 +829,7 @@ bool hubbub401_get_dcc_compression_cap(struct hubbub *hubbub,
 		struct dc_surface_dcc_cap *output)
 {
 	struct dc *dc = hubbub->ctx->dc;
+	const unsigned int max_dcc_plane_width = dc->caps.dcc_plane_width_limit;
 	/* DCN4_Programming_Guide_DCHUB.docx, Section 5.11.2.2 */
 	enum dcc_control dcc_control;
 	unsigned int plane0_bpe, plane1_bpe;
@ -843,6 +844,11 @@ bool hubbub401_get_dcc_compression_cap(struct hubbub *hubbub,
 	if (dc->debug.disable_dcc == DCC_DISABLE)
 		return false;

+	/* Conservatively disable DCC for cases where ODM4:1 may be required. */
+	if (max_dcc_plane_width != 0 &&
+			(input->surface_size.width > max_dcc_plane_width || input->plane1_size.width > max_dcc_plane_width))
+		return false;
+
 	switch (input->format) {
 	default:
 		is_dual_plane = false;
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
@ -677,12 +677,23 @@ void hubp401_cursor_set_position(
 	int rec_x_offset = x_pos - pos->x_hotspot;
 	int rec_y_offset = y_pos - pos->y_hotspot;
 	int dst_x_offset;
-	int x_pos_viewport = x_pos * param->viewport.width / param->recout.width;
-	int x_hot_viewport = pos->x_hotspot * param->viewport.width / param->recout.width;
+	int x_pos_viewport = 0;
+	int x_hot_viewport = 0;
 	uint32_t cur_en = pos->enable ? 1 : 0;

 	hubp->curs_pos = *pos;

+	/* Recout is zero for pipes if the entire dst_rect is contained
+	 * within preceeding ODM slices.
+	 */
+	if (param->recout.width) {
+		x_pos_viewport = x_pos * param->viewport.width / param->recout.width;
+		x_hot_viewport = pos->x_hotspot * param->viewport.width / param->recout.width;
+	} else {
+		ASSERT(!cur_en || x_pos == 0);
+		ASSERT(!cur_en || pos->x_hotspot == 0);
+	}
+
 	/*
 	 * Guard aganst cursor_set_position() from being called with invalid
 	 * attributes
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@ -1305,13 +1305,65 @@ static void populate_audio_dp_link_info(

 	dp_link_info->link_bandwidth_kbps = dc_fixpt_floor(link_bw_kbps);

-	/* HW minimum for 128b/132b HBlank is 4 frame symbols.
-	 * TODO: Plumb the actual programmed HBlank min symbol width to here.
+	/* Calculates hblank_min_symbol_width for 128b/132b
+	 * Corresponding HBLANK_MIN_SYMBOL_WIDTH register is calculated as:
+	 *   floor(h_blank * bits_per_pixel / 128)
 	 */
-	if (dp_link_info->encoding == DP_128b_132b_ENCODING)
-		dp_link_info->hblank_min_symbol_width = 4;
-	else
+	if (dp_link_info->encoding == DP_128b_132b_ENCODING) {
+		struct dc_crtc_timing *crtc_timing = &pipe_ctx->stream->timing;
+
+		uint32_t h_active = crtc_timing->h_addressable + crtc_timing->h_border_left
+				+ crtc_timing->h_border_right;
+		uint32_t h_blank = crtc_timing->h_total - h_active;
+
+		uint32_t bpp;
+
+		if (crtc_timing->flags.DSC) {
+			bpp = crtc_timing->dsc_cfg.bits_per_pixel;
+		} else {
+			/* When the timing is using DSC, dsc_cfg.bits_per_pixel is in 16th bits.
+			 * The bpp in this path is scaled to 16th bits so the final calculation
+			 * is correct for both cases.
+			 */
+			bpp = 16;
+			switch (crtc_timing->display_color_depth) {
+			case COLOR_DEPTH_666:
+				bpp *= 18;
+				break;
+			case COLOR_DEPTH_888:
+				bpp *= 24;
+				break;
+			case COLOR_DEPTH_101010:
+				bpp *= 30;
+				break;
+			case COLOR_DEPTH_121212:
+				bpp *= 36;
+				break;
+			default:
+				bpp = 0;
+				break;
+			}
+
+			switch (crtc_timing->pixel_encoding) {
+			case PIXEL_ENCODING_YCBCR422:
+				bpp = bpp * 2 / 3;
+				break;
+			case PIXEL_ENCODING_YCBCR420:
+				bpp /= 2;
+				break;
+			default:
+				break;
+			}
+		}
+
+		/* Min symbol width = floor(h_blank * (bpp/16) / 128) */
+		dp_link_info->hblank_min_symbol_width = dc_fixpt_floor(
+				dc_fixpt_div(dc_fixpt_from_int(h_blank * bpp),
+						dc_fixpt_from_int(128 / 16)));
+
+	} else {
 		dp_link_info->hblank_min_symbol_width = 0;
+	}
 }

 static void build_audio_output(
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@ -2208,7 +2208,7 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size,
 					grouped_pipes[i]->stream->signal)) {
 				embedded = i;
 				master = i;
-				phase[i] = embedded_pix_clk_100hz*100;
+				phase[i] = embedded_pix_clk_100hz*(uint64_t)100;
 				modulo[i] = dp_ref_clk_100hz*100;
 			} else {

--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
@ -183,6 +183,12 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
 	struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
 	struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;

+	// make a short term w/a for an issue that backlight ramping unexpectedly paused in the middle,
+	// will decouple backlight from ABM and redefine DMUB interface, then this w/a could be removed
+	if (pipe_ctx->stream->abm_level == 0 || pipe_ctx->stream->abm_level == ABM_LEVEL_IMMEDIATE_DISABLE) {
+		return;
+	}
+
 	if (dmcu) {
 		dce110_set_abm_immediate_disable(pipe_ctx);
 		return;
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@ -1088,6 +1088,17 @@ static bool dcn401_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx)
 	return false;
 }

+void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy)
+{
+	if (cursor_width <= 128) {
+		pos_cpy->x_hotspot /= 2;
+		pos_cpy->x_hotspot += 1;
+	} else {
+		pos_cpy->x_hotspot /= 2;
+		pos_cpy->x_hotspot += 2;
+	}
+}
+
 void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx)
 {
 	struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position;
@ -1109,12 +1120,21 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx)
 	int prev_odm_width = 0;
 	int prev_odm_offset = 0;
 	struct pipe_ctx *prev_odm_pipe = NULL;
+	bool mpc_combine_on = false;
+	int  bottom_pipe_x_pos = 0;

 	int x_pos = pos_cpy.x;
 	int y_pos = pos_cpy.y;
 	int recout_x_pos = 0;
 	int recout_y_pos = 0;

+	if ((pipe_ctx->top_pipe != NULL) || (pipe_ctx->bottom_pipe != NULL)) {
+		if ((pipe_ctx->plane_state->src_rect.width != pipe_ctx->plane_res.scl_data.viewport.width) ||
+			(pipe_ctx->plane_state->src_rect.height != pipe_ctx->plane_res.scl_data.viewport.height)) {
+			mpc_combine_on = true;
+		}
+	}
+
 	/* DCN4 moved cursor composition after Scaler, so in HW it is in
 	 * recout space and for HW Cursor position programming need to
 	 * translate to recout space.
@ -1177,15 +1197,8 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx)

 	if (x_pos < 0) {
 		pos_cpy.x_hotspot -= x_pos;
-		if ((odm_combine_on) && (hubp->curs_attr.attribute_flags.bits.ENABLE_MAGNIFICATION)) {
-			if (hubp->curs_attr.width <= 128) {
-				pos_cpy.x_hotspot /= 2;
-				pos_cpy.x_hotspot += 1;
-			} else {
-				pos_cpy.x_hotspot /= 2;
-				pos_cpy.x_hotspot += 2;
-			}
-		}
+		if (hubp->curs_attr.attribute_flags.bits.ENABLE_MAGNIFICATION)
+			adjust_hotspot_between_slices_for_2x_magnify(hubp->curs_attr.width, &pos_cpy);
 		x_pos = 0;
 	}

@ -1194,6 +1207,22 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx)
 		y_pos = 0;
 	}

+	/* If the position on bottom MPC pipe is negative then we need to add to the hotspot and
+	 * adjust x_pos on bottom pipe to make cursor visible when crossing between MPC slices.
+	 */
+	if (mpc_combine_on &&
+		pipe_ctx->top_pipe &&
+		(pipe_ctx == pipe_ctx->top_pipe->bottom_pipe)) {
+
+		bottom_pipe_x_pos = x_pos - pipe_ctx->plane_res.scl_data.recout.x;
+		if (bottom_pipe_x_pos < 0) {
+			x_pos = pipe_ctx->plane_res.scl_data.recout.x;
+			pos_cpy.x_hotspot -= bottom_pipe_x_pos;
+			if (hubp->curs_attr.attribute_flags.bits.ENABLE_MAGNIFICATION)
+				adjust_hotspot_between_slices_for_2x_magnify(hubp->curs_attr.width, &pos_cpy);
+		}
+	}
+
 	pos_cpy.x = (uint32_t)x_pos;
 	pos_cpy.y = (uint32_t)y_pos;

--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
@ -80,4 +80,5 @@ void dcn401_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *l
 void dcn401_hardware_release(struct dc *dc);
 void dcn401_update_odm(struct dc *dc, struct dc_state *context,
 		struct pipe_ctx *otg_master);
+void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy);
 #endif /* __DC_HWSS_DCN401_H__ */
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@ -763,7 +763,7 @@ bool edp_setup_psr(struct dc_link *link,

 	psr_context->crtcTimingVerticalTotal = stream->timing.v_total;
 	psr_context->vsync_rate_hz = div64_u64(div64_u64((stream->
-					timing.pix_clk_100hz * 100),
+					timing.pix_clk_100hz * (u64)100),
 					stream->timing.v_total),
 					stream->timing.h_total);

--- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@ -1695,25 +1695,6 @@ static void dcn314_get_panel_config_defaults(struct dc_panel_config *panel_confi
 	*panel_config = panel_config_defaults;
 }

-static bool filter_modes_for_single_channel_workaround(struct dc *dc,
-		struct dc_state *context)
-{
-	// Filter 2K@240Hz+8K@24fps above combination timing if memory only has single dimm LPDDR
-	if (dc->clk_mgr->bw_params->vram_type == 34 &&
-	    dc->clk_mgr->bw_params->num_channels < 2 &&
-	    context->stream_count > 1) {
-		int total_phy_pix_clk = 0;
-
-		for (int i = 0; i < context->stream_count; i++)
-			if (context->res_ctx.pipe_ctx[i].stream)
-				total_phy_pix_clk += context->res_ctx.pipe_ctx[i].stream->phy_pix_clk;
-
-		if (total_phy_pix_clk >= (1148928+826260)) //2K@240Hz+8K@24fps
-			return true;
-	}
-	return false;
-}
-
 bool dcn314_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate)
@ -1732,9 +1713,6 @@ bool dcn314_validate_bandwidth(struct dc *dc,
 	if (!pipes)
 		goto validate_fail;

-	if (filter_modes_for_single_channel_workaround(dc, context))
-		goto validate_fail;
-
 	DC_FP_START();
 	// do not support self refresh only
 	out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, false);
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
@ -1760,7 +1760,7 @@ static int dcn315_populate_dml_pipes_from_context(
 				bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc)
 						|| (pipe->plane_state && pipe->plane_state->src_rect.width > 5120);

-				if (remaining_det_segs > MIN_RESERVED_DET_SEGS)
+				if (remaining_det_segs > MIN_RESERVED_DET_SEGS && crb_pipes != 0)
 					pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes +
 							(crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0);
 				if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) {
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@ -2153,8 +2153,6 @@ static bool dcn35_resource_construct(

 	dc->dml2_options.max_segments_per_hubp = 24;
 	dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/
-	dc->dml2_options.max_frac_urgent_for_min_fclk_x1000 = 900;
-	dc->dml2_options.min_fclk_for_urgent_workaround_khz = 400 * 1000;

 	if (dc->config.sdpif_request_limit_words_per_umc == 0)
 		dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
@ -1822,6 +1822,9 @@ static bool dcn401_resource_construct(
 	dc->caps.extended_aux_timeout_support = true;
 	dc->caps.dmcub_support = true;

+	if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev))
+		dc->caps.dcc_plane_width_limit = 7680;
+
 	/* Color pipeline capabilities */
 	dc->caps.color.dpp.dcn_arch = 1;
 	dc->caps.color.dpp.input_lut_shared = 0;
--- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
+++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
@ -110,21 +110,21 @@ static struct spl_rect calculate_plane_rec_in_timing_active(
 	struct fixed31_32 temp;


-	temp = dc_fixpt_from_fraction(rec_in->x * stream_dst->width,
+	temp = dc_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width,
 			stream_src->width);
 	rec_out.x = stream_dst->x + dc_fixpt_round(temp);

 	temp = dc_fixpt_from_fraction(
-			(rec_in->x + rec_in->width) * stream_dst->width,
+			(rec_in->x + rec_in->width) * (long long)stream_dst->width,
 			stream_src->width);
 	rec_out.width = stream_dst->x + dc_fixpt_round(temp) - rec_out.x;

-	temp = dc_fixpt_from_fraction(rec_in->y * stream_dst->height,
+	temp = dc_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height,
 			stream_src->height);
 	rec_out.y = stream_dst->y + dc_fixpt_round(temp);

 	temp = dc_fixpt_from_fraction(
-			(rec_in->y + rec_in->height) * stream_dst->height,
+			(rec_in->y + rec_in->height) * (long long)stream_dst->height,
 			stream_src->height);
 	rec_out.height = stream_dst->y + dc_fixpt_round(temp) - rec_out.y;

--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@ -299,6 +299,7 @@ struct dmub_srv_hw_params {
 	enum dmub_memory_access_type mem_access_type;
 	enum dmub_ips_disable_type disable_ips;
 	bool disallow_phy_access;
+	bool disable_sldo_opt;
 };

 /**
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@ -684,7 +684,8 @@ union dmub_fw_boot_options {
 		uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/
 		uint32_t ips_disable: 3; /* options to disable ips support*/
 		uint32_t ips_sequential_ono: 1; /**< 1 to enable sequential ONO IPS sequence */
-		uint32_t reserved : 8; /**< reserved */
+		uint32_t disable_sldo_opt: 1; /**< 1 to disable SLDO optimizations */
+		uint32_t reserved : 7; /**< reserved */
 	} bits; /**< boot bits */
 	uint32_t all; /**< 32-bit access to bits */
 };
@ -3326,6 +3327,7 @@ enum replay_state {
 	REPLAY_STATE_1			= 0x10,
 	REPLAY_STATE_1A			= 0x11,
 	REPLAY_STATE_2			= 0x20,
+	REPLAY_STATE_2A			= 0x21,
 	REPLAY_STATE_3			= 0x30,
 	REPLAY_STATE_3INIT		= 0x31,
 	REPLAY_STATE_4			= 0x40,
@ -3333,6 +3335,7 @@ enum replay_state {
 	REPLAY_STATE_4B			= 0x42,
 	REPLAY_STATE_4C			= 0x43,
 	REPLAY_STATE_4D			= 0x44,
+	REPLAY_STATE_4E			= 0x45,
 	REPLAY_STATE_4B_LOCKED		= 0x4A,
 	REPLAY_STATE_4C_UNLOCKED	= 0x4B,
 	REPLAY_STATE_5			= 0x50,
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
@ -424,6 +424,7 @@ void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu
 	boot_options.bits.disable_clk_gate = params->disable_clock_gate;
 	boot_options.bits.ips_disable = params->disable_ips;
 	boot_options.bits.ips_sequential_ono = params->ips_sequential_ono;
+	boot_options.bits.disable_sldo_opt = params->disable_sldo_opt;

 	REG_WRITE(DMCUB_SCRATCH14, boot_options.all);
 }
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@ -1002,7 +1002,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,

 	if (stream->ctx->dc->caps.max_v_total != 0 && stream->timing.h_total != 0) {
 		min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL),
-			(stream->timing.h_total * stream->ctx->dc->caps.max_v_total));
+			(stream->timing.h_total * (long long)stream->ctx->dc->caps.max_v_total));
 	}
 	/* Limit minimum refresh rate to what can be supported by hardware */
 	min_refresh_in_uhz = min_hardware_refresh_in_uhz > in_config->min_refresh_in_uhz ?
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@ -734,7 +734,7 @@ struct atom_gpio_pin_lut_v2_1
 {
  struct  atom_common_table_header  table_header;
  /*the real number of this included in the structure is calcualted by using the (whole structure size - the header size)/size of atom_gpio_pin_lut  */
-  struct  atom_gpio_pin_assignment  gpio_pin[8];
+  struct  atom_gpio_pin_assignment  gpio_pin[];
 };


--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@ -727,6 +727,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
 		break;
 	case IP_VERSION(14, 0, 0):
 	case IP_VERSION(14, 0, 1):
+	case IP_VERSION(14, 0, 4):
 		smu_v14_0_0_set_ppt_funcs(smu);
 		break;
 	case IP_VERSION(14, 0, 2):
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@ -136,8 +136,7 @@ int smu_v14_0_load_microcode(struct smu_context *smu)
 		    1 & ~MP1_SMN_PUB_CTRL__LX3_RESET_MASK);

 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
-			amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+		if (smu->is_apu)
 			mp1_fw_flags = RREG32_PCIE(MP1_Public |
 						   (smnMP1_FIRMWARE_FLAGS_14_0_0 & 0xffffffff));
 		else
@ -210,8 +209,7 @@ int smu_v14_0_check_fw_status(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t mp1_fw_flags;

-	if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
-		amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+	if (smu->is_apu)
 		mp1_fw_flags = RREG32_PCIE(MP1_Public |
 					   (smnMP1_FIRMWARE_FLAGS_14_0_0 & 0xffffffff));
 	else
@ -245,6 +243,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)

 	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
 	case IP_VERSION(14, 0, 0):
+	case IP_VERSION(14, 0, 4):
 		smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
 		break;
 	case IP_VERSION(14, 0, 1):
@ -759,6 +758,7 @@ int smu_v14_0_gfx_off_control(struct smu_context *smu, bool enable)
 	case IP_VERSION(14, 0, 1):
 	case IP_VERSION(14, 0, 2):
 	case IP_VERSION(14, 0, 3):
+	case IP_VERSION(14, 0, 4):
 		if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
 			return 0;
 		if (enable)
@ -866,8 +866,7 @@ static int smu_v14_0_set_irq_state(struct amdgpu_device *adev,
 		WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_ENA, 0);

 		/* For MP1 SW irqs */
-		if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
-			amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) {
+		if (smu->is_apu) {
 			val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL_mp1_14_0_0);
 			val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 1);
 			WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL_mp1_14_0_0, val);
@ -900,8 +899,7 @@ static int smu_v14_0_set_irq_state(struct amdgpu_device *adev,
 		WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_ENA, val);

 		/* For MP1 SW irqs */
-		if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
-			amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) {
+		if (smu->is_apu) {
 			val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_mp1_14_0_0);
 			val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, ID, 0xFE);
 			val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, VALID, 0);
@ -1494,8 +1492,7 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu,
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;

-		if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
-		    amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) {
+		if (smu->is_apu) {
 			if (i == 0)
 				ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
 								      SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0,
@ -1527,8 +1524,7 @@ int smu_v14_0_set_jpeg_enable(struct smu_context *smu,
 		if (adev->jpeg.harvest_config & (1 << i))
 			continue;

-		if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
-		    amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) {
+		if (smu->is_apu) {
 			if (i == 0)
 				ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
 								      SMU_MSG_PowerUpJpeg0 : SMU_MSG_PowerDownJpeg0,
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@ -723,10 +723,10 @@ static int smu_v14_0_common_get_dpm_freq_by_index(struct smu_context *smu,
 						uint32_t dpm_level,
 						uint32_t *freq)
 {
-	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
-		smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
-	else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
 		smu_v14_0_1_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+	else
+		smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);

 	return 0;
 }
@ -999,10 +999,10 @@ static int smu_v14_0_common_get_dpm_ultimate_freq(struct smu_context *smu,
 							uint32_t *min,
 							uint32_t *max)
 {
-	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
-		smu_v14_0_0_get_dpm_ultimate_freq(smu, clk_type, min, max);
-	else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
 		smu_v14_0_1_get_dpm_ultimate_freq(smu, clk_type, min, max);
+	else
+		smu_v14_0_0_get_dpm_ultimate_freq(smu, clk_type, min, max);

 	return 0;
 }
@ -1104,10 +1104,10 @@ static int smu_v14_0_common_get_dpm_level_count(struct smu_context *smu,
 					   enum smu_clk_type clk_type,
 					   uint32_t *count)
 {
-	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
-		smu_v14_0_0_get_dpm_level_count(smu, clk_type, count);
-	else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
 		smu_v14_0_1_get_dpm_level_count(smu, clk_type, count);
+	else
+		smu_v14_0_0_get_dpm_level_count(smu, clk_type, count);

 	return 0;
 }
@ -1372,10 +1372,10 @@ static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *sm

 static int smu_v14_0_common_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
 {
-	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
-		smu_v14_0_0_set_fine_grain_gfx_freq_parameters(smu);
-	else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
 		smu_v14_0_1_set_fine_grain_gfx_freq_parameters(smu);
+	else
+		smu_v14_0_0_set_fine_grain_gfx_freq_parameters(smu);

 	return 0;
 }
@ -1436,10 +1436,10 @@ static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *

 static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
 {
-	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
-		smu_14_0_0_get_dpm_table(smu, clock_table);
-	else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
 		smu_14_0_1_get_dpm_table(smu, clock_table);
+	else
+		smu_14_0_0_get_dpm_table(smu, clock_table);

 	return 0;
 }
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@ -642,7 +642,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev,
 	if (r)
 		goto error_unlock;

-	if (bo_va->it.start)
+	if (bo_va->it.start && bo_va->bo)
 		r = radeon_vm_bo_update(rdev, bo_va, bo_va->bo->tbo.resource);

 error_unlock:
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@ -1506,6 +1506,8 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
 *    6 - 64KB_3D
 *    7 - 256KB_3D
 */
+#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1
+#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2
 #define AMD_FMT_MOD_TILE_GFX12_64K_2D 3
 #define AMD_FMT_MOD_TILE_GFX12_256K_2D 4

@ -1540,9 +1542,6 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
 #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18
 #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3

-#define AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT     3
-#define AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK      0x3 /* 0:64B, 1:128B, 2:256B */
-
 /*
 * DCC supports embedding some clear colors directly in the DCC surface.
 * However, on older GPUs the rendering HW ignores the embedded clear color