amd-drm-next-6.9-2024-02-09:
amdgpu: - Validate DMABuf imports in compute VMs - Add RAS ACA framework - PSP 13 fixes - Misc code cleanups - Replay fixes - Atom interpretor PS, WS bounds checking - DML2 fixes - Audio fixes - DCN 3.5 Z state fixes - Remove deprecated ida_simple usage - UBSAN fixes - RAS fixes - Enable seq64 infrastructure - DC color block enablement - Documentation updates - DC documentation updates - DMCUB updates - S3 fixes - VCN 4.0.5 fixes - DP MST fixes - SR-IOV fixes amdkfd: - Validate DMABuf imports in compute VMs - SVM fixes - Trap handler updates radeon: - Atom interpretor PS, WS bounds checking - Misc code cleanups UAPI: - Bump KFD version so UMDs know that the fixes that enable the management of VA mappings in compute VMs using the GEM_VA ioctl for DMABufs exported from KFD are present - Add INFO query for input power. This matches the existing INFO query for average power. Used in gaming HUDs, etc. Example userspace: https://github.com/Umio-Yasuno/libdrm-amdgpu-sys-rs/tree/input_power -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZcaM8gAKCRC93/aFa7yZ 2L64AP9S8Wh5T2dEm3Nr8zBR008KdFQyOGVoO4qwlmyJMgin3wEA57gHiUrvs3o7 HRR+PU4JMo4OxQZNpVQtYYHc1BL6nQU= =3AqF -----END PGP SIGNATURE----- Merge tag 'amd-drm-next-6.9-2024-02-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next amd-drm-next-6.9-2024-02-09: amdgpu: - Validate DMABuf imports in compute VMs - Add RAS ACA framework - PSP 13 fixes - Misc code cleanups - Replay fixes - Atom interpretor PS, WS bounds checking - DML2 fixes - Audio fixes - DCN 3.5 Z state fixes - Remove deprecated ida_simple usage - UBSAN fixes - RAS fixes - Enable seq64 infrastructure - DC color block enablement - Documentation updates - DC documentation updates - DMCUB updates - S3 fixes - VCN 4.0.5 fixes - DP MST fixes - SR-IOV fixes amdkfd: - Validate DMABuf imports in compute VMs - SVM fixes - Trap handler updates radeon: - Atom interpretor PS, WS bounds checking - Misc code cleanups UAPI: - Bump KFD version so UMDs know that the fixes that enable the management of VA mappings in compute VMs using the GEM_VA ioctl for DMABufs exported from KFD are present - Add INFO query for input power. This matches the existing INFO query for average power. Used in gaming HUDs, etc. Example userspace: https://github.com/Umio-Yasuno/libdrm-amdgpu-sys-rs/tree/input_power From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240209221459.5453-1-alexander.deucher@amd.com
This commit is contained in:
commit
b344e64fbd
@ -16,6 +16,7 @@ Radeon (RX|TM) (PRO|WX) Vega /MI25 /V320 /V340L /8200 /9100 /SSG MxGPU, VEGA10,
|
||||
AMD Radeon (Pro) VII /MI50 /MI60, VEGA20, DCE 12, 9.4.0, VCE 4.1.0 / UVD 7.2.0, 4.2.0
|
||||
MI100, ARCTURUS, *, 9.4.1, VCN 2.5.0, 4.2.2
|
||||
MI200, ALDEBARAN, *, 9.4.2, VCN 2.6.0, 4.4.0
|
||||
MI300, AQUA_VANGARAM, *, 9.4.3, VCN 4.0.3, 4.4.2
|
||||
AMD Radeon (RX|Pro) 5600(M|XT) /5700 (M|XT|XTB) /W5700, NAVI10, DCN 2.0.0, 10.1.10, VCN 2.0.0, 5.0.0
|
||||
AMD Radeon (Pro) 5300 /5500XTB/5500(XT|M) /W5500M /W5500, NAVI14, DCN 2.0.0, 10.1.1, VCN 2.0.2, 5.0.2
|
||||
AMD Radeon RX 6800(XT) /6900(XT) /W6800, SIENNA_CICHLID, DCN 3.0.0, 10.3.0, VCN 3.0.0, 5.2.0
|
||||
@ -23,4 +24,5 @@ AMD Radeon RX 6700 XT / 6800M / 6700M, NAVY_FLOUNDER, DCN 3.0.0, 10.3.2, VCN 3.0
|
||||
AMD Radeon RX 6600(XT) /6600M /W6600 /W6600M, DIMGREY_CAVEFISH, DCN 3.0.2, 10.3.4, VCN 3.0.16, 5.2.4
|
||||
AMD Radeon RX 6500M /6300M /W6500M /W6300M, BEIGE_GOBY, DCN 3.0.3, 10.3.5, VCN 3.0.33, 5.2.5
|
||||
AMD Radeon RX 7900 XT /XTX, , DCN 3.2.0, 11.0.0, VCN 4.0.0, 6.0.0
|
||||
AMD Radeon RX 7800 XT, , DCN 3.2.0, 11.0.3, VCN 4.0.0, 6.0.3
|
||||
AMD Radeon RX 7600M (XT) /7700S /7600S, , DCN 3.2.1, 11.0.2, VCN 4.0.4, 6.0.2
|
||||
|
|
78
Documentation/gpu/amdgpu/display/dcn-blocks.rst
Normal file
78
Documentation/gpu/amdgpu/display/dcn-blocks.rst
Normal file
@ -0,0 +1,78 @@
|
||||
==========
|
||||
DCN Blocks
|
||||
==========
|
||||
|
||||
In this section, you will find some extra details about some of the DCN blocks
|
||||
and the code documentation when it is automatically generated.
|
||||
|
||||
DCHUBBUB
|
||||
--------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
|
||||
:doc: overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
|
||||
:internal:
|
||||
|
||||
HUBP
|
||||
----
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
|
||||
:doc: overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
|
||||
:internal:
|
||||
|
||||
DPP
|
||||
---
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
|
||||
:doc: overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
|
||||
:internal:
|
||||
|
||||
MPC
|
||||
---
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
|
||||
:doc: overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
|
||||
:internal:
|
||||
|
||||
OPP
|
||||
---
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
|
||||
:doc: overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
|
||||
:internal:
|
||||
|
||||
DIO
|
||||
---
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
|
||||
:doc: overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
|
||||
:internal:
|
168
Documentation/gpu/amdgpu/display/display-contributing.rst
Normal file
168
Documentation/gpu/amdgpu/display/display-contributing.rst
Normal file
@ -0,0 +1,168 @@
|
||||
.. _display_todos:
|
||||
|
||||
==============================
|
||||
AMDGPU - Display Contributions
|
||||
==============================
|
||||
|
||||
First of all, if you are here, you probably want to give some technical
|
||||
contribution to the display code, and for that, we say thank you :)
|
||||
|
||||
This page summarizes some of the issues you can help with; keep in mind that
|
||||
this is a static page, and it is always a good idea to try to reach developers
|
||||
in the amdgfx or some of the maintainers. Finally, this page follows the DRM
|
||||
way of creating a TODO list; for more information, check
|
||||
'Documentation/gpu/todo.rst'.
|
||||
|
||||
Gitlab issues
|
||||
=============
|
||||
|
||||
Users can report issues associated with AMD GPUs at:
|
||||
|
||||
- https://gitlab.freedesktop.org/drm/amd
|
||||
|
||||
Usually, we try to add a proper label to all new tickets to make it easy to
|
||||
filter issues. If you can reproduce any problem, you could help by adding more
|
||||
information or fixing the issue.
|
||||
|
||||
Level: diverse
|
||||
|
||||
IGT
|
||||
===
|
||||
|
||||
`IGT`_ provides many integration tests that can be run on your GPU. We always
|
||||
want to pass a large set of tests to increase the test coverage in our CI. If
|
||||
you wish to contribute to the display code but are unsure where a good place
|
||||
is, we recommend you run all IGT tests and try to fix any failure you see in
|
||||
your hardware. Keep in mind that this failure can be an IGT problem or a kernel
|
||||
issue; it is necessary to analyze case-by-case.
|
||||
|
||||
Level: diverse
|
||||
|
||||
.. _IGT: https://gitlab.freedesktop.org/drm/igt-gpu-tools
|
||||
|
||||
Compilation
|
||||
===========
|
||||
|
||||
Fix compilation warnings
|
||||
------------------------
|
||||
|
||||
Enable the W1 or W2 warning level in the kernel compilation and try to fix the
|
||||
issues on the display side.
|
||||
|
||||
Level: Starter
|
||||
|
||||
Fix compilation issues when using um architecture
|
||||
-------------------------------------------------
|
||||
|
||||
Linux has a User-mode Linux (UML) feature, and the kernel can be compiled to
|
||||
the **um** architecture. Compiling for **um** can bring multiple advantages
|
||||
from the test perspective. We currently have some compilation issues in this
|
||||
area that we need to fix.
|
||||
|
||||
Level: Intermediate
|
||||
|
||||
Code Refactor
|
||||
=============
|
||||
|
||||
Add prefix to DC functions to improve the debug with ftrace
|
||||
-----------------------------------------------------------
|
||||
|
||||
The Ftrace debug feature (check 'Documentation/trace/ftrace.rst') is a
|
||||
fantastic way to check the code path when developers try to make sense of a
|
||||
bug. Ftrace provides a filter mechanism that can be useful when the developer
|
||||
has some hunch of which part of the code can cause the issue; for this reason,
|
||||
if a set of functions has a proper prefix, it becomes easy to create a good
|
||||
filter. Additionally, prefixes can improve stack trace readability.
|
||||
|
||||
The DC code does not follow some prefix rules, which makes the Ftrace filter
|
||||
more complicated and reduces the readability of the stack trace. If you want
|
||||
something simple to start contributing to the display, you can make patches for
|
||||
adding prefixes to DC functions. To create those prefixes, use part of the file
|
||||
name as a prefix for all functions in the target file. Check the
|
||||
'amdgpu_dm_crtc.c` and `amdgpu_dm_plane.c` for some references. However, we
|
||||
strongly advise not to send huge patches changing these prefixes; otherwise, it
|
||||
will be hard to review and test, which can generate second thoughts from
|
||||
maintainers. Try small steps; in case of double, you can ask before you put in
|
||||
effort. We recommend first looking at folders like dceXYZ, dcnXYZ, basics,
|
||||
bios, core, clk_mgr, hwss, resource, and irq.
|
||||
|
||||
Level: Starter
|
||||
|
||||
Reduce code duplication
|
||||
-----------------------
|
||||
|
||||
AMD has an extensive portfolio with various dGPUs and APUs that amdgpu
|
||||
supports. To maintain the new hardware release cadence, DCE/DCN was designed in
|
||||
a modular design, making the bring-up for new hardware fast. Over the years,
|
||||
amdgpu accumulated some technical debt in the code duplication area. For this
|
||||
task, it would be a good idea to find a tool that can discover code duplication
|
||||
(including patterns) and use it as guidance to reduce duplications.
|
||||
|
||||
Level: Intermediate
|
||||
|
||||
Make atomic_commit_[check|tail] more readable
|
||||
---------------------------------------------
|
||||
|
||||
The functions responsible for atomic commit and tail are intricate and
|
||||
extensive. In particular `amdgpu_dm_atomic_commit_tail` is a long function and
|
||||
could benefit from being split into smaller helpers. Improvements in this area
|
||||
are more than welcome, but keep in mind that changes in this area will affect
|
||||
all ASICs, meaning that refactoring requires a comprehensive verification; in
|
||||
other words, this effort can take some time for validation.
|
||||
|
||||
Level: Advanced
|
||||
|
||||
Documentation
|
||||
=============
|
||||
|
||||
Expand kernel-doc
|
||||
-----------------
|
||||
|
||||
Many DC functions do not have a proper kernel-doc; understanding a function and
|
||||
adding documentation is a great way to learn more about the amdgpu driver and
|
||||
also leave an outstanding contribution to the entire community.
|
||||
|
||||
Level: Starter
|
||||
|
||||
Beyond AMDGPU
|
||||
=============
|
||||
|
||||
AMDGPU provides features that are not yet enabled in the userspace. This
|
||||
section highlights some of the coolest display features, which could be enabled
|
||||
with the userspace developer helper.
|
||||
|
||||
Enable underlay
|
||||
---------------
|
||||
|
||||
AMD display has this feature called underlay (which you can read more about at
|
||||
'Documentation/GPU/amdgpu/display/mpo-overview.rst') which is intended to
|
||||
save power when playing a video. The basic idea is to put a video in the
|
||||
underlay plane at the bottom and the desktop in the plane above it with a hole
|
||||
in the video area. This feature is enabled in ChromeOS, and from our data
|
||||
measurement, it can save power.
|
||||
|
||||
Level: Unknown
|
||||
|
||||
Adaptive Backlight Modulation (ABM)
|
||||
-----------------------------------
|
||||
|
||||
ABM is a feature that adjusts the display panel's backlight level and pixel
|
||||
values depending on the displayed image. This power-saving feature can be very
|
||||
useful when the system starts to run off battery; since this will impact the
|
||||
display output fidelity, it would be good if this option was something that
|
||||
users could turn on or off.
|
||||
|
||||
Level: Unknown
|
||||
|
||||
|
||||
HDR & Color management & VRR
|
||||
----------------------------
|
||||
|
||||
HDR, Color Management, and VRR are huge topics and it's hard to put these into
|
||||
concise ToDos. If you are interested in this topic, we recommend checking some
|
||||
blog posts from the community developers to better understand some of the
|
||||
specific challenges and people working on the subject. If anyone wants to work
|
||||
on some particular part, we can try to help with some basic guidance. Finally,
|
||||
keep in mind that we already have some kernel-doc in place for those areas.
|
||||
|
||||
Level: Unknown
|
@ -131,9 +131,6 @@ The DRM blend mode and its elements are then mapped by AMDGPU display manager
|
||||
(DM) to program the blending configuration of the Multiple Pipe/Plane Combined
|
||||
(MPC), as follows:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
|
||||
:doc: mpc-overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
|
||||
:functions: mpcc_blnd_cfg
|
||||
|
||||
|
@ -7,18 +7,80 @@ drm/amd/display - Display Core (DC)
|
||||
AMD display engine is partially shared with other operating systems; for this
|
||||
reason, our Display Core Driver is divided into two pieces:
|
||||
|
||||
1. **Display Core (DC)** contains the OS-agnostic components. Things like
|
||||
#. **Display Core (DC)** contains the OS-agnostic components. Things like
|
||||
hardware programming and resource management are handled here.
|
||||
2. **Display Manager (DM)** contains the OS-dependent components. Hooks to the
|
||||
amdgpu base driver and DRM are implemented here.
|
||||
#. **Display Manager (DM)** contains the OS-dependent components. Hooks to the
|
||||
amdgpu base driver and DRM are implemented here. For example, you can check
|
||||
display/amdgpu_dm/ folder.
|
||||
|
||||
------------------
|
||||
DC Code validation
|
||||
------------------
|
||||
|
||||
Maintaining the same code base across multiple OSes requires a lot of
|
||||
synchronization effort between repositories and exhaustive validation. In the
|
||||
DC case, we maintain a tree to centralize code from different parts. The shared
|
||||
repository has integration tests with our Internal Linux CI farm, and we run a
|
||||
comprehensive set of IGT tests in various AMD GPUs/APUs (mostly recent dGPUs
|
||||
and APUs). Our CI also checks ARM64/32, PPC64/32, and x86_64/32 compilation
|
||||
with DCN enabled and disabled.
|
||||
|
||||
When we upstream a new feature or some patches, we pack them in a patchset with
|
||||
the prefix **DC Patches for <DATE>**, which is created based on the latest
|
||||
`amd-staging-drm-next <https://gitlab.freedesktop.org/agd5f/linux>`_. All of
|
||||
those patches are under a DC version tested as follows:
|
||||
|
||||
* Ensure that every patch compiles and the entire series pass our set of IGT
|
||||
test in different hardware.
|
||||
* Prepare a branch with those patches for our validation team. If there is an
|
||||
error, a developer will debug as fast as possible; usually, a simple bisect
|
||||
in the series is enough to point to a bad change, and two possible actions
|
||||
emerge: fix the issue or drop the patch. If it is not an easy fix, the bad
|
||||
patch is dropped.
|
||||
* Finally, developers wait a few days for community feedback before we merge
|
||||
the series.
|
||||
|
||||
It is good to stress that the test phase is something that we take extremely
|
||||
seriously, and we never merge anything that fails our validation. Follows an
|
||||
overview of our test set:
|
||||
|
||||
#. Manual test
|
||||
* Multiple Hotplugs with DP and HDMI.
|
||||
* Stress test with multiple display configuration changes via the user interface.
|
||||
* Validate VRR behaviour.
|
||||
* Check PSR.
|
||||
* Validate MPO when playing video.
|
||||
* Test more than two displays connected at the same time.
|
||||
* Check suspend/resume.
|
||||
* Validate FPO.
|
||||
* Check MST.
|
||||
#. Automated test
|
||||
* IGT tests in a farm with GPUs and APUs that support DCN and DCE.
|
||||
* Compilation validation with the latest GCC and Clang from LTS distro.
|
||||
* Cross-compilation for PowerPC 64/32, ARM 64/32, and x86 32.
|
||||
|
||||
In terms of test setup for CI and manual tests, we usually use:
|
||||
|
||||
#. The latest Ubuntu LTS.
|
||||
#. In terms of userspace, we only use fully updated open-source components
|
||||
provided by the distribution official package manager.
|
||||
#. Regarding IGT, we use the latest code from the upstream.
|
||||
#. Most of the manual tests are conducted in the GNome but we also use KDE.
|
||||
|
||||
Notice that someone from our test team will always reply to the cover letter
|
||||
with the test report.
|
||||
|
||||
--------------
|
||||
DC Information
|
||||
--------------
|
||||
|
||||
The display pipe is responsible for "scanning out" a rendered frame from the
|
||||
GPU memory (also called VRAM, FrameBuffer, etc.) to a display. In other words,
|
||||
it would:
|
||||
|
||||
1. Read frame information from memory;
|
||||
2. Perform required transformation;
|
||||
3. Send pixel data to sink devices.
|
||||
#. Read frame information from memory;
|
||||
#. Perform required transformation;
|
||||
#. Send pixel data to sink devices.
|
||||
|
||||
If you want to learn more about our driver details, take a look at the below
|
||||
table of content:
|
||||
@ -26,7 +88,9 @@ table of content:
|
||||
.. toctree::
|
||||
|
||||
display-manager.rst
|
||||
dc-debug.rst
|
||||
dcn-overview.rst
|
||||
dcn-blocks.rst
|
||||
mpo-overview.rst
|
||||
dc-debug.rst
|
||||
display-contributing.rst
|
||||
dc-glossary.rst
|
||||
|
@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
|
||||
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
|
||||
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
|
||||
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
|
||||
amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o
|
||||
amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
|
||||
|
||||
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
|
||||
|
||||
|
@ -107,6 +107,7 @@
|
||||
#include "amdgpu_smuio.h"
|
||||
#include "amdgpu_fdinfo.h"
|
||||
#include "amdgpu_mca.h"
|
||||
#include "amdgpu_aca.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_xcp.h"
|
||||
#include "amdgpu_seq64.h"
|
||||
@ -114,14 +115,12 @@
|
||||
|
||||
#define MAX_GPU_INSTANCE 64
|
||||
|
||||
struct amdgpu_gpu_instance
|
||||
{
|
||||
struct amdgpu_gpu_instance {
|
||||
struct amdgpu_device *adev;
|
||||
int mgpu_fan_enabled;
|
||||
};
|
||||
|
||||
struct amdgpu_mgpu_info
|
||||
{
|
||||
struct amdgpu_mgpu_info {
|
||||
struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE];
|
||||
struct mutex mutex;
|
||||
uint32_t num_gpu;
|
||||
@ -140,8 +139,7 @@ enum amdgpu_ss {
|
||||
AMDGPU_SS_DRV_UNLOAD
|
||||
};
|
||||
|
||||
struct amdgpu_watchdog_timer
|
||||
{
|
||||
struct amdgpu_watchdog_timer {
|
||||
bool timeout_fatal_disable;
|
||||
uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */
|
||||
};
|
||||
@ -1045,6 +1043,9 @@ struct amdgpu_device {
|
||||
/* MCA */
|
||||
struct amdgpu_mca mca;
|
||||
|
||||
/* ACA */
|
||||
struct amdgpu_aca aca;
|
||||
|
||||
struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
|
||||
uint32_t harvest_ip_mask;
|
||||
int num_ip_blocks;
|
||||
@ -1078,6 +1079,8 @@ struct amdgpu_device {
|
||||
bool in_s3;
|
||||
bool in_s4;
|
||||
bool in_s0ix;
|
||||
/* indicate amdgpu suspension status */
|
||||
bool suspend_complete;
|
||||
|
||||
enum pp_mp1_state mp1_state;
|
||||
struct amdgpu_doorbell_index doorbell_index;
|
||||
@ -1329,6 +1332,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
||||
#define WREG32_FIELD_OFFSET(reg, offset, field, val) \
|
||||
WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
|
||||
|
||||
#define AMDGPU_GET_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> (l))
|
||||
/*
|
||||
* BIOS helpers.
|
||||
*/
|
||||
|
879
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
Normal file
879
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
Normal file
@ -0,0 +1,879 @@
|
||||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/list.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_aca.h"
|
||||
#include "amdgpu_ras.h"
|
||||
|
||||
#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype}
|
||||
|
||||
typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, void *data);
|
||||
|
||||
struct aca_banks {
|
||||
int nr_banks;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct aca_hwip {
|
||||
int hwid;
|
||||
int mcatype;
|
||||
};
|
||||
|
||||
static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = {
|
||||
ACA_BANK_HWID(SMU, 0x01, 0x01),
|
||||
ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00),
|
||||
ACA_BANK_HWID(UMC, 0x96, 0x00),
|
||||
};
|
||||
|
||||
static void aca_banks_init(struct aca_banks *banks)
|
||||
{
|
||||
if (!banks)
|
||||
return;
|
||||
|
||||
memset(banks, 0, sizeof(*banks));
|
||||
INIT_LIST_HEAD(&banks->list);
|
||||
}
|
||||
|
||||
static int aca_banks_add_bank(struct aca_banks *banks, struct aca_bank *bank)
|
||||
{
|
||||
struct aca_bank_node *node;
|
||||
|
||||
if (!bank)
|
||||
return -EINVAL;
|
||||
|
||||
node = kvzalloc(sizeof(*node), GFP_KERNEL);
|
||||
if (!node)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(&node->bank, bank, sizeof(*bank));
|
||||
|
||||
INIT_LIST_HEAD(&node->node);
|
||||
list_add_tail(&node->node, &banks->list);
|
||||
|
||||
banks->nr_banks++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aca_banks_release(struct aca_banks *banks)
|
||||
{
|
||||
struct aca_bank_node *node, *tmp;
|
||||
|
||||
list_for_each_entry_safe(node, tmp, &banks->list, node) {
|
||||
list_del(&node->node);
|
||||
kvfree(node);
|
||||
}
|
||||
}
|
||||
|
||||
static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_error_type type, u32 *count)
|
||||
{
|
||||
struct amdgpu_aca *aca = &adev->aca;
|
||||
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
|
||||
|
||||
if (!count)
|
||||
return -EINVAL;
|
||||
|
||||
if (!smu_funcs || !smu_funcs->get_valid_aca_count)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return smu_funcs->get_valid_aca_count(adev, type, count);
|
||||
}
|
||||
|
||||
static struct aca_regs_dump {
|
||||
const char *name;
|
||||
int reg_idx;
|
||||
} aca_regs[] = {
|
||||
{"CONTROL", ACA_REG_IDX_CTL},
|
||||
{"STATUS", ACA_REG_IDX_STATUS},
|
||||
{"ADDR", ACA_REG_IDX_ADDR},
|
||||
{"MISC", ACA_REG_IDX_MISC0},
|
||||
{"CONFIG", ACA_REG_IDX_CONFG},
|
||||
{"IPID", ACA_REG_IDX_IPID},
|
||||
{"SYND", ACA_REG_IDX_SYND},
|
||||
{"DESTAT", ACA_REG_IDX_DESTAT},
|
||||
{"DEADDR", ACA_REG_IDX_DEADDR},
|
||||
{"CONTROL_MASK", ACA_REG_IDX_CTL_MASK},
|
||||
};
|
||||
|
||||
static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank)
|
||||
{
|
||||
int i;
|
||||
|
||||
dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n");
|
||||
/* plus 1 for output format, e.g: ACA[08/08]: xxxx */
|
||||
for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
|
||||
dev_info(adev->dev, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
|
||||
idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
|
||||
}
|
||||
|
||||
static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_error_type type,
|
||||
int start, int count,
|
||||
struct aca_banks *banks)
|
||||
{
|
||||
struct amdgpu_aca *aca = &adev->aca;
|
||||
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
|
||||
struct aca_bank bank;
|
||||
int i, max_count, ret;
|
||||
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
if (!smu_funcs || !smu_funcs->get_valid_aca_bank)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (type) {
|
||||
case ACA_ERROR_TYPE_UE:
|
||||
max_count = smu_funcs->max_ue_bank_count;
|
||||
break;
|
||||
case ACA_ERROR_TYPE_CE:
|
||||
max_count = smu_funcs->max_ce_bank_count;
|
||||
break;
|
||||
case ACA_ERROR_TYPE_DEFERRED:
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (start + count >= max_count)
|
||||
return -EINVAL;
|
||||
|
||||
count = min_t(int, count, max_count);
|
||||
for (i = 0; i < count; i++) {
|
||||
memset(&bank, 0, sizeof(bank));
|
||||
ret = smu_funcs->get_valid_aca_bank(adev, type, start + i, &bank);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
aca_smu_bank_dump(adev, i, count, &bank);
|
||||
|
||||
ret = aca_banks_add_bank(banks, &bank);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
|
||||
{
|
||||
|
||||
struct aca_hwip *hwip;
|
||||
int hwid, mcatype;
|
||||
u64 ipid;
|
||||
|
||||
if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
|
||||
return false;
|
||||
|
||||
hwip = &aca_hwid_mcatypes[type];
|
||||
if (!hwip->hwid)
|
||||
return false;
|
||||
|
||||
ipid = bank->regs[ACA_REG_IDX_IPID];
|
||||
hwid = ACA_REG__IPID__HARDWAREID(ipid);
|
||||
mcatype = ACA_REG__IPID__MCATYPE(ipid);
|
||||
|
||||
return hwip->hwid == hwid && hwip->mcatype == mcatype;
|
||||
}
|
||||
|
||||
static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type)
|
||||
{
|
||||
const struct aca_bank_ops *bank_ops = handle->bank_ops;
|
||||
|
||||
if (!aca_bank_hwip_is_matched(bank, handle->hwip))
|
||||
return false;
|
||||
|
||||
if (!bank_ops->aca_bank_is_valid)
|
||||
return true;
|
||||
|
||||
return bank_ops->aca_bank_is_valid(handle, bank, type, handle->data);
|
||||
}
|
||||
|
||||
static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
|
||||
{
|
||||
struct aca_bank_error *bank_error;
|
||||
|
||||
bank_error = kvzalloc(sizeof(*bank_error), GFP_KERNEL);
|
||||
if (!bank_error)
|
||||
return NULL;
|
||||
|
||||
INIT_LIST_HEAD(&bank_error->node);
|
||||
memcpy(&bank_error->info, info, sizeof(*info));
|
||||
|
||||
mutex_lock(&aerr->lock);
|
||||
list_add_tail(&bank_error->node, &aerr->list);
|
||||
mutex_unlock(&aerr->lock);
|
||||
|
||||
return bank_error;
|
||||
}
|
||||
|
||||
static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
|
||||
{
|
||||
struct aca_bank_error *bank_error = NULL;
|
||||
struct aca_bank_info *tmp_info;
|
||||
bool found = false;
|
||||
|
||||
mutex_lock(&aerr->lock);
|
||||
list_for_each_entry(bank_error, &aerr->list, node) {
|
||||
tmp_info = &bank_error->info;
|
||||
if (tmp_info->socket_id == info->socket_id &&
|
||||
tmp_info->die_id == info->die_id) {
|
||||
found = true;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&aerr->lock);
|
||||
|
||||
return found ? bank_error : NULL;
|
||||
}
|
||||
|
||||
static void aca_bank_error_remove(struct aca_error *aerr, struct aca_bank_error *bank_error)
|
||||
{
|
||||
if (!aerr || !bank_error)
|
||||
return;
|
||||
|
||||
list_del(&bank_error->node);
|
||||
aerr->nr_errors--;
|
||||
|
||||
kvfree(bank_error);
|
||||
}
|
||||
|
||||
static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
|
||||
{
|
||||
struct aca_bank_error *bank_error;
|
||||
|
||||
if (!aerr || !info)
|
||||
return NULL;
|
||||
|
||||
bank_error = find_bank_error(aerr, info);
|
||||
if (bank_error)
|
||||
return bank_error;
|
||||
|
||||
return new_bank_error(aerr, info);
|
||||
}
|
||||
|
||||
static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type,
|
||||
struct aca_bank_report *report)
|
||||
{
|
||||
struct aca_error_cache *error_cache = &handle->error_cache;
|
||||
struct aca_bank_error *bank_error;
|
||||
struct aca_error *aerr;
|
||||
|
||||
if (!handle || !report)
|
||||
return -EINVAL;
|
||||
|
||||
if (!report->count[type])
|
||||
return 0;
|
||||
|
||||
aerr = &error_cache->errors[type];
|
||||
bank_error = get_bank_error(aerr, &report->info);
|
||||
if (!bank_error)
|
||||
return -ENOMEM;
|
||||
|
||||
bank_error->count[type] += report->count[type];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank,
|
||||
enum aca_error_type type, struct aca_bank_report *report)
|
||||
{
|
||||
const struct aca_bank_ops *bank_ops = handle->bank_ops;
|
||||
|
||||
if (!bank || !report)
|
||||
return -EINVAL;
|
||||
|
||||
if (!bank_ops->aca_bank_generate_report)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
memset(report, 0, sizeof(*report));
|
||||
return bank_ops->aca_bank_generate_report(handle, bank, type,
|
||||
report, handle->data);
|
||||
}
|
||||
|
||||
static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank,
|
||||
enum aca_error_type type, void *data)
|
||||
{
|
||||
struct aca_bank_report report;
|
||||
int ret;
|
||||
|
||||
ret = aca_generate_bank_report(handle, bank, type, &report);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!report.count[type])
|
||||
return 0;
|
||||
|
||||
ret = aca_log_errors(handle, type, &report);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank,
|
||||
enum aca_error_type type, bank_handler_t handler, void *data)
|
||||
{
|
||||
struct aca_handle *handle;
|
||||
int ret;
|
||||
|
||||
if (list_empty(&mgr->list))
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(handle, &mgr->list, node) {
|
||||
if (!aca_bank_is_valid(handle, bank, type))
|
||||
continue;
|
||||
|
||||
ret = handler(handle, bank, type, data);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks,
|
||||
enum aca_error_type type, bank_handler_t handler, void *data)
|
||||
{
|
||||
struct aca_bank_node *node;
|
||||
struct aca_bank *bank;
|
||||
int ret;
|
||||
|
||||
if (!mgr || !banks)
|
||||
return -EINVAL;
|
||||
|
||||
/* pre check to avoid unnecessary operations */
|
||||
if (list_empty(&mgr->list) || list_empty(&banks->list))
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(node, &banks->list, node) {
|
||||
bank = &node->bank;
|
||||
|
||||
ret = aca_dispatch_bank(mgr, bank, type, handler, data);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type,
|
||||
bank_handler_t handler, void *data)
|
||||
{
|
||||
struct amdgpu_aca *aca = &adev->aca;
|
||||
struct aca_banks banks;
|
||||
u32 count = 0;
|
||||
int ret;
|
||||
|
||||
if (list_empty(&aca->mgr.list))
|
||||
return 0;
|
||||
|
||||
/* NOTE: pmfw is only support UE and CE */
|
||||
if (type == ACA_ERROR_TYPE_DEFERRED)
|
||||
type = ACA_ERROR_TYPE_CE;
|
||||
|
||||
ret = aca_smu_get_valid_aca_count(adev, type, &count);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
aca_banks_init(&banks);
|
||||
|
||||
ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks);
|
||||
if (ret)
|
||||
goto err_release_banks;
|
||||
|
||||
if (list_empty(&banks.list)) {
|
||||
ret = 0;
|
||||
goto err_release_banks;
|
||||
}
|
||||
|
||||
ret = aca_dispatch_banks(&aca->mgr, &banks, type,
|
||||
handler, data);
|
||||
if (ret)
|
||||
goto err_release_banks;
|
||||
|
||||
err_release_banks:
|
||||
aca_banks_release(&banks);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_error_type type, struct ras_err_data *err_data)
|
||||
{
|
||||
struct aca_bank_info *info;
|
||||
struct amdgpu_smuio_mcm_config_info mcm_info;
|
||||
u64 count;
|
||||
|
||||
if (type >= ACA_ERROR_TYPE_COUNT)
|
||||
return -EINVAL;
|
||||
|
||||
count = bank_error->count[type];
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
info = &bank_error->info;
|
||||
mcm_info.die_id = info->die_id;
|
||||
mcm_info.socket_id = info->socket_id;
|
||||
|
||||
switch (type) {
|
||||
case ACA_ERROR_TYPE_UE:
|
||||
amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count);
|
||||
break;
|
||||
case ACA_ERROR_TYPE_CE:
|
||||
amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count);
|
||||
break;
|
||||
case ACA_ERROR_TYPE_DEFERRED:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data)
|
||||
{
|
||||
struct aca_error_cache *error_cache = &handle->error_cache;
|
||||
struct aca_error *aerr = &error_cache->errors[type];
|
||||
struct aca_bank_error *bank_error, *tmp;
|
||||
|
||||
mutex_lock(&aerr->lock);
|
||||
|
||||
if (list_empty(&aerr->list))
|
||||
goto out_unlock;
|
||||
|
||||
list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) {
|
||||
aca_log_aca_error_data(bank_error, type, err_data);
|
||||
aca_bank_error_remove(aerr, bank_error);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&aerr->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type,
|
||||
struct ras_err_data *err_data)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* udpate aca bank to aca source error_cache first */
|
||||
ret = aca_banks_update(adev, type, handler_aca_log_bank_error, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return aca_log_aca_error(handle, type, err_data);
|
||||
}
|
||||
|
||||
static bool aca_handle_is_valid(struct aca_handle *handle)
|
||||
{
|
||||
if (!handle->mask || !list_empty(&handle->node))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
|
||||
enum aca_error_type type, void *data)
|
||||
{
|
||||
struct ras_err_data *err_data = (struct ras_err_data *)data;
|
||||
|
||||
if (!handle || !err_data)
|
||||
return -EINVAL;
|
||||
|
||||
if (aca_handle_is_valid(handle))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!(BIT(type) & handle->mask))
|
||||
return 0;
|
||||
|
||||
return __aca_get_error_data(adev, handle, type, err_data);
|
||||
}
|
||||
|
||||
static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
|
||||
{
|
||||
mutex_init(&aerr->lock);
|
||||
INIT_LIST_HEAD(&aerr->list);
|
||||
aerr->type = type;
|
||||
aerr->nr_errors = 0;
|
||||
}
|
||||
|
||||
static void aca_init_error_cache(struct aca_handle *handle)
|
||||
{
|
||||
struct aca_error_cache *error_cache = &handle->error_cache;
|
||||
int type;
|
||||
|
||||
for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
|
||||
aca_error_init(&error_cache->errors[type], type);
|
||||
}
|
||||
|
||||
static void aca_error_fini(struct aca_error *aerr)
|
||||
{
|
||||
struct aca_bank_error *bank_error, *tmp;
|
||||
|
||||
mutex_lock(&aerr->lock);
|
||||
list_for_each_entry_safe(bank_error, tmp, &aerr->list, node)
|
||||
aca_bank_error_remove(aerr, bank_error);
|
||||
|
||||
mutex_destroy(&aerr->lock);
|
||||
}
|
||||
|
||||
static void aca_fini_error_cache(struct aca_handle *handle)
|
||||
{
|
||||
struct aca_error_cache *error_cache = &handle->error_cache;
|
||||
int type;
|
||||
|
||||
for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
|
||||
aca_error_fini(&error_cache->errors[type]);
|
||||
}
|
||||
|
||||
static int add_aca_handle(struct amdgpu_device *adev, struct aca_handle_manager *mgr, struct aca_handle *handle,
|
||||
const char *name, const struct aca_info *ras_info, void *data)
|
||||
{
|
||||
memset(handle, 0, sizeof(*handle));
|
||||
|
||||
handle->adev = adev;
|
||||
handle->mgr = mgr;
|
||||
handle->name = name;
|
||||
handle->hwip = ras_info->hwip;
|
||||
handle->mask = ras_info->mask;
|
||||
handle->bank_ops = ras_info->bank_ops;
|
||||
handle->data = data;
|
||||
aca_init_error_cache(handle);
|
||||
|
||||
INIT_LIST_HEAD(&handle->node);
|
||||
list_add_tail(&handle->node, &mgr->list);
|
||||
mgr->nr_handles++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t aca_sysfs_read(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct aca_handle *handle = container_of(attr, struct aca_handle, aca_attr);
|
||||
|
||||
/* NOTE: the aca cache will be auto cleared once read,
|
||||
* So the driver should unify the query entry point, forward request to ras query interface directly */
|
||||
return amdgpu_ras_aca_sysfs_read(dev, attr, handle, buf, handle->data);
|
||||
}
|
||||
|
||||
static int add_aca_sysfs(struct amdgpu_device *adev, struct aca_handle *handle)
|
||||
{
|
||||
struct device_attribute *aca_attr = &handle->aca_attr;
|
||||
|
||||
snprintf(handle->attr_name, sizeof(handle->attr_name) - 1, "aca_%s", handle->name);
|
||||
aca_attr->show = aca_sysfs_read;
|
||||
aca_attr->attr.name = handle->attr_name;
|
||||
aca_attr->attr.mode = S_IRUGO;
|
||||
sysfs_attr_init(&aca_attr->attr);
|
||||
|
||||
return sysfs_add_file_to_group(&adev->dev->kobj,
|
||||
&aca_attr->attr,
|
||||
"ras");
|
||||
}
|
||||
|
||||
int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
|
||||
const char *name, const struct aca_info *ras_info, void *data)
|
||||
{
|
||||
struct amdgpu_aca *aca = &adev->aca;
|
||||
int ret;
|
||||
|
||||
if (!amdgpu_aca_is_enabled(adev))
|
||||
return 0;
|
||||
|
||||
ret = add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return add_aca_sysfs(adev, handle);
|
||||
}
|
||||
|
||||
static void remove_aca_handle(struct aca_handle *handle)
|
||||
{
|
||||
struct aca_handle_manager *mgr = handle->mgr;
|
||||
|
||||
aca_fini_error_cache(handle);
|
||||
list_del(&handle->node);
|
||||
mgr->nr_handles--;
|
||||
}
|
||||
|
||||
static void remove_aca_sysfs(struct aca_handle *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = handle->adev;
|
||||
struct device_attribute *aca_attr = &handle->aca_attr;
|
||||
|
||||
if (adev->dev->kobj.sd)
|
||||
sysfs_remove_file_from_group(&adev->dev->kobj,
|
||||
&aca_attr->attr,
|
||||
"ras");
|
||||
}
|
||||
|
||||
void amdgpu_aca_remove_handle(struct aca_handle *handle)
|
||||
{
|
||||
if (!handle || list_empty(&handle->node))
|
||||
return;
|
||||
|
||||
remove_aca_sysfs(handle);
|
||||
remove_aca_handle(handle);
|
||||
}
|
||||
|
||||
static int aca_manager_init(struct aca_handle_manager *mgr)
|
||||
{
|
||||
INIT_LIST_HEAD(&mgr->list);
|
||||
mgr->nr_handles = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aca_manager_fini(struct aca_handle_manager *mgr)
|
||||
{
|
||||
struct aca_handle *handle, *tmp;
|
||||
|
||||
list_for_each_entry_safe(handle, tmp, &mgr->list, node)
|
||||
amdgpu_aca_remove_handle(handle);
|
||||
}
|
||||
|
||||
bool amdgpu_aca_is_enabled(struct amdgpu_device *adev)
|
||||
{
|
||||
return adev->aca.is_enabled;
|
||||
}
|
||||
|
||||
int amdgpu_aca_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_aca *aca = &adev->aca;
|
||||
int ret;
|
||||
|
||||
ret = aca_manager_init(&aca->mgr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_aca_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_aca *aca = &adev->aca;
|
||||
|
||||
aca_manager_fini(&aca->mgr);
|
||||
}
|
||||
|
||||
int amdgpu_aca_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
amdgpu_aca_fini(adev);
|
||||
|
||||
return amdgpu_aca_init(adev);
|
||||
}
|
||||
|
||||
void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)
|
||||
{
|
||||
struct amdgpu_aca *aca = &adev->aca;
|
||||
|
||||
WARN_ON(aca->smu_funcs);
|
||||
aca->smu_funcs = smu_funcs;
|
||||
}
|
||||
|
||||
int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info)
|
||||
{
|
||||
u64 ipid;
|
||||
u32 instidhi, instidlo;
|
||||
|
||||
if (!bank || !info)
|
||||
return -EINVAL;
|
||||
|
||||
ipid = bank->regs[ACA_REG_IDX_IPID];
|
||||
info->hwid = ACA_REG__IPID__HARDWAREID(ipid);
|
||||
info->mcatype = ACA_REG__IPID__MCATYPE(ipid);
|
||||
/*
|
||||
* Unfied DieID Format: SAASS. A:AID, S:Socket.
|
||||
* Unfied DieID[4:4] = InstanceId[0:0]
|
||||
* Unfied DieID[0:3] = InstanceIdHi[0:3]
|
||||
*/
|
||||
instidhi = ACA_REG__IPID__INSTANCEIDHI(ipid);
|
||||
instidlo = ACA_REG__IPID__INSTANCEIDLO(ipid);
|
||||
info->die_id = ((instidhi >> 2) & 0x03);
|
||||
info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank)
|
||||
{
|
||||
int error_code;
|
||||
|
||||
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
|
||||
case IP_VERSION(13, 0, 6):
|
||||
if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) {
|
||||
error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]);
|
||||
return error_code & 0xff;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* NOTE: the true error code is encoded in status.errorcode[0:7] */
|
||||
error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]);
|
||||
|
||||
return error_code & 0xff;
|
||||
}
|
||||
|
||||
int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size)
|
||||
{
|
||||
int i, error_code;
|
||||
|
||||
if (!bank || !err_codes)
|
||||
return -EINVAL;
|
||||
|
||||
error_code = aca_bank_get_error_code(adev, bank);
|
||||
for (i = 0; i < size; i++) {
|
||||
if (err_codes[i] == error_code)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en)
|
||||
{
|
||||
struct amdgpu_aca *aca = &adev->aca;
|
||||
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
|
||||
|
||||
if (!smu_funcs || !smu_funcs->set_debug_mode)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return smu_funcs->set_debug_mode(adev, en);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)data;
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_ras_set_aca_debug_mode(adev, val ? true : false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
dev_info(adev->dev, "amdgpu set smu aca debug mode %s success\n", val ? "on" : "off");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_error_type type, int idx)
|
||||
{
|
||||
struct aca_bank_info info;
|
||||
int i, ret;
|
||||
|
||||
ret = aca_bank_info_decode(bank, &info);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_ERROR_TYPE_UE ? "UE" : "CE");
|
||||
seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
|
||||
idx, info.socket_id, info.die_id, info.hwid, info.mcatype);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
|
||||
seq_printf(m, "aca entry[%d].regs[%d]: 0x%016llx\n", idx, aca_regs[i].reg_idx, bank->regs[aca_regs[i].reg_idx]);
|
||||
}
|
||||
|
||||
struct aca_dump_context {
|
||||
struct seq_file *m;
|
||||
int idx;
|
||||
};
|
||||
|
||||
static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank,
|
||||
enum aca_error_type type, void *data)
|
||||
{
|
||||
struct aca_dump_context *ctx = (struct aca_dump_context *)data;
|
||||
|
||||
aca_dump_entry(ctx->m, bank, type, ctx->idx++);
|
||||
|
||||
return handler_aca_log_bank_error(handle, bank, type, NULL);
|
||||
}
|
||||
|
||||
static int aca_dump_show(struct seq_file *m, enum aca_error_type type)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
|
||||
struct aca_dump_context context = {
|
||||
.m = m,
|
||||
.idx = 0,
|
||||
};
|
||||
|
||||
return aca_banks_update(adev, type, handler_aca_bank_dump, (void *)&context);
|
||||
}
|
||||
|
||||
static int aca_dump_ce_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
return aca_dump_show(m, ACA_ERROR_TYPE_CE);
|
||||
}
|
||||
|
||||
static int aca_dump_ce_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, aca_dump_ce_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations aca_ce_dump_debug_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = aca_dump_ce_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int aca_dump_ue_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
return aca_dump_show(m, ACA_ERROR_TYPE_UE);
|
||||
}
|
||||
|
||||
static int aca_dump_ue_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, aca_dump_ue_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations aca_ue_dump_debug_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = aca_dump_ue_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n");
|
||||
#endif
|
||||
|
||||
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
|
||||
{
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6))
|
||||
return;
|
||||
|
||||
debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops);
|
||||
debugfs_create_file("aca_ue_dump", 0400, root, adev, &aca_ue_dump_debug_fops);
|
||||
debugfs_create_file("aca_ce_dump", 0400, root, adev, &aca_ce_dump_debug_fops);
|
||||
#endif
|
||||
}
|
202
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
Normal file
202
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
Normal file
@ -0,0 +1,202 @@
|
||||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __AMDGPU_ACA_H__
|
||||
#define __AMDGPU_ACA_H__
|
||||
|
||||
#include <linux/list.h>
|
||||
|
||||
#define ACA_MAX_REGS_COUNT (16)
|
||||
|
||||
#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
|
||||
#define ACA_REG__STATUS__VAL(x) ACA_REG_FIELD(x, 63, 63)
|
||||
#define ACA_REG__STATUS__OVERFLOW(x) ACA_REG_FIELD(x, 62, 62)
|
||||
#define ACA_REG__STATUS__UC(x) ACA_REG_FIELD(x, 61, 61)
|
||||
#define ACA_REG__STATUS__EN(x) ACA_REG_FIELD(x, 60, 60)
|
||||
#define ACA_REG__STATUS__MISCV(x) ACA_REG_FIELD(x, 59, 59)
|
||||
#define ACA_REG__STATUS__ADDRV(x) ACA_REG_FIELD(x, 58, 58)
|
||||
#define ACA_REG__STATUS__PCC(x) ACA_REG_FIELD(x, 57, 57)
|
||||
#define ACA_REG__STATUS__ERRCOREIDVAL(x) ACA_REG_FIELD(x, 56, 56)
|
||||
#define ACA_REG__STATUS__TCC(x) ACA_REG_FIELD(x, 55, 55)
|
||||
#define ACA_REG__STATUS__SYNDV(x) ACA_REG_FIELD(x, 53, 53)
|
||||
#define ACA_REG__STATUS__CECC(x) ACA_REG_FIELD(x, 46, 46)
|
||||
#define ACA_REG__STATUS__UECC(x) ACA_REG_FIELD(x, 45, 45)
|
||||
#define ACA_REG__STATUS__DEFERRED(x) ACA_REG_FIELD(x, 44, 44)
|
||||
#define ACA_REG__STATUS__POISON(x) ACA_REG_FIELD(x, 43, 43)
|
||||
#define ACA_REG__STATUS__SCRUB(x) ACA_REG_FIELD(x, 40, 40)
|
||||
#define ACA_REG__STATUS__ERRCOREID(x) ACA_REG_FIELD(x, 37, 32)
|
||||
#define ACA_REG__STATUS__ADDRLSB(x) ACA_REG_FIELD(x, 29, 24)
|
||||
#define ACA_REG__STATUS__ERRORCODEEXT(x) ACA_REG_FIELD(x, 21, 16)
|
||||
#define ACA_REG__STATUS__ERRORCODE(x) ACA_REG_FIELD(x, 15, 0)
|
||||
|
||||
#define ACA_REG__IPID__MCATYPE(x) ACA_REG_FIELD(x, 63, 48)
|
||||
#define ACA_REG__IPID__INSTANCEIDHI(x) ACA_REG_FIELD(x, 47, 44)
|
||||
#define ACA_REG__IPID__HARDWAREID(x) ACA_REG_FIELD(x, 43, 32)
|
||||
#define ACA_REG__IPID__INSTANCEIDLO(x) ACA_REG_FIELD(x, 31, 0)
|
||||
|
||||
#define ACA_REG__MISC0__VALID(x) ACA_REG_FIELD(x, 63, 63)
|
||||
#define ACA_REG__MISC0__OVRFLW(x) ACA_REG_FIELD(x, 48, 48)
|
||||
#define ACA_REG__MISC0__ERRCNT(x) ACA_REG_FIELD(x, 43, 32)
|
||||
|
||||
#define ACA_REG__SYND__ERRORINFORMATION(x) ACA_REG_FIELD(x, 17, 0)
|
||||
|
||||
/* NOTE: The following codes refers to the smu header file */
|
||||
#define ACA_EXTERROR_CODE_CE 0x3a
|
||||
#define ACA_EXTERROR_CODE_FAULT 0x3b
|
||||
|
||||
#define ACA_ERROR_UE_MASK BIT_MASK(ACA_ERROR_TYPE_UE)
|
||||
#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE)
|
||||
#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED)
|
||||
|
||||
enum aca_reg_idx {
|
||||
ACA_REG_IDX_CTL = 0,
|
||||
ACA_REG_IDX_STATUS = 1,
|
||||
ACA_REG_IDX_ADDR = 2,
|
||||
ACA_REG_IDX_MISC0 = 3,
|
||||
ACA_REG_IDX_CONFG = 4,
|
||||
ACA_REG_IDX_IPID = 5,
|
||||
ACA_REG_IDX_SYND = 6,
|
||||
ACA_REG_IDX_DESTAT = 8,
|
||||
ACA_REG_IDX_DEADDR = 9,
|
||||
ACA_REG_IDX_CTL_MASK = 10,
|
||||
ACA_REG_IDX_COUNT = 16,
|
||||
};
|
||||
|
||||
enum aca_hwip_type {
|
||||
ACA_HWIP_TYPE_UNKNOW = -1,
|
||||
ACA_HWIP_TYPE_PSP = 0,
|
||||
ACA_HWIP_TYPE_UMC,
|
||||
ACA_HWIP_TYPE_SMU,
|
||||
ACA_HWIP_TYPE_PCS_XGMI,
|
||||
ACA_HWIP_TYPE_COUNT,
|
||||
};
|
||||
|
||||
enum aca_error_type {
|
||||
ACA_ERROR_TYPE_INVALID = -1,
|
||||
ACA_ERROR_TYPE_UE = 0,
|
||||
ACA_ERROR_TYPE_CE,
|
||||
ACA_ERROR_TYPE_DEFERRED,
|
||||
ACA_ERROR_TYPE_COUNT
|
||||
};
|
||||
|
||||
struct aca_bank {
|
||||
u64 regs[ACA_MAX_REGS_COUNT];
|
||||
};
|
||||
|
||||
struct aca_bank_node {
|
||||
struct aca_bank bank;
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
struct aca_bank_info {
|
||||
int die_id;
|
||||
int socket_id;
|
||||
int hwid;
|
||||
int mcatype;
|
||||
};
|
||||
|
||||
struct aca_bank_report {
|
||||
struct aca_bank_info info;
|
||||
u64 count[ACA_ERROR_TYPE_COUNT];
|
||||
};
|
||||
|
||||
struct aca_bank_error {
|
||||
struct list_head node;
|
||||
struct aca_bank_info info;
|
||||
u64 count[ACA_ERROR_TYPE_COUNT];
|
||||
};
|
||||
|
||||
struct aca_error {
|
||||
struct list_head list;
|
||||
struct mutex lock;
|
||||
enum aca_error_type type;
|
||||
int nr_errors;
|
||||
};
|
||||
|
||||
struct aca_handle_manager {
|
||||
struct list_head list;
|
||||
int nr_handles;
|
||||
};
|
||||
|
||||
struct aca_error_cache {
|
||||
struct aca_error errors[ACA_ERROR_TYPE_COUNT];
|
||||
};
|
||||
|
||||
struct aca_handle {
|
||||
struct list_head node;
|
||||
enum aca_hwip_type hwip;
|
||||
struct amdgpu_device *adev;
|
||||
struct aca_handle_manager *mgr;
|
||||
struct aca_error_cache error_cache;
|
||||
const struct aca_bank_ops *bank_ops;
|
||||
struct device_attribute aca_attr;
|
||||
char attr_name[64];
|
||||
const char *name;
|
||||
u32 mask;
|
||||
void *data;
|
||||
};
|
||||
|
||||
struct aca_bank_ops {
|
||||
int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
|
||||
struct aca_bank_report *report, void *data);
|
||||
bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
|
||||
void *data);
|
||||
};
|
||||
|
||||
struct aca_smu_funcs {
|
||||
int max_ue_bank_count;
|
||||
int max_ce_bank_count;
|
||||
int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
|
||||
int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_error_type type, u32 *count);
|
||||
int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_error_type type, int idx, struct aca_bank *bank);
|
||||
};
|
||||
|
||||
struct amdgpu_aca {
|
||||
struct aca_handle_manager mgr;
|
||||
const struct aca_smu_funcs *smu_funcs;
|
||||
bool is_enabled;
|
||||
};
|
||||
|
||||
struct aca_info {
|
||||
enum aca_hwip_type hwip;
|
||||
const struct aca_bank_ops *bank_ops;
|
||||
u32 mask;
|
||||
};
|
||||
|
||||
int amdgpu_aca_init(struct amdgpu_device *adev);
|
||||
void amdgpu_aca_fini(struct amdgpu_device *adev);
|
||||
int amdgpu_aca_reset(struct amdgpu_device *adev);
|
||||
void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);
|
||||
bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);
|
||||
|
||||
int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info);
|
||||
int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size);
|
||||
|
||||
int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
|
||||
const char *name, const struct aca_info *aca_info, void *data);
|
||||
void amdgpu_aca_remove_handle(struct aca_handle *handle);
|
||||
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
|
||||
enum aca_error_type type, void *data);
|
||||
int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
|
||||
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
|
||||
#endif
|
@ -742,9 +742,10 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
|
||||
amdgpu_device_flush_hdp(adev, NULL);
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
|
||||
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
|
||||
enum amdgpu_ras_block block, bool reset)
|
||||
{
|
||||
amdgpu_umc_poison_handler(adev, reset);
|
||||
amdgpu_umc_poison_handler(adev, block, reset);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
|
||||
|
@ -193,6 +193,9 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
|
||||
int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
|
||||
int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
|
||||
unsigned long cur_seq, struct kgd_mem *mem);
|
||||
int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
|
||||
uint32_t domain,
|
||||
struct dma_fence *fence);
|
||||
#else
|
||||
static inline
|
||||
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
|
||||
@ -218,6 +221,13 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline
|
||||
int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
|
||||
uint32_t domain,
|
||||
struct dma_fence *fence)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
/* Shared API */
|
||||
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
|
||||
@ -326,7 +336,7 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
|
||||
struct tile_config *config);
|
||||
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
|
||||
bool reset);
|
||||
enum amdgpu_ras_block block, bool reset);
|
||||
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
|
||||
void amdgpu_amdkfd_block_mmu_notifications(void *p);
|
||||
int amdgpu_amdkfd_criu_resume(void *p);
|
||||
|
@ -426,9 +426,9 @@ validate_fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
|
||||
uint32_t domain,
|
||||
struct dma_fence *fence)
|
||||
int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
|
||||
uint32_t domain,
|
||||
struct dma_fence *fence)
|
||||
{
|
||||
int ret = amdgpu_bo_reserve(bo, false);
|
||||
|
||||
@ -464,13 +464,15 @@ static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
|
||||
* again. Page directories are only updated after updating page
|
||||
* tables.
|
||||
*/
|
||||
static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
|
||||
static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm,
|
||||
struct ww_acquire_ctx *ticket)
|
||||
{
|
||||
struct amdgpu_bo *pd = vm->root.bo;
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
|
||||
ret = amdgpu_vm_validate(adev, vm, ticket,
|
||||
amdgpu_amdkfd_validate_vm_bo, NULL);
|
||||
if (ret) {
|
||||
pr_err("failed to validate PT BOs\n");
|
||||
return ret;
|
||||
@ -1310,14 +1312,15 @@ update_gpuvm_pte_failed:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int process_validate_vms(struct amdkfd_process_info *process_info)
|
||||
static int process_validate_vms(struct amdkfd_process_info *process_info,
|
||||
struct ww_acquire_ctx *ticket)
|
||||
{
|
||||
struct amdgpu_vm *peer_vm;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node) {
|
||||
ret = vm_validate_pt_pd_bos(peer_vm);
|
||||
ret = vm_validate_pt_pd_bos(peer_vm, ticket);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -1402,7 +1405,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
|
||||
ret = amdgpu_bo_reserve(vm->root.bo, true);
|
||||
if (ret)
|
||||
goto reserve_pd_fail;
|
||||
ret = vm_validate_pt_pd_bos(vm);
|
||||
ret = vm_validate_pt_pd_bos(vm, NULL);
|
||||
if (ret) {
|
||||
pr_err("validate_pt_pd_bos() failed\n");
|
||||
goto validate_pd_fail;
|
||||
@ -2043,7 +2046,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||
bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
|
||||
is_invalid_userptr = true;
|
||||
|
||||
ret = vm_validate_pt_pd_bos(avm);
|
||||
ret = vm_validate_pt_pd_bos(avm, NULL);
|
||||
if (unlikely(ret))
|
||||
goto out_unreserve;
|
||||
|
||||
@ -2136,7 +2139,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
|
||||
goto unreserve_out;
|
||||
}
|
||||
|
||||
ret = vm_validate_pt_pd_bos(avm);
|
||||
ret = vm_validate_pt_pd_bos(avm, NULL);
|
||||
if (unlikely(ret))
|
||||
goto unreserve_out;
|
||||
|
||||
@ -2634,7 +2637,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
||||
}
|
||||
}
|
||||
|
||||
ret = process_validate_vms(process_info);
|
||||
ret = process_validate_vms(process_info, NULL);
|
||||
if (ret)
|
||||
goto unreserve_out;
|
||||
|
||||
@ -2894,11 +2897,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
|
||||
|
||||
amdgpu_sync_create(&sync_obj);
|
||||
|
||||
/* Validate PDs and PTs */
|
||||
ret = process_validate_vms(process_info);
|
||||
if (ret)
|
||||
goto validate_map_fail;
|
||||
|
||||
/* Validate BOs and map them to GPUVM (update VM page tables). */
|
||||
list_for_each_entry(mem, &process_info->kfd_bo_list,
|
||||
validate_list) {
|
||||
@ -2949,6 +2947,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
|
||||
if (failed_size)
|
||||
pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
|
||||
|
||||
/* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
|
||||
* validations above would invalidate DMABuf imports again.
|
||||
*/
|
||||
ret = process_validate_vms(process_info, &exec.ticket);
|
||||
if (ret)
|
||||
goto validate_map_fail;
|
||||
|
||||
/* Update mappings not managed by KFD */
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node) {
|
||||
@ -3020,7 +3025,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
|
||||
&process_info->eviction_fence->base,
|
||||
DMA_RESV_USAGE_BOOKKEEP);
|
||||
}
|
||||
/* Attach eviction fence to PD / PT BOs */
|
||||
/* Attach eviction fence to PD / PT BOs and DMABuf imports */
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node) {
|
||||
struct amdgpu_bo *bo = peer_vm->root.bo;
|
||||
|
@ -1018,7 +1018,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
|
||||
if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
|
||||
args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
|
||||
sizeof(args));
|
||||
|
||||
dividers->post_div = args.v3.ucPostDiv;
|
||||
dividers->enable_post_div = (args.v3.ucCntlFlag &
|
||||
@ -1038,7 +1039,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
|
||||
if (strobe_mode)
|
||||
args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
|
||||
sizeof(args));
|
||||
|
||||
dividers->post_div = args.v5.ucPostDiv;
|
||||
dividers->enable_post_div = (args.v5.ucCntlFlag &
|
||||
@ -1056,7 +1058,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
|
||||
/* fusion */
|
||||
args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
|
||||
sizeof(args));
|
||||
|
||||
dividers->post_divider = dividers->post_div = args.v4.ucPostDiv;
|
||||
dividers->real_clock = le32_to_cpu(args.v4.ulClock);
|
||||
@ -1067,7 +1070,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
|
||||
args.v6_in.ulClock.ulComputeClockFlag = clock_type;
|
||||
args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
|
||||
sizeof(args));
|
||||
|
||||
dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv);
|
||||
dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac);
|
||||
@ -1109,7 +1113,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
|
||||
if (strobe_mode)
|
||||
args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
|
||||
sizeof(args));
|
||||
|
||||
mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac);
|
||||
mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv);
|
||||
@ -1151,7 +1156,8 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
|
||||
if (mem_clock)
|
||||
args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK);
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
|
||||
sizeof(args));
|
||||
}
|
||||
|
||||
void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
|
||||
@ -1205,7 +1211,8 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
|
||||
args.v2.ucVoltageMode = 0;
|
||||
args.v2.usVoltageLevel = 0;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
|
||||
sizeof(args));
|
||||
|
||||
*voltage = le16_to_cpu(args.v2.usVoltageLevel);
|
||||
break;
|
||||
@ -1214,7 +1221,8 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
|
||||
args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
|
||||
args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
|
||||
sizeof(args));
|
||||
|
||||
*voltage = le16_to_cpu(args.v3.usVoltageLevel);
|
||||
break;
|
||||
|
@ -941,5 +941,6 @@ int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1);
|
||||
return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1,
|
||||
sizeof(asic_init_ps_v2_1));
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
|
||||
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
|
||||
bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
|
||||
bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev);
|
||||
bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address);
|
||||
bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t *i2c_address);
|
||||
bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev);
|
||||
bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev);
|
||||
int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev);
|
||||
|
@ -952,10 +952,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
||||
p->bytes_moved = 0;
|
||||
p->bytes_moved_vis = 0;
|
||||
|
||||
r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
|
||||
amdgpu_cs_bo_validate, p);
|
||||
r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
|
||||
amdgpu_cs_bo_validate, p);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
|
||||
DRM_ERROR("amdgpu_vm_validate() failed.\n");
|
||||
goto out_free_user_pages;
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,7 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
|
||||
|
||||
addr -= AMDGPU_VA_RESERVED_SIZE;
|
||||
addr -= AMDGPU_VA_RESERVED_CSA_SIZE;
|
||||
addr = amdgpu_gmc_sign_extend(addr);
|
||||
|
||||
return addr;
|
||||
|
@ -96,6 +96,9 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
|
||||
#define AMDGPU_RESUME_MS 2000
|
||||
#define AMDGPU_MAX_RETRY_LIMIT 2
|
||||
#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
|
||||
#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
|
||||
#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
|
||||
#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
|
||||
|
||||
static const struct drm_driver amdgpu_kms_driver;
|
||||
|
||||
@ -781,12 +784,22 @@ u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
|
||||
void __iomem *pcie_index_hi_offset;
|
||||
void __iomem *pcie_data_offset;
|
||||
|
||||
pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
|
||||
pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
|
||||
if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
|
||||
pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
|
||||
else
|
||||
if (unlikely(!adev->nbio.funcs)) {
|
||||
pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
|
||||
pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
|
||||
} else {
|
||||
pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
|
||||
pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
|
||||
}
|
||||
|
||||
if (reg_addr >> 32) {
|
||||
if (unlikely(!adev->nbio.funcs))
|
||||
pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
|
||||
else
|
||||
pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
|
||||
} else {
|
||||
pcie_index_hi = 0;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
|
||||
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
|
||||
@ -1218,8 +1231,6 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
|
||||
amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
|
||||
amdgpu_psp_wait_for_bootloader(adev);
|
||||
ret = amdgpu_atomfirmware_asic_init(adev, true);
|
||||
/* TODO: check the return val and stop device initialization if boot fails */
|
||||
amdgpu_psp_query_boot_status(adev);
|
||||
return ret;
|
||||
} else {
|
||||
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
|
||||
@ -1442,6 +1453,10 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return 0;
|
||||
|
||||
/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
|
||||
if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
|
||||
DRM_WARN("System can't access extended configuration space,please check!!\n");
|
||||
|
||||
/* skip if the bios has already enabled large BAR */
|
||||
if (adev->gmc.real_vram_size &&
|
||||
(pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
|
||||
@ -5680,6 +5695,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
|
||||
/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
|
||||
if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
|
||||
IP_VERSION(9, 4, 2) ||
|
||||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
|
||||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
|
||||
amdgpu_ras_resume(adev);
|
||||
} else {
|
||||
@ -6101,6 +6117,20 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
|
||||
struct amdgpu_reset_context reset_context;
|
||||
u32 memsize;
|
||||
struct list_head device_list;
|
||||
struct amdgpu_hive_info *hive;
|
||||
int hive_ras_recovery = 0;
|
||||
struct amdgpu_ras *ras;
|
||||
|
||||
/* PCI error slot reset should be skipped During RAS recovery */
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (hive) {
|
||||
hive_ras_recovery = atomic_read(&hive->ras_recovery);
|
||||
amdgpu_put_xgmi_hive(hive);
|
||||
}
|
||||
ras = amdgpu_ras_get_context(adev);
|
||||
if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
|
||||
ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
|
||||
return PCI_ERS_RESULT_RECOVERED;
|
||||
|
||||
DRM_INFO("PCI error: slot reset callback!!\n");
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "amdgpu_discovery.h"
|
||||
#include "soc15_hw_ip.h"
|
||||
#include "discovery.h"
|
||||
#include "amdgpu_ras.h"
|
||||
|
||||
#include "soc15.h"
|
||||
#include "gfx_v9_0.h"
|
||||
@ -98,6 +99,7 @@
|
||||
#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin"
|
||||
MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
|
||||
|
||||
#define mmIP_DISCOVERY_VERSION 0x16A00
|
||||
#define mmRCC_CONFIG_MEMSIZE 0xde3
|
||||
#define mmMP0_SMN_C2PMSG_33 0x16061
|
||||
#define mmMM_INDEX 0x0
|
||||
@ -518,7 +520,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
|
||||
out:
|
||||
kfree(adev->mman.discovery_bin);
|
||||
adev->mman.discovery_bin = NULL;
|
||||
|
||||
if ((amdgpu_discovery != 2) &&
|
||||
(RREG32(mmIP_DISCOVERY_VERSION) == 4))
|
||||
amdgpu_ras_query_boot_status(adev, 4);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -1278,11 +1282,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
||||
* 0b10 : encode is disabled
|
||||
* 0b01 : decode is disabled
|
||||
*/
|
||||
adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
|
||||
ip->revision & 0xc0;
|
||||
ip->revision &= ~0xc0;
|
||||
if (adev->vcn.num_vcn_inst <
|
||||
AMDGPU_MAX_VCN_INSTANCES) {
|
||||
adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
|
||||
ip->revision & 0xc0;
|
||||
adev->vcn.num_vcn_inst++;
|
||||
adev->vcn.inst_mask |=
|
||||
(1U << ip->instance_number);
|
||||
@ -1293,6 +1296,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
|
||||
adev->vcn.num_vcn_inst + 1,
|
||||
AMDGPU_MAX_VCN_INSTANCES);
|
||||
}
|
||||
ip->revision &= ~0xc0;
|
||||
}
|
||||
if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
|
||||
le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
|
||||
|
@ -377,6 +377,10 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
|
||||
struct amdgpu_vm_bo_base *bo_base;
|
||||
int r;
|
||||
|
||||
/* FIXME: This should be after the "if", but needs a fix to make sure
|
||||
* DMABuf imports are initialized in the right VM list.
|
||||
*/
|
||||
amdgpu_vm_bo_invalidate(adev, bo, false);
|
||||
if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
|
||||
return;
|
||||
|
||||
|
@ -366,7 +366,7 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
|
||||
* Setting the value to 0 disables this functionality.
|
||||
* Setting the value to -2 is auto enabled with power down when displays are attached.
|
||||
*/
|
||||
MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = autowith displays)");
|
||||
MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = auto with displays)");
|
||||
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
|
||||
|
||||
/**
|
||||
@ -593,7 +593,7 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644);
|
||||
#ifdef CONFIG_DRM_AMDGPU_SI
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
|
||||
int amdgpu_si_support = 0;
|
||||
int amdgpu_si_support;
|
||||
MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
|
||||
#else
|
||||
int amdgpu_si_support = 1;
|
||||
@ -612,7 +612,7 @@ module_param_named(si_support, amdgpu_si_support, int, 0444);
|
||||
#ifdef CONFIG_DRM_AMDGPU_CIK
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
|
||||
int amdgpu_cik_support = 0;
|
||||
int amdgpu_cik_support;
|
||||
MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
|
||||
#else
|
||||
int amdgpu_cik_support = 1;
|
||||
@ -2476,6 +2476,7 @@ static int amdgpu_pmops_suspend(struct device *dev)
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(drm_dev);
|
||||
|
||||
adev->suspend_complete = false;
|
||||
if (amdgpu_acpi_is_s0ix_active(adev))
|
||||
adev->in_s0ix = true;
|
||||
else if (amdgpu_acpi_is_s3_active(adev))
|
||||
@ -2490,6 +2491,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev)
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(drm_dev);
|
||||
|
||||
adev->suspend_complete = true;
|
||||
if (amdgpu_acpi_should_gpu_reset(adev))
|
||||
return amdgpu_asic_reset(adev);
|
||||
|
||||
|
@ -187,7 +187,34 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
|
||||
else
|
||||
++bo_va->ref_count;
|
||||
amdgpu_bo_unreserve(abo);
|
||||
return 0;
|
||||
|
||||
/* Validate and add eviction fence to DMABuf imports with dynamic
|
||||
* attachment in compute VMs. Re-validation will be done by
|
||||
* amdgpu_vm_validate. Fences are on the reservation shared with the
|
||||
* export, which is currently required to be validated and fenced
|
||||
* already by amdgpu_amdkfd_gpuvm_restore_process_bos.
|
||||
*
|
||||
* Nested locking below for the case that a GEM object is opened in
|
||||
* kfd_mem_export_dmabuf. Since the lock below is only taken for imports,
|
||||
* but not for export, this is a different lock class that cannot lead to
|
||||
* circular lock dependencies.
|
||||
*/
|
||||
if (!vm->is_compute_context || !vm->process_info)
|
||||
return 0;
|
||||
if (!obj->import_attach ||
|
||||
!dma_buf_is_dynamic(obj->import_attach->dmabuf))
|
||||
return 0;
|
||||
mutex_lock_nested(&vm->process_info->lock, 1);
|
||||
if (!WARN_ON(!vm->process_info->eviction_fence)) {
|
||||
r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT,
|
||||
&vm->process_info->eviction_fence->base);
|
||||
if (r)
|
||||
dev_warn(adev->dev, "%d: validate_and_fence failed: %d\n",
|
||||
vm->task_info.pid, r);
|
||||
}
|
||||
mutex_unlock(&vm->process_info->lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_gem_object_close(struct drm_gem_object *obj,
|
||||
@ -682,10 +709,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
|
||||
uint64_t vm_size;
|
||||
int r = 0;
|
||||
|
||||
if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
|
||||
if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {
|
||||
dev_dbg(dev->dev,
|
||||
"va_address 0x%llx is in reserved area 0x%llx\n",
|
||||
args->va_address, AMDGPU_VA_RESERVED_SIZE);
|
||||
args->va_address, AMDGPU_VA_RESERVED_BOTTOM);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -701,7 +728,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
|
||||
args->va_address &= AMDGPU_GMC_HOLE_MASK;
|
||||
|
||||
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
|
||||
vm_size -= AMDGPU_VA_RESERVED_SIZE;
|
||||
vm_size -= AMDGPU_VA_RESERVED_TOP;
|
||||
if (args->va_address + args->map_size > vm_size) {
|
||||
dev_dbg(dev->dev,
|
||||
"va_address 0x%llx is in top reserved area 0x%llx\n",
|
||||
|
@ -643,8 +643,8 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
|
||||
kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
j = i + xcc_id * adev->gfx.num_compute_rings;
|
||||
kiq->pmf->kiq_map_queues(kiq_ring,
|
||||
&adev->gfx.compute_ring[j]);
|
||||
kiq->pmf->kiq_map_queues(kiq_ring,
|
||||
&adev->gfx.compute_ring[j]);
|
||||
}
|
||||
|
||||
r = amdgpu_ring_test_helper(kiq_ring);
|
||||
|
@ -52,7 +52,7 @@ int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
|
||||
struct amdgpu_bo_param bp;
|
||||
u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
|
||||
uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
|
||||
uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift;
|
||||
uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift;
|
||||
|
||||
memset(&bp, 0, sizeof(bp));
|
||||
bp.size = PAGE_ALIGN((npdes + 1) * 8);
|
||||
@ -746,6 +746,59 @@ error_unlock_reset:
|
||||
return r;
|
||||
}
|
||||
|
||||
void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t reg1,
|
||||
uint32_t ref, uint32_t mask,
|
||||
uint32_t xcc_inst)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
|
||||
struct amdgpu_ring *ring = &kiq->ring;
|
||||
signed long r, cnt = 0;
|
||||
unsigned long flags;
|
||||
uint32_t seq;
|
||||
|
||||
if (adev->mes.ring.sched.ready) {
|
||||
amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
|
||||
ref, mask);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&kiq->ring_lock, flags);
|
||||
amdgpu_ring_alloc(ring, 32);
|
||||
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
|
||||
ref, mask);
|
||||
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
|
||||
if (r)
|
||||
goto failed_undo;
|
||||
|
||||
amdgpu_ring_commit(ring);
|
||||
spin_unlock_irqrestore(&kiq->ring_lock, flags);
|
||||
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
|
||||
/* don't wait anymore for IRQ context */
|
||||
if (r < 1 && in_interrupt())
|
||||
goto failed_kiq;
|
||||
|
||||
might_sleep();
|
||||
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
|
||||
|
||||
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
}
|
||||
|
||||
if (cnt > MAX_KIQ_REG_TRY)
|
||||
goto failed_kiq;
|
||||
|
||||
return;
|
||||
|
||||
failed_undo:
|
||||
amdgpu_ring_undo(ring);
|
||||
spin_unlock_irqrestore(&kiq->ring_lock, flags);
|
||||
failed_kiq:
|
||||
dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
|
||||
* @adev: amdgpu_device pointer
|
||||
|
@ -417,6 +417,10 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
|
||||
int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
|
||||
uint32_t flush_type, bool all_hub,
|
||||
uint32_t inst);
|
||||
void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t reg1,
|
||||
uint32_t ref, uint32_t mask,
|
||||
uint32_t xcc_inst);
|
||||
|
||||
extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
|
||||
extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
|
||||
|
@ -62,9 +62,8 @@ int amdgpu_pasid_alloc(unsigned int bits)
|
||||
int pasid = -EINVAL;
|
||||
|
||||
for (bits = min(bits, 31U); bits > 0; bits--) {
|
||||
pasid = ida_simple_get(&amdgpu_pasid_ida,
|
||||
1U << (bits - 1), 1U << bits,
|
||||
GFP_KERNEL);
|
||||
pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
|
||||
(1U << bits) - 1, GFP_KERNEL);
|
||||
if (pasid != -ENOSPC)
|
||||
break;
|
||||
}
|
||||
@ -82,7 +81,7 @@ int amdgpu_pasid_alloc(unsigned int bits)
|
||||
void amdgpu_pasid_free(u32 pasid)
|
||||
{
|
||||
trace_amdgpu_pasid_freed(pasid);
|
||||
ida_simple_remove(&amdgpu_pasid_ida, pasid);
|
||||
ida_free(&amdgpu_pasid_ida, pasid);
|
||||
}
|
||||
|
||||
static void amdgpu_pasid_free_cb(struct dma_fence *fence,
|
||||
|
@ -894,14 +894,14 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
|
||||
|
||||
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
|
||||
vm_size -= AMDGPU_VA_RESERVED_SIZE;
|
||||
vm_size -= AMDGPU_VA_RESERVED_TOP;
|
||||
|
||||
/* Older VCE FW versions are buggy and can handle only 40bits */
|
||||
if (adev->vce.fw_version &&
|
||||
adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
|
||||
vm_size = min(vm_size, 1ULL << 40);
|
||||
|
||||
dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
|
||||
dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_BOTTOM;
|
||||
dev_info->virtual_address_max =
|
||||
min(vm_size, AMDGPU_GMC_HOLE_START);
|
||||
|
||||
@ -1114,6 +1114,15 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
}
|
||||
ui32 >>= 8;
|
||||
break;
|
||||
case AMDGPU_INFO_SENSOR_GPU_INPUT_POWER:
|
||||
/* get input GPU power */
|
||||
if (amdgpu_dpm_read_sensor(adev,
|
||||
AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
|
||||
(void *)&ui32, &ui32_size)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
ui32 >>= 8;
|
||||
break;
|
||||
case AMDGPU_INFO_SENSOR_VDDNB:
|
||||
/* get VDDNB in millivolts */
|
||||
if (amdgpu_dpm_read_sensor(adev,
|
||||
@ -1370,6 +1379,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
|
||||
goto error_vm;
|
||||
}
|
||||
|
||||
r = amdgpu_seq64_map(adev, &fpriv->vm, &fpriv->seq64_va);
|
||||
if (r)
|
||||
goto error_vm;
|
||||
|
||||
mutex_init(&fpriv->bo_list_lock);
|
||||
idr_init_base(&fpriv->bo_list_handles, 1);
|
||||
|
||||
|
@ -27,6 +27,16 @@
|
||||
#include "umc/umc_6_7_0_offset.h"
|
||||
#include "umc/umc_6_7_0_sh_mask.h"
|
||||
|
||||
static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev,
|
||||
uint64_t mc_status)
|
||||
{
|
||||
if (adev->umc.ras->check_ecc_err_status)
|
||||
return adev->umc.ras->check_ecc_err_status(adev,
|
||||
AMDGPU_MCA_ERROR_TYPE_DE, &mc_status);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
|
||||
uint64_t mc_status_addr,
|
||||
unsigned long *error_count)
|
||||
@ -202,16 +212,16 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
|
||||
|
||||
static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry)
|
||||
{
|
||||
dev_info(adev->dev, "[Hardware error] Accelerator Check Architecture events logged\n");
|
||||
dev_info(adev->dev, "[Hardware error] aca entry[%02d].STATUS=0x%016llx\n",
|
||||
dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n");
|
||||
dev_info(adev->dev, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n",
|
||||
idx, entry->regs[MCA_REG_IDX_STATUS]);
|
||||
dev_info(adev->dev, "[Hardware error] aca entry[%02d].ADDR=0x%016llx\n",
|
||||
dev_info(adev->dev, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n",
|
||||
idx, entry->regs[MCA_REG_IDX_ADDR]);
|
||||
dev_info(adev->dev, "[Hardware error] aca entry[%02d].MISC0=0x%016llx\n",
|
||||
dev_info(adev->dev, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n",
|
||||
idx, entry->regs[MCA_REG_IDX_MISC0]);
|
||||
dev_info(adev->dev, "[Hardware error] aca entry[%02d].IPID=0x%016llx\n",
|
||||
dev_info(adev->dev, HW_ERR "aca entry[%02d].IPID=0x%016llx\n",
|
||||
idx, entry->regs[MCA_REG_IDX_IPID]);
|
||||
dev_info(adev->dev, "[Hardware error] aca entry[%02d].SYND=0x%016llx\n",
|
||||
dev_info(adev->dev, HW_ERR "aca entry[%02d].SYND=0x%016llx\n",
|
||||
idx, entry->regs[MCA_REG_IDX_SYND]);
|
||||
}
|
||||
|
||||
@ -256,9 +266,14 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo
|
||||
if (type == AMDGPU_MCA_ERROR_TYPE_UE)
|
||||
amdgpu_ras_error_statistic_ue_count(err_data,
|
||||
&mcm_info, &err_addr, (uint64_t)count);
|
||||
else
|
||||
amdgpu_ras_error_statistic_ce_count(err_data,
|
||||
&mcm_info, &err_addr, (uint64_t)count);
|
||||
else {
|
||||
if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]))
|
||||
amdgpu_ras_error_statistic_de_count(err_data,
|
||||
&mcm_info, &err_addr, (uint64_t)count);
|
||||
else
|
||||
amdgpu_ras_error_statistic_ce_count(err_data,
|
||||
&mcm_info, &err_addr, (uint64_t)count);
|
||||
}
|
||||
}
|
||||
|
||||
out_mca_release:
|
||||
|
@ -65,6 +65,7 @@ enum amdgpu_mca_ip {
|
||||
enum amdgpu_mca_error_type {
|
||||
AMDGPU_MCA_ERROR_TYPE_UE = 0,
|
||||
AMDGPU_MCA_ERROR_TYPE_CE,
|
||||
AMDGPU_MCA_ERROR_TYPE_DE,
|
||||
};
|
||||
|
||||
struct amdgpu_mca_ras_block {
|
||||
|
@ -1398,7 +1398,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
|
||||
goto error_fini;
|
||||
}
|
||||
|
||||
ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
|
||||
ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM;
|
||||
r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to map ctx meta data\n");
|
||||
@ -1565,9 +1565,9 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
struct drm_minor *minor = adev_to_drm(adev)->primary;
|
||||
struct dentry *root = minor->debugfs_root;
|
||||
|
||||
debugfs_create_file("amdgpu_mes_event_log", 0444, root,
|
||||
adev, &amdgpu_debugfs_mes_event_log_fops);
|
||||
if (adev->enable_mes)
|
||||
debugfs_create_file("amdgpu_mes_event_log", 0444, root,
|
||||
adev, &amdgpu_debugfs_mes_event_log_fops);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
@ -291,21 +291,22 @@ static int psp_memory_training_init(struct psp_context *psp)
|
||||
struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
|
||||
|
||||
if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
|
||||
DRM_DEBUG("memory training is not supported!\n");
|
||||
dev_dbg(psp->adev->dev, "memory training is not supported!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
|
||||
if (ctx->sys_cache == NULL) {
|
||||
DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
|
||||
dev_err(psp->adev->dev, "alloc mem_train_ctx.sys_cache failed!\n");
|
||||
ret = -ENOMEM;
|
||||
goto Err_out;
|
||||
}
|
||||
|
||||
DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
|
||||
ctx->train_data_size,
|
||||
ctx->p2c_train_data_offset,
|
||||
ctx->c2p_train_data_offset);
|
||||
dev_dbg(psp->adev->dev,
|
||||
"train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
|
||||
ctx->train_data_size,
|
||||
ctx->p2c_train_data_offset,
|
||||
ctx->c2p_train_data_offset);
|
||||
ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
|
||||
return 0;
|
||||
|
||||
@ -407,7 +408,7 @@ static int psp_sw_init(void *handle)
|
||||
|
||||
psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!psp->cmd) {
|
||||
DRM_ERROR("Failed to allocate memory to command buffer!\n");
|
||||
dev_err(adev->dev, "Failed to allocate memory to command buffer!\n");
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
|
||||
@ -454,13 +455,13 @@ static int psp_sw_init(void *handle)
|
||||
if (mem_training_ctx->enable_mem_training) {
|
||||
ret = psp_memory_training_init(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to initialize memory training!\n");
|
||||
dev_err(adev->dev, "Failed to initialize memory training!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to process memory training!\n");
|
||||
dev_err(adev->dev, "Failed to process memory training!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -675,9 +676,11 @@ psp_cmd_submit_buf(struct psp_context *psp,
|
||||
*/
|
||||
if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) {
|
||||
if (ucode)
|
||||
DRM_WARN("failed to load ucode %s(0x%X) ",
|
||||
amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
|
||||
DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
|
||||
dev_warn(psp->adev->dev,
|
||||
"failed to load ucode %s(0x%X) ",
|
||||
amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
|
||||
dev_warn(psp->adev->dev,
|
||||
"psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
|
||||
psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id,
|
||||
psp->cmd_buf_mem->resp.status);
|
||||
/* If any firmware (including CAP) load fails under SRIOV, it should
|
||||
@ -807,7 +810,7 @@ static int psp_tmr_init(struct psp_context *psp)
|
||||
psp->fw_pri_buf) {
|
||||
ret = psp_load_toc(psp, &tmr_size);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to load toc\n");
|
||||
dev_err(psp->adev->dev, "Failed to load toc\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -855,7 +858,7 @@ static int psp_tmr_load(struct psp_context *psp)
|
||||
|
||||
psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
|
||||
if (psp->tmr_bo)
|
||||
DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
|
||||
dev_info(psp->adev->dev, "reserve 0x%lx from 0x%llx for PSP TMR\n",
|
||||
amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
@ -1113,7 +1116,7 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
|
||||
psp_prep_reg_prog_cmd_buf(cmd, reg, value);
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
|
||||
if (ret)
|
||||
DRM_ERROR("PSP failed to program reg id %d", reg);
|
||||
dev_err(psp->adev->dev, "PSP failed to program reg id %d\n", reg);
|
||||
|
||||
release_psp_cmd_buf(psp);
|
||||
|
||||
@ -1526,22 +1529,22 @@ static void psp_ras_ta_check_status(struct psp_context *psp)
|
||||
switch (ras_cmd->ras_status) {
|
||||
case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP:
|
||||
dev_warn(psp->adev->dev,
|
||||
"RAS WARNING: cmd failed due to unsupported ip\n");
|
||||
"RAS WARNING: cmd failed due to unsupported ip\n");
|
||||
break;
|
||||
case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ:
|
||||
dev_warn(psp->adev->dev,
|
||||
"RAS WARNING: cmd failed due to unsupported error injection\n");
|
||||
"RAS WARNING: cmd failed due to unsupported error injection\n");
|
||||
break;
|
||||
case TA_RAS_STATUS__SUCCESS:
|
||||
break;
|
||||
case TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED:
|
||||
if (ras_cmd->cmd_id == TA_RAS_COMMAND__TRIGGER_ERROR)
|
||||
dev_warn(psp->adev->dev,
|
||||
"RAS WARNING: Inject error to critical region is not allowed\n");
|
||||
"RAS WARNING: Inject error to critical region is not allowed\n");
|
||||
break;
|
||||
default:
|
||||
dev_warn(psp->adev->dev,
|
||||
"RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status);
|
||||
"RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1565,7 +1568,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
|
||||
return ret;
|
||||
|
||||
if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) {
|
||||
DRM_WARN("RAS: Unsupported Interface");
|
||||
dev_warn(psp->adev->dev, "RAS: Unsupported Interface\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -1715,7 +1718,7 @@ int psp_ras_initialize(struct psp_context *psp)
|
||||
psp->ras_context.context.initialized = true;
|
||||
else {
|
||||
if (ras_cmd->ras_status)
|
||||
dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
|
||||
dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
|
||||
|
||||
/* fail to load RAS TA */
|
||||
psp->ras_context.context.initialized = false;
|
||||
@ -1779,6 +1782,31 @@ int psp_ras_trigger_error(struct psp_context *psp,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int psp_ras_query_address(struct psp_context *psp,
|
||||
struct ta_ras_query_address_input *addr_in,
|
||||
struct ta_ras_query_address_output *addr_out)
|
||||
{
|
||||
struct ta_ras_shared_memory *ras_cmd;
|
||||
int ret;
|
||||
|
||||
if (!psp->ras_context.context.initialized)
|
||||
return -EINVAL;
|
||||
|
||||
ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
|
||||
memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
|
||||
|
||||
ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS;
|
||||
ras_cmd->ras_in_message.address = *addr_in;
|
||||
|
||||
ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
|
||||
if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
|
||||
return -EINVAL;
|
||||
|
||||
*addr_out = ras_cmd->ras_out_message.address;
|
||||
|
||||
return 0;
|
||||
}
|
||||
// ras end
|
||||
|
||||
// HDCP start
|
||||
@ -2125,19 +2153,14 @@ int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_psp_query_boot_status(struct amdgpu_device *adev)
|
||||
bool amdgpu_psp_get_ras_capability(struct psp_context *psp)
|
||||
{
|
||||
struct psp_context *psp = &adev->psp;
|
||||
int ret = 0;
|
||||
|
||||
if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
|
||||
return 0;
|
||||
|
||||
if (psp->funcs &&
|
||||
psp->funcs->query_boot_status)
|
||||
ret = psp->funcs->query_boot_status(psp);
|
||||
|
||||
return ret;
|
||||
psp->funcs->get_ras_capability) {
|
||||
return psp->funcs->get_ras_capability(psp);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static int psp_hw_start(struct psp_context *psp)
|
||||
@ -2150,7 +2173,7 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
(psp->funcs->bootloader_load_kdb != NULL)) {
|
||||
ret = psp_bootloader_load_kdb(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load kdb failed!\n");
|
||||
dev_err(adev->dev, "PSP load kdb failed!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2159,7 +2182,7 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
(psp->funcs->bootloader_load_spl != NULL)) {
|
||||
ret = psp_bootloader_load_spl(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load spl failed!\n");
|
||||
dev_err(adev->dev, "PSP load spl failed!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2168,7 +2191,7 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
(psp->funcs->bootloader_load_sysdrv != NULL)) {
|
||||
ret = psp_bootloader_load_sysdrv(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load sys drv failed!\n");
|
||||
dev_err(adev->dev, "PSP load sys drv failed!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2177,7 +2200,7 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
(psp->funcs->bootloader_load_soc_drv != NULL)) {
|
||||
ret = psp_bootloader_load_soc_drv(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load soc drv failed!\n");
|
||||
dev_err(adev->dev, "PSP load soc drv failed!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2186,7 +2209,7 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
(psp->funcs->bootloader_load_intf_drv != NULL)) {
|
||||
ret = psp_bootloader_load_intf_drv(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load intf drv failed!\n");
|
||||
dev_err(adev->dev, "PSP load intf drv failed!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2195,7 +2218,7 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
(psp->funcs->bootloader_load_dbg_drv != NULL)) {
|
||||
ret = psp_bootloader_load_dbg_drv(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load dbg drv failed!\n");
|
||||
dev_err(adev->dev, "PSP load dbg drv failed!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2204,7 +2227,7 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
(psp->funcs->bootloader_load_ras_drv != NULL)) {
|
||||
ret = psp_bootloader_load_ras_drv(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load ras_drv failed!\n");
|
||||
dev_err(adev->dev, "PSP load ras_drv failed!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2213,7 +2236,7 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
(psp->funcs->bootloader_load_sos != NULL)) {
|
||||
ret = psp_bootloader_load_sos(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load sos failed!\n");
|
||||
dev_err(adev->dev, "PSP load sos failed!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2221,7 +2244,7 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
|
||||
ret = psp_ring_create(psp, PSP_RING_TYPE__KM);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP create ring failed!\n");
|
||||
dev_err(adev->dev, "PSP create ring failed!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2231,7 +2254,7 @@ static int psp_hw_start(struct psp_context *psp)
|
||||
if (!psp_boottime_tmr(psp)) {
|
||||
ret = psp_tmr_init(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP tmr init failed!\n");
|
||||
dev_err(adev->dev, "PSP tmr init failed!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2250,7 +2273,7 @@ skip_pin_bo:
|
||||
|
||||
ret = psp_tmr_load(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load tmr failed!\n");
|
||||
dev_err(adev->dev, "PSP load tmr failed!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2518,7 +2541,8 @@ static void psp_print_fw_hdr(struct psp_context *psp,
|
||||
}
|
||||
}
|
||||
|
||||
static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
|
||||
static int psp_prep_load_ip_fw_cmd_buf(struct psp_context *psp,
|
||||
struct amdgpu_firmware_info *ucode,
|
||||
struct psp_gfx_cmd_resp *cmd)
|
||||
{
|
||||
int ret;
|
||||
@ -2531,7 +2555,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
|
||||
|
||||
ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
|
||||
if (ret)
|
||||
DRM_ERROR("Unknown firmware type\n");
|
||||
dev_err(psp->adev->dev, "Unknown firmware type\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2542,7 +2566,7 @@ int psp_execute_ip_fw_load(struct psp_context *psp,
|
||||
int ret = 0;
|
||||
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
|
||||
|
||||
ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd);
|
||||
ret = psp_prep_load_ip_fw_cmd_buf(psp, ucode, cmd);
|
||||
if (!ret) {
|
||||
ret = psp_cmd_submit_buf(psp, ucode, cmd,
|
||||
psp->fence_buf_mc_addr);
|
||||
@ -2601,13 +2625,13 @@ static int psp_load_smu_fw(struct psp_context *psp)
|
||||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 2)))) {
|
||||
ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
|
||||
if (ret)
|
||||
DRM_WARN("Failed to set MP1 state prepare for reload\n");
|
||||
dev_err(adev->dev, "Failed to set MP1 state prepare for reload\n");
|
||||
}
|
||||
|
||||
ret = psp_execute_ip_fw_load(psp, ucode);
|
||||
|
||||
if (ret)
|
||||
DRM_ERROR("PSP load smu failed!\n");
|
||||
dev_err(adev->dev, "PSP load smu failed!\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2712,7 +2736,7 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
|
||||
adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) {
|
||||
ret = psp_rlc_autoload_start(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to start rlc autoload\n");
|
||||
dev_err(adev->dev, "Failed to start rlc autoload\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2734,7 +2758,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
|
||||
|
||||
ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP ring init failed!\n");
|
||||
dev_err(adev->dev, "PSP ring init failed!\n");
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
@ -2749,13 +2773,13 @@ static int psp_load_fw(struct amdgpu_device *adev)
|
||||
|
||||
ret = psp_asd_initialize(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load asd failed!\n");
|
||||
dev_err(adev->dev, "PSP load asd failed!\n");
|
||||
goto failed1;
|
||||
}
|
||||
|
||||
ret = psp_rl_load(adev);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load RL failed!\n");
|
||||
dev_err(adev->dev, "PSP load RL failed!\n");
|
||||
goto failed1;
|
||||
}
|
||||
|
||||
@ -2775,7 +2799,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
|
||||
ret = psp_ras_initialize(psp);
|
||||
if (ret)
|
||||
dev_err(psp->adev->dev,
|
||||
"RAS: Failed to initialize RAS\n");
|
||||
"RAS: Failed to initialize RAS\n");
|
||||
|
||||
ret = psp_hdcp_initialize(psp);
|
||||
if (ret)
|
||||
@ -2828,7 +2852,7 @@ static int psp_hw_init(void *handle)
|
||||
|
||||
ret = psp_load_fw(adev);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP firmware loading failed\n");
|
||||
dev_err(adev->dev, "PSP firmware loading failed\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
@ -2875,7 +2899,7 @@ static int psp_suspend(void *handle)
|
||||
psp->xgmi_context.context.initialized) {
|
||||
ret = psp_xgmi_terminate(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to terminate xgmi ta\n");
|
||||
dev_err(adev->dev, "Failed to terminate xgmi ta\n");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@ -2883,46 +2907,46 @@ static int psp_suspend(void *handle)
|
||||
if (psp->ta_fw) {
|
||||
ret = psp_ras_terminate(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to terminate ras ta\n");
|
||||
dev_err(adev->dev, "Failed to terminate ras ta\n");
|
||||
goto out;
|
||||
}
|
||||
ret = psp_hdcp_terminate(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to terminate hdcp ta\n");
|
||||
dev_err(adev->dev, "Failed to terminate hdcp ta\n");
|
||||
goto out;
|
||||
}
|
||||
ret = psp_dtm_terminate(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to terminate dtm ta\n");
|
||||
dev_err(adev->dev, "Failed to terminate dtm ta\n");
|
||||
goto out;
|
||||
}
|
||||
ret = psp_rap_terminate(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to terminate rap ta\n");
|
||||
dev_err(adev->dev, "Failed to terminate rap ta\n");
|
||||
goto out;
|
||||
}
|
||||
ret = psp_securedisplay_terminate(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to terminate securedisplay ta\n");
|
||||
dev_err(adev->dev, "Failed to terminate securedisplay ta\n");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = psp_asd_terminate(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to terminate asd\n");
|
||||
dev_err(adev->dev, "Failed to terminate asd\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = psp_tmr_terminate(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to terminate tmr\n");
|
||||
dev_err(adev->dev, "Failed to terminate tmr\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
|
||||
if (ret)
|
||||
DRM_ERROR("PSP ring stop failed\n");
|
||||
dev_err(adev->dev, "PSP ring stop failed\n");
|
||||
|
||||
out:
|
||||
return ret;
|
||||
@ -2934,12 +2958,12 @@ static int psp_resume(void *handle)
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct psp_context *psp = &adev->psp;
|
||||
|
||||
DRM_INFO("PSP is resuming...\n");
|
||||
dev_info(adev->dev, "PSP is resuming...\n");
|
||||
|
||||
if (psp->mem_train_ctx.enable_mem_training) {
|
||||
ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to process memory training!\n");
|
||||
dev_err(adev->dev, "Failed to process memory training!\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -2956,7 +2980,7 @@ static int psp_resume(void *handle)
|
||||
|
||||
ret = psp_asd_initialize(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP load asd failed!\n");
|
||||
dev_err(adev->dev, "PSP load asd failed!\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
@ -2980,7 +3004,7 @@ static int psp_resume(void *handle)
|
||||
ret = psp_ras_initialize(psp);
|
||||
if (ret)
|
||||
dev_err(psp->adev->dev,
|
||||
"RAS: Failed to initialize RAS\n");
|
||||
"RAS: Failed to initialize RAS\n");
|
||||
|
||||
ret = psp_hdcp_initialize(psp);
|
||||
if (ret)
|
||||
@ -3008,7 +3032,7 @@ static int psp_resume(void *handle)
|
||||
return 0;
|
||||
|
||||
failed:
|
||||
DRM_ERROR("PSP resume failed\n");
|
||||
dev_err(adev->dev, "PSP resume failed\n");
|
||||
mutex_unlock(&adev->firmware.mutex);
|
||||
return ret;
|
||||
}
|
||||
@ -3069,9 +3093,11 @@ int psp_ring_cmd_submit(struct psp_context *psp,
|
||||
write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
|
||||
/* Check invalid write_frame ptr address */
|
||||
if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
|
||||
DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
|
||||
ring_buffer_start, ring_buffer_end, write_frame);
|
||||
DRM_ERROR("write_frame is pointing to address out of bounds\n");
|
||||
dev_err(adev->dev,
|
||||
"ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
|
||||
ring_buffer_start, ring_buffer_end, write_frame);
|
||||
dev_err(adev->dev,
|
||||
"write_frame is pointing to address out of bounds\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -3597,7 +3623,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
|
||||
int ret;
|
||||
|
||||
if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
|
||||
DRM_INFO("PSP block is not ready yet.");
|
||||
dev_info(adev->dev, "PSP block is not ready yet\n.");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
@ -3606,7 +3632,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
|
||||
mutex_unlock(&adev->psp.mutex);
|
||||
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to read USBC PD FW, err = %d", ret);
|
||||
dev_err(adev->dev, "Failed to read USBC PD FW, err = %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3628,7 +3654,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
|
||||
void *fw_pri_cpu_addr;
|
||||
|
||||
if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
|
||||
DRM_INFO("PSP block is not ready yet.");
|
||||
dev_err(adev->dev, "PSP block is not ready yet.");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
@ -3661,7 +3687,7 @@ rel_buf:
|
||||
release_firmware(usbc_pd_fw);
|
||||
fail:
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to load USBC PD FW, err = %d", ret);
|
||||
dev_err(adev->dev, "Failed to load USBC PD FW, err = %d", ret);
|
||||
count = ret;
|
||||
}
|
||||
|
||||
@ -3708,7 +3734,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
|
||||
|
||||
/* Safeguard against memory drain */
|
||||
if (adev->psp.vbflash_image_size > AMD_VBIOS_FILE_MAX_SIZE_B) {
|
||||
dev_err(adev->dev, "File size cannot exceed %u", AMD_VBIOS_FILE_MAX_SIZE_B);
|
||||
dev_err(adev->dev, "File size cannot exceed %u\n", AMD_VBIOS_FILE_MAX_SIZE_B);
|
||||
kvfree(adev->psp.vbflash_tmp_buf);
|
||||
adev->psp.vbflash_tmp_buf = NULL;
|
||||
adev->psp.vbflash_image_size = 0;
|
||||
@ -3727,7 +3753,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
|
||||
adev->psp.vbflash_image_size += count;
|
||||
mutex_unlock(&adev->psp.mutex);
|
||||
|
||||
dev_dbg(adev->dev, "IFWI staged for update");
|
||||
dev_dbg(adev->dev, "IFWI staged for update\n");
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -3747,7 +3773,7 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
|
||||
if (adev->psp.vbflash_image_size == 0)
|
||||
return -EINVAL;
|
||||
|
||||
dev_dbg(adev->dev, "PSP IFWI flash process initiated");
|
||||
dev_dbg(adev->dev, "PSP IFWI flash process initiated\n");
|
||||
|
||||
ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size,
|
||||
AMDGPU_GPU_PAGE_SIZE,
|
||||
@ -3772,11 +3798,11 @@ rel_buf:
|
||||
adev->psp.vbflash_image_size = 0;
|
||||
|
||||
if (ret) {
|
||||
dev_err(adev->dev, "Failed to load IFWI, err = %d", ret);
|
||||
dev_err(adev->dev, "Failed to load IFWI, err = %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
dev_dbg(adev->dev, "PSP IFWI flash process done");
|
||||
dev_dbg(adev->dev, "PSP IFWI flash process done\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -134,7 +134,7 @@ struct psp_funcs {
|
||||
int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
|
||||
int (*vbflash_stat)(struct psp_context *psp);
|
||||
int (*fatal_error_recovery_quirk)(struct psp_context *psp);
|
||||
int (*query_boot_status)(struct psp_context *psp);
|
||||
bool (*get_ras_capability)(struct psp_context *psp);
|
||||
};
|
||||
|
||||
struct ta_funcs {
|
||||
@ -502,6 +502,9 @@ int psp_ras_enable_features(struct psp_context *psp,
|
||||
int psp_ras_trigger_error(struct psp_context *psp,
|
||||
struct ta_ras_trigger_error_input *info, uint32_t instance_mask);
|
||||
int psp_ras_terminate(struct psp_context *psp);
|
||||
int psp_ras_query_address(struct psp_context *psp,
|
||||
struct ta_ras_query_address_input *addr_in,
|
||||
struct ta_ras_query_address_output *addr_out);
|
||||
|
||||
int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
|
||||
int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
|
||||
@ -538,7 +541,5 @@ int psp_spatial_partition(struct psp_context *psp, int mode);
|
||||
int is_psp_fw_valid(struct psp_bin_desc bin);
|
||||
|
||||
int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_psp_query_boot_status(struct amdgpu_device *adev);
|
||||
|
||||
bool amdgpu_psp_get_ras_capability(struct psp_context *psp);
|
||||
#endif
|
||||
|
@ -362,7 +362,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
|
||||
}
|
||||
}
|
||||
|
||||
if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len))
|
||||
if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
|
||||
ret = -EFAULT;
|
||||
|
||||
err_free_shared_buf:
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include "nbio_v7_9.h"
|
||||
#include "atom.h"
|
||||
#include "amdgpu_reset.h"
|
||||
#include "amdgpu_psp.h"
|
||||
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
#include <asm/mce.h>
|
||||
@ -73,6 +74,8 @@ const char *ras_block_string[] = {
|
||||
"mca",
|
||||
"vcn",
|
||||
"jpeg",
|
||||
"ih",
|
||||
"mpio",
|
||||
};
|
||||
|
||||
const char *ras_mca_block_string[] = {
|
||||
@ -94,7 +97,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
|
||||
if (!ras_block)
|
||||
return "NULL";
|
||||
|
||||
if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT)
|
||||
if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT ||
|
||||
ras_block->block >= ARRAY_SIZE(ras_block_string))
|
||||
return "OUT OF RANGE";
|
||||
|
||||
if (ras_block->block == AMDGPU_RAS_BLOCK__MCA)
|
||||
@ -116,6 +120,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
|
||||
/* typical ECC bad page rate is 1 bad page per 100MB VRAM */
|
||||
#define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL)
|
||||
|
||||
#define MAX_UMC_POISON_POLLING_TIME_ASYNC 100 //ms
|
||||
|
||||
enum amdgpu_ras_retire_page_reservation {
|
||||
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
|
||||
AMDGPU_RAS_RETIRE_PAGE_PENDING,
|
||||
@ -628,8 +634,12 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
|
||||
dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
|
||||
}
|
||||
|
||||
return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
|
||||
"ce", info.ce_count);
|
||||
if (info.head.block == AMDGPU_RAS_BLOCK__UMC)
|
||||
return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
|
||||
"ce", info.ce_count, "de", info.de_count);
|
||||
else
|
||||
return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
|
||||
"ce", info.ce_count);
|
||||
}
|
||||
|
||||
/* obj begin */
|
||||
@ -1036,7 +1046,8 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
|
||||
struct ras_manager *ras_mgr,
|
||||
struct ras_err_data *err_data,
|
||||
const char *blk_name,
|
||||
bool is_ue)
|
||||
bool is_ue,
|
||||
bool is_de)
|
||||
{
|
||||
struct amdgpu_smuio_mcm_config_info *mcm_info;
|
||||
struct ras_err_node *err_node;
|
||||
@ -1065,25 +1076,50 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
} else {
|
||||
for_each_ras_error(err_node, err_data) {
|
||||
err_info = &err_node->err_info;
|
||||
mcm_info = &err_info->mcm_info;
|
||||
if (err_info->ce_count) {
|
||||
dev_info(adev->dev, "socket: %d, die: %d, "
|
||||
"%lld new correctable hardware errors detected in %s block\n",
|
||||
mcm_info->socket_id,
|
||||
mcm_info->die_id,
|
||||
err_info->ce_count,
|
||||
blk_name);
|
||||
if (is_de) {
|
||||
for_each_ras_error(err_node, err_data) {
|
||||
err_info = &err_node->err_info;
|
||||
mcm_info = &err_info->mcm_info;
|
||||
if (err_info->de_count) {
|
||||
dev_info(adev->dev, "socket: %d, die: %d, "
|
||||
"%lld new deferred hardware errors detected in %s block\n",
|
||||
mcm_info->socket_id,
|
||||
mcm_info->die_id,
|
||||
err_info->de_count,
|
||||
blk_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for_each_ras_error(err_node, &ras_mgr->err_data) {
|
||||
err_info = &err_node->err_info;
|
||||
mcm_info = &err_info->mcm_info;
|
||||
dev_info(adev->dev, "socket: %d, die: %d, "
|
||||
"%lld correctable hardware errors detected in total in %s block\n",
|
||||
mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name);
|
||||
for_each_ras_error(err_node, &ras_mgr->err_data) {
|
||||
err_info = &err_node->err_info;
|
||||
mcm_info = &err_info->mcm_info;
|
||||
dev_info(adev->dev, "socket: %d, die: %d, "
|
||||
"%lld deferred hardware errors detected in total in %s block\n",
|
||||
mcm_info->socket_id, mcm_info->die_id,
|
||||
err_info->de_count, blk_name);
|
||||
}
|
||||
} else {
|
||||
for_each_ras_error(err_node, err_data) {
|
||||
err_info = &err_node->err_info;
|
||||
mcm_info = &err_info->mcm_info;
|
||||
if (err_info->ce_count) {
|
||||
dev_info(adev->dev, "socket: %d, die: %d, "
|
||||
"%lld new correctable hardware errors detected in %s block\n",
|
||||
mcm_info->socket_id,
|
||||
mcm_info->die_id,
|
||||
err_info->ce_count,
|
||||
blk_name);
|
||||
}
|
||||
}
|
||||
|
||||
for_each_ras_error(err_node, &ras_mgr->err_data) {
|
||||
err_info = &err_node->err_info;
|
||||
mcm_info = &err_info->mcm_info;
|
||||
dev_info(adev->dev, "socket: %d, die: %d, "
|
||||
"%lld correctable hardware errors detected in total in %s block\n",
|
||||
mcm_info->socket_id, mcm_info->die_id,
|
||||
err_info->ce_count, blk_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1102,7 +1138,8 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
|
||||
|
||||
if (err_data->ce_count) {
|
||||
if (err_data_has_source_info(err_data)) {
|
||||
amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, false);
|
||||
amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data,
|
||||
blk_name, false, false);
|
||||
} else if (!adev->aid_mask &&
|
||||
adev->smuio.funcs &&
|
||||
adev->smuio.funcs->get_socket_id &&
|
||||
@ -1124,7 +1161,8 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
|
||||
|
||||
if (err_data->ue_count) {
|
||||
if (err_data_has_source_info(err_data)) {
|
||||
amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, true);
|
||||
amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data,
|
||||
blk_name, true, false);
|
||||
} else if (!adev->aid_mask &&
|
||||
adev->smuio.funcs &&
|
||||
adev->smuio.funcs->get_socket_id &&
|
||||
@ -1144,6 +1182,28 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
|
||||
}
|
||||
}
|
||||
|
||||
if (err_data->de_count) {
|
||||
if (err_data_has_source_info(err_data)) {
|
||||
amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data,
|
||||
blk_name, false, true);
|
||||
} else if (!adev->aid_mask &&
|
||||
adev->smuio.funcs &&
|
||||
adev->smuio.funcs->get_socket_id &&
|
||||
adev->smuio.funcs->get_die_id) {
|
||||
dev_info(adev->dev, "socket: %d, die: %d "
|
||||
"%ld deferred hardware errors "
|
||||
"detected in %s block\n",
|
||||
adev->smuio.funcs->get_socket_id(adev),
|
||||
adev->smuio.funcs->get_die_id(adev),
|
||||
ras_mgr->err_data.de_count,
|
||||
blk_name);
|
||||
} else {
|
||||
dev_info(adev->dev, "%ld deferred hardware errors "
|
||||
"detected in %s block\n",
|
||||
ras_mgr->err_data.de_count,
|
||||
blk_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data)
|
||||
@ -1154,7 +1214,8 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s
|
||||
if (err_data_has_source_info(err_data)) {
|
||||
for_each_ras_error(err_node, err_data) {
|
||||
err_info = &err_node->err_info;
|
||||
|
||||
amdgpu_ras_error_statistic_de_count(&obj->err_data,
|
||||
&err_info->mcm_info, NULL, err_info->de_count);
|
||||
amdgpu_ras_error_statistic_ce_count(&obj->err_data,
|
||||
&err_info->mcm_info, NULL, err_info->ce_count);
|
||||
amdgpu_ras_error_statistic_ue_count(&obj->err_data,
|
||||
@ -1164,9 +1225,72 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s
|
||||
/* for legacy asic path which doesn't has error source info */
|
||||
obj->err_data.ue_count += err_data->ue_count;
|
||||
obj->err_data.ce_count += err_data->ce_count;
|
||||
obj->err_data.de_count += err_data->de_count;
|
||||
}
|
||||
}
|
||||
|
||||
static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
|
||||
{
|
||||
struct ras_common_if head;
|
||||
|
||||
memset(&head, 0, sizeof(head));
|
||||
head.block = blk;
|
||||
|
||||
return amdgpu_ras_find_obj(adev, &head);
|
||||
}
|
||||
|
||||
int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
|
||||
const struct aca_info *aca_info, void *data)
|
||||
{
|
||||
struct ras_manager *obj;
|
||||
|
||||
obj = get_ras_manager(adev, blk);
|
||||
if (!obj)
|
||||
return -EINVAL;
|
||||
|
||||
return amdgpu_aca_add_handle(adev, &obj->aca_handle, ras_block_str(blk), aca_info, data);
|
||||
}
|
||||
|
||||
int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
|
||||
{
|
||||
struct ras_manager *obj;
|
||||
|
||||
obj = get_ras_manager(adev, blk);
|
||||
if (!obj)
|
||||
return -EINVAL;
|
||||
|
||||
amdgpu_aca_remove_handle(&obj->aca_handle);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
|
||||
enum aca_error_type type, struct ras_err_data *err_data)
|
||||
{
|
||||
struct ras_manager *obj;
|
||||
|
||||
obj = get_ras_manager(adev, blk);
|
||||
if (!obj)
|
||||
return -EINVAL;
|
||||
|
||||
return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data);
|
||||
}
|
||||
|
||||
ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
|
||||
struct aca_handle *handle, char *buf, void *data)
|
||||
{
|
||||
struct ras_manager *obj = container_of(handle, struct ras_manager, aca_handle);
|
||||
struct ras_query_if info = {
|
||||
.head = obj->head,
|
||||
};
|
||||
|
||||
if (amdgpu_ras_query_error_status(obj->adev, &info))
|
||||
return -EINVAL;
|
||||
|
||||
return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
|
||||
"ce", info.ce_count);
|
||||
}
|
||||
|
||||
static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
|
||||
struct ras_query_if *info,
|
||||
struct ras_err_data *err_data,
|
||||
@ -1174,6 +1298,7 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
|
||||
{
|
||||
enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
|
||||
struct amdgpu_ras_block_object *block_obj = NULL;
|
||||
int ret;
|
||||
|
||||
if (blk == AMDGPU_RAS_BLOCK_COUNT)
|
||||
return -EINVAL;
|
||||
@ -1203,9 +1328,19 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* FIXME: add code to check return value later */
|
||||
amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data);
|
||||
amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data);
|
||||
if (amdgpu_aca_is_enabled(adev)) {
|
||||
ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
/* FIXME: add code to check return value later */
|
||||
amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data);
|
||||
amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -1239,6 +1374,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i
|
||||
|
||||
info->ue_count = obj->err_data.ue_count;
|
||||
info->ce_count = obj->err_data.ce_count;
|
||||
info->de_count = obj->err_data.de_count;
|
||||
|
||||
amdgpu_ras_error_generate_report(adev, info, &err_data);
|
||||
|
||||
@ -1254,6 +1390,7 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
|
||||
struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
|
||||
const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
|
||||
struct amdgpu_hive_info *hive;
|
||||
int hive_ras_recovery = 0;
|
||||
|
||||
@ -1264,7 +1401,7 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
if (!amdgpu_ras_is_supported(adev, block) ||
|
||||
!amdgpu_ras_get_mca_debug_mode(adev))
|
||||
!amdgpu_ras_get_aca_debug_mode(adev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
@ -1276,7 +1413,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
|
||||
/* skip ras error reset in gpu reset */
|
||||
if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
|
||||
hive_ras_recovery) &&
|
||||
mca_funcs && mca_funcs->mca_set_debug_mode)
|
||||
((smu_funcs && smu_funcs->set_debug_mode) ||
|
||||
(mca_funcs && mca_funcs->mca_set_debug_mode)))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (block_obj->hw_ops->reset_ras_error_count)
|
||||
@ -1772,7 +1910,10 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_mca_smu_debugfs_init(adev, dir);
|
||||
if (amdgpu_aca_is_enabled(adev))
|
||||
amdgpu_aca_smu_debugfs_init(adev, dir);
|
||||
else
|
||||
amdgpu_mca_smu_debugfs_init(adev, dir);
|
||||
}
|
||||
|
||||
/* debugfs end */
|
||||
@ -1900,7 +2041,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_umc_poison_handler(adev, false);
|
||||
amdgpu_umc_poison_handler(adev, obj->head.block, false);
|
||||
|
||||
if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
|
||||
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
|
||||
@ -1951,6 +2092,7 @@ static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
|
||||
*/
|
||||
obj->err_data.ue_count += err_data.ue_count;
|
||||
obj->err_data.ce_count += err_data.ce_count;
|
||||
obj->err_data.de_count += err_data.de_count;
|
||||
}
|
||||
|
||||
amdgpu_ras_error_data_fini(&err_data);
|
||||
@ -2520,6 +2662,32 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
|
||||
}
|
||||
}
|
||||
|
||||
static int amdgpu_ras_page_retirement_thread(void *param)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)param;
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
|
||||
wait_event_interruptible(con->page_retirement_wq,
|
||||
kthread_should_stop() ||
|
||||
atomic_read(&con->page_retirement_req_cnt));
|
||||
|
||||
if (kthread_should_stop())
|
||||
break;
|
||||
|
||||
dev_info(adev->dev, "Start processing page retirement. request:%d\n",
|
||||
atomic_read(&con->page_retirement_req_cnt));
|
||||
|
||||
atomic_dec(&con->page_retirement_req_cnt);
|
||||
|
||||
amdgpu_umc_bad_page_polling_timeout(adev,
|
||||
false, MAX_UMC_POISON_POLLING_TIME_ASYNC);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
@ -2583,6 +2751,16 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
|
||||
}
|
||||
}
|
||||
|
||||
mutex_init(&con->page_retirement_lock);
|
||||
init_waitqueue_head(&con->page_retirement_wq);
|
||||
atomic_set(&con->page_retirement_req_cnt, 0);
|
||||
con->page_retirement_thread =
|
||||
kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement");
|
||||
if (IS_ERR(con->page_retirement_thread)) {
|
||||
con->page_retirement_thread = NULL;
|
||||
dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
if ((adev->asic_type == CHIP_ALDEBARAN) &&
|
||||
(adev->gmc.xgmi.connected_to_cpu))
|
||||
@ -2618,6 +2796,11 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
|
||||
if (!data)
|
||||
return 0;
|
||||
|
||||
if (con->page_retirement_thread)
|
||||
kthread_stop(con->page_retirement_thread);
|
||||
|
||||
atomic_set(&con->page_retirement_req_cnt, 0);
|
||||
|
||||
cancel_work_sync(&con->recovery_work);
|
||||
|
||||
mutex_lock(&con->recovery_lock);
|
||||
@ -2679,107 +2862,54 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
|
||||
}
|
||||
|
||||
/*
|
||||
* check hardware's ras ability which will be saved in hw_supported.
|
||||
* if hardware does not support ras, we can skip some ras initializtion and
|
||||
* forbid some ras operations from IP.
|
||||
* if software itself, say boot parameter, limit the ras ability. We still
|
||||
* need allow IP do some limited operations, like disable. In such case,
|
||||
* we have to initialize ras as normal. but need check if operation is
|
||||
* allowed or not in each function.
|
||||
*/
|
||||
static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
|
||||
/* Query ras capablity via atomfirmware interface */
|
||||
static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->ras_hw_enabled = adev->ras_enabled = 0;
|
||||
|
||||
if (!amdgpu_ras_asic_supported(adev))
|
||||
return;
|
||||
|
||||
if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
|
||||
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
|
||||
dev_info(adev->dev, "MEM ECC is active.\n");
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
|
||||
1 << AMDGPU_RAS_BLOCK__DF);
|
||||
} else {
|
||||
dev_info(adev->dev, "MEM ECC is not presented.\n");
|
||||
}
|
||||
|
||||
if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
|
||||
dev_info(adev->dev, "SRAM ECC is active.\n");
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
|
||||
1 << AMDGPU_RAS_BLOCK__DF);
|
||||
else
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
|
||||
1 << AMDGPU_RAS_BLOCK__SDMA |
|
||||
1 << AMDGPU_RAS_BLOCK__GFX);
|
||||
|
||||
/* VCN/JPEG RAS can be supported on both bare metal and
|
||||
* SRIOV environment
|
||||
*/
|
||||
if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
|
||||
IP_VERSION(2, 6, 0) ||
|
||||
amdgpu_ip_version(adev, VCN_HWIP, 0) ==
|
||||
IP_VERSION(4, 0, 0) ||
|
||||
amdgpu_ip_version(adev, VCN_HWIP, 0) ==
|
||||
IP_VERSION(4, 0, 3))
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
|
||||
1 << AMDGPU_RAS_BLOCK__JPEG);
|
||||
else
|
||||
adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
|
||||
1 << AMDGPU_RAS_BLOCK__JPEG);
|
||||
|
||||
/*
|
||||
* XGMI RAS is not supported if xgmi num physical nodes
|
||||
* is zero
|
||||
*/
|
||||
if (!adev->gmc.xgmi.num_physical_nodes)
|
||||
adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
|
||||
} else {
|
||||
dev_info(adev->dev, "SRAM ECC is not presented.\n");
|
||||
}
|
||||
/* mem_ecc cap */
|
||||
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
|
||||
dev_info(adev->dev, "MEM ECC is active.\n");
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
|
||||
1 << AMDGPU_RAS_BLOCK__DF);
|
||||
} else {
|
||||
/* driver only manages a few IP blocks RAS feature
|
||||
* when GPU is connected cpu through XGMI */
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
|
||||
1 << AMDGPU_RAS_BLOCK__SDMA |
|
||||
1 << AMDGPU_RAS_BLOCK__MMHUB);
|
||||
dev_info(adev->dev, "MEM ECC is not presented.\n");
|
||||
}
|
||||
|
||||
amdgpu_ras_get_quirks(adev);
|
||||
/* sram_ecc cap */
|
||||
if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
|
||||
dev_info(adev->dev, "SRAM ECC is active.\n");
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
|
||||
1 << AMDGPU_RAS_BLOCK__DF);
|
||||
else
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
|
||||
1 << AMDGPU_RAS_BLOCK__SDMA |
|
||||
1 << AMDGPU_RAS_BLOCK__GFX);
|
||||
|
||||
/* hw_supported needs to be aligned with RAS block mask. */
|
||||
adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
|
||||
/*
|
||||
* VCN/JPEG RAS can be supported on both bare metal and
|
||||
* SRIOV environment
|
||||
*/
|
||||
if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) ||
|
||||
amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) ||
|
||||
amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3))
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
|
||||
1 << AMDGPU_RAS_BLOCK__JPEG);
|
||||
else
|
||||
adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
|
||||
1 << AMDGPU_RAS_BLOCK__JPEG);
|
||||
|
||||
adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
|
||||
adev->ras_hw_enabled & amdgpu_ras_mask;
|
||||
}
|
||||
|
||||
static void amdgpu_ras_counte_dw(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
|
||||
ras_counte_delay_work.work);
|
||||
struct amdgpu_device *adev = con->adev;
|
||||
struct drm_device *dev = adev_to_drm(adev);
|
||||
unsigned long ce_count, ue_count;
|
||||
int res;
|
||||
|
||||
res = pm_runtime_get_sync(dev->dev);
|
||||
if (res < 0)
|
||||
goto Out;
|
||||
|
||||
/* Cache new values.
|
||||
*/
|
||||
if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL) == 0) {
|
||||
atomic_set(&con->ras_ce_count, ce_count);
|
||||
atomic_set(&con->ras_ue_count, ue_count);
|
||||
/*
|
||||
* XGMI RAS is not supported if xgmi num physical nodes
|
||||
* is zero
|
||||
*/
|
||||
if (!adev->gmc.xgmi.num_physical_nodes)
|
||||
adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
|
||||
} else {
|
||||
dev_info(adev->dev, "SRAM ECC is not presented.\n");
|
||||
}
|
||||
|
||||
pm_runtime_mark_last_busy(dev->dev);
|
||||
Out:
|
||||
pm_runtime_put_autosuspend(dev->dev);
|
||||
}
|
||||
|
||||
/* Query poison mode from umc/df IP callbacks */
|
||||
static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
@ -2813,6 +2943,79 @@ static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* check hardware's ras ability which will be saved in hw_supported.
|
||||
* if hardware does not support ras, we can skip some ras initializtion and
|
||||
* forbid some ras operations from IP.
|
||||
* if software itself, say boot parameter, limit the ras ability. We still
|
||||
* need allow IP do some limited operations, like disable. In such case,
|
||||
* we have to initialize ras as normal. but need check if operation is
|
||||
* allowed or not in each function.
|
||||
*/
|
||||
static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->ras_hw_enabled = adev->ras_enabled = 0;
|
||||
|
||||
if (!amdgpu_ras_asic_supported(adev))
|
||||
return;
|
||||
|
||||
/* query ras capability from psp */
|
||||
if (amdgpu_psp_get_ras_capability(&adev->psp))
|
||||
goto init_ras_enabled_flag;
|
||||
|
||||
/* query ras capablity from bios */
|
||||
if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
|
||||
amdgpu_ras_query_ras_capablity_from_vbios(adev);
|
||||
} else {
|
||||
/* driver only manages a few IP blocks RAS feature
|
||||
* when GPU is connected cpu through XGMI */
|
||||
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
|
||||
1 << AMDGPU_RAS_BLOCK__SDMA |
|
||||
1 << AMDGPU_RAS_BLOCK__MMHUB);
|
||||
}
|
||||
|
||||
/* apply asic specific settings (vega20 only for now) */
|
||||
amdgpu_ras_get_quirks(adev);
|
||||
|
||||
/* query poison mode from umc/df ip callback */
|
||||
amdgpu_ras_query_poison_mode(adev);
|
||||
|
||||
init_ras_enabled_flag:
|
||||
/* hw_supported needs to be aligned with RAS block mask. */
|
||||
adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
|
||||
|
||||
adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
|
||||
adev->ras_hw_enabled & amdgpu_ras_mask;
|
||||
|
||||
/* aca is disabled by default */
|
||||
adev->aca.is_enabled = false;
|
||||
}
|
||||
|
||||
static void amdgpu_ras_counte_dw(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
|
||||
ras_counte_delay_work.work);
|
||||
struct amdgpu_device *adev = con->adev;
|
||||
struct drm_device *dev = adev_to_drm(adev);
|
||||
unsigned long ce_count, ue_count;
|
||||
int res;
|
||||
|
||||
res = pm_runtime_get_sync(dev->dev);
|
||||
if (res < 0)
|
||||
goto Out;
|
||||
|
||||
/* Cache new values.
|
||||
*/
|
||||
if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL) == 0) {
|
||||
atomic_set(&con->ras_ce_count, ce_count);
|
||||
atomic_set(&con->ras_ue_count, ue_count);
|
||||
}
|
||||
|
||||
pm_runtime_mark_last_busy(dev->dev);
|
||||
Out:
|
||||
pm_runtime_put_autosuspend(dev->dev);
|
||||
}
|
||||
|
||||
static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
|
||||
{
|
||||
return amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 |
|
||||
@ -2917,12 +3120,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
|
||||
goto release_con;
|
||||
}
|
||||
|
||||
amdgpu_ras_query_poison_mode(adev);
|
||||
|
||||
/* Packed socket_id to ras feature mask bits[31:29] */
|
||||
if (adev->smuio.funcs &&
|
||||
adev->smuio.funcs->get_socket_id)
|
||||
con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29);
|
||||
con->features |= ((adev->smuio.funcs->get_socket_id(adev)) <<
|
||||
AMDGPU_RAS_FEATURES_SOCKETID_SHIFT);
|
||||
|
||||
/* Get RAS schema for particular SOC */
|
||||
con->schema = amdgpu_get_ras_schema(adev);
|
||||
@ -3128,7 +3330,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
|
||||
|
||||
amdgpu_ras_disable_all_features(adev, 0);
|
||||
/* Make sure all ras objects are disabled. */
|
||||
if (con->features)
|
||||
if (AMDGPU_RAS_GET_FEATURES(con->features))
|
||||
amdgpu_ras_disable_all_features(adev, 1);
|
||||
}
|
||||
|
||||
@ -3142,15 +3344,29 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return 0;
|
||||
|
||||
amdgpu_ras_set_mca_debug_mode(adev, false);
|
||||
if (amdgpu_aca_is_enabled(adev)) {
|
||||
if (amdgpu_in_reset(adev))
|
||||
r = amdgpu_aca_reset(adev);
|
||||
else
|
||||
r = amdgpu_aca_init(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
amdgpu_ras_set_aca_debug_mode(adev, false);
|
||||
} else {
|
||||
amdgpu_ras_set_mca_debug_mode(adev, false);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
|
||||
if (!node->ras_obj) {
|
||||
obj = node->ras_obj;
|
||||
if (!obj) {
|
||||
dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
obj = node->ras_obj;
|
||||
if (!amdgpu_ras_is_supported(adev, obj->ras_comm.block))
|
||||
continue;
|
||||
|
||||
if (obj->ras_late_init) {
|
||||
r = obj->ras_late_init(adev, &obj->ras_comm);
|
||||
if (r) {
|
||||
@ -3175,7 +3391,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
|
||||
|
||||
|
||||
/* Need disable ras on all IPs here before ip [hw/sw]fini */
|
||||
if (con->features)
|
||||
if (AMDGPU_RAS_GET_FEATURES(con->features))
|
||||
amdgpu_ras_disable_all_features(adev, 0);
|
||||
amdgpu_ras_recovery_fini(adev);
|
||||
return 0;
|
||||
@ -3208,10 +3424,13 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
|
||||
amdgpu_ras_fs_fini(adev);
|
||||
amdgpu_ras_interrupt_remove_all(adev);
|
||||
|
||||
WARN(con->features, "Feature mask is not cleared");
|
||||
if (amdgpu_aca_is_enabled(adev))
|
||||
amdgpu_aca_fini(adev);
|
||||
|
||||
if (con->features)
|
||||
amdgpu_ras_disable_all_features(adev, 1);
|
||||
WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
|
||||
|
||||
if (AMDGPU_RAS_GET_FEATURES(con->features))
|
||||
amdgpu_ras_disable_all_features(adev, 0);
|
||||
|
||||
cancel_delayed_work_sync(&con->ras_counte_delay_work);
|
||||
|
||||
@ -3425,22 +3644,41 @@ int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
|
||||
if (con) {
|
||||
ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
|
||||
if (!ret)
|
||||
con->is_mca_debug_mode = enable;
|
||||
con->is_aca_debug_mode = enable;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev)
|
||||
int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
int ret = 0;
|
||||
|
||||
if (con) {
|
||||
if (amdgpu_aca_is_enabled(adev))
|
||||
ret = amdgpu_aca_smu_set_debug_mode(adev, enable);
|
||||
else
|
||||
ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
|
||||
if (!ret)
|
||||
con->is_aca_debug_mode = enable;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
|
||||
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
|
||||
|
||||
if (!con)
|
||||
return false;
|
||||
|
||||
if (mca_funcs && mca_funcs->mca_set_debug_mode)
|
||||
return con->is_mca_debug_mode;
|
||||
if ((amdgpu_aca_is_enabled(adev) && smu_funcs && smu_funcs->set_debug_mode) ||
|
||||
(!amdgpu_aca_is_enabled(adev) && mca_funcs && mca_funcs->mca_set_debug_mode))
|
||||
return con->is_aca_debug_mode;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
@ -3450,15 +3688,16 @@ bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
|
||||
const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
|
||||
|
||||
if (!con) {
|
||||
*error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mca_funcs && mca_funcs->mca_set_debug_mode)
|
||||
if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode))
|
||||
*error_query_mode =
|
||||
(con->is_mca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY;
|
||||
(con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY;
|
||||
else
|
||||
*error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY;
|
||||
|
||||
@ -3699,8 +3938,7 @@ static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct
|
||||
}
|
||||
|
||||
static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
|
||||
struct amdgpu_smuio_mcm_config_info *mcm_info,
|
||||
struct ras_err_addr *err_addr)
|
||||
struct amdgpu_smuio_mcm_config_info *mcm_info)
|
||||
{
|
||||
struct ras_err_node *err_node;
|
||||
|
||||
@ -3712,10 +3950,9 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d
|
||||
if (!err_node)
|
||||
return NULL;
|
||||
|
||||
memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
|
||||
INIT_LIST_HEAD(&err_node->err_info.err_addr_list);
|
||||
|
||||
if (err_addr)
|
||||
memcpy(&err_node->err_info.err_addr, err_addr, sizeof(*err_addr));
|
||||
memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
|
||||
|
||||
err_data->err_list_count++;
|
||||
list_add_tail(&err_node->node, &err_data->err_node_list);
|
||||
@ -3724,6 +3961,29 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d
|
||||
return &err_node->err_info;
|
||||
}
|
||||
|
||||
void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr)
|
||||
{
|
||||
struct ras_err_addr *mca_err_addr;
|
||||
|
||||
mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL);
|
||||
if (!mca_err_addr)
|
||||
return;
|
||||
|
||||
INIT_LIST_HEAD(&mca_err_addr->node);
|
||||
|
||||
mca_err_addr->err_status = err_addr->err_status;
|
||||
mca_err_addr->err_ipid = err_addr->err_ipid;
|
||||
mca_err_addr->err_addr = err_addr->err_addr;
|
||||
|
||||
list_add_tail(&mca_err_addr->node, &err_info->err_addr_list);
|
||||
}
|
||||
|
||||
void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr)
|
||||
{
|
||||
list_del(&mca_err_addr->node);
|
||||
kfree(mca_err_addr);
|
||||
}
|
||||
|
||||
int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
|
||||
struct amdgpu_smuio_mcm_config_info *mcm_info,
|
||||
struct ras_err_addr *err_addr, u64 count)
|
||||
@ -3736,10 +3996,13 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr);
|
||||
err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
|
||||
if (!err_info)
|
||||
return -EINVAL;
|
||||
|
||||
if (err_addr && err_addr->err_status)
|
||||
amdgpu_ras_add_mca_err_addr(err_info, err_addr);
|
||||
|
||||
err_info->ue_count += count;
|
||||
err_data->ue_count += count;
|
||||
|
||||
@ -3758,7 +4021,7 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr);
|
||||
err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
|
||||
if (!err_info)
|
||||
return -EINVAL;
|
||||
|
||||
@ -3767,3 +4030,135 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
|
||||
struct amdgpu_smuio_mcm_config_info *mcm_info,
|
||||
struct ras_err_addr *err_addr, u64 count)
|
||||
{
|
||||
struct ras_err_info *err_info;
|
||||
|
||||
if (!err_data || !mcm_info)
|
||||
return -EINVAL;
|
||||
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
|
||||
if (!err_info)
|
||||
return -EINVAL;
|
||||
|
||||
if (err_addr && err_addr->err_status)
|
||||
amdgpu_ras_add_mca_err_addr(err_info, err_addr);
|
||||
|
||||
err_info->de_count += count;
|
||||
err_data->de_count += count;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define mmMP0_SMN_C2PMSG_92 0x1609C
|
||||
#define mmMP0_SMN_C2PMSG_126 0x160BE
|
||||
static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
|
||||
u32 instance, u32 boot_error)
|
||||
{
|
||||
u32 socket_id, aid_id, hbm_id;
|
||||
u32 reg_data;
|
||||
u64 reg_addr;
|
||||
|
||||
socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
|
||||
aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error);
|
||||
hbm_id = AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error);
|
||||
|
||||
/* The pattern for smn addressing in other SOC could be different from
|
||||
* the one for aqua_vanjaram. We should revisit the code if the pattern
|
||||
* is changed. In such case, replace the aqua_vanjaram implementation
|
||||
* with more common helper */
|
||||
reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
|
||||
aqua_vanjaram_encode_ext_smn_addressing(instance);
|
||||
|
||||
reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
|
||||
dev_err(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n",
|
||||
socket_id, aid_id, reg_data);
|
||||
|
||||
if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n",
|
||||
socket_id, aid_id, hbm_id);
|
||||
|
||||
if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n",
|
||||
socket_id, aid_id);
|
||||
|
||||
if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n",
|
||||
socket_id, aid_id);
|
||||
|
||||
if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n",
|
||||
socket_id, aid_id);
|
||||
|
||||
if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n",
|
||||
socket_id, aid_id);
|
||||
|
||||
if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n",
|
||||
socket_id, aid_id);
|
||||
|
||||
if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n",
|
||||
socket_id, aid_id, hbm_id);
|
||||
|
||||
if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n",
|
||||
socket_id, aid_id, hbm_id);
|
||||
}
|
||||
|
||||
static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev,
|
||||
u32 instance, u32 *boot_error)
|
||||
{
|
||||
u32 reg_addr;
|
||||
u32 reg_data;
|
||||
int retry_loop;
|
||||
|
||||
reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
|
||||
aqua_vanjaram_encode_ext_smn_addressing(instance);
|
||||
|
||||
for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
|
||||
reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
|
||||
if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS) {
|
||||
*boot_error = AMDGPU_RAS_BOOT_SUCEESS;
|
||||
return 0;
|
||||
}
|
||||
msleep(1);
|
||||
}
|
||||
|
||||
/* The pattern for smn addressing in other SOC could be different from
|
||||
* the one for aqua_vanjaram. We should revisit the code if the pattern
|
||||
* is changed. In such case, replace the aqua_vanjaram implementation
|
||||
* with more common helper */
|
||||
reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
|
||||
aqua_vanjaram_encode_ext_smn_addressing(instance);
|
||||
|
||||
for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
|
||||
reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
|
||||
if (AMDGPU_RAS_GPU_ERR_BOOT_STATUS(reg_data)) {
|
||||
*boot_error = reg_data;
|
||||
return 0;
|
||||
}
|
||||
msleep(1);
|
||||
}
|
||||
|
||||
*boot_error = reg_data;
|
||||
return -ETIME;
|
||||
}
|
||||
|
||||
void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances)
|
||||
{
|
||||
u32 boot_error = 0;
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < num_instances; i++) {
|
||||
if (amdgpu_ras_wait_for_boot_complete(adev, i, &boot_error))
|
||||
amdgpu_ras_boot_time_error_reporting(adev, i, boot_error);
|
||||
}
|
||||
}
|
||||
|
@ -29,9 +29,28 @@
|
||||
#include "ta_ras_if.h"
|
||||
#include "amdgpu_ras_eeprom.h"
|
||||
#include "amdgpu_smuio.h"
|
||||
#include "amdgpu_aca.h"
|
||||
|
||||
struct amdgpu_iv_entry;
|
||||
|
||||
#define AMDGPU_RAS_GPU_ERR_MEM_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 0, 0)
|
||||
#define AMDGPU_RAS_GPU_ERR_FW_LOAD(x) AMDGPU_GET_REG_FIELD(x, 1, 1)
|
||||
#define AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 2, 2)
|
||||
#define AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 3, 3)
|
||||
#define AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 4, 4)
|
||||
#define AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 5, 5)
|
||||
#define AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(x) AMDGPU_GET_REG_FIELD(x, 6, 6)
|
||||
#define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x) AMDGPU_GET_REG_FIELD(x, 7, 7)
|
||||
#define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8)
|
||||
#define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11)
|
||||
#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 13, 13)
|
||||
#define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x) AMDGPU_GET_REG_FIELD(x, 31, 31)
|
||||
|
||||
#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 1000
|
||||
#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA
|
||||
#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF
|
||||
#define AMDGPU_RAS_BOOT_SUCEESS 0x80000000
|
||||
|
||||
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
|
||||
/* position of instance value in sub_block_index of
|
||||
* ta_ras_trigger_error_input, the sub block uses lower 12 bits
|
||||
@ -39,6 +58,12 @@ struct amdgpu_iv_entry;
|
||||
#define AMDGPU_RAS_INST_MASK 0xfffff000
|
||||
#define AMDGPU_RAS_INST_SHIFT 0xc
|
||||
|
||||
#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29
|
||||
#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000
|
||||
|
||||
/* The high three bits indicates socketid */
|
||||
#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
|
||||
|
||||
enum amdgpu_ras_block {
|
||||
AMDGPU_RAS_BLOCK__UMC = 0,
|
||||
AMDGPU_RAS_BLOCK__SDMA,
|
||||
@ -57,6 +82,8 @@ enum amdgpu_ras_block {
|
||||
AMDGPU_RAS_BLOCK__MCA,
|
||||
AMDGPU_RAS_BLOCK__VCN,
|
||||
AMDGPU_RAS_BLOCK__JPEG,
|
||||
AMDGPU_RAS_BLOCK__IH,
|
||||
AMDGPU_RAS_BLOCK__MPIO,
|
||||
|
||||
AMDGPU_RAS_BLOCK__LAST
|
||||
};
|
||||
@ -441,10 +468,15 @@ struct amdgpu_ras {
|
||||
/* Indicates smu whether need update bad channel info */
|
||||
bool update_channel_flag;
|
||||
/* Record status of smu mca debug mode */
|
||||
bool is_mca_debug_mode;
|
||||
bool is_aca_debug_mode;
|
||||
|
||||
/* Record special requirements of gpu reset caller */
|
||||
uint32_t gpu_reset_flags;
|
||||
|
||||
struct task_struct *page_retirement_thread;
|
||||
wait_queue_head_t page_retirement_wq;
|
||||
struct mutex page_retirement_lock;
|
||||
atomic_t page_retirement_req_cnt;
|
||||
};
|
||||
|
||||
struct ras_fs_data {
|
||||
@ -453,6 +485,7 @@ struct ras_fs_data {
|
||||
};
|
||||
|
||||
struct ras_err_addr {
|
||||
struct list_head node;
|
||||
uint64_t err_status;
|
||||
uint64_t err_ipid;
|
||||
uint64_t err_addr;
|
||||
@ -462,7 +495,8 @@ struct ras_err_info {
|
||||
struct amdgpu_smuio_mcm_config_info mcm_info;
|
||||
u64 ce_count;
|
||||
u64 ue_count;
|
||||
struct ras_err_addr err_addr;
|
||||
u64 de_count;
|
||||
struct list_head err_addr_list;
|
||||
};
|
||||
|
||||
struct ras_err_node {
|
||||
@ -473,6 +507,7 @@ struct ras_err_node {
|
||||
struct ras_err_data {
|
||||
unsigned long ue_count;
|
||||
unsigned long ce_count;
|
||||
unsigned long de_count;
|
||||
unsigned long err_addr_cnt;
|
||||
struct eeprom_table_record *err_addr;
|
||||
u32 err_list_count;
|
||||
@ -529,6 +564,8 @@ struct ras_manager {
|
||||
struct ras_ih_data ih_data;
|
||||
|
||||
struct ras_err_data err_data;
|
||||
|
||||
struct aca_handle aca_handle;
|
||||
};
|
||||
|
||||
struct ras_badpage {
|
||||
@ -548,6 +585,7 @@ struct ras_query_if {
|
||||
struct ras_common_if head;
|
||||
unsigned long ue_count;
|
||||
unsigned long ce_count;
|
||||
unsigned long de_count;
|
||||
};
|
||||
|
||||
struct ras_inject_if {
|
||||
@ -781,7 +819,8 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev);
|
||||
int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con);
|
||||
|
||||
int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
|
||||
bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev);
|
||||
int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable);
|
||||
bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev);
|
||||
bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
|
||||
unsigned int *mode);
|
||||
|
||||
@ -818,5 +857,20 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
|
||||
int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
|
||||
struct amdgpu_smuio_mcm_config_info *mcm_info,
|
||||
struct ras_err_addr *err_addr, u64 count);
|
||||
int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
|
||||
struct amdgpu_smuio_mcm_config_info *mcm_info,
|
||||
struct ras_err_addr *err_addr, u64 count);
|
||||
void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances);
|
||||
int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
|
||||
const struct aca_info *aca_info, void *data);
|
||||
int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk);
|
||||
|
||||
ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
|
||||
struct aca_handle *handle, char *buf, void *data);
|
||||
|
||||
void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info,
|
||||
struct ras_err_addr *err_addr);
|
||||
|
||||
void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info,
|
||||
struct ras_err_addr *mca_err_addr);
|
||||
#endif
|
||||
|
@ -241,7 +241,7 @@ void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
}
|
||||
|
||||
for (i = 0; i < table_size; i ++) {
|
||||
for (i = 0; i < table_size; i++) {
|
||||
dst_ptr[bo_offset + i] =
|
||||
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
|
||||
}
|
||||
|
@ -169,7 +169,7 @@ struct amdgpu_rlc_funcs {
|
||||
void (*stop)(struct amdgpu_device *adev);
|
||||
void (*reset)(struct amdgpu_device *adev);
|
||||
void (*start)(struct amdgpu_device *adev);
|
||||
void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
|
||||
void (*update_spm_vmid)(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid);
|
||||
bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
|
||||
};
|
||||
|
||||
|
@ -35,14 +35,29 @@
|
||||
* counters and VM updates. It has maximum count of 32768 64 bit slots.
|
||||
*/
|
||||
|
||||
/**
|
||||
* amdgpu_seq64_get_va_base - Get the seq64 va base address
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Returns:
|
||||
* va base address on success
|
||||
*/
|
||||
static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
|
||||
{
|
||||
u64 addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
|
||||
|
||||
addr -= AMDGPU_VA_RESERVED_TOP;
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_seq64_map - Map the seq64 memory to VM
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @vm: vm pointer
|
||||
* @bo_va: bo_va pointer
|
||||
* @seq64_addr: seq64 vaddr start address
|
||||
* @size: seq64 pool size
|
||||
*
|
||||
* Map the seq64 memory to the given VM.
|
||||
*
|
||||
@ -50,11 +65,11 @@
|
||||
* 0 on success or a negative error code on failure
|
||||
*/
|
||||
int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va **bo_va, u64 seq64_addr,
|
||||
uint32_t size)
|
||||
struct amdgpu_bo_va **bo_va)
|
||||
{
|
||||
struct amdgpu_bo *bo;
|
||||
struct drm_exec exec;
|
||||
u64 seq64_addr;
|
||||
int r;
|
||||
|
||||
bo = adev->seq64.sbo;
|
||||
@ -77,9 +92,9 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, size,
|
||||
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
|
||||
AMDGPU_PTE_EXECUTABLE);
|
||||
seq64_addr = amdgpu_seq64_get_va_base(adev);
|
||||
r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE,
|
||||
AMDGPU_PTE_READABLE);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
|
||||
amdgpu_vm_bo_del(adev, *bo_va);
|
||||
@ -144,31 +159,25 @@ error:
|
||||
* amdgpu_seq64_alloc - Allocate a 64 bit memory
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @gpu_addr: allocated gpu VA start address
|
||||
* @cpu_addr: allocated cpu VA start address
|
||||
* @va: VA to access the seq in process address space
|
||||
* @cpu_addr: CPU address to access the seq
|
||||
*
|
||||
* Alloc a 64 bit memory from seq64 pool.
|
||||
*
|
||||
* Returns:
|
||||
* 0 on success or a negative error code on failure
|
||||
*/
|
||||
int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr,
|
||||
u64 **cpu_addr)
|
||||
int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr)
|
||||
{
|
||||
unsigned long bit_pos;
|
||||
u32 offset;
|
||||
|
||||
bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
|
||||
if (bit_pos >= adev->seq64.num_sem)
|
||||
return -ENOSPC;
|
||||
|
||||
if (bit_pos < adev->seq64.num_sem) {
|
||||
__set_bit(bit_pos, adev->seq64.used);
|
||||
offset = bit_pos << 6; /* convert to qw offset */
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*gpu_addr = offset + AMDGPU_SEQ64_VADDR_START;
|
||||
*cpu_addr = offset + adev->seq64.cpu_base_addr;
|
||||
__set_bit(bit_pos, adev->seq64.used);
|
||||
*va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev);
|
||||
*cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -177,20 +186,17 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr,
|
||||
* amdgpu_seq64_free - Free the given 64 bit memory
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @gpu_addr: gpu start address to be freed
|
||||
* @va: gpu start address to be freed
|
||||
*
|
||||
* Free the given 64 bit memory from seq64 pool.
|
||||
*
|
||||
*/
|
||||
void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr)
|
||||
void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
|
||||
{
|
||||
u32 offset;
|
||||
unsigned long bit_pos;
|
||||
|
||||
offset = gpu_addr - AMDGPU_SEQ64_VADDR_START;
|
||||
|
||||
offset >>= 6;
|
||||
if (offset < adev->seq64.num_sem)
|
||||
__clear_bit(offset, adev->seq64.used);
|
||||
bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64);
|
||||
if (bit_pos < adev->seq64.num_sem)
|
||||
__clear_bit(bit_pos, adev->seq64.used);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -229,7 +235,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev)
|
||||
* AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS
|
||||
* 64bit slots
|
||||
*/
|
||||
r = amdgpu_bo_create_kernel(adev, AMDGPU_SEQ64_SIZE,
|
||||
r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
|
||||
&adev->seq64.sbo, NULL,
|
||||
(void **)&adev->seq64.cpu_base_addr);
|
||||
@ -238,7 +244,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev)
|
||||
return r;
|
||||
}
|
||||
|
||||
memset(adev->seq64.cpu_base_addr, 0, AMDGPU_SEQ64_SIZE);
|
||||
memset(adev->seq64.cpu_base_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE);
|
||||
|
||||
adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS;
|
||||
memset(&adev->seq64.used, 0, sizeof(adev->seq64.used));
|
||||
|
@ -25,10 +25,9 @@
|
||||
#ifndef __AMDGPU_SEQ64_H__
|
||||
#define __AMDGPU_SEQ64_H__
|
||||
|
||||
#define AMDGPU_SEQ64_SIZE (2ULL << 20)
|
||||
#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_SEQ64_SIZE / (sizeof(u64) * 8))
|
||||
#define AMDGPU_SEQ64_VADDR_OFFSET 0x50000
|
||||
#define AMDGPU_SEQ64_VADDR_START (AMDGPU_VA_RESERVED_SIZE + AMDGPU_SEQ64_VADDR_OFFSET)
|
||||
#include "amdgpu_vm.h"
|
||||
|
||||
#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_VA_RESERVED_SEQ64_SIZE / sizeof(u64))
|
||||
|
||||
struct amdgpu_seq64 {
|
||||
struct amdgpu_bo *sbo;
|
||||
@ -42,7 +41,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev);
|
||||
int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr);
|
||||
void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
|
||||
int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va **bo_va, u64 seq64_addr, uint32_t size);
|
||||
struct amdgpu_bo_va **bo_va);
|
||||
void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv);
|
||||
|
||||
#endif
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "umc_v6_7.h"
|
||||
#define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms
|
||||
|
||||
static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data, uint64_t err_addr,
|
||||
@ -85,18 +86,21 @@ out_fini_err_data:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
|
||||
void *ras_error_status,
|
||||
struct amdgpu_iv_entry *entry,
|
||||
bool reset)
|
||||
static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
|
||||
void *ras_error_status)
|
||||
{
|
||||
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
unsigned int error_query_mode;
|
||||
int ret = 0;
|
||||
unsigned long err_count;
|
||||
|
||||
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
|
||||
amdgpu_ras_get_error_query_mode(adev, &error_query_mode);
|
||||
|
||||
mutex_lock(&con->page_retirement_lock);
|
||||
ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
|
||||
if (ret == -EOPNOTSUPP) {
|
||||
if (ret == -EOPNOTSUPP &&
|
||||
error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
|
||||
if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
|
||||
adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
|
||||
adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status);
|
||||
@ -120,7 +124,8 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
|
||||
*/
|
||||
adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
|
||||
}
|
||||
} else if (!ret) {
|
||||
} else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY ||
|
||||
(!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) {
|
||||
if (adev->umc.ras &&
|
||||
adev->umc.ras->ecc_info_query_ras_error_count)
|
||||
adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);
|
||||
@ -147,16 +152,13 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
/* only uncorrectable error needs gpu reset */
|
||||
if (err_data->ue_count) {
|
||||
dev_info(adev->dev, "%ld uncorrectable hardware errors "
|
||||
"detected in UMC block\n",
|
||||
err_data->ue_count);
|
||||
|
||||
if (err_data->ue_count || err_data->de_count) {
|
||||
err_count = err_data->ue_count + err_data->de_count;
|
||||
if ((amdgpu_bad_page_threshold != 0) &&
|
||||
err_data->err_addr_cnt) {
|
||||
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
|
||||
err_data->err_addr_cnt);
|
||||
amdgpu_ras_save_bad_pages(adev, &(err_data->ue_count));
|
||||
amdgpu_ras_save_bad_pages(adev, &err_count);
|
||||
|
||||
amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
|
||||
|
||||
@ -165,20 +167,87 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
|
||||
con->update_channel_flag = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (reset) {
|
||||
/* use mode-2 reset for poison consumption */
|
||||
if (!entry)
|
||||
con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
|
||||
amdgpu_ras_reset_gpu(adev);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(err_data->err_addr);
|
||||
|
||||
mutex_unlock(&con->page_retirement_lock);
|
||||
}
|
||||
|
||||
static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
|
||||
void *ras_error_status,
|
||||
struct amdgpu_iv_entry *entry,
|
||||
bool reset)
|
||||
{
|
||||
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
|
||||
amdgpu_umc_handle_bad_pages(adev, ras_error_status);
|
||||
|
||||
if (err_data->ue_count && reset) {
|
||||
/* use mode-2 reset for poison consumption */
|
||||
if (!entry)
|
||||
con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
|
||||
amdgpu_ras_reset_gpu(adev);
|
||||
}
|
||||
|
||||
return AMDGPU_RAS_SUCCESS;
|
||||
}
|
||||
|
||||
int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
|
||||
int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev,
|
||||
bool reset, uint32_t timeout_ms)
|
||||
{
|
||||
struct ras_err_data err_data;
|
||||
struct ras_common_if head = {
|
||||
.block = AMDGPU_RAS_BLOCK__UMC,
|
||||
};
|
||||
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
|
||||
uint32_t timeout = timeout_ms;
|
||||
|
||||
memset(&err_data, 0, sizeof(err_data));
|
||||
amdgpu_ras_error_data_init(&err_data);
|
||||
|
||||
do {
|
||||
|
||||
amdgpu_umc_handle_bad_pages(adev, &err_data);
|
||||
|
||||
if (timeout && !err_data.de_count) {
|
||||
msleep(1);
|
||||
timeout--;
|
||||
}
|
||||
|
||||
} while (timeout && !err_data.de_count);
|
||||
|
||||
if (!timeout)
|
||||
dev_warn(adev->dev, "Can't find bad pages\n");
|
||||
|
||||
if (err_data.de_count)
|
||||
dev_info(adev->dev, "%ld new deferred hardware errors detected\n", err_data.de_count);
|
||||
|
||||
if (obj) {
|
||||
obj->err_data.ue_count += err_data.ue_count;
|
||||
obj->err_data.ce_count += err_data.ce_count;
|
||||
obj->err_data.de_count += err_data.de_count;
|
||||
}
|
||||
|
||||
amdgpu_ras_error_data_fini(&err_data);
|
||||
|
||||
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
|
||||
|
||||
if (reset) {
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
/* use mode-2 reset for poison consumption */
|
||||
con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
|
||||
amdgpu_ras_reset_gpu(adev);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
|
||||
enum amdgpu_ras_block block, bool reset)
|
||||
{
|
||||
int ret = AMDGPU_RAS_SUCCESS;
|
||||
|
||||
@ -195,27 +264,41 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
|
||||
}
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
struct ras_err_data err_data;
|
||||
struct ras_common_if head = {
|
||||
.block = AMDGPU_RAS_BLOCK__UMC,
|
||||
};
|
||||
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
|
||||
if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
|
||||
struct ras_err_data err_data;
|
||||
struct ras_common_if head = {
|
||||
.block = AMDGPU_RAS_BLOCK__UMC,
|
||||
};
|
||||
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
|
||||
|
||||
ret = amdgpu_ras_error_data_init(&err_data);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = amdgpu_ras_error_data_init(&err_data);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
|
||||
ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
|
||||
|
||||
if (ret == AMDGPU_RAS_SUCCESS && obj) {
|
||||
obj->err_data.ue_count += err_data.ue_count;
|
||||
obj->err_data.ce_count += err_data.ce_count;
|
||||
if (ret == AMDGPU_RAS_SUCCESS && obj) {
|
||||
obj->err_data.ue_count += err_data.ue_count;
|
||||
obj->err_data.ce_count += err_data.ce_count;
|
||||
obj->err_data.de_count += err_data.de_count;
|
||||
}
|
||||
|
||||
amdgpu_ras_error_data_fini(&err_data);
|
||||
} else {
|
||||
if (reset) {
|
||||
amdgpu_umc_bad_page_polling_timeout(adev,
|
||||
reset, MAX_UMC_POISON_POLLING_TIME_SYNC);
|
||||
} else {
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
atomic_inc(&con->page_retirement_req_cnt);
|
||||
|
||||
wake_up(&con->page_retirement_wq);
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_ras_error_data_fini(&err_data);
|
||||
} else {
|
||||
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
|
||||
adev->virt.ops->ras_poison_handler(adev);
|
||||
adev->virt.ops->ras_poison_handler(adev, block);
|
||||
else
|
||||
dev_warn(adev->dev,
|
||||
"No ras_poison_handler interface in SRIOV!\n");
|
||||
|
@ -21,7 +21,7 @@
|
||||
#ifndef __AMDGPU_UMC_H__
|
||||
#define __AMDGPU_UMC_H__
|
||||
#include "amdgpu_ras.h"
|
||||
|
||||
#include "amdgpu_mca.h"
|
||||
/*
|
||||
* (addr / 256) * 4096, the higher 26 bits in ErrorAddr
|
||||
* is the index of 4KB block
|
||||
@ -64,6 +64,8 @@ struct amdgpu_umc_ras {
|
||||
void *ras_error_status);
|
||||
void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
|
||||
void *ras_error_status);
|
||||
bool (*check_ecc_err_status)(struct amdgpu_device *adev,
|
||||
enum amdgpu_mca_error_type type, void *ras_error_status);
|
||||
/* support different eeprom table version for different asic */
|
||||
void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr);
|
||||
};
|
||||
@ -100,7 +102,8 @@ struct amdgpu_umc {
|
||||
|
||||
int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
|
||||
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
|
||||
int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset);
|
||||
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
|
||||
enum amdgpu_ras_block block, bool reset);
|
||||
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
@ -118,4 +121,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
|
||||
|
||||
int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
|
||||
umc_func func, void *data);
|
||||
|
||||
int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev,
|
||||
bool reset, uint32_t timeout_ms);
|
||||
#endif
|
||||
|
@ -69,12 +69,12 @@ struct amdgpu_debugfs_gprwave_data {
|
||||
};
|
||||
|
||||
enum AMDGPU_DEBUGFS_REGS2_CMDS {
|
||||
AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0,
|
||||
AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE = 0,
|
||||
AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2,
|
||||
};
|
||||
|
||||
enum AMDGPU_DEBUGFS_GPRWAVE_CMDS {
|
||||
AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0,
|
||||
AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE = 0,
|
||||
};
|
||||
|
||||
//reg2 interface
|
||||
|
@ -358,7 +358,7 @@ static int setup_umsch_mm_test(struct amdgpu_device *adev,
|
||||
|
||||
memset(test->ring_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ring_data));
|
||||
|
||||
test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
|
||||
test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM;
|
||||
r = map_ring_data(adev, test->vm, test->ring_data_obj, &test->bo_va,
|
||||
test->ring_data_gpu_addr, sizeof(struct umsch_mm_test_ring_data));
|
||||
if (r)
|
||||
|
@ -1189,7 +1189,7 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
|
||||
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
|
||||
} else {
|
||||
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
|
||||
adev->virt.ops->ras_poison_handler(adev);
|
||||
adev->virt.ops->ras_poison_handler(adev, ras_if->block);
|
||||
else
|
||||
dev_warn(adev->dev,
|
||||
"No ras_poison_handler interface in SRIOV for VCN!\n");
|
||||
|
@ -71,59 +71,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
|
||||
amdgpu_num_kcq = 2;
|
||||
}
|
||||
|
||||
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t reg1,
|
||||
uint32_t ref, uint32_t mask,
|
||||
uint32_t xcc_inst)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
|
||||
struct amdgpu_ring *ring = &kiq->ring;
|
||||
signed long r, cnt = 0;
|
||||
unsigned long flags;
|
||||
uint32_t seq;
|
||||
|
||||
if (adev->mes.ring.sched.ready) {
|
||||
amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
|
||||
ref, mask);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&kiq->ring_lock, flags);
|
||||
amdgpu_ring_alloc(ring, 32);
|
||||
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
|
||||
ref, mask);
|
||||
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
|
||||
if (r)
|
||||
goto failed_undo;
|
||||
|
||||
amdgpu_ring_commit(ring);
|
||||
spin_unlock_irqrestore(&kiq->ring_lock, flags);
|
||||
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
|
||||
/* don't wait anymore for IRQ context */
|
||||
if (r < 1 && in_interrupt())
|
||||
goto failed_kiq;
|
||||
|
||||
might_sleep();
|
||||
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
|
||||
|
||||
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
}
|
||||
|
||||
if (cnt > MAX_KIQ_REG_TRY)
|
||||
goto failed_kiq;
|
||||
|
||||
return;
|
||||
|
||||
failed_undo:
|
||||
amdgpu_ring_undo(ring);
|
||||
spin_unlock_irqrestore(&kiq->ring_lock, flags);
|
||||
failed_kiq:
|
||||
dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_virt_request_full_gpu() - request full gpu access
|
||||
* @adev: amdgpu device.
|
||||
@ -303,11 +250,11 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
|
||||
if (!*data)
|
||||
goto data_failure;
|
||||
|
||||
bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL);
|
||||
bps = kmalloc_array(align_space, sizeof(*(*data)->bps), GFP_KERNEL);
|
||||
if (!bps)
|
||||
goto bps_failure;
|
||||
|
||||
bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL);
|
||||
bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL);
|
||||
if (!bps_bo)
|
||||
goto bps_bo_failure;
|
||||
|
||||
@ -340,8 +287,10 @@ static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
|
||||
|
||||
for (i = data->last_reserved - 1; i >= 0; i--) {
|
||||
bo = data->bps_bo[i];
|
||||
amdgpu_bo_free_kernel(&bo, NULL, NULL);
|
||||
data->bps_bo[i] = bo;
|
||||
if (bo) {
|
||||
amdgpu_bo_free_kernel(&bo, NULL, NULL);
|
||||
data->bps_bo[i] = bo;
|
||||
}
|
||||
data->last_reserved = i;
|
||||
}
|
||||
}
|
||||
@ -381,6 +330,8 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
|
||||
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
|
||||
struct ttm_resource_manager *man = &mgr->manager;
|
||||
struct amdgpu_bo *bo = NULL;
|
||||
uint64_t bp;
|
||||
int i;
|
||||
@ -396,12 +347,18 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
|
||||
* 2) a ras bad page has been reserved (duplicate error injection
|
||||
* for one page);
|
||||
*/
|
||||
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
|
||||
AMDGPU_GPU_PAGE_SIZE,
|
||||
&bo, NULL))
|
||||
DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
|
||||
|
||||
data->bps_bo[i] = bo;
|
||||
if (ttm_resource_manager_used(man)) {
|
||||
amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
|
||||
bp << AMDGPU_GPU_PAGE_SHIFT,
|
||||
AMDGPU_GPU_PAGE_SIZE);
|
||||
data->bps_bo[i] = NULL;
|
||||
} else {
|
||||
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
|
||||
AMDGPU_GPU_PAGE_SIZE,
|
||||
&bo, NULL))
|
||||
DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
|
||||
data->bps_bo[i] = bo;
|
||||
}
|
||||
data->last_reserved = i + 1;
|
||||
bo = NULL;
|
||||
}
|
||||
|
@ -88,7 +88,8 @@ struct amdgpu_virt_ops {
|
||||
int (*wait_reset)(struct amdgpu_device *adev);
|
||||
void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
|
||||
u32 data1, u32 data2, u32 data3);
|
||||
void (*ras_poison_handler)(struct amdgpu_device *adev);
|
||||
void (*ras_poison_handler)(struct amdgpu_device *adev,
|
||||
enum amdgpu_ras_block block);
|
||||
};
|
||||
|
||||
/*
|
||||
@ -332,10 +333,6 @@ static inline bool is_virtual_machine(void)
|
||||
((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
|
||||
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
|
||||
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
|
||||
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t rreg1,
|
||||
uint32_t ref, uint32_t mask,
|
||||
uint32_t xcc_inst);
|
||||
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
|
||||
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
|
||||
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
|
||||
|
@ -660,8 +660,7 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
|
||||
.set_powergating_state = amdgpu_vkms_set_powergating_state,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version amdgpu_vkms_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_DCE,
|
||||
.major = 1,
|
||||
.minor = 0,
|
||||
|
@ -233,6 +233,22 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
|
||||
spin_unlock(&vm_bo->vm->status_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_bo_evicted_user - vm_bo is evicted
|
||||
*
|
||||
* @vm_bo: vm_bo which is evicted
|
||||
*
|
||||
* State for BOs used by user mode queues which are not at the location they
|
||||
* should be.
|
||||
*/
|
||||
static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
|
||||
{
|
||||
vm_bo->moved = true;
|
||||
spin_lock(&vm_bo->vm->status_lock);
|
||||
list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user);
|
||||
spin_unlock(&vm_bo->vm->status_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_bo_relocated - vm_bo is reloacted
|
||||
*
|
||||
@ -427,21 +443,25 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_validate_pt_bos - validate the page table BOs
|
||||
* amdgpu_vm_validate - validate evicted BOs tracked in the VM
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @vm: vm providing the BOs
|
||||
* @ticket: optional reservation ticket used to reserve the VM
|
||||
* @validate: callback to do the validation
|
||||
* @param: parameter for the validation callback
|
||||
*
|
||||
* Validate the page table BOs on command submission if neccessary.
|
||||
* Validate the page table BOs and per-VM BOs on command submission if
|
||||
* necessary. If a ticket is given, also try to validate evicted user queue
|
||||
* BOs. They must already be reserved with the given ticket.
|
||||
*
|
||||
* Returns:
|
||||
* Validation result.
|
||||
*/
|
||||
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
int (*validate)(void *p, struct amdgpu_bo *bo),
|
||||
void *param)
|
||||
int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct ww_acquire_ctx *ticket,
|
||||
int (*validate)(void *p, struct amdgpu_bo *bo),
|
||||
void *param)
|
||||
{
|
||||
struct amdgpu_vm_bo_base *bo_base;
|
||||
struct amdgpu_bo *shadow;
|
||||
@ -484,6 +504,28 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
}
|
||||
spin_lock(&vm->status_lock);
|
||||
}
|
||||
while (ticket && !list_empty(&vm->evicted_user)) {
|
||||
bo_base = list_first_entry(&vm->evicted_user,
|
||||
struct amdgpu_vm_bo_base,
|
||||
vm_status);
|
||||
spin_unlock(&vm->status_lock);
|
||||
|
||||
bo = bo_base->bo;
|
||||
|
||||
if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) {
|
||||
pr_warn_ratelimited("Evicted user BO is not reserved in pid %d\n",
|
||||
vm->task_info.pid);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
r = validate(param, bo);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
amdgpu_vm_bo_invalidated(bo_base);
|
||||
|
||||
spin_lock(&vm->status_lock);
|
||||
}
|
||||
spin_unlock(&vm->status_lock);
|
||||
|
||||
amdgpu_vm_eviction_lock(vm);
|
||||
@ -651,7 +693,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
|
||||
|
||||
if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
|
||||
|
||||
if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
|
||||
gds_switch_needed) {
|
||||
@ -1426,11 +1468,21 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
r = amdgpu_vm_bo_update(adev, bo_va, clear);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (unlock)
|
||||
dma_resv_unlock(resv);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* Remember evicted DMABuf imports in compute VMs for later
|
||||
* validation
|
||||
*/
|
||||
if (vm->is_compute_context &&
|
||||
bo_va->base.bo->tbo.base.import_attach &&
|
||||
(!bo_va->base.bo->tbo.resource ||
|
||||
bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
|
||||
amdgpu_vm_bo_evicted_user(&bo_va->base);
|
||||
|
||||
spin_lock(&vm->status_lock);
|
||||
}
|
||||
spin_unlock(&vm->status_lock);
|
||||
@ -2196,6 +2248,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
|
||||
vm->reserved_vmid[i] = NULL;
|
||||
INIT_LIST_HEAD(&vm->evicted);
|
||||
INIT_LIST_HEAD(&vm->evicted_user);
|
||||
INIT_LIST_HEAD(&vm->relocated);
|
||||
INIT_LIST_HEAD(&vm->moved);
|
||||
INIT_LIST_HEAD(&vm->idle);
|
||||
|
@ -136,7 +136,11 @@ struct amdgpu_mem_stats;
|
||||
#define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < AMDGPU_MAX_VMHUBS)
|
||||
|
||||
/* Reserve 2MB at top/bottom of address space for kernel use */
|
||||
#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
|
||||
#define AMDGPU_VA_RESERVED_CSA_SIZE (2ULL << 20)
|
||||
#define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
|
||||
#define AMDGPU_VA_RESERVED_BOTTOM (2ULL << 20)
|
||||
#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_SEQ64_SIZE + \
|
||||
AMDGPU_VA_RESERVED_CSA_SIZE)
|
||||
|
||||
/* See vm_update_mode */
|
||||
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
|
||||
@ -288,9 +292,12 @@ struct amdgpu_vm {
|
||||
/* Lock to protect vm_bo add/del/move on all lists of vm */
|
||||
spinlock_t status_lock;
|
||||
|
||||
/* BOs who needs a validation */
|
||||
/* Per-VM and PT BOs who needs a validation */
|
||||
struct list_head evicted;
|
||||
|
||||
/* BOs for user mode queues that need a validation */
|
||||
struct list_head evicted_user;
|
||||
|
||||
/* PT BOs which relocated and their parent need an update */
|
||||
struct list_head relocated;
|
||||
|
||||
@ -434,9 +441,10 @@ int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
|
||||
unsigned int num_fences);
|
||||
bool amdgpu_vm_ready(struct amdgpu_vm *vm);
|
||||
uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
|
||||
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
int (*callback)(void *p, struct amdgpu_bo *bo),
|
||||
void *param);
|
||||
int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct ww_acquire_ctx *ticket,
|
||||
int (*callback)(void *p, struct amdgpu_bo *bo),
|
||||
void *param);
|
||||
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
|
||||
int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm, bool immediate);
|
||||
|
@ -1035,15 +1035,74 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
|
||||
struct aca_bank_report *report, void *data)
|
||||
{
|
||||
struct amdgpu_device *adev = handle->adev;
|
||||
const char *error_str;
|
||||
u64 status;
|
||||
int ret, ext_error_code;
|
||||
|
||||
ret = aca_bank_info_decode(bank, &report->info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
status = bank->regs[ACA_REG_IDX_STATUS];
|
||||
ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
|
||||
|
||||
error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
|
||||
xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
|
||||
if (error_str)
|
||||
dev_info(adev->dev, "%s detected\n", error_str);
|
||||
|
||||
if ((type == ACA_ERROR_TYPE_UE && ext_error_code == 0) ||
|
||||
(type == ACA_ERROR_TYPE_CE && ext_error_code == 6))
|
||||
report->count[type] = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = {
|
||||
.aca_bank_generate_report = xgmi_v6_4_0_aca_bank_generate_report,
|
||||
};
|
||||
|
||||
static const struct aca_info xgmi_v6_4_0_aca_info = {
|
||||
.hwip = ACA_HWIP_TYPE_PCS_XGMI,
|
||||
.mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
|
||||
.bank_ops = &xgmi_v6_4_0_aca_bank_ops,
|
||||
};
|
||||
|
||||
static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (!adev->gmc.xgmi.supported ||
|
||||
adev->gmc.xgmi.num_physical_nodes == 0)
|
||||
return 0;
|
||||
|
||||
amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL);
|
||||
|
||||
return amdgpu_ras_block_late_init(adev, ras_block);
|
||||
r = amdgpu_ras_block_late_init(adev, ras_block);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
|
||||
case IP_VERSION(6, 4, 0):
|
||||
r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL,
|
||||
&xgmi_v6_4_0_aca_info, NULL);
|
||||
if (r)
|
||||
goto late_fini;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
late_fini:
|
||||
amdgpu_ras_block_late_fini(adev, ras_block);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
|
||||
@ -1099,7 +1158,7 @@ static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev)
|
||||
|
||||
static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base)
|
||||
{
|
||||
WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL);
|
||||
WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
|
||||
}
|
||||
|
||||
static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst)
|
||||
@ -1277,12 +1336,12 @@ static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev,
|
||||
err_data->ce_count += ce_cnt;
|
||||
}
|
||||
|
||||
static enum amdgpu_mca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status)
|
||||
static enum aca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status)
|
||||
{
|
||||
const char *error_str;
|
||||
int ext_error_code;
|
||||
|
||||
ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status);
|
||||
ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
|
||||
|
||||
error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
|
||||
xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
|
||||
@ -1291,9 +1350,9 @@ static enum amdgpu_mca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdg
|
||||
|
||||
switch (ext_error_code) {
|
||||
case 0:
|
||||
return AMDGPU_MCA_ERROR_TYPE_UE;
|
||||
return ACA_ERROR_TYPE_UE;
|
||||
case 6:
|
||||
return AMDGPU_MCA_ERROR_TYPE_CE;
|
||||
return ACA_ERROR_TYPE_CE;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -1307,22 +1366,22 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a
|
||||
int xgmi_inst = mcm_info->die_id;
|
||||
u64 status = 0;
|
||||
|
||||
status = RREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS);
|
||||
if (!MCA_REG__STATUS__VAL(status))
|
||||
status = RREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS);
|
||||
if (!ACA_REG__STATUS__VAL(status))
|
||||
return;
|
||||
|
||||
switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) {
|
||||
case AMDGPU_MCA_ERROR_TYPE_UE:
|
||||
case ACA_ERROR_TYPE_UE:
|
||||
amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL);
|
||||
break;
|
||||
case AMDGPU_MCA_ERROR_TYPE_CE:
|
||||
case ACA_ERROR_TYPE_CE:
|
||||
amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL);
|
||||
WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
|
||||
}
|
||||
|
||||
static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data)
|
||||
|
@ -62,6 +62,7 @@
|
||||
typedef struct {
|
||||
struct atom_context *ctx;
|
||||
uint32_t *ps, *ws;
|
||||
int ps_size, ws_size;
|
||||
int ps_shift;
|
||||
uint16_t start;
|
||||
unsigned last_jump;
|
||||
@ -70,8 +71,8 @@ typedef struct {
|
||||
} atom_exec_context;
|
||||
|
||||
int amdgpu_atom_debug;
|
||||
static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params);
|
||||
int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params);
|
||||
static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size);
|
||||
int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size);
|
||||
|
||||
static uint32_t atom_arg_mask[8] =
|
||||
{ 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000,
|
||||
@ -223,7 +224,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
|
||||
(*ptr)++;
|
||||
/* get_unaligned_le32 avoids unaligned accesses from atombios
|
||||
* tables, noticed on a DEC Alpha. */
|
||||
val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
|
||||
if (idx < ctx->ps_size)
|
||||
val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
|
||||
else
|
||||
pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size);
|
||||
if (print)
|
||||
DEBUG("PS[0x%02X,0x%04X]", idx, val);
|
||||
break;
|
||||
@ -261,7 +265,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
|
||||
val = gctx->reg_block;
|
||||
break;
|
||||
default:
|
||||
val = ctx->ws[idx];
|
||||
if (idx < ctx->ws_size)
|
||||
val = ctx->ws[idx];
|
||||
else
|
||||
pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size);
|
||||
}
|
||||
break;
|
||||
case ATOM_ARG_ID:
|
||||
@ -495,6 +502,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
|
||||
idx = U8(*ptr);
|
||||
(*ptr)++;
|
||||
DEBUG("PS[0x%02X]", idx);
|
||||
if (idx >= ctx->ps_size) {
|
||||
pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size);
|
||||
return;
|
||||
}
|
||||
ctx->ps[idx] = cpu_to_le32(val);
|
||||
break;
|
||||
case ATOM_ARG_WS:
|
||||
@ -527,6 +538,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
|
||||
gctx->reg_block = val;
|
||||
break;
|
||||
default:
|
||||
if (idx >= ctx->ws_size) {
|
||||
pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size);
|
||||
return;
|
||||
}
|
||||
ctx->ws[idx] = val;
|
||||
}
|
||||
break;
|
||||
@ -624,7 +639,7 @@ static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg)
|
||||
else
|
||||
SDEBUG(" table: %d\n", idx);
|
||||
if (U16(ctx->ctx->cmd_table + 4 + 2 * idx))
|
||||
r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift);
|
||||
r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift, ctx->ps_size - ctx->ps_shift);
|
||||
if (r) {
|
||||
ctx->abort = true;
|
||||
}
|
||||
@ -1203,7 +1218,7 @@ static struct {
|
||||
atom_op_div32, ATOM_ARG_WS},
|
||||
};
|
||||
|
||||
static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params)
|
||||
static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size)
|
||||
{
|
||||
int base = CU16(ctx->cmd_table + 4 + 2 * index);
|
||||
int len, ws, ps, ptr;
|
||||
@ -1225,12 +1240,16 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index,
|
||||
ectx.ps_shift = ps / 4;
|
||||
ectx.start = base;
|
||||
ectx.ps = params;
|
||||
ectx.ps_size = params_size;
|
||||
ectx.abort = false;
|
||||
ectx.last_jump = 0;
|
||||
if (ws)
|
||||
if (ws) {
|
||||
ectx.ws = kcalloc(4, ws, GFP_KERNEL);
|
||||
else
|
||||
ectx.ws_size = ws;
|
||||
} else {
|
||||
ectx.ws = NULL;
|
||||
ectx.ws_size = 0;
|
||||
}
|
||||
|
||||
debug_depth++;
|
||||
while (1) {
|
||||
@ -1264,7 +1283,7 @@ free:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params)
|
||||
int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size)
|
||||
{
|
||||
int r;
|
||||
|
||||
@ -1280,7 +1299,7 @@ int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *par
|
||||
/* reset divmul */
|
||||
ctx->divmul[0] = 0;
|
||||
ctx->divmul[1] = 0;
|
||||
r = amdgpu_atom_execute_table_locked(ctx, index, params);
|
||||
r = amdgpu_atom_execute_table_locked(ctx, index, params, params_size);
|
||||
mutex_unlock(&ctx->mutex);
|
||||
return r;
|
||||
}
|
||||
@ -1552,7 +1571,7 @@ int amdgpu_atom_asic_init(struct atom_context *ctx)
|
||||
|
||||
if (!CU16(ctx->cmd_table + 4 + 2 * ATOM_CMD_INIT))
|
||||
return 1;
|
||||
ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps);
|
||||
ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps, 16);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -156,7 +156,7 @@ struct atom_context {
|
||||
extern int amdgpu_atom_debug;
|
||||
|
||||
struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios);
|
||||
int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params);
|
||||
int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size);
|
||||
int amdgpu_atom_asic_init(struct atom_context *ctx);
|
||||
void amdgpu_atom_destroy(struct atom_context *ctx);
|
||||
bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size,
|
||||
|
@ -77,7 +77,7 @@ void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc,
|
||||
args.usOverscanTop = cpu_to_le16(amdgpu_crtc->v_border);
|
||||
break;
|
||||
}
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc)
|
||||
@ -106,7 +106,7 @@ void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc)
|
||||
args.ucEnable = ATOM_SCALER_DISABLE;
|
||||
break;
|
||||
}
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock)
|
||||
@ -123,7 +123,7 @@ void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock)
|
||||
args.ucCRTC = amdgpu_crtc->crtc_id;
|
||||
args.ucEnable = lock;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state)
|
||||
@ -139,7 +139,7 @@ void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state)
|
||||
args.ucCRTC = amdgpu_crtc->crtc_id;
|
||||
args.ucEnable = state;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state)
|
||||
@ -155,7 +155,7 @@ void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state)
|
||||
args.ucCRTC = amdgpu_crtc->crtc_id;
|
||||
args.ucBlanking = state;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
|
||||
@ -171,7 +171,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
|
||||
args.ucDispPipeId = amdgpu_crtc->crtc_id;
|
||||
args.ucEnable = state;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
|
||||
@ -183,7 +183,7 @@ void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
|
||||
|
||||
args.ucEnable = ATOM_INIT;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
|
||||
@ -228,7 +228,7 @@ void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
|
||||
args.susModeMiscInfo.usAccess = cpu_to_le16(misc);
|
||||
args.ucCRTC = amdgpu_crtc->crtc_id;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
union atom_enable_ss {
|
||||
@ -293,7 +293,7 @@ static void amdgpu_atombios_crtc_program_ss(struct amdgpu_device *adev,
|
||||
args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
|
||||
args.v3.ucEnable = enable;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
union adjust_pixel_clock {
|
||||
@ -395,7 +395,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc,
|
||||
ADJUST_DISPLAY_CONFIG_SS_ENABLE;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context,
|
||||
index, (uint32_t *)&args);
|
||||
index, (uint32_t *)&args, sizeof(args));
|
||||
adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10;
|
||||
break;
|
||||
case 3:
|
||||
@ -428,7 +428,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc,
|
||||
args.v3.sInput.ucExtTransmitterID = 0;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context,
|
||||
index, (uint32_t *)&args);
|
||||
index, (uint32_t *)&args, sizeof(args));
|
||||
adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10;
|
||||
if (args.v3.sOutput.ucRefDiv) {
|
||||
amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV;
|
||||
@ -514,7 +514,7 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
|
||||
DRM_ERROR("Unknown table version %d %d\n", frev, crev);
|
||||
return;
|
||||
}
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
union set_dce_clock {
|
||||
@ -544,7 +544,7 @@ u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev,
|
||||
args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */
|
||||
args.v2_1.asParam.ucDCEClkType = clk_type;
|
||||
args.v2_1.asParam.ucDCEClkSrc = clk_src;
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10;
|
||||
break;
|
||||
default:
|
||||
@ -740,7 +740,7 @@ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
|
||||
return;
|
||||
}
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc,
|
||||
|
@ -83,7 +83,7 @@ static int amdgpu_atombios_dp_process_aux_ch(struct amdgpu_i2c_chan *chan,
|
||||
args.v2.ucDelay = delay / 10;
|
||||
args.v2.ucHPD_ID = chan->rec.hpd;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
|
||||
*ack = args.v2.ucReplyStatus;
|
||||
|
||||
@ -301,7 +301,7 @@ static u8 amdgpu_atombios_dp_encoder_service(struct amdgpu_device *adev,
|
||||
args.ucLaneNum = lane_num;
|
||||
args.ucStatus = 0;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
return args.ucStatus;
|
||||
}
|
||||
|
||||
|
@ -335,7 +335,7 @@ amdgpu_atombios_encoder_setup_dac(struct drm_encoder *encoder, int action)
|
||||
args.ucDacStandard = ATOM_DAC1_PS2;
|
||||
args.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
|
||||
}
|
||||
|
||||
@ -432,7 +432,7 @@ amdgpu_atombios_encoder_setup_dvo(struct drm_encoder *encoder, int action)
|
||||
break;
|
||||
}
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
|
||||
@ -732,7 +732,7 @@ amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder,
|
||||
break;
|
||||
}
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
|
||||
}
|
||||
|
||||
@ -1136,7 +1136,7 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
|
||||
break;
|
||||
}
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
bool
|
||||
@ -1164,7 +1164,7 @@ amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector,
|
||||
|
||||
args.v1.ucAction = action;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
|
||||
/* wait for the panel to power up */
|
||||
if (action == ATOM_TRANSMITTER_ACTION_POWER_ON) {
|
||||
@ -1288,7 +1288,7 @@ amdgpu_atombios_encoder_setup_external_encoder(struct drm_encoder *encoder,
|
||||
DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
|
||||
return;
|
||||
}
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1633,7 +1633,7 @@ amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder)
|
||||
return;
|
||||
}
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
||||
/* This only needs to be called once at startup */
|
||||
@ -1706,7 +1706,7 @@ amdgpu_atombios_encoder_dac_load_detect(struct drm_encoder *encoder,
|
||||
args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb;
|
||||
}
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
|
||||
return true;
|
||||
} else
|
||||
|
@ -86,7 +86,7 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan,
|
||||
args.ucSlaveAddr = slave_addr << 1;
|
||||
args.ucLineNumber = chan->rec.i2c_id;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
|
||||
/* error */
|
||||
if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) {
|
||||
@ -172,5 +172,5 @@ void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr
|
||||
args.ucSlaveAddr = slave_addr;
|
||||
args.ucLineNumber = line_number;
|
||||
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
|
||||
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
|
||||
}
|
||||
|
@ -21,8 +21,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
|
||||
{
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_1[] = {
|
||||
0x00000000, // DB_RENDER_CONTROL
|
||||
0x00000000, // DB_COUNT_CONTROL
|
||||
0x00000000, // DB_DEPTH_VIEW
|
||||
@ -236,8 +235,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
|
||||
0x00000000, // PA_SC_VPORT_ZMIN_15
|
||||
0x3f800000, // PA_SC_VPORT_ZMAX_15
|
||||
};
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
|
||||
{
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_2[] = {
|
||||
0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
|
||||
0x00000000, // PA_SC_TILE_STEERING_OVERRIDE
|
||||
0x00000000, // CP_PERFMON_CNTX_CNTL
|
||||
@ -521,15 +519,13 @@ static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
|
||||
0x00000000, // CB_MRT6_EPITCH
|
||||
0x00000000, // CB_MRT7_EPITCH
|
||||
};
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_3[] =
|
||||
{
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_3[] = {
|
||||
0x00000000, // PA_CL_POINT_X_RAD
|
||||
0x00000000, // PA_CL_POINT_Y_RAD
|
||||
0x00000000, // PA_CL_POINT_SIZE
|
||||
0x00000000, // PA_CL_POINT_CULL_RAD
|
||||
};
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_4[] =
|
||||
{
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_4[] = {
|
||||
0x00000000, // DB_DEPTH_CONTROL
|
||||
0x00000000, // DB_EQAA
|
||||
0x00000000, // CB_COLOR_CONTROL
|
||||
@ -688,17 +684,14 @@ static const unsigned int gfx9_SECT_CONTEXT_def_4[] =
|
||||
0x00000000, // VGT_GS_OUT_PRIM_TYPE
|
||||
0x00000000, // IA_ENHANCE
|
||||
};
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_5[] =
|
||||
{
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_5[] = {
|
||||
0x00000000, // WD_ENHANCE
|
||||
0x00000000, // VGT_PRIMITIVEID_EN
|
||||
};
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_6[] =
|
||||
{
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_6[] = {
|
||||
0x00000000, // VGT_PRIMITIVEID_RESET
|
||||
};
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
|
||||
{
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_7[] = {
|
||||
0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
|
||||
0x00000000, // VGT_DRAW_PAYLOAD_CNTL
|
||||
0, // HOLE
|
||||
@ -766,8 +759,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
|
||||
0x00000000, // VGT_STRMOUT_CONFIG
|
||||
0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
|
||||
};
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_8[] =
|
||||
{
|
||||
static const unsigned int gfx9_SECT_CONTEXT_def_8[] = {
|
||||
0x00000000, // PA_SC_CENTROID_PRIORITY_0
|
||||
0x00000000, // PA_SC_CENTROID_PRIORITY_1
|
||||
0x00001000, // PA_SC_LINE_CNTL
|
||||
@ -924,8 +916,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_8[] =
|
||||
0x00000000, // CB_COLOR7_DCC_BASE
|
||||
0x00000000, // CB_COLOR7_DCC_BASE_EXT
|
||||
};
|
||||
static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] =
|
||||
{
|
||||
static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = {
|
||||
{gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 },
|
||||
{gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 },
|
||||
{gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
|
||||
|
@ -21,8 +21,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
static const u32 si_SECT_CONTEXT_def_1[] =
|
||||
{
|
||||
static const u32 si_SECT_CONTEXT_def_1[] = {
|
||||
0x00000000, // DB_RENDER_CONTROL
|
||||
0x00000000, // DB_COUNT_CONTROL
|
||||
0x00000000, // DB_DEPTH_VIEW
|
||||
@ -236,8 +235,7 @@ static const u32 si_SECT_CONTEXT_def_1[] =
|
||||
0x00000000, // PA_SC_VPORT_ZMIN_15
|
||||
0x3f800000, // PA_SC_VPORT_ZMAX_15
|
||||
};
|
||||
static const u32 si_SECT_CONTEXT_def_2[] =
|
||||
{
|
||||
static const u32 si_SECT_CONTEXT_def_2[] = {
|
||||
0x00000000, // CP_PERFMON_CNTX_CNTL
|
||||
0x00000000, // CP_RINGID
|
||||
0x00000000, // CP_VMID
|
||||
@ -511,8 +509,7 @@ static const u32 si_SECT_CONTEXT_def_2[] =
|
||||
0x00000000, // CB_BLEND6_CONTROL
|
||||
0x00000000, // CB_BLEND7_CONTROL
|
||||
};
|
||||
static const u32 si_SECT_CONTEXT_def_3[] =
|
||||
{
|
||||
static const u32 si_SECT_CONTEXT_def_3[] = {
|
||||
0x00000000, // PA_CL_POINT_X_RAD
|
||||
0x00000000, // PA_CL_POINT_Y_RAD
|
||||
0x00000000, // PA_CL_POINT_SIZE
|
||||
@ -520,8 +517,7 @@ static const u32 si_SECT_CONTEXT_def_3[] =
|
||||
0x00000000, // VGT_DMA_BASE_HI
|
||||
0x00000000, // VGT_DMA_BASE
|
||||
};
|
||||
static const u32 si_SECT_CONTEXT_def_4[] =
|
||||
{
|
||||
static const u32 si_SECT_CONTEXT_def_4[] = {
|
||||
0x00000000, // DB_DEPTH_CONTROL
|
||||
0x00000000, // DB_EQAA
|
||||
0x00000000, // CB_COLOR_CONTROL
|
||||
@ -680,16 +676,13 @@ static const u32 si_SECT_CONTEXT_def_4[] =
|
||||
0x00000000, // VGT_GS_OUT_PRIM_TYPE
|
||||
0x00000000, // IA_ENHANCE
|
||||
};
|
||||
static const u32 si_SECT_CONTEXT_def_5[] =
|
||||
{
|
||||
static const u32 si_SECT_CONTEXT_def_5[] = {
|
||||
0x00000000, // VGT_PRIMITIVEID_EN
|
||||
};
|
||||
static const u32 si_SECT_CONTEXT_def_6[] =
|
||||
{
|
||||
static const u32 si_SECT_CONTEXT_def_6[] = {
|
||||
0x00000000, // VGT_PRIMITIVEID_RESET
|
||||
};
|
||||
static const u32 si_SECT_CONTEXT_def_7[] =
|
||||
{
|
||||
static const u32 si_SECT_CONTEXT_def_7[] = {
|
||||
0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
|
||||
0, // HOLE
|
||||
0, // HOLE
|
||||
@ -924,8 +917,7 @@ static const u32 si_SECT_CONTEXT_def_7[] =
|
||||
0x00000000, // CB_COLOR7_CLEAR_WORD0
|
||||
0x00000000, // CB_COLOR7_CLEAR_WORD1
|
||||
};
|
||||
static const struct cs_extent_def si_SECT_CONTEXT_defs[] =
|
||||
{
|
||||
static const struct cs_extent_def si_SECT_CONTEXT_defs[] = {
|
||||
{si_SECT_CONTEXT_def_1, 0x0000a000, 212 },
|
||||
{si_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
|
||||
{si_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
|
||||
|
@ -52,6 +52,7 @@
|
||||
|
||||
static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev);
|
||||
static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev);
|
||||
static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, int hpd);
|
||||
|
||||
static const u32 crtc_offsets[] = {
|
||||
CRTC0_REGISTER_OFFSET,
|
||||
@ -364,6 +365,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
|
||||
AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
|
||||
WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
|
||||
|
||||
dce_v10_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
|
||||
dce_v10_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
|
||||
amdgpu_irq_get(adev, &adev->hpd_irq,
|
||||
amdgpu_connector->hpd.hpd);
|
||||
|
@ -52,6 +52,7 @@
|
||||
|
||||
static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev);
|
||||
static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev);
|
||||
static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, int hpd);
|
||||
|
||||
static const u32 crtc_offsets[] =
|
||||
{
|
||||
@ -388,6 +389,7 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
|
||||
AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
|
||||
WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
|
||||
|
||||
dce_v11_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
|
||||
dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
|
||||
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
|
||||
}
|
||||
|
@ -273,6 +273,21 @@ static void dce_v6_0_hpd_set_polarity(struct amdgpu_device *adev,
|
||||
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
|
||||
}
|
||||
|
||||
static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev,
|
||||
int hpd)
|
||||
{
|
||||
u32 tmp;
|
||||
|
||||
if (hpd >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hdp %d\n", hpd);
|
||||
return;
|
||||
}
|
||||
|
||||
tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
|
||||
tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
|
||||
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
|
||||
}
|
||||
|
||||
/**
|
||||
* dce_v6_0_hpd_init - hpd setup callback.
|
||||
*
|
||||
@ -312,6 +327,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
|
||||
continue;
|
||||
}
|
||||
|
||||
dce_v6_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
|
||||
dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
|
||||
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
|
||||
}
|
||||
@ -3089,7 +3105,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
uint32_t disp_int, mask, tmp;
|
||||
uint32_t disp_int, mask;
|
||||
unsigned hpd;
|
||||
|
||||
if (entry->src_data[0] >= adev->mode_info.num_hpd) {
|
||||
@ -3102,9 +3118,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
|
||||
mask = interrupt_status_offsets[hpd].hpd;
|
||||
|
||||
if (disp_int & mask) {
|
||||
tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
|
||||
tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
|
||||
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
|
||||
dce_v6_0_hpd_int_ack(adev, hpd);
|
||||
schedule_delayed_work(&adev->hotplug_work, 0);
|
||||
DRM_DEBUG("IH: HPD%d\n", hpd + 1);
|
||||
}
|
||||
|
@ -265,6 +265,21 @@ static void dce_v8_0_hpd_set_polarity(struct amdgpu_device *adev,
|
||||
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
|
||||
}
|
||||
|
||||
static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev,
|
||||
int hpd)
|
||||
{
|
||||
u32 tmp;
|
||||
|
||||
if (hpd >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hdp %d\n", hpd);
|
||||
return;
|
||||
}
|
||||
|
||||
tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
|
||||
tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
|
||||
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
|
||||
}
|
||||
|
||||
/**
|
||||
* dce_v8_0_hpd_init - hpd setup callback.
|
||||
*
|
||||
@ -304,6 +319,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
|
||||
continue;
|
||||
}
|
||||
|
||||
dce_v8_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
|
||||
dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
|
||||
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
|
||||
}
|
||||
@ -3177,7 +3193,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
uint32_t disp_int, mask, tmp;
|
||||
uint32_t disp_int, mask;
|
||||
unsigned hpd;
|
||||
|
||||
if (entry->src_data[0] >= adev->mode_info.num_hpd) {
|
||||
@ -3190,9 +3206,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
|
||||
mask = interrupt_status_offsets[hpd].hpd;
|
||||
|
||||
if (disp_int & mask) {
|
||||
tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
|
||||
tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
|
||||
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
|
||||
dce_v8_0_hpd_int_ack(adev, hpd);
|
||||
schedule_delayed_work(&adev->hotplug_work, 0);
|
||||
DRM_DEBUG("IH: HPD%d\n", hpd + 1);
|
||||
}
|
||||
|
@ -7947,7 +7947,7 @@ static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
|
||||
WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
|
||||
}
|
||||
|
||||
static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid)
|
||||
static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
|
||||
{
|
||||
amdgpu_gfx_off_ctrl(adev, false);
|
||||
|
||||
|
@ -727,7 +727,7 @@ static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
|
||||
|
||||
/* init spm vmid with 0xf */
|
||||
if (adev->gfx.rlc.funcs->update_spm_vmid)
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -5027,7 +5027,7 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
|
||||
static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
|
||||
{
|
||||
u32 data;
|
||||
|
||||
@ -5041,6 +5041,14 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
|
||||
WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
|
||||
|
||||
amdgpu_gfx_off_ctrl(adev, true);
|
||||
|
||||
if (ring
|
||||
&& amdgpu_sriov_is_pp_one_vf(adev)
|
||||
&& ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
|
||||
|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
|
||||
uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
|
||||
amdgpu_ring_emit_wreg(ring, reg, data);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
|
||||
@ -6104,7 +6112,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
|
||||
.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
|
||||
.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
|
||||
.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
|
||||
.emit_frame_size = /* totally 242 maximum if 16 IBs */
|
||||
.emit_frame_size = /* totally 247 maximum if 16 IBs */
|
||||
5 + /* update_spm_vmid */
|
||||
5 + /* COND_EXEC */
|
||||
9 + /* SET_Q_PREEMPTION_MODE */
|
||||
7 + /* PIPELINE_SYNC */
|
||||
@ -6154,6 +6163,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
|
||||
.get_wptr = gfx_v11_0_ring_get_wptr_compute,
|
||||
.set_wptr = gfx_v11_0_ring_set_wptr_compute,
|
||||
.emit_frame_size =
|
||||
5 + /* update_spm_vmid */
|
||||
20 + /* gfx_v11_0_ring_emit_gds_switch */
|
||||
7 + /* gfx_v11_0_ring_emit_hdp_flush */
|
||||
5 + /* hdp invalidate */
|
||||
|
@ -69,7 +69,7 @@ static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev,
|
||||
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
|
||||
} else {
|
||||
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
|
||||
adev->virt.ops->ras_poison_handler(adev);
|
||||
adev->virt.ops->ras_poison_handler(adev, ras_if->block);
|
||||
else
|
||||
dev_warn(adev->dev,
|
||||
"No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name);
|
||||
|
@ -3274,7 +3274,7 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
|
||||
|
||||
/* init spm vmid with 0xf */
|
||||
if (adev->gfx.rlc.funcs->update_spm_vmid)
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -3500,7 +3500,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
|
||||
static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
|
||||
{
|
||||
u32 data;
|
||||
|
||||
|
@ -1288,7 +1288,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
|
||||
|
||||
/* init spm vmid with 0xf */
|
||||
if (adev->gfx.rlc.funcs->update_spm_vmid)
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -5579,7 +5579,7 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
|
||||
}
|
||||
}
|
||||
|
||||
static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
|
||||
static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
|
||||
{
|
||||
u32 data;
|
||||
|
||||
|
@ -3034,6 +3034,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
|
||||
|
||||
gfx_v9_0_cp_gfx_enable(adev, true);
|
||||
|
||||
/* Now only limit the quirk on the APU gfx9 series and already
|
||||
* confirmed that the APU gfx10/gfx11 needn't such update.
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU &&
|
||||
adev->in_s3 && !adev->suspend_complete) {
|
||||
DRM_INFO(" Will skip the CSB packet resubmit\n");
|
||||
return 0;
|
||||
}
|
||||
r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
|
||||
@ -4894,7 +4902,7 @@ static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
|
||||
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid)
|
||||
static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
|
||||
{
|
||||
amdgpu_gfx_off_ctrl(adev, false);
|
||||
|
||||
|
@ -970,8 +970,9 @@ static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
|
||||
WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
|
||||
}
|
||||
|
||||
static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs =
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 };
|
||||
static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = {
|
||||
SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32
|
||||
};
|
||||
|
||||
static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
|
||||
{
|
||||
|
@ -38,6 +38,7 @@
|
||||
|
||||
#include "gfx_v9_4_3.h"
|
||||
#include "amdgpu_xcp.h"
|
||||
#include "amdgpu_aca.h"
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
|
||||
@ -48,6 +49,10 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
|
||||
#define GOLDEN_GB_ADDR_CONFIG 0x2a114042
|
||||
#define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301
|
||||
|
||||
#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */
|
||||
#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
|
||||
#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
|
||||
|
||||
struct amdgpu_gfx_ras gfx_v9_4_3_ras;
|
||||
|
||||
static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev);
|
||||
@ -675,6 +680,66 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
|
||||
.ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst,
|
||||
};
|
||||
|
||||
static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle,
|
||||
struct aca_bank *bank, enum aca_error_type type,
|
||||
struct aca_bank_report *report, void *data)
|
||||
{
|
||||
u64 status, misc0;
|
||||
u32 instlo;
|
||||
int ret;
|
||||
|
||||
status = bank->regs[ACA_REG_IDX_STATUS];
|
||||
if ((type == ACA_ERROR_TYPE_UE &&
|
||||
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) ||
|
||||
(type == ACA_ERROR_TYPE_CE &&
|
||||
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) {
|
||||
|
||||
ret = aca_bank_info_decode(bank, &report->info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* NOTE: overwrite info.die_id with xcd id for gfx */
|
||||
instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
|
||||
instlo &= GENMASK(31, 1);
|
||||
report->info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1;
|
||||
|
||||
misc0 = bank->regs[ACA_REG_IDX_MISC0];
|
||||
report->count[type] = ACA_REG__MISC0__ERRCNT(misc0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
|
||||
enum aca_error_type type, void *data)
|
||||
{
|
||||
u32 instlo;
|
||||
|
||||
instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
|
||||
instlo &= GENMASK(31, 1);
|
||||
switch (instlo) {
|
||||
case mmSMNAID_XCD0_MCA_SMU:
|
||||
case mmSMNAID_XCD1_MCA_SMU:
|
||||
case mmSMNXCD_XCD0_MCA_SMU:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = {
|
||||
.aca_bank_generate_report = gfx_v9_4_3_aca_bank_generate_report,
|
||||
.aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid,
|
||||
};
|
||||
|
||||
static const struct aca_info gfx_v9_4_3_aca_info = {
|
||||
.hwip = ACA_HWIP_TYPE_SMU,
|
||||
.mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
|
||||
.bank_ops = &gfx_v9_4_3_aca_bank_ops,
|
||||
};
|
||||
|
||||
static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 gb_addr_config;
|
||||
@ -1109,7 +1174,7 @@ static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
|
||||
{
|
||||
/* init spm vmid with 0xf */
|
||||
if (adev->gfx.rlc.funcs->update_spm_vmid)
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1320,7 +1385,7 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev,
|
||||
static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring,
|
||||
unsigned vmid)
|
||||
{
|
||||
u32 reg, data;
|
||||
@ -4242,9 +4307,32 @@ struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = {
|
||||
.reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count,
|
||||
};
|
||||
|
||||
static int gfx_v9_4_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = amdgpu_ras_block_late_init(adev, ras_block);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__GFX,
|
||||
&gfx_v9_4_3_aca_info,
|
||||
NULL);
|
||||
if (r)
|
||||
goto late_fini;
|
||||
|
||||
return 0;
|
||||
|
||||
late_fini:
|
||||
amdgpu_ras_block_late_fini(adev, ras_block);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
struct amdgpu_gfx_ras gfx_v9_4_3_ras = {
|
||||
.ras_block = {
|
||||
.hw_ops = &gfx_v9_4_3_ras_ops,
|
||||
.ras_late_init = &gfx_v9_4_3_ras_late_init,
|
||||
},
|
||||
.enable_watchdog_timer = &gfx_v9_4_3_enable_watchdog_timer,
|
||||
};
|
||||
|
@ -262,16 +262,17 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
|
||||
/* flush hdp cache */
|
||||
adev->hdp.funcs->flush_hdp(adev, NULL);
|
||||
|
||||
/* For SRIOV run time, driver shouldn't access the register through MMIO
|
||||
* Directly use kiq to do the vm invalidation instead
|
||||
/* This is necessary for SRIOV as well as for GFXOFF to function
|
||||
* properly under bare metal
|
||||
*/
|
||||
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
|
||||
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
|
||||
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
|
||||
1 << vmid, GET_INST(GC, 0));
|
||||
amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
|
||||
1 << vmid, GET_INST(GC, 0));
|
||||
return;
|
||||
}
|
||||
|
||||
/* This path is needed before KIQ/MES/GFXOFF are set up */
|
||||
hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
|
||||
|
||||
spin_lock(&adev->gmc.invalidate_lock);
|
||||
|
@ -223,16 +223,17 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
|
||||
/* flush hdp cache */
|
||||
adev->hdp.funcs->flush_hdp(adev, NULL);
|
||||
|
||||
/* For SRIOV run time, driver shouldn't access the register through MMIO
|
||||
* Directly use kiq to do the vm invalidation instead
|
||||
/* This is necessary for SRIOV as well as for GFXOFF to function
|
||||
* properly under bare metal
|
||||
*/
|
||||
if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
|
||||
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
|
||||
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
|
||||
1 << vmid, GET_INST(GC, 0));
|
||||
amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
|
||||
1 << vmid, GET_INST(GC, 0));
|
||||
return;
|
||||
}
|
||||
|
||||
/* This path is needed before KIQ/MES/GFXOFF are set up */
|
||||
hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
|
||||
|
||||
spin_lock(&adev->gmc.invalidate_lock);
|
||||
|
@ -435,9 +435,10 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
|
||||
WREG32(mmVM_PRT_CNTL, tmp);
|
||||
|
||||
if (enable) {
|
||||
uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
|
||||
AMDGPU_GPU_PAGE_SHIFT;
|
||||
uint32_t high = adev->vm_manager.max_pfn -
|
||||
(AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
|
||||
(AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
|
||||
|
||||
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
|
||||
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
|
||||
|
@ -563,9 +563,10 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
|
||||
WREG32(mmVM_PRT_CNTL, tmp);
|
||||
|
||||
if (enable) {
|
||||
uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
|
||||
AMDGPU_GPU_PAGE_SHIFT;
|
||||
uint32_t high = adev->vm_manager.max_pfn -
|
||||
(AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
|
||||
(AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
|
||||
|
||||
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
|
||||
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
|
||||
|
@ -777,9 +777,10 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
|
||||
WREG32(mmVM_PRT_CNTL, tmp);
|
||||
|
||||
if (enable) {
|
||||
uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
|
||||
AMDGPU_GPU_PAGE_SHIFT;
|
||||
uint32_t high = adev->vm_manager.max_pfn -
|
||||
(AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
|
||||
(AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
|
||||
|
||||
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
|
||||
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
|
||||
|
@ -829,23 +829,25 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
|
||||
req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
|
||||
ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
|
||||
|
||||
/* This is necessary for a HW workaround under SRIOV as well
|
||||
* as GFXOFF under bare metal
|
||||
*/
|
||||
if (vmhub >= AMDGPU_MMHUB0(0))
|
||||
inst = GET_INST(GC, 0);
|
||||
else
|
||||
inst = vmhub;
|
||||
|
||||
/* This is necessary for SRIOV as well as for GFXOFF to function
|
||||
* properly under bare metal
|
||||
*/
|
||||
if (adev->gfx.kiq[inst].ring.sched.ready &&
|
||||
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
|
||||
uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
|
||||
uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
|
||||
|
||||
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
|
||||
1 << vmid, inst);
|
||||
amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
|
||||
1 << vmid, inst);
|
||||
return;
|
||||
}
|
||||
|
||||
/* This path is needed before KIQ/MES/GFXOFF are set up */
|
||||
spin_lock(&adev->gmc.invalidate_lock);
|
||||
|
||||
/*
|
||||
@ -1947,14 +1949,6 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)
|
||||
|
||||
static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
|
||||
{
|
||||
static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
|
||||
u32 vram_info;
|
||||
|
||||
/* Only for dGPU, vendor informaton is reliable */
|
||||
if (!amdgpu_sriov_vf(adev) && !(adev->flags & AMD_IS_APU)) {
|
||||
vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
|
||||
adev->gmc.vram_vendor = vram_info & 0xF;
|
||||
}
|
||||
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
|
||||
adev->gmc.vram_width = 128 * 64;
|
||||
}
|
||||
|
@ -551,7 +551,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle,
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
int ret;
|
||||
|
||||
if(state == adev->jpeg.cur_state)
|
||||
if (state == adev->jpeg.cur_state)
|
||||
return 0;
|
||||
|
||||
if (state == AMD_PG_STATE_GATE)
|
||||
@ -559,7 +559,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle,
|
||||
else
|
||||
ret = jpeg_v2_5_start(adev);
|
||||
|
||||
if(!ret)
|
||||
if (!ret)
|
||||
adev->jpeg.cur_state = state;
|
||||
|
||||
return ret;
|
||||
@ -754,8 +754,7 @@ static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
|
||||
}
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_JPEG,
|
||||
.major = 2,
|
||||
.minor = 5,
|
||||
@ -763,8 +762,7 @@ const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
|
||||
.funcs = &jpeg_v2_5_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version jpeg_v2_6_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_JPEG,
|
||||
.major = 2,
|
||||
.minor = 6,
|
||||
|
@ -674,14 +674,6 @@ static int jpeg_v4_0_set_powergating_state(void *handle,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
unsigned type,
|
||||
enum amdgpu_interrupt_state state)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
unsigned int type,
|
||||
@ -765,7 +757,6 @@ static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = {
|
||||
.set = jpeg_v4_0_set_interrupt_state,
|
||||
.process = jpeg_v4_0_process_interrupt,
|
||||
};
|
||||
|
||||
|
@ -181,7 +181,6 @@ static int jpeg_v4_0_5_hw_fini(void *handle)
|
||||
RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
|
||||
jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
|
||||
}
|
||||
amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -516,14 +515,6 @@ static int jpeg_v4_0_5_set_powergating_state(void *handle,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int jpeg_v4_0_5_set_interrupt_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
unsigned type,
|
||||
enum amdgpu_interrupt_state state)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
@ -603,7 +594,6 @@ static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = {
|
||||
.set = jpeg_v4_0_5_set_interrupt_state,
|
||||
.process = jpeg_v4_0_5_process_interrupt,
|
||||
};
|
||||
|
||||
|
@ -33,6 +33,7 @@
|
||||
|
||||
#define regVM_L2_CNTL3_DEFAULT 0x80100007
|
||||
#define regVM_L2_CNTL4_DEFAULT 0x000000c1
|
||||
#define mmSMNAID_AID0_MCA_SMU 0x03b30400
|
||||
|
||||
static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev)
|
||||
{
|
||||
@ -705,8 +706,94 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
|
||||
.reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
|
||||
};
|
||||
|
||||
static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle,
|
||||
struct aca_bank *bank, enum aca_error_type type,
|
||||
struct aca_bank_report *report, void *data)
|
||||
{
|
||||
u64 status, misc0;
|
||||
int ret;
|
||||
|
||||
status = bank->regs[ACA_REG_IDX_STATUS];
|
||||
if ((type == ACA_ERROR_TYPE_UE &&
|
||||
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) ||
|
||||
(type == ACA_ERROR_TYPE_CE &&
|
||||
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) {
|
||||
|
||||
ret = aca_bank_info_decode(bank, &report->info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
misc0 = bank->regs[ACA_REG_IDX_MISC0];
|
||||
report->count[type] = ACA_REG__MISC0__ERRCNT(misc0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* reference to smu driver if header file */
|
||||
static int mmhub_v1_8_err_codes[] = {
|
||||
0, 1, 2, 3, 4, /* CODE_DAGB0 - 4 */
|
||||
5, 6, 7, 8, 9, /* CODE_EA0 - 4 */
|
||||
10, /* CODE_UTCL2_ROUTER */
|
||||
11, /* CODE_VML2 */
|
||||
12, /* CODE_VML2_WALKER */
|
||||
13, /* CODE_MMCANE */
|
||||
};
|
||||
|
||||
static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
|
||||
enum aca_error_type type, void *data)
|
||||
{
|
||||
u32 instlo;
|
||||
|
||||
instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
|
||||
instlo &= GENMASK(31, 1);
|
||||
|
||||
if (instlo != mmSMNAID_AID0_MCA_SMU)
|
||||
return false;
|
||||
|
||||
if (aca_bank_check_error_codes(handle->adev, bank,
|
||||
mmhub_v1_8_err_codes,
|
||||
ARRAY_SIZE(mmhub_v1_8_err_codes)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = {
|
||||
.aca_bank_generate_report = mmhub_v1_8_aca_bank_generate_report,
|
||||
.aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid,
|
||||
};
|
||||
|
||||
static const struct aca_info mmhub_v1_8_aca_info = {
|
||||
.hwip = ACA_HWIP_TYPE_SMU,
|
||||
.mask = ACA_ERROR_UE_MASK,
|
||||
.bank_ops = &mmhub_v1_8_aca_bank_ops,
|
||||
};
|
||||
|
||||
static int mmhub_v1_8_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = amdgpu_ras_block_late_init(adev, ras_block);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__MMHUB,
|
||||
&mmhub_v1_8_aca_info, NULL);
|
||||
if (r)
|
||||
goto late_fini;
|
||||
|
||||
return 0;
|
||||
|
||||
late_fini:
|
||||
amdgpu_ras_block_late_fini(adev, ras_block);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
struct amdgpu_mmhub_ras mmhub_v1_8_ras = {
|
||||
.ras_block = {
|
||||
.hw_ops = &mmhub_v1_8_ras_hw_ops,
|
||||
.ras_late_init = mmhub_v1_8_ras_late_init,
|
||||
},
|
||||
};
|
||||
|
@ -404,7 +404,8 @@ static int xgpu_ai_request_init_data(struct amdgpu_device *adev)
|
||||
return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
|
||||
}
|
||||
|
||||
static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev)
|
||||
static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev,
|
||||
enum amdgpu_ras_block block)
|
||||
{
|
||||
xgpu_ai_send_access_requests(adev, IDH_RAS_POISON);
|
||||
}
|
||||
|
@ -152,14 +152,14 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
|
||||
xgpu_nv_mailbox_set_valid(adev, false);
|
||||
}
|
||||
|
||||
static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
|
||||
enum idh_request req)
|
||||
static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
|
||||
enum idh_request req, u32 data1, u32 data2, u32 data3)
|
||||
{
|
||||
int r, retry = 1;
|
||||
enum idh_event event = -1;
|
||||
|
||||
send_request:
|
||||
xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
|
||||
xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3);
|
||||
|
||||
switch (req) {
|
||||
case IDH_REQ_GPU_INIT_ACCESS:
|
||||
@ -170,6 +170,10 @@ send_request:
|
||||
case IDH_REQ_GPU_INIT_DATA:
|
||||
event = IDH_REQ_GPU_INIT_DATA_READY;
|
||||
break;
|
||||
case IDH_RAS_POISON:
|
||||
if (data1 != 0)
|
||||
event = IDH_RAS_POISON_READY;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -206,6 +210,13 @@ send_request:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
|
||||
enum idh_request req)
|
||||
{
|
||||
return xgpu_nv_send_access_requests_with_param(adev,
|
||||
req, 0, 0, 0);
|
||||
}
|
||||
|
||||
static int xgpu_nv_request_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
int ret, i = 0;
|
||||
@ -424,9 +435,17 @@ void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
|
||||
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
|
||||
}
|
||||
|
||||
static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev)
|
||||
static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev,
|
||||
enum amdgpu_ras_block block)
|
||||
{
|
||||
xgpu_nv_send_access_requests(adev, IDH_RAS_POISON);
|
||||
if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
|
||||
xgpu_nv_send_access_requests(adev, IDH_RAS_POISON);
|
||||
} else {
|
||||
amdgpu_virt_fini_data_exchange(adev);
|
||||
xgpu_nv_send_access_requests_with_param(adev,
|
||||
IDH_RAS_POISON, block, 0, 0);
|
||||
amdgpu_virt_init_data_exchange(adev);
|
||||
}
|
||||
}
|
||||
|
||||
const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
|
||||
|
@ -51,6 +51,7 @@ enum idh_event {
|
||||
IDH_FAIL,
|
||||
IDH_QUERY_ALIVE,
|
||||
IDH_REQ_GPU_INIT_DATA_READY,
|
||||
IDH_RAS_POISON_READY,
|
||||
|
||||
IDH_TEXT_MESSAGE = 255,
|
||||
};
|
||||
|
@ -728,8 +728,7 @@ static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev)
|
||||
adev->irq.ih_funcs = &navi10_ih_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version navi10_ih_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version navi10_ih_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_IH,
|
||||
.major = 5,
|
||||
.minor = 0,
|
||||
|
@ -431,6 +431,12 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
|
||||
u32 inst_mask;
|
||||
int i;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
adev->rmmio_remap.reg_offset =
|
||||
SOC15_REG_OFFSET(
|
||||
NBIO, 0,
|
||||
regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL)
|
||||
<< 2;
|
||||
WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE,
|
||||
0xff & ~(adev->gfx.xcc_mask));
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "amdgpu_ucode.h"
|
||||
#include "soc15_common.h"
|
||||
#include "psp_v13_0.h"
|
||||
#include "amdgpu_ras.h"
|
||||
|
||||
#include "mp/mp_13_0_2_offset.h"
|
||||
#include "mp/mp_13_0_2_sh_mask.h"
|
||||
@ -187,11 +188,18 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
|
||||
static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp)
|
||||
{
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
int ret;
|
||||
|
||||
if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) {
|
||||
psp_v13_0_wait_for_vmbx_ready(psp);
|
||||
ret = psp_v13_0_wait_for_vmbx_ready(psp);
|
||||
if (ret)
|
||||
amdgpu_ras_query_boot_status(adev, 4);
|
||||
|
||||
return psp_v13_0_wait_for_bootloader(psp);
|
||||
ret = psp_v13_0_wait_for_bootloader(psp);
|
||||
if (ret)
|
||||
amdgpu_ras_query_boot_status(adev, 4);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -763,81 +771,28 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void psp_v13_0_boot_error_reporting(struct amdgpu_device *adev,
|
||||
uint32_t inst,
|
||||
uint32_t boot_error)
|
||||
{
|
||||
uint32_t socket_id;
|
||||
uint32_t aid_id;
|
||||
uint32_t hbm_id;
|
||||
uint32_t reg_data;
|
||||
|
||||
socket_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, SOCKET_ID);
|
||||
aid_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, AID_ID);
|
||||
hbm_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, HBM_ID);
|
||||
|
||||
reg_data = RREG32_SOC15(MP0, inst, regMP0_SMN_C2PMSG_109);
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n",
|
||||
socket_id, aid_id, reg_data);
|
||||
|
||||
if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_MEM_TRAINING))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n",
|
||||
socket_id, aid_id, hbm_id);
|
||||
|
||||
if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_FW_LOAD))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n",
|
||||
socket_id, aid_id);
|
||||
|
||||
if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_WAFL_LINK_TRAINING))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n",
|
||||
socket_id, aid_id);
|
||||
|
||||
if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_XGMI_LINK_TRAINING))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n",
|
||||
socket_id, aid_id);
|
||||
|
||||
if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_CP_LINK_TRAINING))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n",
|
||||
socket_id, aid_id);
|
||||
|
||||
if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_DP_LINK_TRAINING))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n",
|
||||
socket_id, aid_id);
|
||||
|
||||
if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_MEM_TEST))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n",
|
||||
socket_id, aid_id, hbm_id);
|
||||
|
||||
if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_BIST_TEST))
|
||||
dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n",
|
||||
socket_id, aid_id, hbm_id);
|
||||
}
|
||||
|
||||
static int psp_v13_0_query_boot_status(struct psp_context *psp)
|
||||
static bool psp_v13_0_get_ras_capability(struct psp_context *psp)
|
||||
{
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
int inst_mask = adev->aid_mask;
|
||||
uint32_t reg_data;
|
||||
uint32_t i;
|
||||
int ret = 0;
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
u32 reg_data;
|
||||
|
||||
if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
|
||||
return 0;
|
||||
/* query ras cap should be done from host side */
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return false;
|
||||
|
||||
if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109)
|
||||
return 0;
|
||||
if (!con)
|
||||
return false;
|
||||
|
||||
for_each_inst(i, inst_mask) {
|
||||
reg_data = RREG32_SOC15(MP0, i, regMP0_SMN_C2PMSG_126);
|
||||
if (!REG_GET_FIELD(reg_data, MP0_SMN_C2PMSG_126, BOOT_STATUS)) {
|
||||
psp_v13_0_boot_error_reporting(adev, i, reg_data);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) &&
|
||||
(!(adev->flags & AMD_IS_APU))) {
|
||||
reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127);
|
||||
adev->ras_hw_enabled = (reg_data & GENMASK_ULL(23, 0));
|
||||
con->poison_supported = ((reg_data & GENMASK_ULL(24, 24)) >> 24) ? true : false;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct psp_funcs psp_v13_0_funcs = {
|
||||
@ -862,7 +817,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
|
||||
.update_spirom = psp_v13_0_update_spirom,
|
||||
.vbflash_stat = psp_v13_0_vbflash_status,
|
||||
.fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
|
||||
.query_boot_status = psp_v13_0_query_boot_status,
|
||||
.get_ras_capability = psp_v13_0_get_ras_capability,
|
||||
};
|
||||
|
||||
void psp_v13_0_set_psp_funcs(struct psp_context *psp)
|
||||
|
@ -57,22 +57,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev);
|
||||
MODULE_FIRMWARE("amdgpu/topaz_sdma.bin");
|
||||
MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin");
|
||||
|
||||
static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
|
||||
{
|
||||
static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = {
|
||||
SDMA0_REGISTER_OFFSET,
|
||||
SDMA1_REGISTER_OFFSET
|
||||
};
|
||||
|
||||
static const u32 golden_settings_iceland_a11[] =
|
||||
{
|
||||
static const u32 golden_settings_iceland_a11[] = {
|
||||
mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
|
||||
mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
|
||||
mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
|
||||
mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
|
||||
};
|
||||
|
||||
static const u32 iceland_mgcg_cgcg_init[] =
|
||||
{
|
||||
static const u32 iceland_mgcg_cgcg_init[] = {
|
||||
mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
|
||||
mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
|
||||
};
|
||||
@ -142,7 +139,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
|
||||
case CHIP_TOPAZ:
|
||||
chip_name = "topaz";
|
||||
break;
|
||||
default: BUG();
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
@ -1258,8 +1256,7 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
|
||||
adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version sdma_v2_4_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_SDMA,
|
||||
.major = 2,
|
||||
.minor = 4,
|
||||
|
@ -45,6 +45,8 @@
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin");
|
||||
|
||||
#define mmSMNAID_AID0_MCA_SMU 0x03b30400
|
||||
|
||||
#define WREG32_SDMA(instance, offset, value) \
|
||||
WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value)
|
||||
#define RREG32_SDMA(instance, offset) \
|
||||
@ -2204,9 +2206,79 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = {
|
||||
.reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count,
|
||||
};
|
||||
|
||||
static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle,
|
||||
struct aca_bank *bank, enum aca_error_type type,
|
||||
struct aca_bank_report *report, void *data)
|
||||
{
|
||||
u64 status, misc0;
|
||||
int ret;
|
||||
|
||||
status = bank->regs[ACA_REG_IDX_STATUS];
|
||||
if ((type == ACA_ERROR_TYPE_UE &&
|
||||
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) ||
|
||||
(type == ACA_ERROR_TYPE_CE &&
|
||||
ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) {
|
||||
|
||||
ret = aca_bank_info_decode(bank, &report->info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
misc0 = bank->regs[ACA_REG_IDX_MISC0];
|
||||
report->count[type] = ACA_REG__MISC0__ERRCNT(misc0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */
|
||||
static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 };
|
||||
|
||||
static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
|
||||
enum aca_error_type type, void *data)
|
||||
{
|
||||
u32 instlo;
|
||||
|
||||
instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
|
||||
instlo &= GENMASK(31, 1);
|
||||
|
||||
if (instlo != mmSMNAID_AID0_MCA_SMU)
|
||||
return false;
|
||||
|
||||
if (aca_bank_check_error_codes(handle->adev, bank,
|
||||
sdma_v4_4_2_err_codes,
|
||||
ARRAY_SIZE(sdma_v4_4_2_err_codes)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = {
|
||||
.aca_bank_generate_report = sdma_v4_4_2_aca_bank_generate_report,
|
||||
.aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid,
|
||||
};
|
||||
|
||||
static const struct aca_info sdma_v4_4_2_aca_info = {
|
||||
.hwip = ACA_HWIP_TYPE_SMU,
|
||||
.mask = ACA_ERROR_UE_MASK,
|
||||
.bank_ops = &sdma_v4_4_2_aca_bank_ops,
|
||||
};
|
||||
|
||||
static int sdma_v4_4_2_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = amdgpu_sdma_ras_late_init(adev, ras_block);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__SDMA,
|
||||
&sdma_v4_4_2_aca_info, NULL);
|
||||
}
|
||||
|
||||
static struct amdgpu_sdma_ras sdma_v4_4_2_ras = {
|
||||
.ras_block = {
|
||||
.hw_ops = &sdma_v4_4_2_ras_hw_ops,
|
||||
.ras_late_init = sdma_v4_4_2_ras_late_init,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -1298,10 +1298,32 @@ static int soc15_common_suspend(void *handle)
|
||||
return soc15_common_hw_fini(adev);
|
||||
}
|
||||
|
||||
static bool soc15_need_reset_on_resume(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 sol_reg;
|
||||
|
||||
sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
|
||||
|
||||
/* Will reset for the following suspend abort cases.
|
||||
* 1) Only reset limit on APU side, dGPU hasn't checked yet.
|
||||
* 2) S3 suspend abort and TOS already launched.
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU && adev->in_s3 &&
|
||||
!adev->suspend_complete &&
|
||||
sol_reg)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int soc15_common_resume(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
if (soc15_need_reset_on_resume(adev)) {
|
||||
dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n");
|
||||
soc15_asic_reset(adev);
|
||||
}
|
||||
return soc15_common_hw_init(adev);
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,9 @@ enum ras_command {
|
||||
TA_RAS_COMMAND__ENABLE_FEATURES = 0,
|
||||
TA_RAS_COMMAND__DISABLE_FEATURES,
|
||||
TA_RAS_COMMAND__TRIGGER_ERROR,
|
||||
TA_RAS_COMMAND__QUERY_BLOCK_INFO,
|
||||
TA_RAS_COMMAND__QUERY_SUB_BLOCK_INFO,
|
||||
TA_RAS_COMMAND__QUERY_ADDRESS,
|
||||
};
|
||||
|
||||
enum ta_ras_status {
|
||||
@ -105,6 +108,11 @@ enum ta_ras_error_type {
|
||||
TA_RAS_ERROR__POISON = 8,
|
||||
};
|
||||
|
||||
enum ta_ras_address_type {
|
||||
TA_RAS_MCA_TO_PA,
|
||||
TA_RAS_PA_TO_MCA,
|
||||
};
|
||||
|
||||
/* Input/output structures for RAS commands */
|
||||
/**********************************************************/
|
||||
|
||||
@ -133,12 +141,38 @@ struct ta_ras_init_flags {
|
||||
uint8_t channel_dis_num;
|
||||
};
|
||||
|
||||
struct ta_ras_mca_addr {
|
||||
uint64_t err_addr;
|
||||
uint32_t ch_inst;
|
||||
uint32_t umc_inst;
|
||||
uint32_t node_inst;
|
||||
};
|
||||
|
||||
struct ta_ras_phy_addr {
|
||||
uint64_t pa;
|
||||
uint32_t bank;
|
||||
uint32_t channel_idx;
|
||||
};
|
||||
|
||||
struct ta_ras_query_address_input {
|
||||
enum ta_ras_address_type addr_type;
|
||||
struct ta_ras_mca_addr ma;
|
||||
struct ta_ras_phy_addr pa;
|
||||
};
|
||||
|
||||
struct ta_ras_output_flags {
|
||||
uint8_t ras_init_success_flag;
|
||||
uint8_t err_inject_switch_disable_flag;
|
||||
uint8_t reg_access_failure_flag;
|
||||
};
|
||||
|
||||
struct ta_ras_query_address_output {
|
||||
/* don't use the flags here */
|
||||
struct ta_ras_output_flags flags;
|
||||
struct ta_ras_mca_addr ma;
|
||||
struct ta_ras_phy_addr pa;
|
||||
};
|
||||
|
||||
/* Common input structure for RAS callbacks */
|
||||
/**********************************************************/
|
||||
union ta_ras_cmd_input {
|
||||
@ -146,12 +180,14 @@ union ta_ras_cmd_input {
|
||||
struct ta_ras_enable_features_input enable_features;
|
||||
struct ta_ras_disable_features_input disable_features;
|
||||
struct ta_ras_trigger_error_input trigger_error;
|
||||
struct ta_ras_query_address_input address;
|
||||
|
||||
uint32_t reserve_pad[256];
|
||||
};
|
||||
|
||||
union ta_ras_cmd_output {
|
||||
struct ta_ras_output_flags flags;
|
||||
struct ta_ras_query_address_output address;
|
||||
|
||||
uint32_t reserve_pad[256];
|
||||
};
|
||||
|
@ -89,12 +89,28 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
|
||||
umc_v12_0_reset_error_count_per_channel, NULL);
|
||||
}
|
||||
|
||||
bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
|
||||
{
|
||||
dev_info(adev->dev,
|
||||
"MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, PCC:%llu, UC:%llu, TCC:%llu\n",
|
||||
mc_umc_status,
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val),
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison),
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred),
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC),
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC),
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC)
|
||||
);
|
||||
|
||||
return (amdgpu_ras_is_poison_mode_supported(adev) &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1));
|
||||
}
|
||||
|
||||
bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
|
||||
{
|
||||
if (amdgpu_ras_is_poison_mode_supported(adev) &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1))
|
||||
return true;
|
||||
if (umc_v12_0_is_deferred_error(adev, mc_umc_status))
|
||||
return false;
|
||||
|
||||
return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
|
||||
@ -104,9 +120,7 @@ bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_um
|
||||
|
||||
bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
|
||||
{
|
||||
if (amdgpu_ras_is_poison_mode_supported(adev) &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1))
|
||||
if (umc_v12_0_is_deferred_error(adev, mc_umc_status))
|
||||
return false;
|
||||
|
||||
return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||
@ -119,9 +133,10 @@ bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_
|
||||
!(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)))));
|
||||
}
|
||||
|
||||
static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
|
||||
static void umc_v12_0_query_error_count_per_type(struct amdgpu_device *adev,
|
||||
uint64_t umc_reg_offset,
|
||||
unsigned long *error_count)
|
||||
unsigned long *error_count,
|
||||
check_error_type_func error_type_func)
|
||||
{
|
||||
uint64_t mc_umc_status;
|
||||
uint64_t mc_umc_status_addr;
|
||||
@ -129,31 +144,11 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
|
||||
mc_umc_status_addr =
|
||||
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
|
||||
|
||||
/* Rely on MCUMC_STATUS for correctable error counter
|
||||
* MCUMC_STATUS is a 64 bit register
|
||||
*/
|
||||
/* Check MCUMC_STATUS */
|
||||
mc_umc_status =
|
||||
RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
|
||||
|
||||
if (umc_v12_0_is_correctable_error(adev, mc_umc_status))
|
||||
*error_count += 1;
|
||||
}
|
||||
|
||||
static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev,
|
||||
uint64_t umc_reg_offset,
|
||||
unsigned long *error_count)
|
||||
{
|
||||
uint64_t mc_umc_status;
|
||||
uint64_t mc_umc_status_addr;
|
||||
|
||||
mc_umc_status_addr =
|
||||
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
|
||||
|
||||
/* Check the MCUMC_STATUS. */
|
||||
mc_umc_status =
|
||||
RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
|
||||
|
||||
if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))
|
||||
if (error_type_func(adev, mc_umc_status))
|
||||
*error_count += 1;
|
||||
}
|
||||
|
||||
@ -162,7 +157,7 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev,
|
||||
uint32_t ch_inst, void *data)
|
||||
{
|
||||
struct ras_err_data *err_data = (struct ras_err_data *)data;
|
||||
unsigned long ue_count = 0, ce_count = 0;
|
||||
unsigned long ue_count = 0, ce_count = 0, de_count = 0;
|
||||
|
||||
/* NOTE: node_inst is converted by adev->umc.active_mask and the range is [0-3],
|
||||
* which can be used as die ID directly */
|
||||
@ -174,11 +169,16 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev,
|
||||
uint64_t umc_reg_offset =
|
||||
get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
|
||||
|
||||
umc_v12_0_query_correctable_error_count(adev, umc_reg_offset, &ce_count);
|
||||
umc_v12_0_query_uncorrectable_error_count(adev, umc_reg_offset, &ue_count);
|
||||
umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
|
||||
&ce_count, umc_v12_0_is_correctable_error);
|
||||
umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
|
||||
&ue_count, umc_v12_0_is_uncorrectable_error);
|
||||
umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
|
||||
&de_count, umc_v12_0_is_deferred_error);
|
||||
|
||||
amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count);
|
||||
amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count);
|
||||
amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, de_count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -203,14 +203,14 @@ static bool umc_v12_0_bit_wise_xor(uint32_t val)
|
||||
return result;
|
||||
}
|
||||
|
||||
static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data, uint64_t err_addr,
|
||||
uint32_t ch_inst, uint32_t umc_inst,
|
||||
uint32_t node_inst)
|
||||
static void umc_v12_0_mca_addr_to_pa(struct amdgpu_device *adev,
|
||||
uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst,
|
||||
uint32_t node_inst,
|
||||
struct ta_ras_query_address_output *addr_out)
|
||||
{
|
||||
uint32_t channel_index, i;
|
||||
uint64_t soc_pa, na, retired_page, column;
|
||||
uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row, row_xor;
|
||||
uint64_t na, soc_pa;
|
||||
uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row;
|
||||
uint32_t bank0, bank1, bank2, bank3, bank;
|
||||
|
||||
bank_hash0 = (err_addr >> UMC_V12_0_MCA_B0_BIT) & 0x1ULL;
|
||||
@ -260,12 +260,44 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
|
||||
/* the umc channel bits are not original values, they are hashed */
|
||||
UMC_V12_0_SET_CHANNEL_HASH(channel_index, soc_pa);
|
||||
|
||||
addr_out->pa.pa = soc_pa;
|
||||
addr_out->pa.bank = bank;
|
||||
addr_out->pa.channel_idx = channel_index;
|
||||
}
|
||||
|
||||
static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data, uint64_t err_addr,
|
||||
uint32_t ch_inst, uint32_t umc_inst,
|
||||
uint32_t node_inst)
|
||||
{
|
||||
uint32_t col, row, row_xor, bank, channel_index;
|
||||
uint64_t soc_pa, retired_page, column;
|
||||
struct ta_ras_query_address_input addr_in;
|
||||
struct ta_ras_query_address_output addr_out;
|
||||
|
||||
addr_in.addr_type = TA_RAS_MCA_TO_PA;
|
||||
addr_in.ma.err_addr = err_addr;
|
||||
addr_in.ma.ch_inst = ch_inst;
|
||||
addr_in.ma.umc_inst = umc_inst;
|
||||
addr_in.ma.node_inst = node_inst;
|
||||
|
||||
if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out))
|
||||
/* fallback to old path if fail to get pa from psp */
|
||||
umc_v12_0_mca_addr_to_pa(adev, err_addr, ch_inst, umc_inst,
|
||||
node_inst, &addr_out);
|
||||
|
||||
soc_pa = addr_out.pa.pa;
|
||||
bank = addr_out.pa.bank;
|
||||
channel_index = addr_out.pa.channel_idx;
|
||||
|
||||
col = (err_addr >> 1) & 0x1fULL;
|
||||
row = (err_addr >> 10) & 0x3fffULL;
|
||||
row_xor = row ^ (0x1ULL << 13);
|
||||
/* clear [C3 C2] in soc physical address */
|
||||
soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
|
||||
/* clear [C4] in soc physical address */
|
||||
soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
|
||||
|
||||
row_xor = row ^ (0x1ULL << 13);
|
||||
/* loop for all possibilities of [C4 C3 C2] */
|
||||
for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
|
||||
retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
|
||||
@ -316,10 +348,7 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
/* calculate error address if ue error is detected */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
|
||||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1) {
|
||||
|
||||
if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) {
|
||||
mc_umc_addrt0 =
|
||||
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
|
||||
|
||||
@ -385,45 +414,69 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
|
||||
{
|
||||
struct ras_err_node *err_node;
|
||||
uint64_t mc_umc_status;
|
||||
struct ras_err_info *err_info;
|
||||
struct ras_err_addr *mca_err_addr, *tmp;
|
||||
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
|
||||
|
||||
for_each_ras_error(err_node, err_data) {
|
||||
mc_umc_status = err_node->err_info.err_addr.err_status;
|
||||
if (!mc_umc_status)
|
||||
err_info = &err_node->err_info;
|
||||
if (list_empty(&err_info->err_addr_list))
|
||||
continue;
|
||||
|
||||
if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) {
|
||||
uint64_t mca_addr, err_addr, mca_ipid;
|
||||
uint32_t InstanceIdLo;
|
||||
struct amdgpu_smuio_mcm_config_info *mcm_info;
|
||||
list_for_each_entry_safe(mca_err_addr, tmp, &err_info->err_addr_list, node) {
|
||||
mc_umc_status = mca_err_addr->err_status;
|
||||
if (mc_umc_status &&
|
||||
(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) ||
|
||||
umc_v12_0_is_deferred_error(adev, mc_umc_status))) {
|
||||
uint64_t mca_addr, err_addr, mca_ipid;
|
||||
uint32_t InstanceIdLo;
|
||||
|
||||
mcm_info = &err_node->err_info.mcm_info;
|
||||
mca_addr = err_node->err_info.err_addr.err_addr;
|
||||
mca_ipid = err_node->err_info.err_addr.err_ipid;
|
||||
mca_addr = mca_err_addr->err_addr;
|
||||
mca_ipid = mca_err_addr->err_ipid;
|
||||
|
||||
err_addr = REG_GET_FIELD(mca_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
|
||||
InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo);
|
||||
err_addr = REG_GET_FIELD(mca_addr,
|
||||
MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
|
||||
InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo);
|
||||
|
||||
dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n",
|
||||
mca_ipid,
|
||||
mcm_info->die_id,
|
||||
MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
|
||||
MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
|
||||
err_addr);
|
||||
dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n",
|
||||
mca_ipid,
|
||||
err_info->mcm_info.die_id,
|
||||
MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
|
||||
MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
|
||||
err_addr);
|
||||
|
||||
umc_v12_0_convert_error_address(adev,
|
||||
err_data, err_addr,
|
||||
MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
|
||||
MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
|
||||
mcm_info->die_id);
|
||||
umc_v12_0_convert_error_address(adev,
|
||||
err_data, err_addr,
|
||||
MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
|
||||
MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
|
||||
err_info->mcm_info.die_id);
|
||||
}
|
||||
|
||||
/* Clear umc error address content */
|
||||
memset(&err_node->err_info.err_addr,
|
||||
0, sizeof(err_node->err_info.err_addr));
|
||||
/* Delete error address node from list and free memory */
|
||||
amdgpu_ras_del_mca_err_addr(err_info, mca_err_addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev,
|
||||
enum amdgpu_mca_error_type type, void *ras_error_status)
|
||||
{
|
||||
uint64_t mc_umc_status = *(uint64_t *)ras_error_status;
|
||||
|
||||
switch (type) {
|
||||
case AMDGPU_MCA_ERROR_TYPE_UE:
|
||||
return umc_v12_0_is_uncorrectable_error(adev, mc_umc_status);
|
||||
case AMDGPU_MCA_ERROR_TYPE_CE:
|
||||
return umc_v12_0_is_correctable_error(adev, mc_umc_status);
|
||||
case AMDGPU_MCA_ERROR_TYPE_DE:
|
||||
return umc_v12_0_is_deferred_error(adev, mc_umc_status);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev)
|
||||
{
|
||||
amdgpu_umc_loop_channels(adev,
|
||||
@ -444,12 +497,71 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = {
|
||||
.query_ras_error_address = umc_v12_0_query_ras_error_address,
|
||||
};
|
||||
|
||||
static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
|
||||
struct aca_bank_report *report, void *data)
|
||||
{
|
||||
struct amdgpu_device *adev = handle->adev;
|
||||
u64 status;
|
||||
int ret;
|
||||
|
||||
ret = aca_bank_info_decode(bank, &report->info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
status = bank->regs[ACA_REG_IDX_STATUS];
|
||||
switch (type) {
|
||||
case ACA_ERROR_TYPE_UE:
|
||||
if (umc_v12_0_is_uncorrectable_error(adev, status)) {
|
||||
report->count[type] = 1;
|
||||
}
|
||||
break;
|
||||
case ACA_ERROR_TYPE_CE:
|
||||
if (umc_v12_0_is_correctable_error(adev, status)) {
|
||||
report->count[type] = 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct aca_bank_ops umc_v12_0_aca_bank_ops = {
|
||||
.aca_bank_generate_report = umc_v12_0_aca_bank_generate_report,
|
||||
};
|
||||
|
||||
const struct aca_info umc_v12_0_aca_info = {
|
||||
.hwip = ACA_HWIP_TYPE_UMC,
|
||||
.mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
|
||||
.bank_ops = &umc_v12_0_aca_bank_ops,
|
||||
};
|
||||
|
||||
static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_umc_ras_late_init(adev, ras_block);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__UMC,
|
||||
&umc_v12_0_aca_info, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct amdgpu_umc_ras umc_v12_0_ras = {
|
||||
.ras_block = {
|
||||
.hw_ops = &umc_v12_0_ras_hw_ops,
|
||||
.ras_late_init = umc_v12_0_ras_late_init,
|
||||
},
|
||||
.err_cnt_init = umc_v12_0_err_cnt_init,
|
||||
.query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
|
||||
.ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count,
|
||||
.ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address,
|
||||
.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
|
||||
};
|
||||
|
||||
|
@ -121,9 +121,12 @@
|
||||
(((_ipid_lo) >> 12) & 0xF))
|
||||
#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
|
||||
|
||||
bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
|
||||
bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
|
||||
bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
|
||||
|
||||
typedef bool (*check_error_type_func)(struct amdgpu_device *adev, uint64_t mc_umc_status);
|
||||
|
||||
extern const uint32_t
|
||||
umc_v12_0_channel_idx_tbl[]
|
||||
[UMC_V12_0_UMC_INSTANCE_NUM]
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
static void umc_v6_0_init_registers(struct amdgpu_device *adev)
|
||||
{
|
||||
unsigned i,j;
|
||||
unsigned i, j;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
for (j = 0; j < 4; j++)
|
||||
|
@ -55,6 +55,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -64,6 +65,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache (in SQC module) per bank */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -73,6 +75,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
|
||||
/* Scalar L1 Data Cache (in SQC module) per bank */
|
||||
.cache_size = 8,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -88,6 +91,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -95,8 +99,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Instruction Cache (in SQC module) per bank */
|
||||
.cache_size = 8,
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -104,8 +109,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
|
||||
},
|
||||
{
|
||||
/* Scalar L1 Data Cache (in SQC module) per bank. */
|
||||
.cache_size = 4,
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -135,6 +141,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -144,6 +151,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -153,6 +161,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -162,6 +171,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 4096,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -174,6 +184,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -183,6 +194,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -192,6 +204,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -201,6 +214,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 1024,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -213,6 +227,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -222,6 +237,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -231,6 +247,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -240,6 +257,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 1024,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -252,6 +270,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -261,6 +280,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -270,6 +290,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -279,6 +300,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 2048,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -291,6 +313,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -300,6 +323,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -309,6 +333,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -318,6 +343,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 8192,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -330,6 +356,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -339,6 +366,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -348,6 +376,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -357,6 +386,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 8192,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -369,6 +399,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -378,6 +409,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -387,6 +419,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -396,6 +429,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -405,6 +439,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 4096,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -417,6 +452,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -426,6 +462,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -435,6 +472,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -444,6 +482,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -453,6 +492,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 1024,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -465,6 +505,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -474,6 +515,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -483,6 +525,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -492,6 +535,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -501,6 +545,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 2048,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -513,6 +558,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -522,6 +568,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -531,6 +578,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -540,6 +588,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -549,6 +598,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 4096,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -558,6 +608,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
|
||||
/* L3 Data Cache per GPU */
|
||||
.cache_size = 128*1024,
|
||||
.cache_level = 3,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -570,6 +621,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -579,6 +631,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -588,6 +641,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -597,6 +651,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -606,6 +661,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 3072,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -615,6 +671,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
|
||||
/* L3 Data Cache per GPU */
|
||||
.cache_size = 96*1024,
|
||||
.cache_level = 3,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -627,6 +684,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -636,6 +694,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -645,6 +704,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -654,6 +714,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -663,6 +724,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 2048,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -672,6 +734,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
|
||||
/* L3 Data Cache per GPU */
|
||||
.cache_size = 32*1024,
|
||||
.cache_level = 3,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -684,6 +747,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -693,6 +757,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -702,6 +767,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -711,6 +777,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -720,6 +787,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 1024,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -729,6 +797,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
|
||||
/* L3 Data Cache per GPU */
|
||||
.cache_size = 16*1024,
|
||||
.cache_level = 3,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -741,6 +810,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -750,6 +820,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -759,6 +830,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -768,6 +840,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -777,6 +850,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 2048,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -789,6 +863,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -798,6 +873,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -807,6 +883,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -816,6 +893,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -825,6 +903,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 256,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -837,6 +916,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -846,6 +926,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -855,6 +936,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -864,6 +946,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -873,6 +956,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 256,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 128,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -885,6 +969,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
|
||||
/* TCP L1 Cache per CU */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -894,6 +979,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
.cache_size = 32,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_INST_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -903,6 +989,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
.cache_size = 16,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -912,6 +999,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
|
||||
/* GL1 Data Cache per SA */
|
||||
.cache_size = 128,
|
||||
.cache_level = 1,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
@ -921,6 +1009,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
.cache_size = 2048,
|
||||
.cache_level = 2,
|
||||
.cache_line_size = 64,
|
||||
.flags = (CRAT_CACHE_FLAGS_ENABLED |
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE),
|
||||
|
@ -303,6 +303,7 @@ struct kfd_node;
|
||||
struct kfd_gpu_cache_info {
|
||||
uint32_t cache_size;
|
||||
uint32_t cache_level;
|
||||
uint32_t cache_line_size;
|
||||
uint32_t flags;
|
||||
/* Indicates how many Compute Units share this cache
|
||||
* within a SA. Value = 1 indicates the cache is not shared
|
||||
|
@ -1018,12 +1018,14 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
|
||||
uint32_t *entry_size)
|
||||
{
|
||||
struct kfd_dbg_device_info_entry device_info;
|
||||
uint32_t tmp_entry_size = *entry_size, tmp_num_devices;
|
||||
uint32_t tmp_entry_size, tmp_num_devices;
|
||||
int i, r = 0;
|
||||
|
||||
if (!(target && user_info && number_of_device_infos && entry_size))
|
||||
return -EINVAL;
|
||||
|
||||
tmp_entry_size = *entry_size;
|
||||
|
||||
tmp_num_devices = min_t(size_t, *number_of_device_infos, target->n_pdds);
|
||||
*number_of_device_infos = target->n_pdds;
|
||||
*entry_size = min_t(size_t, *entry_size, sizeof(device_info));
|
||||
|
@ -1285,8 +1285,10 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
|
||||
uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
|
||||
int user_gpu_id;
|
||||
|
||||
if (!p)
|
||||
if (!p) {
|
||||
dev_warn(dev->adev->dev, "Not find process with pasid:%d\n", pasid);
|
||||
return; /* Presumably process exited. */
|
||||
}
|
||||
|
||||
user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
|
||||
if (unlikely(user_gpu_id == -EINVAL)) {
|
||||
@ -1322,6 +1324,8 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
|
||||
}
|
||||
}
|
||||
|
||||
dev_warn(dev->adev->dev, "Send SIGBUS to process %s(pasid:%d)\n",
|
||||
p->lead_thread->comm, pasid);
|
||||
rcu_read_unlock();
|
||||
|
||||
/* user application will handle SIGBUS signal */
|
||||
|
@ -132,6 +132,7 @@ enum SQ_INTERRUPT_ERROR_TYPE {
|
||||
static void event_interrupt_poison_consumption(struct kfd_node *dev,
|
||||
uint16_t pasid, uint16_t client_id)
|
||||
{
|
||||
enum amdgpu_ras_block block = 0;
|
||||
int old_poison, ret = -EINVAL;
|
||||
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
||||
|
||||
@ -151,12 +152,14 @@ static void event_interrupt_poison_consumption(struct kfd_node *dev,
|
||||
case SOC15_IH_CLIENTID_SE3SH:
|
||||
case SOC15_IH_CLIENTID_UTCL2:
|
||||
ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
|
||||
block = AMDGPU_RAS_BLOCK__GFX;
|
||||
break;
|
||||
case SOC15_IH_CLIENTID_SDMA0:
|
||||
case SOC15_IH_CLIENTID_SDMA1:
|
||||
case SOC15_IH_CLIENTID_SDMA2:
|
||||
case SOC15_IH_CLIENTID_SDMA3:
|
||||
case SOC15_IH_CLIENTID_SDMA4:
|
||||
block = AMDGPU_RAS_BLOCK__SDMA;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -171,12 +174,12 @@ static void event_interrupt_poison_consumption(struct kfd_node *dev,
|
||||
dev_warn(dev->adev->dev,
|
||||
"RAS poison consumption, unmap queue flow succeeded: client id %d\n",
|
||||
client_id);
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, false);
|
||||
} else {
|
||||
dev_warn(dev->adev->dev,
|
||||
"RAS poison consumption, fall back to gpu reset flow: client id %d\n",
|
||||
client_id);
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -191,6 +191,7 @@ static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1)
|
||||
static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
|
||||
uint16_t pasid, uint16_t source_id)
|
||||
{
|
||||
enum amdgpu_ras_block block = 0;
|
||||
int ret = -EINVAL;
|
||||
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
||||
|
||||
@ -210,9 +211,11 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
|
||||
case SOC15_INTSRC_SQ_INTERRUPT_MSG:
|
||||
if (dev->dqm->ops.reset_queues)
|
||||
ret = dev->dqm->ops.reset_queues(dev->dqm, pasid);
|
||||
block = AMDGPU_RAS_BLOCK__GFX;
|
||||
break;
|
||||
case SOC21_INTSRC_SDMA_ECC:
|
||||
default:
|
||||
block = AMDGPU_RAS_BLOCK__GFX;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -221,9 +224,9 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
|
||||
/* resetting queue passes, do page retirement without gpu reset
|
||||
resetting queue fails, fallback to gpu reset solution */
|
||||
if (!ret)
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, false);
|
||||
else
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, true);
|
||||
}
|
||||
|
||||
static bool event_interrupt_isr_v11(struct kfd_node *dev,
|
||||
|
@ -143,6 +143,7 @@ enum SQ_INTERRUPT_ERROR_TYPE {
|
||||
static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
|
||||
uint16_t pasid, uint16_t client_id)
|
||||
{
|
||||
enum amdgpu_ras_block block = 0;
|
||||
int old_poison, ret = -EINVAL;
|
||||
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
||||
|
||||
@ -162,12 +163,14 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
|
||||
case SOC15_IH_CLIENTID_SE3SH:
|
||||
case SOC15_IH_CLIENTID_UTCL2:
|
||||
ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
|
||||
block = AMDGPU_RAS_BLOCK__GFX;
|
||||
break;
|
||||
case SOC15_IH_CLIENTID_SDMA0:
|
||||
case SOC15_IH_CLIENTID_SDMA1:
|
||||
case SOC15_IH_CLIENTID_SDMA2:
|
||||
case SOC15_IH_CLIENTID_SDMA3:
|
||||
case SOC15_IH_CLIENTID_SDMA4:
|
||||
block = AMDGPU_RAS_BLOCK__SDMA;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -182,12 +185,12 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
|
||||
dev_warn(dev->adev->dev,
|
||||
"RAS poison consumption, unmap queue flow succeeded: client id %d\n",
|
||||
client_id);
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, false);
|
||||
} else {
|
||||
dev_warn(dev->adev->dev,
|
||||
"RAS poison consumption, fall back to gpu reset flow: client id %d\n",
|
||||
client_id);
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user