drm for 6.9:

core:
 - EDID cleanups
 - scheduler error handling fixes
 - managed: add drmm_release_action() with tests
 - add ratelimited drm debug print
 - DPCD PSR early transport macro
 - DP tunneling and bandwidth allocation helpers
 - remove built-in edids
 - dp: Avoid AUX transfers on powered-down displays
 - dp: Add VSC SDP helpers
 
 cross drivers:
 - use new drm print helpers
 - switch to ->read_edid callback
 - gem: add stats for shared buffers plus updates to amdgpu, i915, xe
 
 syncobj:
 - fixes to waiting and sleeping
 
 ttm:
 - add tests
 - fix errno codes
 - simply busy-placement handling
 - fix page decryption
 
 media:
 - tc358743: fix v4l device registration
 
 video:
 - move all kernel parameters for video behind CONFIG_VIDEO
 
 sound:
 - remove <drm/drm_edid.h> include from header
 
 ci:
 - add tests for msm
 - fix apq8016 runner
 
 efifb:
 - use copy of global screen_info state
 
 vesafb:
 - use copy of global screen_info state
 
 simplefb:
 - fix logging
 
 bridge:
 - ite-6505: fix DP link-training bug
 - samsung-dsim: fix error checking in probe
 - samsung-dsim: add bsh-smm-s2/pro boards
 - tc358767: fix regmap usage
 - imx: add i.MX8MP HDMI PVI plus DT bindings
 - imx: add i.MX8MP HDMI TX plus DT bindings
 - sii902x: fix probing and unregistration
 - tc358767: limit pixel PLL input range
 - switch to new drm_bridge_read_edid() interface
 
 panel:
 - ltk050h3146w: error-handling fixes
 - panel-edp: support delay between power-on and enable; use put_sync in
   unprepare; support Mediatek MT8173 Chromebooks, BOE NV116WHM-N49 V8.0,
   BOE NV122WUM-N41, CSO MNC207QS1-1 plus DT bindings
 - panel-lvds: support EDT ETML0700Z9NDHA plus DT bindings
 - panel-novatek: FRIDA FRD400B25025-A-CTK plus DT bindings
 - add BOE TH101MB31IG002-28A plus DT bindings
 - add EDT ETML1010G3DRA plus DT bindings
 - add Novatek NT36672E LCD DSI plus DT bindings
 - nt36523: support 120Hz timings, fix includes
 - simple: fix display timings on RK32FN48H
 - visionox-vtdr6130: fix initialization
 - add Powkiddy RGB10MAX3 plus DT bindings
 - st7703: support panel rotation plus DT bindings
 - add Himax HX83112A plus DT bindings
 - ltk500hd1829: add support for ltk101b4029w and admatec 9904370
 - simple: add BOE BP082WX1-100 8.2" panel plus DT bindungs
 
 panel-orientation-quirks:
 - GPD Win Mini
 
 amdgpu:
 - Validate DMABuf imports in compute VMs
 - Add RAS ACA framework
 - PSP 13 fixes
 - Misc code cleanups
 - Replay fixes
 - Atom interpretor PS, WS bounds checking
 - DML2 fixes
 - Audio fixes
 - DCN 3.5 Z state fixes
 - Remove deprecated ida_simple usage
 - UBSAN fixes
 - RAS fixes
 - Enable seq64 infrastructure
 - DC color block enablement
 - Documentation updates
 - DC documentation updates
 - DMCUB updates
 - ATHUB 4.1 support
 - LSDMA 7.0 support
 - JPEG DPG support
 - IH 7.0 support
 - HDP 7.0 support
 - VCN 5.0 support
 - SMU 13.0.6 updates
 - NBIO 7.11 updates
 - SDMA 6.1 updates
 - MMHUB 3.3 updates
 - DCN 3.5.1 support
 - NBIF 6.3.1 support
 - VPE 6.1.1 support
 
 amdkfd:
 - Validate DMABuf imports in compute VMs
 - SVM fixes
 - Trap handler updates and enhancements
 - Fix cache size reporting
 - Relocate the trap handler
 
 radeon:
 - Atom interpretor PS, WS bounds checking
 - Misc code cleanups
 
 xe:
 - new query for GuC submission version
 - Remove unused persistent exec_queues
 - Add vram frequency sysfs attributes
 - Add the flag XE_VM_BIND_FLAG_DUMPABLE
 - Drop pre-production workarounds
 - Drop kunit tests for unsupported platforms
 - Start pumbling SR-IOV support with memory based interrupts for VF
 - Allow to map BO in GGTT with PAT index corresponding to
   XE_CACHE_UC to work with memory based interrupts
 - Add GuC Doorbells Manager as prep work SR-IOV
 - Implement additional workarounds for xe2 and MTL
 - Program a few registers according to perfomance guide spec for Xe2
 - Fix remaining 32b build issues and enable it back
 - Fix build with CONFIG_DEBUG_FS=n
 - Fix warnings from GuC ABI headers
 - Introduce Relay Communication for SR-IOV for VF <-> GuC <-> PF
 - Release mmap mappings on rpm suspend
 - Disable mid-thread preemption when not properly supported by hardware
 - Fix xe_exec by reserving extra fence slot for CPU bind
 - Fix xe_exec with full long running exec queue
 - Canonicalize addresses where needed for Xe2 and add to devcoredum
 - Toggle USM support for Xe2
 - Only allow 1 ufence per exec / bind IOCTL
 - Add GuC firmware loading for Lunar Lake
 - Add XE_VMA_PTE_64K VMA flag
 
 i915:
 - Add more ADL-N PCI IDs
 - Enable fastboot also on older platforms
 - Early transport for panel replay and PSR
 - New ARL PCI IDs
 - DP TPS4 PHY test pattern support
 - Unify and improve VSC SDP for PSR and non-PSR cases
 - Refactor memory regions and improve debug logging
 - Rework global state serialization
 - Remove unused CDCLK divider fields
 - Unify HDCP connector logging format
 - Use display instead of graphics version in display code
 - Move VBT and opregion debugfs next to the implementation
 - Abstract opregion interface, use opaque type
 - MTL fixes
 - HPD handling fixes
 - Add GuC submission interface version query
 - Atomically invalidate userptr on mmu-notifier
 - Update handling of MMIO triggered reports
 - Don't make assumptions about intel_wakeref_t type
 - Extend driver code of Xe_LPG to Xe_LPG+
 - Add flex arrays to struct i915_syncmap
 - Allow for very slow HuC loading
 - DP tunneling and bandwidth allocation support
 
 msm:
 - Correct bindings for MSM8976 and SM8650 platforms
 - Start migration of MDP5 platforms to DPU driver
 - X1E80100 MDSS support
 - DPU:
 - Improve DSC allocation, fixing several important corner cases
 - Add support for SDM630/SDM660 platforms
 - Simplify dpu_encoder_phys_ops
 - Apply fixes targeting DSC support with a single DSC encoder
 - Apply fixes for HCTL_EN timing configuration
 - X1E80100 support
 - Add support for YUV420 over DP
 - GPU:
 - fix sc7180 UBWC config
 - fix a7xx LLC config
 - new gpu support: a305B, a750, a702
 - machine support: SM7150 (different power levels than other a618)
 - a7xx devcoredump support
 
 habanalabs:
 - configure IRQ affinity according to NUMA node
 - move HBM MMU page tables inside the HBM
 - improve device reset
 - check extended PCIe errors
 
 ivpu:
 - updates to firmware API
 - refactor BO allocation
 
 imx:
 - use devm_ functions during init
 
 hisilicon:
 - fix EDID includes
 
 mgag200:
 - improve ioremap usage
 - convert to struct drm_edid
 - Work around PCI write bursts
 
 nouveau:
 - disp: use kmemdup()
 - fix EDID includes
 - documentation fixes
 
 qaic:
 - fixes to BO handling
 - make use of DRM managed release
 - fix order of remove operations
 
 rockchip:
 - analogix_dp: get encoder port from DT
 - inno_hdmi: support HDMI for RK3128
 - lvds: error-handling fixes
 
 ssd130x:
 - support SSD133x plus DT bindings
 
 tegra:
 - fix error handling
 
 tilcdc:
 - make use of DRM managed release
 
 v3d:
 - show memory stats in debugfs
 - Support display MMU page size
 
 vc4:
 - fix error handling in plane prepare_fb
 - fix framebuffer test in plane helpers
 
 virtio:
 - add venus capset defines
 
 vkms:
 - fix OOB access when programming the LUT
 - Kconfig improvements
 
 vmwgfx:
 - unmap surface before changing plane state
 - fix memory leak in error handling
 - documentation fixes
 - list command SVGA_3D_CMD_DEFINE_GB_SURFACE_V4 as invalid
 - fix null-pointer deref in execbuf
 - refactor display-mode probing
 - fix fencing for creating cursor MOBs
 - fix cursor-memory lifetime
 
 xlnx:
 - fix live video input for ZynqMP DPSUB
 
 lima:
 - fix memory leak
 
 loongson:
 - fail if no VRAM present
 
 meson:
 - switch to new drm_bridge_read_edid() interface
 
 renesas:
 - add RZ/G2L DU support plus DT bindings
 
 mxsfb:
 - Use managed mode config
 
 sun4i:
 - HDMI: updates to atomic mode setting
 
 mediatek:
 - Add display driver for MT8188 VDOSYS1
 - DSI driver cleanups
 - Filter modes according to hardware capability
 - Fix a null pointer crash in mtk_drm_crtc_finish_page_flip
 
 etnaviv:
 - enhancements for NPU and MRT support
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmXxI+AACgkQDHTzWXnE
 hr5isxAApZ+DxesDbV8bd91KXL03vTfJtM5xVQuZoDzrr20KdTvu2EfQcCFnAUjl
 YtY05U9arDT4Txq5nX70Xc6I5M9HN6lqSUfsWhI6xUcR9TUollPbYwEu8IdoMaCG
 TRnspkiheye+DLFY6omLNH2aG1/k1IIefVWKaClFpbNPaaSHREDiY7/rkmErMBIS
 hrN13+6IVzX7+6fmNgHugUfdyawDJ8J9Nsc8T3Zlioljq3p+VbtStLsGeABTHSEJ
 MX18FwbGllI+QcXvaXM8gIg8NYKvSx/ZtlvKTpyPpTjZT3i3BpY+7yJqWDRQhiGM
 VTX7di1f90yWgzlYE5T33MW7Imvw3q04N7qYJ+Z1LHD/A8VyjwPUKLeul8P9ousT
 0qQLSQsnuXH5AMLDh8IeLG/i0hAMWJ2UbProFSAFhd/UQHP7QOm2mmCsf79me9It
 qKFn6QZKvAKGZk/myTbQIVAmQWrDCpKq4i1aoKXEvcEuQUtM1lPvmMVsStVEfG+y
 ACaI24zSJACViH6rfhVzr74giwZX/ay0NSXqwRXfD5kX8fXb050LxLGW93iYZoHv
 FpdT2C8oTS1A5nsZpoxwVP35euUsp7D4J5YYbrZder2m0s0DDCVLMqdFrSVNdWDM
 4ZQRiY3wCiJjSS8dpwppW0uaVGjtnGQnjQ5sQrIw0vKkwxee0TQ=
 =WLj9
 -----END PGP SIGNATURE-----

Merge tag 'drm-next-2024-03-13' of https://gitlab.freedesktop.org/drm/kernel

Pull drm updates from Dave Airlie:
 "Highlights are usual, more AMD IP blocks for future hw, i915/xe
  changes, Displayport tunnelling support for i915, msm YUV over DP
  changes, new tests for ttm, but its mostly a lot of stuff all over the
  place from lots of people.

  core:
   - EDID cleanups
   - scheduler error handling fixes
   - managed: add drmm_release_action() with tests
   - add ratelimited drm debug print
   - DPCD PSR early transport macro
   - DP tunneling and bandwidth allocation helpers
   - remove built-in edids
   - dp: Avoid AUX transfers on powered-down displays
   - dp: Add VSC SDP helpers

  cross drivers:
   - use new drm print helpers
   - switch to ->read_edid callback
   - gem: add stats for shared buffers plus updates to amdgpu, i915, xe

  syncobj:
   - fixes to waiting and sleeping

  ttm:
   - add tests
   - fix errno codes
   - simply busy-placement handling
   - fix page decryption

  media:
   - tc358743: fix v4l device registration

  video:
   - move all kernel parameters for video behind CONFIG_VIDEO

  sound:
   - remove <drm/drm_edid.h> include from header

  ci:
   - add tests for msm
   - fix apq8016 runner

  efifb:
   - use copy of global screen_info state

  vesafb:
   - use copy of global screen_info state

  simplefb:
   - fix logging

  bridge:
   - ite-6505: fix DP link-training bug
   - samsung-dsim: fix error checking in probe
   - samsung-dsim: add bsh-smm-s2/pro boards
   - tc358767: fix regmap usage
   - imx: add i.MX8MP HDMI PVI plus DT bindings
   - imx: add i.MX8MP HDMI TX plus DT bindings
   - sii902x: fix probing and unregistration
   - tc358767: limit pixel PLL input range
   - switch to new drm_bridge_read_edid() interface

  panel:
   - ltk050h3146w: error-handling fixes
   - panel-edp: support delay between power-on and enable; use put_sync
     in unprepare; support Mediatek MT8173 Chromebooks, BOE NV116WHM-N49
     V8.0, BOE NV122WUM-N41, CSO MNC207QS1-1 plus DT bindings
   - panel-lvds: support EDT ETML0700Z9NDHA plus DT bindings
   - panel-novatek: FRIDA FRD400B25025-A-CTK plus DT bindings
   - add BOE TH101MB31IG002-28A plus DT bindings
   - add EDT ETML1010G3DRA plus DT bindings
   - add Novatek NT36672E LCD DSI plus DT bindings
   - nt36523: support 120Hz timings, fix includes
   - simple: fix display timings on RK32FN48H
   - visionox-vtdr6130: fix initialization
   - add Powkiddy RGB10MAX3 plus DT bindings
   - st7703: support panel rotation plus DT bindings
   - add Himax HX83112A plus DT bindings
   - ltk500hd1829: add support for ltk101b4029w and admatec 9904370
   - simple: add BOE BP082WX1-100 8.2" panel plus DT bindungs

  panel-orientation-quirks:
   - GPD Win Mini

  amdgpu:
   - Validate DMABuf imports in compute VMs
   - Add RAS ACA framework
   - PSP 13 fixes
   - Misc code cleanups
   - Replay fixes
   - Atom interpretor PS, WS bounds checking
   - DML2 fixes
   - Audio fixes
   - DCN 3.5 Z state fixes
   - Remove deprecated ida_simple usage
   - UBSAN fixes
   - RAS fixes
   - Enable seq64 infrastructure
   - DC color block enablement
   - Documentation updates
   - DC documentation updates
   - DMCUB updates
   - ATHUB 4.1 support
   - LSDMA 7.0 support
   - JPEG DPG support
   - IH 7.0 support
   - HDP 7.0 support
   - VCN 5.0 support
   - SMU 13.0.6 updates
   - NBIO 7.11 updates
   - SDMA 6.1 updates
   - MMHUB 3.3 updates
   - DCN 3.5.1 support
   - NBIF 6.3.1 support
   - VPE 6.1.1 support

  amdkfd:
   - Validate DMABuf imports in compute VMs
   - SVM fixes
   - Trap handler updates and enhancements
   - Fix cache size reporting
   - Relocate the trap handler

  radeon:
   - Atom interpretor PS, WS bounds checking
   - Misc code cleanups

  xe:
   - new query for GuC submission version
   - Remove unused persistent exec_queues
   - Add vram frequency sysfs attributes
   - Add the flag XE_VM_BIND_FLAG_DUMPABLE
   - Drop pre-production workarounds
   - Drop kunit tests for unsupported platforms
   - Start pumbling SR-IOV support with memory based interrupts for VF
   - Allow to map BO in GGTT with PAT index corresponding to XE_CACHE_UC
     to work with memory based interrupts
   - Add GuC Doorbells Manager as prep work SR-IOV
   - Implement additional workarounds for xe2 and MTL
   - Program a few registers according to perfomance guide spec for Xe2
   - Fix remaining 32b build issues and enable it back
   - Fix build with CONFIG_DEBUG_FS=n
   - Fix warnings from GuC ABI headers
   - Introduce Relay Communication for SR-IOV for VF <-> GuC <-> PF
   - Release mmap mappings on rpm suspend
   - Disable mid-thread preemption when not properly supported by
     hardware
   - Fix xe_exec by reserving extra fence slot for CPU bind
   - Fix xe_exec with full long running exec queue
   - Canonicalize addresses where needed for Xe2 and add to devcoredum
   - Toggle USM support for Xe2
   - Only allow 1 ufence per exec / bind IOCTL
   - Add GuC firmware loading for Lunar Lake
   - Add XE_VMA_PTE_64K VMA flag

  i915:
   - Add more ADL-N PCI IDs
   - Enable fastboot also on older platforms
   - Early transport for panel replay and PSR
   - New ARL PCI IDs
   - DP TPS4 PHY test pattern support
   - Unify and improve VSC SDP for PSR and non-PSR cases
   - Refactor memory regions and improve debug logging
   - Rework global state serialization
   - Remove unused CDCLK divider fields
   - Unify HDCP connector logging format
   - Use display instead of graphics version in display code
   - Move VBT and opregion debugfs next to the implementation
   - Abstract opregion interface, use opaque type
   - MTL fixes
   - HPD handling fixes
   - Add GuC submission interface version query
   - Atomically invalidate userptr on mmu-notifier
   - Update handling of MMIO triggered reports
   - Don't make assumptions about intel_wakeref_t type
   - Extend driver code of Xe_LPG to Xe_LPG+
   - Add flex arrays to struct i915_syncmap
   - Allow for very slow HuC loading
   - DP tunneling and bandwidth allocation support

  msm:
   - Correct bindings for MSM8976 and SM8650 platforms
   - Start migration of MDP5 platforms to DPU driver
   - X1E80100 MDSS support
   - DPU:
      - Improve DSC allocation, fixing several important corner cases
      - Add support for SDM630/SDM660 platforms
      - Simplify dpu_encoder_phys_ops
      - Apply fixes targeting DSC support with a single DSC encoder
      - Apply fixes for HCTL_EN timing configuration
      - X1E80100 support
      - Add support for YUV420 over DP
   - GPU:
      - fix sc7180 UBWC config
      - fix a7xx LLC config
      - new gpu support: a305B, a750, a702
      - machine support: SM7150 (different power levels than other a618)
      - a7xx devcoredump support

  habanalabs:
   - configure IRQ affinity according to NUMA node
   - move HBM MMU page tables inside the HBM
   - improve device reset
   - check extended PCIe errors

  ivpu:
   - updates to firmware API
   - refactor BO allocation

  imx:
   - use devm_ functions during init

  hisilicon:
   - fix EDID includes

  mgag200:
   - improve ioremap usage
   - convert to struct drm_edid
   - Work around PCI write bursts

  nouveau:
   - disp: use kmemdup()
   - fix EDID includes
   - documentation fixes

  qaic:
   - fixes to BO handling
   - make use of DRM managed release
   - fix order of remove operations

  rockchip:
   - analogix_dp: get encoder port from DT
   - inno_hdmi: support HDMI for RK3128
   - lvds: error-handling fixes

  ssd130x:
   - support SSD133x plus DT bindings

  tegra:
   - fix error handling

  tilcdc:
   - make use of DRM managed release

  v3d:
   - show memory stats in debugfs
   - Support display MMU page size

  vc4:
   - fix error handling in plane prepare_fb
   - fix framebuffer test in plane helpers

  virtio:
   - add venus capset defines

  vkms:
   - fix OOB access when programming the LUT
   - Kconfig improvements

  vmwgfx:
   - unmap surface before changing plane state
   - fix memory leak in error handling
   - documentation fixes
   - list command SVGA_3D_CMD_DEFINE_GB_SURFACE_V4 as invalid
   - fix null-pointer deref in execbuf
   - refactor display-mode probing
   - fix fencing for creating cursor MOBs
   - fix cursor-memory lifetime

  xlnx:
   - fix live video input for ZynqMP DPSUB

  lima:
   - fix memory leak

  loongson:
   - fail if no VRAM present

  meson:
   - switch to new drm_bridge_read_edid() interface

  renesas:
   - add RZ/G2L DU support plus DT bindings

  mxsfb:
   - Use managed mode config

  sun4i:
   - HDMI: updates to atomic mode setting

  mediatek:
   - Add display driver for MT8188 VDOSYS1
   - DSI driver cleanups
   - Filter modes according to hardware capability
   - Fix a null pointer crash in mtk_drm_crtc_finish_page_flip

  etnaviv:
   - enhancements for NPU and MRT support"

* tag 'drm-next-2024-03-13' of https://gitlab.freedesktop.org/drm/kernel: (1420 commits)
  drm/amd/display: Removed redundant @ symbol to fix kernel-doc warnings in -next repo
  drm/amd/pm: wait for completion of the EnableGfxImu message
  drm/amdgpu/soc21: add mode2 asic reset for SMU IP v14.0.1
  drm/amdgpu: add smu 14.0.1 support
  drm/amdgpu: add VPE 6.1.1 discovery support
  drm/amdgpu/vpe: add VPE 6.1.1 support
  drm/amdgpu/vpe: don't emit cond exec command under collaborate mode
  drm/amdgpu/vpe: add collaborate mode support for VPE
  drm/amdgpu/vpe: add PRED_EXE and COLLAB_SYNC OPCODE
  drm/amdgpu/vpe: add multi instance VPE support
  drm/amdgpu/discovery: add nbif v6_3_1 ip block
  drm/amdgpu: Add nbif v6_3_1 ip block support
  drm/amdgpu: Add pcie v6_1_0 ip headers (v5)
  drm/amdgpu: Add nbif v6_3_1 ip headers (v5)
  arch/powerpc: Remove <linux/fb.h> from backlight code
  macintosh/via-pmu-backlight: Include <linux/backlight.h>
  fbdev/chipsfb: Include <linux/backlight.h>
  drm/etnaviv: Restore some id values
  drm/amdkfd: make kfd_class constant
  drm/amdgpu: add ring timeout information in devcoredump
  ...
This commit is contained in:
Linus Torvalds 2024-03-13 18:34:05 -07:00
commit 480e035fc4
1189 changed files with 189380 additions and 16411 deletions

View File

@ -24,37 +24,4 @@ restrictions later on.
As a remedy for such situations, the kernel configuration item
CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an
individually prepared or corrected EDID data set in the /lib/firmware
directory from where it is loaded via the firmware interface. The code
(see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for
commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200,
1680x1050, 1920x1080) as binary blobs, but the kernel source tree does
not contain code to create these data. In order to elucidate the origin
of the built-in binary EDID blobs and to facilitate the creation of
individual data for a specific misbehaving monitor, commented sources
and a Makefile environment are given here.
To create binary EDID and C source code files from the existing data
material, simply type "make" in tools/edid/.
If you want to create your own EDID file, copy the file 1024x768.S,
replace the settings with your own data and add a new target to the
Makefile. Please note that the EDID data structure expects the timing
values in a different way as compared to the standard X11 format.
X11:
HTimings:
hdisp hsyncstart hsyncend htotal
VTimings:
vdisp vsyncstart vsyncend vtotal
EDID::
#define XPIX hdisp
#define XBLANK htotal-hdisp
#define XOFFSET hsyncstart-hdisp
#define XPULSE hsyncend-hsyncstart
#define YPIX vdisp
#define YBLANK vtotal-vdisp
#define YOFFSET vsyncstart-vdisp
#define YPULSE vsyncend-vsyncstart
directory from where it is loaded via the firmware interface.

View File

@ -1173,16 +1173,10 @@
panels may send no or incorrect EDID data sets.
This parameter allows to specify an EDID data sets
in the /lib/firmware directory that are used instead.
Generic built-in EDID data sets are used, if one of
edid/1024x768.bin, edid/1280x1024.bin,
edid/1680x1050.bin, or edid/1920x1080.bin is given
and no file with the same name exists. Details and
instructions how to build your own EDID data are
available in Documentation/admin-guide/edid.rst. An EDID
data set will only be used for a particular connector,
if its name and a colon are prepended to the EDID
name. Each connector may use a unique EDID data
set by separating the files with a comma. An EDID
An EDID data set will only be used for a particular
connector, if its name and a colon are prepended to
the EDID name. Each connector may use a unique EDID
data set by separating the files with a comma. An EDID
data set with no connector name will be used for
any connectors not explicitly specified.

View File

@ -0,0 +1,102 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/bridge/fsl,imx8mp-hdmi-tx.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Freescale i.MX8MP DWC HDMI TX Encoder
maintainers:
- Lucas Stach <l.stach@pengutronix.de>
description:
The i.MX8MP HDMI transmitter is a Synopsys DesignWare
HDMI 2.0a TX controller IP.
allOf:
- $ref: /schemas/display/bridge/synopsys,dw-hdmi.yaml#
properties:
compatible:
enum:
- fsl,imx8mp-hdmi-tx
reg-io-width:
const: 1
clocks:
maxItems: 4
clock-names:
items:
- const: iahb
- const: isfr
- const: cec
- const: pix
power-domains:
maxItems: 1
ports:
$ref: /schemas/graph.yaml#/properties/ports
properties:
port@0:
$ref: /schemas/graph.yaml#/properties/port
description: Parallel RGB input port
port@1:
$ref: /schemas/graph.yaml#/properties/port
description: HDMI output port
required:
- port@0
- port@1
required:
- compatible
- reg
- clocks
- clock-names
- interrupts
- power-domains
- ports
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/clock/imx8mp-clock.h>
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/power/imx8mp-power.h>
hdmi@32fd8000 {
compatible = "fsl,imx8mp-hdmi-tx";
reg = <0x32fd8000 0x7eff>;
interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MP_CLK_HDMI_APB>,
<&clk IMX8MP_CLK_HDMI_REF_266M>,
<&clk IMX8MP_CLK_32K>,
<&hdmi_tx_phy>;
clock-names = "iahb", "isfr", "cec", "pix";
power-domains = <&hdmi_blk_ctrl IMX8MP_HDMIBLK_PD_HDMI_TX>;
reg-io-width = <1>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
hdmi_tx_from_pvi: endpoint {
remote-endpoint = <&pvi_to_hdmi_tx>;
};
};
port@1 {
reg = <1>;
hdmi_tx_out: endpoint {
remote-endpoint = <&hdmi0_con>;
};
};
};
};

View File

@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: SN65DSI86 DSI to eDP bridge chip
maintainers:
- Sandeep Panda <spanda@codeaurora.org>
- Douglas Anderson <dianders@chromium.org>
description: |
The Texas Instruments SN65DSI86 bridge takes MIPI DSI in and outputs eDP.

View File

@ -0,0 +1,84 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/imx/fsl,imx8mp-hdmi-pvi.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Freescale i.MX8MP HDMI Parallel Video Interface
maintainers:
- Lucas Stach <l.stach@pengutronix.de>
description:
The HDMI parallel video interface is a timing and sync generator block in the
i.MX8MP SoC, that sits between the video source and the HDMI TX controller.
properties:
compatible:
const: fsl,imx8mp-hdmi-pvi
reg:
maxItems: 1
interrupts:
maxItems: 1
power-domains:
maxItems: 1
ports:
$ref: /schemas/graph.yaml#/properties/ports
properties:
port@0:
$ref: /schemas/graph.yaml#/properties/port
description: Input from the LCDIF controller.
port@1:
$ref: /schemas/graph.yaml#/properties/port
description: Output to the HDMI TX controller.
required:
- port@0
- port@1
required:
- compatible
- reg
- interrupts
- power-domains
- ports
additionalProperties: false
examples:
- |
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/power/imx8mp-power.h>
display-bridge@32fc4000 {
compatible = "fsl,imx8mp-hdmi-pvi";
reg = <0x32fc4000 0x44>;
interrupt-parent = <&irqsteer_hdmi>;
interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&hdmi_blk_ctrl IMX8MP_HDMIBLK_PD_PVI>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
pvi_from_lcdif3: endpoint {
remote-endpoint = <&lcdif3_to_pvi>;
};
};
port@1 {
reg = <1>;
pvi_to_hdmi_tx: endpoint {
remote-endpoint = <&hdmi_tx_from_pvi>;
};
};
};
};

View File

@ -19,6 +19,7 @@ properties:
- qcom,msm8916-dsi-ctrl
- qcom,msm8953-dsi-ctrl
- qcom,msm8974-dsi-ctrl
- qcom,msm8976-dsi-ctrl
- qcom,msm8996-dsi-ctrl
- qcom,msm8998-dsi-ctrl
- qcom,qcm2290-dsi-ctrl
@ -248,6 +249,7 @@ allOf:
contains:
enum:
- qcom,msm8953-dsi-ctrl
- qcom,msm8976-dsi-ctrl
then:
properties:
clocks:

View File

@ -224,6 +224,7 @@ allOf:
enum:
- qcom,adreno-gmu-730.1
- qcom,adreno-gmu-740.1
- qcom,adreno-gmu-750.1
then:
properties:
reg:

View File

@ -23,7 +23,7 @@ properties:
The driver is parsing the compat string for Adreno to
figure out the gpu-id and patch level.
items:
- pattern: '^qcom,adreno-[3-7][0-9][0-9]\.[0-9]$'
- pattern: '^qcom,adreno-[3-7][0-9][0-9]\.[0-9]+$'
- const: qcom,adreno
- description: |
The driver is parsing the compat string for Imageon to
@ -127,7 +127,7 @@ allOf:
properties:
compatible:
contains:
pattern: '^qcom,adreno-[3-5][0-9][0-9]\.[0-9]$'
pattern: '^qcom,adreno-[3-5][0-9][0-9]\.[0-9]+$'
then:
properties:
@ -203,7 +203,7 @@ allOf:
properties:
compatible:
contains:
pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]$'
pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]+$'
then: # Starting with A6xx, the clocks are usually defined in the GMU node
properties:

View File

@ -127,6 +127,7 @@ patternProperties:
- qcom,dsi-phy-20nm
- qcom,dsi-phy-28nm-8226
- qcom,dsi-phy-28nm-hpm
- qcom,dsi-phy-28nm-hpm-fam-b
- qcom,dsi-phy-28nm-lp
- qcom,hdmi-phy-8084
- qcom,hdmi-phy-8660

View File

@ -13,7 +13,9 @@ $ref: /schemas/display/msm/dpu-common.yaml#
properties:
compatible:
const: qcom,sm8650-dpu
enum:
- qcom,sm8650-dpu
- qcom,x1e80100-dpu
reg:
items:

View File

@ -37,18 +37,21 @@ properties:
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8650-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8650-dp
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
items:
@ -57,6 +60,7 @@ patternProperties:
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sm8650-dsi-phy-4nm

View File

@ -0,0 +1,251 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/msm/qcom,x1e80100-mdss.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Qualcomm X1E80100 Display MDSS
maintainers:
- Abel Vesa <abel.vesa@linaro.org>
description:
X1E80100 MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like
DPU display controller, DP interfaces, etc.
$ref: /schemas/display/msm/mdss-common.yaml#
properties:
compatible:
const: qcom,x1e80100-mdss
clocks:
items:
- description: Display AHB
- description: Display hf AXI
- description: Display core
iommus:
maxItems: 1
interconnects:
maxItems: 3
interconnect-names:
maxItems: 3
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,x1e80100-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,x1e80100-dp
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,x1e80100-dp-phy
required:
- compatible
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/clock/qcom,rpmh.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/interconnect/qcom,x1e80100-rpmh.h>
#include <dt-bindings/phy/phy-qcom-qmp.h>
#include <dt-bindings/power/qcom,rpmhpd.h>
display-subsystem@ae00000 {
compatible = "qcom,x1e80100-mdss";
reg = <0x0ae00000 0x1000>;
reg-names = "mdss";
interconnects = <&mmss_noc MASTER_MDP 0 &gem_noc SLAVE_LLCC 0>,
<&mc_virt MASTER_LLCC 0 &mc_virt SLAVE_EBI1 0>,
<&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_DISPLAY_CFG 0>;
interconnect-names = "mdp0-mem", "mdp1-mem", "cpu-cfg";
resets = <&dispcc_core_bcr>;
power-domains = <&dispcc_gdsc>;
clocks = <&dispcc_ahb_clk>,
<&gcc_disp_hf_axi_clk>,
<&dispcc_mdp_clk>;
clock-names = "bus", "nrt_bus", "core";
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
interrupt-controller;
#interrupt-cells = <1>;
iommus = <&apps_smmu 0x1c00 0x2>;
#address-cells = <1>;
#size-cells = <1>;
ranges;
display-controller@ae01000 {
compatible = "qcom,x1e80100-dpu";
reg = <0x0ae01000 0x8f000>,
<0x0aeb0000 0x2008>;
reg-names = "mdp", "vbif";
clocks = <&gcc_axi_clk>,
<&dispcc_ahb_clk>,
<&dispcc_mdp_lut_clk>,
<&dispcc_mdp_clk>,
<&dispcc_mdp_vsync_clk>;
clock-names = "nrt_bus",
"iface",
"lut",
"core",
"vsync";
assigned-clocks = <&dispcc_mdp_vsync_clk>;
assigned-clock-rates = <19200000>;
operating-points-v2 = <&mdp_opp_table>;
power-domains = <&rpmhpd RPMHPD_MMCX>;
interrupt-parent = <&mdss>;
interrupts = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dpu_intf1_out: endpoint {
remote-endpoint = <&dsi0_in>;
};
};
port@1 {
reg = <1>;
dpu_intf2_out: endpoint {
remote-endpoint = <&dsi1_in>;
};
};
};
mdp_opp_table: opp-table {
compatible = "operating-points-v2";
opp-200000000 {
opp-hz = /bits/ 64 <200000000>;
required-opps = <&rpmhpd_opp_low_svs>;
};
opp-325000000 {
opp-hz = /bits/ 64 <325000000>;
required-opps = <&rpmhpd_opp_svs>;
};
opp-375000000 {
opp-hz = /bits/ 64 <375000000>;
required-opps = <&rpmhpd_opp_svs_l1>;
};
opp-514000000 {
opp-hz = /bits/ 64 <514000000>;
required-opps = <&rpmhpd_opp_nom>;
};
};
};
displayport-controller@ae90000 {
compatible = "qcom,x1e80100-dp";
reg = <0 0xae90000 0 0x200>,
<0 0xae90200 0 0x200>,
<0 0xae90400 0 0x600>,
<0 0xae91000 0 0x400>,
<0 0xae91400 0 0x400>;
interrupt-parent = <&mdss>;
interrupts = <12>;
clocks = <&dispcc_mdss_ahb_clk>,
<&dispcc_dptx0_aux_clk>,
<&dispcc_dptx0_link_clk>,
<&dispcc_dptx0_link_intf_clk>,
<&dispcc_dptx0_pixel0_clk>;
clock-names = "core_iface", "core_aux",
"ctrl_link",
"ctrl_link_iface",
"stream_pixel";
assigned-clocks = <&dispcc_mdss_dptx0_link_clk_src>,
<&dispcc_mdss_dptx0_pixel0_clk_src>;
assigned-clock-parents = <&usb_1_ss0_qmpphy QMP_USB43DP_DP_LINK_CLK>,
<&usb_1_ss0_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>;
operating-points-v2 = <&mdss_dp0_opp_table>;
power-domains = <&rpmhpd RPMHPD_MMCX>;
phys = <&usb_1_ss0_qmpphy QMP_USB43DP_DP_PHY>;
phy-names = "dp";
#sound-dai-cells = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
mdss_dp0_in: endpoint {
remote-endpoint = <&mdss_intf0_out>;
};
};
port@1 {
reg = <1>;
mdss_dp0_out: endpoint {
};
};
};
mdss_dp0_opp_table: opp-table {
compatible = "operating-points-v2";
opp-160000000 {
opp-hz = /bits/ 64 <160000000>;
required-opps = <&rpmhpd_opp_low_svs>;
};
opp-270000000 {
opp-hz = /bits/ 64 <270000000>;
required-opps = <&rpmhpd_opp_svs>;
};
opp-540000000 {
opp-hz = /bits/ 64 <540000000>;
required-opps = <&rpmhpd_opp_svs_l1>;
};
opp-810000000 {
opp-hz = /bits/ 64 <810000000>;
required-opps = <&rpmhpd_opp_nom>;
};
};
};
};
...

View File

@ -0,0 +1,58 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/panel/boe,th101mb31ig002-28a.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: BOE TH101MB31IG002-28A WXGA DSI Display Panel
maintainers:
- Manuel Traut <manut@mecka.net>
allOf:
- $ref: panel-common.yaml#
properties:
compatible:
enum:
# BOE TH101MB31IG002-28A 10.1" WXGA TFT LCD panel
- boe,th101mb31ig002-28a
reg: true
backlight: true
enable-gpios: true
power-supply: true
port: true
rotation: true
required:
- compatible
- reg
- enable-gpios
- power-supply
additionalProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
dsi {
#address-cells = <1>;
#size-cells = <0>;
panel@0 {
compatible = "boe,th101mb31ig002-28a";
reg = <0>;
backlight = <&backlight_lcd0>;
enable-gpios = <&gpio 45 GPIO_ACTIVE_HIGH>;
rotation = <90>;
power-supply = <&vcc_3v3>;
port {
panel_in_dsi: endpoint {
remote-endpoint = <&dsi_out_con>;
};
};
};
};
...

View File

@ -0,0 +1,74 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/panel/himax,hx83112a.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Himax HX83112A-based DSI display panels
maintainers:
- Luca Weiss <luca.weiss@fairphone.com>
description:
The Himax HX83112A is a generic DSI Panel IC used to control
LCD panels.
allOf:
- $ref: panel-common.yaml#
properties:
compatible:
contains:
const: djn,9a-3r063-1102b
vdd1-supply:
description: Digital voltage rail
vsn-supply:
description: Positive source voltage rail
vsp-supply:
description: Negative source voltage rail
reg: true
port: true
required:
- compatible
- reg
- reset-gpios
- vdd1-supply
- vsn-supply
- vsp-supply
- port
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
dsi {
#address-cells = <1>;
#size-cells = <0>;
panel@0 {
compatible = "djn,9a-3r063-1102b";
reg = <0>;
backlight = <&pm6150l_wled>;
reset-gpios = <&pm6150l_gpios 9 GPIO_ACTIVE_LOW>;
vdd1-supply = <&vreg_l1e>;
vsn-supply = <&pm6150l_lcdb_ncp>;
vsp-supply = <&pm6150l_lcdb_ldo>;
port {
panel_in_0: endpoint {
remote-endpoint = <&dsi0_out>;
};
};
};
};
...

View File

@ -14,7 +14,9 @@ allOf:
properties:
compatible:
const: leadtek,ltk500hd1829
enum:
- leadtek,ltk101b4029w
- leadtek,ltk500hd1829
reg: true
backlight: true
reset-gpios: true

View File

@ -15,7 +15,9 @@ allOf:
properties:
compatible:
items:
- const: hydis,hva40wv1
- enum:
- frida,frd400b25025
- hydis,hva40wv1
- const: novatek,nt35510
description: This indicates the panel manufacturer of the panel
that is in turn using the NT35510 panel driver. The compatible

View File

@ -0,0 +1,66 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/panel/novatek,nt36672e.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Novatek NT36672E LCD DSI Panel
maintainers:
- Ritesh Kumar <quic_riteshk@quicinc.com>
allOf:
- $ref: panel-common.yaml#
properties:
compatible:
const: novatek,nt36672e
reg:
maxItems: 1
description: DSI virtual channel
vddi-supply: true
avdd-supply: true
avee-supply: true
port: true
reset-gpios: true
backlight: true
required:
- compatible
- reg
- vddi-supply
- avdd-supply
- avee-supply
- reset-gpios
- port
additionalProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
dsi {
#address-cells = <1>;
#size-cells = <0>;
panel@0 {
compatible = "novatek,nt36672e";
reg = <0>;
reset-gpios = <&tlmm 44 GPIO_ACTIVE_HIGH>;
vddi-supply = <&vreg_l8c_1p8>;
avdd-supply = <&disp_avdd>;
avee-supply = <&disp_avee>;
backlight = <&pwm_backlight>;
port {
panel0_in: endpoint {
remote-endpoint = <&dsi0_out>;
};
};
};
};
...

View File

@ -39,9 +39,13 @@ properties:
compatible:
items:
- enum:
# Admatec 9904379 10.1" 1024x600 LVDS panel
- admatec,9904379
- auo,b101ew05
# Chunghwa Picture Tubes Ltd. 7" WXGA (800x1280) TFT LCD LVDS panel
- chunghwa,claa070wp03xg
# EDT ETML0700Z9NDHA 7.0" WSVGA (1024x600) color TFT LCD LVDS panel
- edt,etml0700z9ndha
# HannStar Display Corp. HSD101PWW2 10.1" WXGA (1280x800) LVDS panel
- hannstar,hsd101pww2
# Hydis Technologies 7" WXGA (800x1280) TFT LCD LVDS panel

View File

@ -73,6 +73,8 @@ properties:
- auo,t215hvn01
# Shanghai AVIC Optoelectronics 7" 1024x600 color TFT-LCD panel
- avic,tm070ddh03
# BOE BP082WX1-100 8.2" WXGA (1280x800) LVDS panel
- boe,bp082wx1-100
# BOE BP101WX1-100 10.1" WXGA (1280x800) LVDS panel
- boe,bp101wx1-100
# BOE EV121WXM-N10-1850 12.1" WXGA (1280x800) TFT LCD panel
@ -141,6 +143,8 @@ properties:
- edt,etm0700g0edh6
# Emerging Display Technology Corp. LVDS WSVGA TFT Display with capacitive touch
- edt,etml0700y5dha
# Emerging Display Technology Corp. 10.1" LVDS WXGA TFT Display with capacitive touch
- edt,etml1010g3dra
# Emerging Display Technology Corp. 5.7" VGA TFT LCD panel with
# capacitive touch
- edt,etmv570g2dhu

View File

@ -22,6 +22,8 @@ properties:
enum:
# Anberic RG353V-V2 5.0" 640x480 TFT LCD panel
- anbernic,rg353v-panel-v2
# Powkiddy RGB10MAX3 5.0" 720x1280 TFT LCD panel
- powkiddy,rgb10max3-panel
# Powkiddy RGB30 3.0" 720x720 TFT LCD panel
- powkiddy,rgb30-panel
# Rocktech JH057N00900 5.5" 720x1440 TFT LCD panel
@ -43,6 +45,7 @@ properties:
reset-gpios: true
backlight: true
rotation: true
required:
- compatible

View File

@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Visionox model RM69299 Panels
maintainers:
- Harigovindan P <harigovi@codeaurora.org>
- Abhinav Kumar <quic_abhinavk@quicinc.com>
- Jessica Zhang <quic_jesszhan@quicinc.com>
description: |
This binding is for display panels using a Visionox RM692999 panel.

View File

@ -0,0 +1,126 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/renesas,rzg2l-du.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Renesas RZ/G2L Display Unit (DU)
maintainers:
- Biju Das <biju.das.jz@bp.renesas.com>
- Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
description: |
These DT bindings describe the Display Unit embedded in the Renesas RZ/G2L
and RZ/V2L SoCs.
properties:
compatible:
oneOf:
- enum:
- renesas,r9a07g044-du # RZ/G2{L,LC}
- items:
- enum:
- renesas,r9a07g054-du # RZ/V2L
- const: renesas,r9a07g044-du # RZ/G2L fallback
reg:
maxItems: 1
interrupts:
maxItems: 1
clocks:
items:
- description: Main clock
- description: Register access clock
- description: Video clock
clock-names:
items:
- const: aclk
- const: pclk
- const: vclk
resets:
maxItems: 1
power-domains:
maxItems: 1
ports:
$ref: /schemas/graph.yaml#/properties/ports
description: |
The connections to the DU output video ports are modeled using the OF
graph bindings. The number of ports and their assignment are
model-dependent. Each port shall have a single endpoint.
patternProperties:
"^port@[0-1]$":
$ref: /schemas/graph.yaml#/properties/port
unevaluatedProperties: false
required:
- port@0
unevaluatedProperties: false
renesas,vsps:
$ref: /schemas/types.yaml#/definitions/phandle-array
items:
items:
- description: phandle to VSP instance that serves the DU channel
- description: Channel index identifying the LIF instance in that VSP
description:
A list of phandle and channel index tuples to the VSPs that handle the
memory interfaces for the DU channels.
required:
- compatible
- reg
- interrupts
- clocks
- clock-names
- resets
- power-domains
- ports
- renesas,vsps
additionalProperties: false
examples:
# RZ/G2L DU
- |
#include <dt-bindings/clock/r9a07g044-cpg.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
display@10890000 {
compatible = "renesas,r9a07g044-du";
reg = <0x10890000 0x10000>;
interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cpg CPG_MOD R9A07G044_LCDC_CLK_A>,
<&cpg CPG_MOD R9A07G044_LCDC_CLK_P>,
<&cpg CPG_MOD R9A07G044_LCDC_CLK_D>;
clock-names = "aclk", "pclk", "vclk";
resets = <&cpg R9A07G044_LCDC_RESET_N>;
power-domains = <&cpg>;
renesas,vsps = <&vspd0 0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
endpoint {
remote-endpoint = <&dsi0_in>;
};
};
port@1 {
reg = <1>;
};
};
};
...

View File

@ -94,11 +94,14 @@ properties:
- const: default
- const: unwedge
power-domains:
maxItems: 1
ports:
$ref: /schemas/graph.yaml#/properties/ports
patternProperties:
"^port(@0)?$":
properties:
port@0:
$ref: /schemas/graph.yaml#/properties/port
description: Input of the DWC HDMI TX
properties:
@ -108,11 +111,14 @@ properties:
description: Connection to the VOPB
endpoint@1:
description: Connection to the VOPL
properties:
port@1:
$ref: /schemas/graph.yaml#/properties/port
description: Output of the DWC HDMI TX
required:
- port@0
- port@1
rockchip,grf:
$ref: /schemas/types.yaml#/definitions/phandle
description:
@ -135,19 +141,25 @@ examples:
#include <dt-bindings/clock/rk3288-cru.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/power/rk3288-power.h>
hdmi: hdmi@ff980000 {
compatible = "rockchip,rk3288-dw-hdmi";
reg = <0xff980000 0x20000>;
reg-io-width = <4>;
ddc-i2c-bus = <&i2c5>;
rockchip,grf = <&grf>;
interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>;
clock-names = "iahb", "isfr";
ddc-i2c-bus = <&i2c5>;
power-domains = <&power RK3288_PD_VIO>;
rockchip,grf = <&grf>;
ports {
port {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
#address-cells = <1>;
#size-cells = <0>;
@ -155,11 +167,20 @@ examples:
reg = <0>;
remote-endpoint = <&vopb_out_hdmi>;
};
hdmi_in_vopl: endpoint@1 {
reg = <1>;
remote-endpoint = <&vopl_out_hdmi>;
};
};
port@1 {
reg = <1>;
hdmi_out_con: endpoint {
remote-endpoint = <&hdmi_con_in>;
};
};
};
};

View File

@ -131,9 +131,9 @@ allOf:
const: sinowealth,sh1106
then:
properties:
width:
solomon,width:
default: 132
height:
solomon,height:
default: 64
solomon,dclk-div:
default: 1
@ -149,9 +149,9 @@ allOf:
- solomon,ssd1305
then:
properties:
width:
solomon,width:
default: 132
height:
solomon,height:
default: 64
solomon,dclk-div:
default: 1
@ -167,9 +167,9 @@ allOf:
- solomon,ssd1306
then:
properties:
width:
solomon,width:
default: 128
height:
solomon,height:
default: 64
solomon,dclk-div:
default: 1
@ -185,9 +185,9 @@ allOf:
- solomon,ssd1307
then:
properties:
width:
solomon,width:
default: 128
height:
solomon,height:
default: 39
solomon,dclk-div:
default: 2
@ -205,9 +205,9 @@ allOf:
- solomon,ssd1309
then:
properties:
width:
solomon,width:
default: 128
height:
solomon,height:
default: 64
solomon,dclk-div:
default: 1

View File

@ -30,9 +30,9 @@ allOf:
const: solomon,ssd1322
then:
properties:
width:
solomon,width:
default: 480
height:
solomon,height:
default: 128
- if:
@ -42,9 +42,9 @@ allOf:
const: solomon,ssd1325
then:
properties:
width:
solomon,width:
default: 128
height:
solomon,height:
default: 80
- if:
@ -54,9 +54,9 @@ allOf:
const: solomon,ssd1327
then:
properties:
width:
solomon,width:
default: 128
height:
solomon,height:
default: 128
unevaluatedProperties: false

View File

@ -0,0 +1,45 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/solomon,ssd133x.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Solomon SSD133x OLED Display Controllers
maintainers:
- Javier Martinez Canillas <javierm@redhat.com>
allOf:
- $ref: solomon,ssd-common.yaml#
properties:
compatible:
enum:
- solomon,ssd1331
solomon,width:
default: 96
solomon,height:
default: 64
required:
- compatible
- reg
unevaluatedProperties: false
examples:
- |
spi {
#address-cells = <1>;
#size-cells = <0>;
oled@0 {
compatible = "solomon,ssd1331";
reg = <0x0>;
reset-gpios = <&gpio2 7>;
dc-gpios = <&gpio2 8>;
spi-max-frequency = <10000000>;
};
};

View File

@ -37,6 +37,7 @@ properties:
- description: OVR2 overlay manager for vp2
- description: VP1 video port 1
- description: VP2 video port 2
- description: common1 DSS register area
reg-names:
items:
@ -47,6 +48,7 @@ properties:
- const: ovr2
- const: vp1
- const: vp2
- const: common1
clocks:
items:
@ -147,9 +149,10 @@ examples:
<0x04a07000 0x1000>, /* ovr1 */
<0x04a08000 0x1000>, /* ovr2 */
<0x04a0a000 0x1000>, /* vp1 */
<0x04a0b000 0x1000>; /* vp2 */
<0x04a0b000 0x1000>, /* vp2 */
<0x04a01000 0x1000>; /* common1 */
reg-names = "common", "vidl1", "vid",
"ovr1", "ovr2", "vp1", "vp2";
"ovr1", "ovr2", "vp1", "vp2", "common1";
ti,am65x-oldi-io-ctrl = <&dss_oldi_io_ctrl>;
power-domains = <&k3_pds 67 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 67 1>,

View File

@ -63,6 +63,8 @@ patternProperties:
description: Analog Devices, Inc.
"^adieng,.*":
description: ADI Engineering, Inc.
"^admatec,.*":
description: admatec GmbH
"^advantech,.*":
description: Advantech Corporation
"^aeroflexgaisler,.*":

View File

@ -16,6 +16,7 @@ Radeon (RX|TM) (PRO|WX) Vega /MI25 /V320 /V340L /8200 /9100 /SSG MxGPU, VEGA10,
AMD Radeon (Pro) VII /MI50 /MI60, VEGA20, DCE 12, 9.4.0, VCE 4.1.0 / UVD 7.2.0, 4.2.0
MI100, ARCTURUS, *, 9.4.1, VCN 2.5.0, 4.2.2
MI200, ALDEBARAN, *, 9.4.2, VCN 2.6.0, 4.4.0
MI300, AQUA_VANGARAM, *, 9.4.3, VCN 4.0.3, 4.4.2
AMD Radeon (RX|Pro) 5600(M|XT) /5700 (M|XT|XTB) /W5700, NAVI10, DCN 2.0.0, 10.1.10, VCN 2.0.0, 5.0.0
AMD Radeon (Pro) 5300 /5500XTB/5500(XT|M) /W5500M /W5500, NAVI14, DCN 2.0.0, 10.1.1, VCN 2.0.2, 5.0.2
AMD Radeon RX 6800(XT) /6900(XT) /W6800, SIENNA_CICHLID, DCN 3.0.0, 10.3.0, VCN 3.0.0, 5.2.0
@ -23,4 +24,5 @@ AMD Radeon RX 6700 XT / 6800M / 6700M, NAVY_FLOUNDER, DCN 3.0.0, 10.3.2, VCN 3.0
AMD Radeon RX 6600(XT) /6600M /W6600 /W6600M, DIMGREY_CAVEFISH, DCN 3.0.2, 10.3.4, VCN 3.0.16, 5.2.4
AMD Radeon RX 6500M /6300M /W6500M /W6300M, BEIGE_GOBY, DCN 3.0.3, 10.3.5, VCN 3.0.33, 5.2.5
AMD Radeon RX 7900 XT /XTX, , DCN 3.2.0, 11.0.0, VCN 4.0.0, 6.0.0
AMD Radeon RX 7800 XT, , DCN 3.2.0, 11.0.3, VCN 4.0.0, 6.0.3
AMD Radeon RX 7600M (XT) /7700S /7600S, , DCN 3.2.1, 11.0.2, VCN 4.0.4, 6.0.2

1 Product Name Code Reference DCN/DCE version GC version VCN version SDMA version
16 AMD Radeon (Pro) VII /MI50 /MI60 VEGA20 DCE 12 9.4.0 VCE 4.1.0 / UVD 7.2.0 4.2.0
17 MI100 ARCTURUS * 9.4.1 VCN 2.5.0 4.2.2
18 MI200 ALDEBARAN * 9.4.2 VCN 2.6.0 4.4.0
19 MI300 AQUA_VANGARAM * 9.4.3 VCN 4.0.3 4.4.2
20 AMD Radeon (RX|Pro) 5600(M|XT) /5700 (M|XT|XTB) /W5700 NAVI10 DCN 2.0.0 10.1.10 VCN 2.0.0 5.0.0
21 AMD Radeon (Pro) 5300 /5500XTB/5500(XT|M) /W5500M /W5500 NAVI14 DCN 2.0.0 10.1.1 VCN 2.0.2 5.0.2
22 AMD Radeon RX 6800(XT) /6900(XT) /W6800 SIENNA_CICHLID DCN 3.0.0 10.3.0 VCN 3.0.0 5.2.0
24 AMD Radeon RX 6600(XT) /6600M /W6600 /W6600M DIMGREY_CAVEFISH DCN 3.0.2 10.3.4 VCN 3.0.16 5.2.4
25 AMD Radeon RX 6500M /6300M /W6500M /W6300M BEIGE_GOBY DCN 3.0.3 10.3.5 VCN 3.0.33 5.2.5
26 AMD Radeon RX 7900 XT /XTX DCN 3.2.0 11.0.0 VCN 4.0.0 6.0.0
27 AMD Radeon RX 7800 XT DCN 3.2.0 11.0.3 VCN 4.0.0 6.0.3
28 AMD Radeon RX 7600M (XT) /7700S /7600S DCN 3.2.1 11.0.2 VCN 4.0.4 6.0.2

View File

@ -0,0 +1,78 @@
==========
DCN Blocks
==========
In this section, you will find some extra details about some of the DCN blocks
and the code documentation when it is automatically generated.
DCHUBBUB
--------
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
:doc: overview
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
:export:
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
:internal:
HUBP
----
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
:doc: overview
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
:export:
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
:internal:
DPP
---
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
:doc: overview
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
:export:
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
:internal:
MPC
---
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
:doc: overview
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
:export:
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
:internal:
OPP
---
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
:doc: overview
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
:export:
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
:internal:
DIO
---
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
:doc: overview
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
:export:
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
:internal:

View File

@ -0,0 +1,168 @@
.. _display_todos:
==============================
AMDGPU - Display Contributions
==============================
First of all, if you are here, you probably want to give some technical
contribution to the display code, and for that, we say thank you :)
This page summarizes some of the issues you can help with; keep in mind that
this is a static page, and it is always a good idea to try to reach developers
in the amdgfx or some of the maintainers. Finally, this page follows the DRM
way of creating a TODO list; for more information, check
'Documentation/gpu/todo.rst'.
Gitlab issues
=============
Users can report issues associated with AMD GPUs at:
- https://gitlab.freedesktop.org/drm/amd
Usually, we try to add a proper label to all new tickets to make it easy to
filter issues. If you can reproduce any problem, you could help by adding more
information or fixing the issue.
Level: diverse
IGT
===
`IGT`_ provides many integration tests that can be run on your GPU. We always
want to pass a large set of tests to increase the test coverage in our CI. If
you wish to contribute to the display code but are unsure where a good place
is, we recommend you run all IGT tests and try to fix any failure you see in
your hardware. Keep in mind that this failure can be an IGT problem or a kernel
issue; it is necessary to analyze case-by-case.
Level: diverse
.. _IGT: https://gitlab.freedesktop.org/drm/igt-gpu-tools
Compilation
===========
Fix compilation warnings
------------------------
Enable the W1 or W2 warning level in the kernel compilation and try to fix the
issues on the display side.
Level: Starter
Fix compilation issues when using um architecture
-------------------------------------------------
Linux has a User-mode Linux (UML) feature, and the kernel can be compiled to
the **um** architecture. Compiling for **um** can bring multiple advantages
from the test perspective. We currently have some compilation issues in this
area that we need to fix.
Level: Intermediate
Code Refactor
=============
Add prefix to DC functions to improve the debug with ftrace
-----------------------------------------------------------
The Ftrace debug feature (check 'Documentation/trace/ftrace.rst') is a
fantastic way to check the code path when developers try to make sense of a
bug. Ftrace provides a filter mechanism that can be useful when the developer
has some hunch of which part of the code can cause the issue; for this reason,
if a set of functions has a proper prefix, it becomes easy to create a good
filter. Additionally, prefixes can improve stack trace readability.
The DC code does not follow some prefix rules, which makes the Ftrace filter
more complicated and reduces the readability of the stack trace. If you want
something simple to start contributing to the display, you can make patches for
adding prefixes to DC functions. To create those prefixes, use part of the file
name as a prefix for all functions in the target file. Check the
'amdgpu_dm_crtc.c` and `amdgpu_dm_plane.c` for some references. However, we
strongly advise not to send huge patches changing these prefixes; otherwise, it
will be hard to review and test, which can generate second thoughts from
maintainers. Try small steps; in case of double, you can ask before you put in
effort. We recommend first looking at folders like dceXYZ, dcnXYZ, basics,
bios, core, clk_mgr, hwss, resource, and irq.
Level: Starter
Reduce code duplication
-----------------------
AMD has an extensive portfolio with various dGPUs and APUs that amdgpu
supports. To maintain the new hardware release cadence, DCE/DCN was designed in
a modular design, making the bring-up for new hardware fast. Over the years,
amdgpu accumulated some technical debt in the code duplication area. For this
task, it would be a good idea to find a tool that can discover code duplication
(including patterns) and use it as guidance to reduce duplications.
Level: Intermediate
Make atomic_commit_[check|tail] more readable
---------------------------------------------
The functions responsible for atomic commit and tail are intricate and
extensive. In particular `amdgpu_dm_atomic_commit_tail` is a long function and
could benefit from being split into smaller helpers. Improvements in this area
are more than welcome, but keep in mind that changes in this area will affect
all ASICs, meaning that refactoring requires a comprehensive verification; in
other words, this effort can take some time for validation.
Level: Advanced
Documentation
=============
Expand kernel-doc
-----------------
Many DC functions do not have a proper kernel-doc; understanding a function and
adding documentation is a great way to learn more about the amdgpu driver and
also leave an outstanding contribution to the entire community.
Level: Starter
Beyond AMDGPU
=============
AMDGPU provides features that are not yet enabled in the userspace. This
section highlights some of the coolest display features, which could be enabled
with the userspace developer helper.
Enable underlay
---------------
AMD display has this feature called underlay (which you can read more about at
'Documentation/GPU/amdgpu/display/mpo-overview.rst') which is intended to
save power when playing a video. The basic idea is to put a video in the
underlay plane at the bottom and the desktop in the plane above it with a hole
in the video area. This feature is enabled in ChromeOS, and from our data
measurement, it can save power.
Level: Unknown
Adaptive Backlight Modulation (ABM)
-----------------------------------
ABM is a feature that adjusts the display panel's backlight level and pixel
values depending on the displayed image. This power-saving feature can be very
useful when the system starts to run off battery; since this will impact the
display output fidelity, it would be good if this option was something that
users could turn on or off.
Level: Unknown
HDR & Color management & VRR
----------------------------
HDR, Color Management, and VRR are huge topics and it's hard to put these into
concise ToDos. If you are interested in this topic, we recommend checking some
blog posts from the community developers to better understand some of the
specific challenges and people working on the subject. If anyone wants to work
on some particular part, we can try to help with some basic guidance. Finally,
keep in mind that we already have some kernel-doc in place for those areas.
Level: Unknown

View File

@ -131,9 +131,6 @@ The DRM blend mode and its elements are then mapped by AMDGPU display manager
(DM) to program the blending configuration of the Multiple Pipe/Plane Combined
(MPC), as follows:
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
:doc: mpc-overview
.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
:functions: mpcc_blnd_cfg

View File

@ -7,18 +7,80 @@ drm/amd/display - Display Core (DC)
AMD display engine is partially shared with other operating systems; for this
reason, our Display Core Driver is divided into two pieces:
1. **Display Core (DC)** contains the OS-agnostic components. Things like
#. **Display Core (DC)** contains the OS-agnostic components. Things like
hardware programming and resource management are handled here.
2. **Display Manager (DM)** contains the OS-dependent components. Hooks to the
amdgpu base driver and DRM are implemented here.
#. **Display Manager (DM)** contains the OS-dependent components. Hooks to the
amdgpu base driver and DRM are implemented here. For example, you can check
display/amdgpu_dm/ folder.
------------------
DC Code validation
------------------
Maintaining the same code base across multiple OSes requires a lot of
synchronization effort between repositories and exhaustive validation. In the
DC case, we maintain a tree to centralize code from different parts. The shared
repository has integration tests with our Internal Linux CI farm, and we run a
comprehensive set of IGT tests in various AMD GPUs/APUs (mostly recent dGPUs
and APUs). Our CI also checks ARM64/32, PPC64/32, and x86_64/32 compilation
with DCN enabled and disabled.
When we upstream a new feature or some patches, we pack them in a patchset with
the prefix **DC Patches for <DATE>**, which is created based on the latest
`amd-staging-drm-next <https://gitlab.freedesktop.org/agd5f/linux>`_. All of
those patches are under a DC version tested as follows:
* Ensure that every patch compiles and the entire series pass our set of IGT
test in different hardware.
* Prepare a branch with those patches for our validation team. If there is an
error, a developer will debug as fast as possible; usually, a simple bisect
in the series is enough to point to a bad change, and two possible actions
emerge: fix the issue or drop the patch. If it is not an easy fix, the bad
patch is dropped.
* Finally, developers wait a few days for community feedback before we merge
the series.
It is good to stress that the test phase is something that we take extremely
seriously, and we never merge anything that fails our validation. Follows an
overview of our test set:
#. Manual test
* Multiple Hotplugs with DP and HDMI.
* Stress test with multiple display configuration changes via the user interface.
* Validate VRR behaviour.
* Check PSR.
* Validate MPO when playing video.
* Test more than two displays connected at the same time.
* Check suspend/resume.
* Validate FPO.
* Check MST.
#. Automated test
* IGT tests in a farm with GPUs and APUs that support DCN and DCE.
* Compilation validation with the latest GCC and Clang from LTS distro.
* Cross-compilation for PowerPC 64/32, ARM 64/32, and x86 32.
In terms of test setup for CI and manual tests, we usually use:
#. The latest Ubuntu LTS.
#. In terms of userspace, we only use fully updated open-source components
provided by the distribution official package manager.
#. Regarding IGT, we use the latest code from the upstream.
#. Most of the manual tests are conducted in the GNome but we also use KDE.
Notice that someone from our test team will always reply to the cover letter
with the test report.
--------------
DC Information
--------------
The display pipe is responsible for "scanning out" a rendered frame from the
GPU memory (also called VRAM, FrameBuffer, etc.) to a display. In other words,
it would:
1. Read frame information from memory;
2. Perform required transformation;
3. Send pixel data to sink devices.
#. Read frame information from memory;
#. Perform required transformation;
#. Send pixel data to sink devices.
If you want to learn more about our driver details, take a look at the below
table of content:
@ -26,7 +88,9 @@ table of content:
.. toctree::
display-manager.rst
dc-debug.rst
dcn-overview.rst
dcn-blocks.rst
mpo-overview.rst
dc-debug.rst
display-contributing.rst
dc-glossary.rst

View File

@ -153,18 +153,6 @@ Managed Resources
.. kernel-doc:: include/drm/drm_managed.h
:internal:
Bus-specific Device Registration and PCI Support
------------------------------------------------
A number of functions are provided to help with device registration. The
functions deal with PCI and platform devices respectively and are only
provided for historical reasons. These are all deprecated and shouldn't
be used in new drivers. Besides that there's a few helpers for pci
drivers.
.. kernel-doc:: drivers/gpu/drm/drm_pci.c
:export:
Open/Close, File Operations and IOCTLs
======================================

View File

@ -138,7 +138,7 @@ indicating kibi- or mebi-bytes.
- drm-shared-<region>: <uint> [KiB|MiB]
The total size of buffers that are shared with another file (ie. have more
The total size of buffers that are shared with another file (e.g., have more
than a single handle).
- drm-total-<region>: <uint> [KiB|MiB]

View File

@ -164,6 +164,8 @@ Conference talks
Slides and articles
-------------------
* `The Linux graphics stack in a nutshell, part 1 <https://lwn.net/Articles/955376/>`_ - Thomas Zimmermann (2023)
* `The Linux graphics stack in a nutshell, part 2 <https://lwn.net/Articles/955708/>`_ - Thomas Zimmermann (2023)
* `Understanding the Linux Graphics Stack <https://bootlin.com/doc/training/graphics/graphics-slides.pdf>`_ - Bootlin (2022)
* `DRM KMS overview <https://wiki.st.com/stm32mpu/wiki/DRM_KMS_overview>`_ - STMicroelectronics (2021)
* `Linux graphic stack <https://studiopixl.com/2017-05-13/linux-graphic-stack-an-overview>`_ - Nathan Gauër (2017)

View File

@ -31,7 +31,3 @@ host such documentation:
.. toctree::
i915_vm_bind.rst
.. toctree::
xe.rst

View File

@ -1,234 +0,0 @@
==========================
Xe Merge Acceptance Plan
==========================
Xe is a new driver for Intel GPUs that supports both integrated and
discrete platforms starting with Tiger Lake (first Intel Xe Architecture).
This document aims to establish a merge plan for the Xe, by writing down clear
pre-merge goals, in order to avoid unnecessary delays.
Xe Overview
=============
The main motivation of Xe is to have a fresh base to work from that is
unencumbered by older platforms, whilst also taking the opportunity to
rearchitect our driver to increase sharing across the drm subsystem, both
leveraging and allowing us to contribute more towards other shared components
like TTM and drm/scheduler.
This is also an opportunity to start from the beginning with a clean uAPI that is
extensible by design and already aligned with the modern userspace needs. For
this reason, the memory model is solely based on GPU Virtual Address space
bind/unbind (VM_BIND) of GEM buffer objects (BOs) and execution only supporting
explicit synchronization. With persistent mapping across the execution, the
userspace does not need to provide a list of all required mappings during each
submission.
The new driver leverages a lot from i915. As for display, the intent is to share
the display code with the i915 driver so that there is maximum reuse there.
As for the power management area, the goal is to have a much-simplified support
for the system suspend states (S-states), PCI device suspend states (D-states),
GPU/Render suspend states (R-states) and frequency management. It should leverage
as much as possible all the existent PCI-subsystem infrastructure (pm and
runtime_pm) and underlying firmware components such PCODE and GuC for the power
states and frequency decisions.
Repository:
https://gitlab.freedesktop.org/drm/xe/kernel (branch drm-xe-next)
Xe Platforms
==============
Currently, Xe is already functional and has experimental support for multiple
platforms starting from Tiger Lake, with initial support in userspace implemented
in Mesa (for Iris and Anv, our OpenGL and Vulkan drivers), as well as in NEO
(for OpenCL and Level0).
During a transition period, platforms will be supported by both Xe and i915.
However, the force_probe mechanism existent in both drivers will allow only one
official and by-default probe at a given time.
For instance, in order to probe a DG2 which PCI ID is 0x5690 by Xe instead of
i915, the following set of parameters need to be used:
```
i915.force_probe=!5690 xe.force_probe=5690
```
In both drivers, the .require_force_probe protection forces the user to use the
force_probe parameter while the driver is under development. This protection is
only removed when the support for the platform and the uAPI are stable. Stability
which needs to be demonstrated by CI results.
In order to avoid user space regressions, i915 will continue to support all the
current platforms that are already out of this protection. Xe support will be
forever experimental and dependent on the usage of force_probe for these
platforms.
When the time comes for Xe, the protection will be lifted on Xe and kept in i915.
Xe Pre-Merge Goals - Work-in-Progress
=======================================
Display integration with i915
-----------------------------
In order to share the display code with the i915 driver so that there is maximum
reuse, the i915/display/ code is built twice, once for i915.ko and then for
xe.ko. Currently, the i915/display code in Xe tree is polluted with many 'ifdefs'
depending on the build target. The goal is to refactor both Xe and i915/display
code simultaneously in order to get a clean result before they land upstream, so
that display can already be part of the initial pull request towards drm-next.
However, display code should not gate the acceptance of Xe in upstream. Xe
patches will be refactored in a way that display code can be removed, if needed,
from the first pull request of Xe towards drm-next. The expectation is that when
both drivers are part of the drm-tip, the introduction of cleaner patches will be
easier and speed up.
Xe uAPI high level overview
=============================
...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
Xe Pre-Merge Goals - Completed
================================
Drm_exec
--------
Helper to make dma_resv locking for a big number of buffers is getting removed in
the drm_exec series proposed in https://patchwork.freedesktop.org/patch/524376/
If that happens, Xe needs to change and incorporate the changes in the driver.
The goal is to engage with the Community to understand if the best approach is to
move that to the drivers that are using it or if we should keep the helpers in
place waiting for Xe to get merged.
This item ties into the GPUVA, VM_BIND, and even long-running compute support.
As a key measurable result, we need to have a community consensus documented in
this document and the Xe driver prepared for the changes, if necessary.
Userptr integration and vm_bind
-------------------------------
Different drivers implement different ways of dealing with execution of userptr.
With multiple drivers currently introducing support to VM_BIND, the goal is to
aim for a DRM consensus on whats the best way to have that support. To some
extent this is already getting addressed itself with the GPUVA where likely the
userptr will be a GPUVA with a NULL GEM call VM bind directly on the userptr.
However, there are more aspects around the rules for that and the usage of
mmu_notifiers, locking and other aspects.
This task here has the goal of introducing a documentation of the basic rules.
The documentation *needs* to first live in this document (API session below) and
then moved to another more specific document or at Xe level or at DRM level.
Documentation should include:
* The userptr part of the VM_BIND api.
* Locking, including the page-faulting case.
* O(1) complexity under VM_BIND.
The document is now included in the drm documentation :doc:`here </gpu/drm-vm-bind-async>`.
Some parts of userptr like mmu_notifiers should become GPUVA or DRM helpers when
the second driver supporting VM_BIND+userptr appears. Details to be defined when
the time comes.
The DRM GPUVM helpers do not yet include the userptr parts, but discussions
about implementing them are ongoing.
ASYNC VM_BIND
-------------
Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
Xe merged, it is mandatory to have a consensus with other drivers and Mesa.
It needs to be clear how to handle async VM_BIND and interactions with userspace
memory fences. Ideally with helper support so people don't get it wrong in all
possible ways.
As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of
various flavors, error handling and sample API suggestions are documented in
:doc:`The ASYNC VM_BIND document </gpu/drm-vm-bind-async>`.
Drm_scheduler
-------------
Xe primarily uses Firmware based scheduling (GuC FW). However, it will use
drm_scheduler as the scheduler frontend for userspace submission in order to
resolve syncobj and dma-buf implicit sync dependencies. However, drm_scheduler is
not yet prepared to handle the 1-to-1 relationship between drm_gpu_scheduler and
drm_sched_entity.
Deeper changes to drm_scheduler should *not* be required to get Xe accepted, but
some consensus needs to be reached between Xe and other community drivers that
could also benefit from this work, for coupling FW based/assisted submission such
as the ARMs new Mali GPU driver, and others.
As a key measurable result, the patch series introducing Xe itself shall not
depend on any other patch touching drm_scheduler itself that was not yet merged
through drm-misc. This, by itself, already includes the reach of an agreement for
uniform 1 to 1 relationship implementation / usage across drivers.
Long running compute: minimal data structure/scaffolding
--------------------------------------------------------
The generic scheduler code needs to include the handling of endless compute
contexts, with the minimal scaffolding for preempt-ctx fences (probably on the
drm_sched_entity) and making sure drm_scheduler can cope with the lack of job
completion fence.
The goal is to achieve a consensus ahead of Xe initial pull-request, ideally with
this minimal drm/scheduler work, if needed, merged to drm-misc in a way that any
drm driver, including Xe, could re-use and add their own individual needs on top
in a next stage. However, this should not block the initial merge.
Dev_coredump
------------
Xe needs to align with other drivers on the way that the error states are
dumped, avoiding a Xe only error_state solution. The goal is to use devcoredump
infrastructure to report error states, since it produces a standardized way
by exposing a virtual and temporary /sys/class/devcoredump device.
As the key measurable result, Xe driver needs to provide GPU snapshots captured
at hang time through devcoredump, but without depending on any core modification
of devcoredump infrastructure itself.
Later, when we are in-tree, the goal is to collaborate with devcoredump
infrastructure with overall possible improvements, like multiple file support
for better organization of the dumps, snapshot support, dmesg extra print,
and whatever may make sense and help the overall infrastructure.
DRM_VM_BIND
-----------
Nouveau, and Xe are all implementing VM_BIND and new Exec uAPIs in order to
fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the
development of a common new drm_infrastructure. However, the Xe team needs to
engage with the community to explore the options of a common API.
As a key measurable result, the DRM_VM_BIND needs to be documented in this file
below, or this entire block deleted if the consensus is for independent drivers
vm_bind ioctls.
Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
Xe merged, it is mandatory to enforce the overall locking scheme for all major
structs and list (so vm and vma). So, a consensus is needed, and possibly some
common helpers. If helpers are needed, they should be also documented in this
document.
GPU VA
------
Two main goals of Xe are meeting together here:
1) Have an uAPI that aligns with modern UMD needs.
2) Early upstream engagement.
RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping
track of GPU virtual address mappings. This is still not merged upstream, but
this aligns very well with our goals and with our VM_BIND. The engagement with
upstream and the port of Xe towards GPUVA is already ongoing.
As a key measurable result, Xe needs to be aligned with the GPU VA and working in
our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA
related patch should be independent and present on dri-devel or acked by
maintainers to go along with the first Xe pull request towards drm-next.

View File

@ -120,6 +120,29 @@ Contact: Daniel Vetter, respective driver maintainers
Level: Advanced
Rename drm_atomic_state
-----------------------
The KMS framework uses two slightly different definitions for the ``state``
concept. For a given object (plane, CRTC, encoder, etc., so
``drm_$OBJECT_state``), the state is the entire state of that object. However,
at the device level, ``drm_atomic_state`` refers to a state update for a
limited number of objects.
The state isn't the entire device state, but only the full state of some
objects in that device. This is confusing to newcomers, and
``drm_atomic_state`` should be renamed to something clearer like
``drm_atomic_commit``.
In addition to renaming the structure itself, it would also imply renaming some
related functions (``drm_atomic_state_alloc``, ``drm_atomic_state_get``,
``drm_atomic_state_put``, ``drm_atomic_state_init``,
``__drm_atomic_state_free``, etc.).
Contact: Maxime Ripard <mripard@kernel.org>
Level: Advanced
Fallout from atomic KMS
-----------------------

View File

@ -614,7 +614,7 @@ AGPGART DRIVER
M: David Airlie <airlied@redhat.com>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm
T: git https://gitlab.freedesktop.org/drm/kernel.git
F: drivers/char/agp/
F: include/linux/agp*
F: include/uapi/linux/agp*
@ -7022,7 +7022,7 @@ L: dri-devel@lists.freedesktop.org
S: Maintained
B: https://gitlab.freedesktop.org/drm
C: irc://irc.oftc.net/dri-devel
T: git git://anongit.freedesktop.org/drm/drm
T: git https://gitlab.freedesktop.org/drm/kernel.git
F: Documentation/devicetree/bindings/display/
F: Documentation/devicetree/bindings/gpu/
F: Documentation/gpu/
@ -7056,7 +7056,7 @@ X: drivers/gpu/drm/mediatek/
X: drivers/gpu/drm/msm/
X: drivers/gpu/drm/nouveau/
X: drivers/gpu/drm/radeon/
X: drivers/gpu/drm/renesas/
X: drivers/gpu/drm/renesas/rcar-du/
X: drivers/gpu/drm/tegra/
DRM DRIVERS FOR ALLWINNER A10
@ -7224,12 +7224,22 @@ F: Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
F: Documentation/devicetree/bindings/display/renesas,du.yaml
F: drivers/gpu/drm/renesas/rcar-du/
DRM DRIVERS FOR RENESAS RZ
M: Biju Das <biju.das.jz@bp.renesas.com>
L: dri-devel@lists.freedesktop.org
L: linux-renesas-soc@vger.kernel.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/renesas,rzg2l-du.yaml
F: drivers/gpu/drm/renesas/rz-du/
DRM DRIVERS FOR RENESAS SHMOBILE
M: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
M: Geert Uytterhoeven <geert+renesas@glider.be>
L: dri-devel@lists.freedesktop.org
L: linux-renesas-soc@vger.kernel.org
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml
F: drivers/gpu/drm/renesas/shmobile/
F: include/linux/platform_data/shmob_drm.h
@ -7339,6 +7349,7 @@ F: drivers/gpu/drm/xlnx/
DRM GPU SCHEDULER
M: Luben Tuikov <ltuikov89@gmail.com>
M: Matthew Brost <matthew.brost@intel.com>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
@ -10516,7 +10527,6 @@ F: drivers/media/rc/img-ir/
IMGTEC POWERVR DRM DRIVER
M: Frank Binns <frank.binns@imgtec.com>
M: Donald Robson <donald.robson@imgtec.com>
M: Matt Coster <matt.coster@imgtec.com>
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc

View File

@ -10,15 +10,14 @@
#define __ASM_POWERPC_BACKLIGHT_H
#ifdef __KERNEL__
#include <linux/fb.h>
#include <linux/mutex.h>
struct backlight_device;
/* For locking instructions, see the implementation file */
extern struct backlight_device *pmac_backlight;
extern struct mutex pmac_backlight_mutex;
extern int pmac_backlight_curve_lookup(struct fb_info *info, int value);
extern int pmac_has_backlight_type(const char *type);
extern void pmac_backlight_key(int direction);

View File

@ -9,7 +9,6 @@
*/
#include <linux/kernel.h>
#include <linux/fb.h>
#include <linux/backlight.h>
#include <linux/adb.h>
#include <linux/pmu.h>
@ -72,31 +71,6 @@ int pmac_has_backlight_type(const char *type)
return 0;
}
int pmac_backlight_curve_lookup(struct fb_info *info, int value)
{
int level = (FB_BACKLIGHT_LEVELS - 1);
if (info && info->bl_dev) {
int i, max = 0;
/* Look for biggest value */
for (i = 0; i < FB_BACKLIGHT_LEVELS; i++)
max = max((int)info->bl_curve[i], max);
/* Look for nearest value */
for (i = 0; i < FB_BACKLIGHT_LEVELS; i++) {
int diff = abs(info->bl_curve[i] - value);
if (diff < max) {
max = diff;
level = i;
}
}
}
return level;
}
static void pmac_backlight_key_worker(struct work_struct *work)
{
if (atomic_read(&kernel_backlight_disabled))

View File

@ -67,6 +67,7 @@ config PS3_VUART
config PS3_PS3AV
depends on PPC_PS3
tristate "PS3 AV settings driver" if PS3_ADVANCED
select VIDEO
select PS3_VUART
default y
help

View File

@ -23,7 +23,7 @@ static struct idr accel_minors_idr;
static struct dentry *accel_debugfs_root;
static struct device_type accel_sysfs_device_minor = {
static const struct device_type accel_sysfs_device_minor = {
.name = "accel_minor"
};

View File

@ -1360,9 +1360,8 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
return -EINVAL;
}
if (!hl_device_operational(hdev, &status)) {
if (!hl_device_operational(hdev, &status))
return -EBUSY;
}
if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
!hdev->supports_staged_submission) {

View File

@ -484,7 +484,7 @@ static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
char kbuf[MMU_KBUF_SIZE];
char kbuf[MMU_KBUF_SIZE] = {0};
char *c;
ssize_t rc;
@ -546,7 +546,7 @@ static ssize_t mmu_ack_error_value_write(struct file *file,
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
char kbuf[MMU_KBUF_SIZE];
char kbuf[MMU_KBUF_SIZE] = {0};
ssize_t rc;
if (count > sizeof(kbuf) - 1)
@ -1643,19 +1643,19 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
&hl_data64b_fops);
debugfs_create_file("set_power_state",
0200,
0644,
root,
dev_entry,
&hl_power_fops);
debugfs_create_file("device",
0200,
0644,
root,
dev_entry,
&hl_device_fops);
debugfs_create_file("clk_gate",
0200,
0644,
root,
dev_entry,
&hl_clk_gate_fops);
@ -1667,13 +1667,13 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
&hl_stop_on_err_fops);
debugfs_create_file("dump_security_violations",
0644,
0400,
root,
dev_entry,
&hl_security_violations_fops);
debugfs_create_file("dump_razwi_events",
0644,
0400,
root,
dev_entry,
&hl_razwi_check_fops);
@ -1706,7 +1706,7 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
&hdev->reset_info.skip_reset_on_timeout);
debugfs_create_file("state_dump",
0600,
0644,
root,
dev_entry,
&hl_state_dump_fops);
@ -1724,7 +1724,7 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name,
0444,
0644,
root,
entry,
&hl_debugfs_fops);

View File

@ -55,7 +55,8 @@ static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_regi
if (is_power_of_2(prop->dram_pci_bar_size))
bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
else
bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) *
bar_base_addr = region->region_base +
div64_u64((addr - region->region_base), prop->dram_pci_bar_size) *
prop->dram_pci_bar_size;
old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
@ -1034,14 +1035,14 @@ static void device_early_fini(struct hl_device *hdev)
static bool is_pci_link_healthy(struct hl_device *hdev)
{
u16 vendor_id;
u16 device_id;
if (!hdev->pdev)
return false;
pci_read_config_word(hdev->pdev, PCI_VENDOR_ID, &vendor_id);
pci_read_config_word(hdev->pdev, PCI_DEVICE_ID, &device_id);
return (vendor_id == PCI_VENDOR_ID_HABANALABS);
return (device_id == hdev->pdev->device);
}
static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
@ -1768,14 +1769,16 @@ kill_processes:
hdev->device_cpu_disabled = false;
hdev->reset_info.hard_reset_pending = false;
if (hdev->reset_info.reset_trigger_repeated &&
(hdev->reset_info.prev_reset_trigger ==
HL_DRV_RESET_FW_FATAL_ERR)) {
/* if there 2 back to back resets from FW,
* ensure driver puts the driver in a unusable state
/*
* Put the device in an unusable state if there are 2 back to back resets due to
* fatal errors.
*/
if (hdev->reset_info.reset_trigger_repeated &&
(hdev->reset_info.prev_reset_trigger == HL_DRV_RESET_FW_FATAL_ERR ||
hdev->reset_info.prev_reset_trigger ==
HL_DRV_RESET_HEARTBEAT)) {
dev_crit(hdev->dev,
"%s Consecutive FW fatal errors received, stopping hard reset\n",
"%s Consecutive fatal errors, stopping hard reset\n",
dev_name(&(hdev)->pdev->dev));
rc = -EIO;
goto out_err;
@ -2801,3 +2804,35 @@ void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
atomic_set(&captured_err_info->cs_timeout.write_enable, 1);
captured_err_info->undef_opcode.write_enable = true;
}
void hl_init_cpu_for_irq(struct hl_device *hdev)
{
#ifdef CONFIG_NUMA
struct cpumask *available_mask = &hdev->irq_affinity_mask;
int numa_node = hdev->pdev->dev.numa_node, i;
static struct cpumask cpu_mask;
if (numa_node < 0)
return;
if (!cpumask_and(&cpu_mask, cpumask_of_node(numa_node), cpu_online_mask)) {
dev_err(hdev->dev, "No available affinities in current numa node\n");
return;
}
/* Remove HT siblings */
for_each_cpu(i, &cpu_mask)
cpumask_set_cpu(cpumask_first(topology_sibling_cpumask(i)), available_mask);
#endif
}
void hl_set_irq_affinity(struct hl_device *hdev, int irq)
{
if (cpumask_empty(&hdev->irq_affinity_mask)) {
dev_dbg(hdev->dev, "affinity mask is empty\n");
return;
}
if (irq_set_affinity_and_hint(irq, &hdev->irq_affinity_mask))
dev_err(hdev->dev, "Failed setting irq %d affinity\n", irq);
}

View File

@ -501,7 +501,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
0, &result);
if (rc)
dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
dev_err(hdev->dev, "failed to unmask event %d", event_type);
return rc;
}
@ -540,7 +540,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
total_pkt_size, 0, &result);
if (rc)
dev_err(hdev->dev, "failed to unmask IRQ array\n");
dev_err(hdev->dev, "failed to unmask event array\n");
kfree(pkt);
@ -2718,18 +2718,20 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
}
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
struct lkd_fw_binning_info *binning_info;
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
sizeof(struct lkd_msg_comms));
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, sizeof(struct lkd_msg_comms));
if (rc)
goto protocol_err;
if (hdev->asic_prop.support_dynamic_resereved_fw_size)
hdev->asic_prop.reserved_fw_mem_size =
le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb) * SZ_1M;
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
struct lkd_fw_binning_info *binning_info;
/* read preboot version */
rc = hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT,
fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
if (rc)
return rc;
@ -2756,11 +2758,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->decoder_binning, hdev->rotator_binning);
}
if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
hdev->asic_prop.reserved_fw_mem_size =
le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
}
return 0;
}
@ -2795,7 +2792,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->asic_funcs->init_cpu_scrambler_dram(hdev);
if (!(hdev->fw_components & FW_TYPE_LINUX)) {
dev_info(hdev->dev, "Skip loading Linux F/W\n");
dev_dbg(hdev->dev, "Skip loading Linux F/W\n");
return 0;
}

View File

@ -443,18 +443,22 @@ enum hl_collective_mode {
* a CB handle can be provided for jobs on this queue.
* Otherwise, a CB address must be provided.
* @collective_mode: collective mode of current queue
* @q_dram_bd_address: PQ dram address, used when PQ need to reside in DRAM.
* @driver_only: true if only the driver is allowed to send a job to this queue,
* false otherwise.
* @binned: True if the queue is binned out and should not be used
* @supports_sync_stream: True if queue supports sync stream
* @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram
*/
struct hw_queue_properties {
enum hl_queue_type type;
enum queue_cb_alloc_flags cb_alloc_flags;
enum hl_collective_mode collective_mode;
u64 q_dram_bd_address;
u8 driver_only;
u8 binned;
u8 supports_sync_stream;
u8 dram_bd;
};
/**
@ -590,8 +594,6 @@ struct hl_hints_range {
* we display to the user
* @mmu_pgt_size: MMU page tables total size.
* @mmu_pte_size: PTE size in MMU page tables.
* @mmu_hop_table_size: MMU hop table size.
* @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
* @dram_page_size: The DRAM physical page size.
* @cfg_size: configuration space size on SRAM.
* @sram_size: total size of SRAM.
@ -645,10 +647,10 @@ struct hl_hints_range {
* @num_engine_cores: number of engine cpu cores.
* @max_num_of_engines: maximum number of all engines in the ASIC.
* @num_of_special_blocks: special_blocks array size.
* @glbl_err_cause_num: global err cause number.
* @glbl_err_max_cause_num: global err max cause number.
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
* not supported.
* @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
* @reserved_fw_mem_size: size of dram memory reserved for FW.
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
@ -743,8 +745,6 @@ struct asic_fixed_properties {
u32 clk_pll_index;
u32 mmu_pgt_size;
u32 mmu_pte_size;
u32 mmu_hop_table_size;
u32 mmu_hop0_tables_total_size;
u32 dram_page_size;
u32 cfg_size;
u32 sram_size;
@ -779,7 +779,7 @@ struct asic_fixed_properties {
u32 num_engine_cores;
u32 max_num_of_engines;
u32 num_of_special_blocks;
u32 glbl_err_cause_num;
u32 glbl_err_max_cause_num;
u32 hbw_flush_reg;
u32 reserved_fw_mem_size;
u16 collective_first_sob;
@ -1052,6 +1052,8 @@ struct hl_encaps_signals_mgr {
* @collective_mode: collective mode of current queue
* @kernel_address: holds the queue's kernel virtual address.
* @bus_address: holds the queue's DMA address.
* @pq_dram_address: hold the dram address when the PQ is allocated, used when dram_bd is true in
* queue properites.
* @pi: holds the queue's pi value.
* @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
* @hw_queue_id: the id of the H/W queue.
@ -1061,6 +1063,7 @@ struct hl_encaps_signals_mgr {
* @valid: is the queue valid (we have array of 32 queues, not all of them
* exist).
* @supports_sync_stream: True if queue supports sync stream
* @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram
*/
struct hl_hw_queue {
struct hl_cs_job **shadow_queue;
@ -1069,6 +1072,7 @@ struct hl_hw_queue {
enum hl_collective_mode collective_mode;
void *kernel_address;
dma_addr_t bus_address;
u64 pq_dram_address;
u32 pi;
atomic_t ci;
u32 hw_queue_id;
@ -1077,6 +1081,7 @@ struct hl_hw_queue {
u16 int_queue_len;
u8 valid;
u8 supports_sync_stream;
u8 dram_bd;
};
/**
@ -2547,7 +2552,7 @@ struct hl_state_dump_specs {
* DEVICES
*/
#define HL_STR_MAX 32
#define HL_STR_MAX 64
#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
@ -3257,6 +3262,7 @@ struct hl_reset_info {
* @clk_throttling: holds information about current/previous clock throttling events
* @captured_err_info: holds information about errors.
* @reset_info: holds current device reset information.
* @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling.
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
* @fw_inner_major_ver: the major of current loaded preboot inner version.
* @fw_inner_minor_ver: the minor of current loaded preboot inner version.
@ -3446,6 +3452,8 @@ struct hl_device {
struct hl_reset_info reset_info;
cpumask_t irq_affinity_mask;
u32 *stream_master_qid_arr;
u32 fw_inner_major_ver;
u32 fw_inner_minor_ver;
@ -3886,6 +3894,7 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_
struct hl_hr_mmu_funcs *hr_func);
int hl_mmu_if_set_funcs(struct hl_device *hdev);
void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
void hl_mmu_v2_hr_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
@ -3893,6 +3902,22 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr);
u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr);
bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr);
void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr);
void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info);
u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx);
u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx);
void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val);
void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val);
void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr);
u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr);
int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr);
u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop);
u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx);
void hl_mmu_dr_flush(struct hl_ctx *ctx);
int hl_mmu_dr_init(struct hl_device *hdev);
void hl_mmu_dr_fini(struct hl_device *hdev);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
void __iomem *dst, u32 src_offset, u32 size);
@ -4032,6 +4057,8 @@ void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_
void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count);
void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
void hl_init_cpu_for_irq(struct hl_device *hdev);
void hl_set_irq_affinity(struct hl_device *hdev, int irq);
#ifdef CONFIG_DEBUG_FS

View File

@ -84,6 +84,8 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
u32 ctl, u32 len, u64 ptr)
{
struct hl_bd *bd;
u64 addr;
int i;
bd = q->kernel_address;
bd += hl_pi_2_offset(q->pi);
@ -91,7 +93,16 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
bd->len = cpu_to_le32(len);
bd->ptr = cpu_to_le64(ptr);
if (q->dram_bd)
for (i = 0 ; i < 2 ; i++) {
addr = q->pq_dram_address +
((hl_pi_2_offset(q->pi) * sizeof(struct hl_bd)) + (i * sizeof(u64)));
hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM, addr,
(u64 *)(bd) + i, DEBUGFS_WRITE64);
}
q->pi = hl_queue_inc_ptr(q->pi);
hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
}
@ -1087,12 +1098,18 @@ int hl_hw_queues_create(struct hl_device *hdev)
q->supports_sync_stream =
asic->hw_queues_props[i].supports_sync_stream;
q->collective_mode = asic->hw_queues_props[i].collective_mode;
q->dram_bd = asic->hw_queues_props[i].dram_bd;
rc = queue_init(hdev, q, i);
if (rc) {
dev_err(hdev->dev,
"failed to initialize queue %d\n", i);
goto release_queues;
}
/* Set DRAM PQ address for the queue if it should be at DRAM */
if (q->dram_bd)
q->pq_dram_address = asic->hw_queues_props[i].q_dram_bd_address;
}
return 0;

View File

@ -46,7 +46,7 @@ static u32 fixup_flags_legacy_fw(struct hl_device *hdev, enum hwmon_sensor_types
break;
default:
dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type);
dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type);
flags = cpucp_flags;
break;
}
@ -134,7 +134,7 @@ static u32 adjust_hwmon_flags(struct hl_device *hdev, enum hwmon_sensor_types ty
break;
default:
dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type);
dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type);
flags = cpucp_flags;
break;
}
@ -162,7 +162,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen
break;
if (type >= HWMON_NR_SENSOR_TYPES) {
dev_err(hdev->dev, "Got wrong sensor type %d from device\n", type);
dev_err_ratelimited(hdev->dev,
"Got wrong sensor type %d from device\n", type);
return -EINVAL;
}
@ -584,7 +585,7 @@ int hl_get_temperature(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get temperature from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
@ -611,7 +612,7 @@ int hl_set_temperature(struct hl_device *hdev,
0, NULL);
if (rc)
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to set temperature of sensor %d, error %d\n",
sensor_index, rc);
@ -638,7 +639,7 @@ int hl_get_voltage(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get voltage from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
@ -667,7 +668,7 @@ int hl_get_current(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get current from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
@ -696,7 +697,7 @@ int hl_get_fan_speed(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get fan speed from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
@ -725,7 +726,7 @@ int hl_get_pwm_info(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get pwm info from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
@ -752,7 +753,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
0, NULL);
if (rc)
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to set pwm info to sensor %d, error %d\n",
sensor_index, rc);
}
@ -775,7 +776,7 @@ int hl_set_voltage(struct hl_device *hdev,
0, NULL);
if (rc)
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to set voltage of sensor %d, error %d\n",
sensor_index, rc);
@ -800,7 +801,7 @@ int hl_set_current(struct hl_device *hdev,
0, NULL);
if (rc)
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to set current of sensor %d, error %d\n",
sensor_index, rc);
@ -831,7 +832,7 @@ int hl_set_power(struct hl_device *hdev,
0, NULL);
if (rc)
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to set power of sensor %d, error %d\n",
sensor_index, rc);
@ -858,7 +859,7 @@ int hl_get_power(struct hl_device *hdev,
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
dev_err_ratelimited(hdev->dev,
"Failed to get power of sensor %d, error %d\n",
sensor_index, rc);
*value = 0;

View File

@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o \
common/mmu/mmu_v2_hr.o
common/mmu/mmu_v2.o common/mmu/mmu_v2_hr.o

View File

@ -585,6 +585,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
int hl_mmu_if_set_funcs(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
if (hdev->mmu_disable)
return 0;
@ -597,7 +599,8 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
case ASIC_GAUDI2:
case ASIC_GAUDI2B:
case ASIC_GAUDI2C:
/* MMUs in Gaudi2 are always host resident */
hl_mmu_v2_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
if (prop->pmmu.host_resident)
hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
break;
default:
@ -1209,3 +1212,219 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_
return 0;
}
struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
{
struct pgt_info *pgt_info = NULL;
hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
(unsigned long) hop_addr)
if (hop_addr == pgt_info->shadow_addr)
break;
return pgt_info;
}
void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr)
{
struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr);
hl_mmu_dr_free_pgt_node(ctx, pgt_info);
}
void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info)
{
struct hl_device *hdev = ctx->hdev;
gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
hdev->asic_prop.dmmu.hop_table_size);
hash_del(&pgt_info->node);
kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
kfree(pgt_info);
}
u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx)
{
return ctx->hdev->asic_prop.mmu_pgt_addr +
(ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size);
}
u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx)
{
return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
(ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size);
}
u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
{
u64 page_mask = ctx->hdev->asic_prop.dmmu.hop_table_size - 1;
u64 shadow_hop_addr = shadow_addr & (~page_mask);
u64 pte_offset = shadow_addr & page_mask;
u64 phys_hop_addr;
if (shadow_hop_addr != hl_mmu_dr_get_hop0_addr(ctx))
phys_hop_addr = hl_mmu_dr_get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
else
phys_hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
return phys_hop_addr + pte_offset;
}
void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
{
u64 phys_val = hl_mmu_dr_get_phys_addr(ctx, val);
ctx->hdev->asic_funcs->write_pte(ctx->hdev, hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr),
phys_val);
*(u64 *) (uintptr_t) shadow_pte_addr = val;
}
void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
{
ctx->hdev->asic_funcs->write_pte(ctx->hdev,
hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr), val);
*(u64 *) (uintptr_t) shadow_pte_addr = val;
}
void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr)
{
hl_mmu_dr_write_final_pte(ctx, pte_addr, 0);
}
void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr)
{
hl_mmu_dr_get_pgt_info(ctx, hop_addr)->num_of_ptes++;
}
int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr)
{
struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr);
int num_of_ptes_left;
pgt_info->num_of_ptes--;
/*
* Need to save the number of ptes left because hl_mmu_free_hop might free
* the pgt_info
*/
num_of_ptes_left = pgt_info->num_of_ptes;
if (!num_of_ptes_left)
hl_mmu_dr_free_pgt_node(ctx, pgt_info);
return num_of_ptes_left;
}
u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pgt_info *pgt_info;
u64 phys_addr, shadow_addr;
pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
if (!pgt_info)
return ULLONG_MAX;
phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
prop->dmmu.hop_table_size);
if (!phys_addr) {
dev_err(hdev->dev, "failed to allocate page\n");
goto pool_add_err;
}
shadow_addr = (u64) (uintptr_t) kzalloc(prop->dmmu.hop_table_size,
GFP_KERNEL);
if (!shadow_addr)
goto shadow_err;
pgt_info->phys_addr = phys_addr;
pgt_info->shadow_addr = shadow_addr;
pgt_info->ctx = ctx;
pgt_info->num_of_ptes = 0;
hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
return shadow_addr;
shadow_err:
gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool,
phys_addr, prop->dmmu.hop_table_size);
pool_add_err:
kfree(pgt_info);
return ULLONG_MAX;
}
u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop)
{
u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop_addr == ULLONG_MAX) {
hop_addr = hl_mmu_dr_alloc_hop(ctx);
*is_new_hop = (hop_addr != ULLONG_MAX);
}
return hop_addr;
}
void hl_mmu_dr_flush(struct hl_ctx *ctx)
{
/* flush all writes from all cores to reach PCI */
mb();
ctx->hdev->asic_funcs->read_pte(ctx->hdev, hl_mmu_dr_get_phys_hop0_addr(ctx));
}
int hl_mmu_dr_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
hdev->mmu_priv.dr.mmu_pgt_pool =
gen_pool_create(__ffs(prop->dmmu.hop_table_size), -1);
if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
dev_err(hdev->dev, "Failed to create page gen pool\n");
return -ENOMEM;
}
rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
prop->dmmu.hop0_tables_total_size,
prop->dmmu.pgt_size - prop->dmmu.hop0_tables_total_size,
-1);
if (rc) {
dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
goto err_pool_add;
}
hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid,
prop->dmmu.hop_table_size, GFP_KERNEL);
if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
rc = -ENOMEM;
goto err_pool_add;
}
/* MMU H/W init will be done in device hw_init() */
return 0;
err_pool_add:
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
return rc;
}
void hl_mmu_dr_fini(struct hl_device *hdev)
{
/* MMU H/W fini was already done in device hw_fini() */
if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0))
return;
kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
/* Make sure that if we arrive here again without init was
* called we won't cause kernel panic. This can happen for
* example if we fail during hard reset code at certain points
*/
hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
}

View File

@ -12,166 +12,6 @@
#define MMU_V1_MAX_HOPS (MMU_HOP4 + 1)
static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
{
struct pgt_info *pgt_info = NULL;
hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
(unsigned long) hop_addr)
if (hop_addr == pgt_info->shadow_addr)
break;
return pgt_info;
}
static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
{
struct hl_device *hdev = ctx->hdev;
gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
hdev->asic_prop.mmu_hop_table_size);
hash_del(&pgt_info->node);
kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
kfree(pgt_info);
}
static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
{
struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
_free_hop(ctx, pgt_info);
}
static u64 alloc_hop(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pgt_info *pgt_info;
u64 phys_addr, shadow_addr;
pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
if (!pgt_info)
return ULLONG_MAX;
phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
prop->mmu_hop_table_size);
if (!phys_addr) {
dev_err(hdev->dev, "failed to allocate page\n");
goto pool_add_err;
}
shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
GFP_KERNEL);
if (!shadow_addr)
goto shadow_err;
pgt_info->phys_addr = phys_addr;
pgt_info->shadow_addr = shadow_addr;
pgt_info->ctx = ctx;
pgt_info->num_of_ptes = 0;
hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
return shadow_addr;
shadow_err:
gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr,
prop->mmu_hop_table_size);
pool_add_err:
kfree(pgt_info);
return ULLONG_MAX;
}
static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
{
return ctx->hdev->asic_prop.mmu_pgt_addr +
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
}
static inline u64 get_hop0_addr(struct hl_ctx *ctx)
{
return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
}
static void flush(struct hl_ctx *ctx)
{
/* flush all writes from all cores to reach PCI */
mb();
ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
}
/* transform the value to physical address when writing to H/W */
static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
{
/*
* The value to write is actually the address of the next shadow hop +
* flags at the 12 LSBs.
* Hence in order to get the value to write to the physical PTE, we
* clear the 12 LSBs and translate the shadow hop to its associated
* physical hop, and add back the original 12 LSBs.
*/
u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
(val & FLAGS_MASK);
ctx->hdev->asic_funcs->write_pte(ctx->hdev,
get_phys_addr(ctx, shadow_pte_addr),
phys_val);
*(u64 *) (uintptr_t) shadow_pte_addr = val;
}
/* do not transform the value to physical address when writing to H/W */
static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
u64 val)
{
ctx->hdev->asic_funcs->write_pte(ctx->hdev,
get_phys_addr(ctx, shadow_pte_addr),
val);
*(u64 *) (uintptr_t) shadow_pte_addr = val;
}
/* clear the last and present bits */
static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
{
/* no need to transform the value to physical address */
write_final_pte(ctx, pte_addr, 0);
}
static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
{
get_pgt_info(ctx, hop_addr)->num_of_ptes++;
}
/*
* put_pte - decrement the num of ptes and free the hop if possible
*
* @ctx: pointer to the context structure
* @hop_addr: addr of the hop
*
* This function returns the number of ptes left on this hop. If the number is
* 0, it means the pte was freed.
*/
static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
{
struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
int num_of_ptes_left;
pgt_info->num_of_ptes--;
/*
* Need to save the number of ptes left because free_hop might free
* the pgt_info
*/
num_of_ptes_left = pgt_info->num_of_ptes;
if (!num_of_ptes_left)
_free_hop(ctx, pgt_info);
return num_of_ptes_left;
}
static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx)
{
@ -183,35 +23,6 @@ static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties
ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
}
static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
bool *is_new_hop)
{
u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop_addr == ULLONG_MAX) {
hop_addr = alloc_hop(ctx);
*is_new_hop = (hop_addr != ULLONG_MAX);
}
return hop_addr;
}
/* translates shadow address inside hop to a physical address */
static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
{
u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
u64 shadow_hop_addr = shadow_addr & ~page_mask;
u64 pte_offset = shadow_addr & page_mask;
u64 phys_hop_addr;
if (shadow_hop_addr != get_hop0_addr(ctx))
phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
else
phys_hop_addr = get_phys_hop0_addr(ctx);
return phys_hop_addr + pte_offset;
}
static int dram_default_mapping_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
@ -232,13 +43,13 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
/* add hop1 and hop2 */
total_hops = num_of_hop3 + 2;
ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL);
ctx->dram_default_hops = kcalloc(total_hops, HL_PTE_SIZE, GFP_KERNEL);
if (!ctx->dram_default_hops)
return -ENOMEM;
hop0_addr = get_hop0_addr(ctx);
hop0_addr = hl_mmu_dr_get_hop0_addr(ctx);
hop1_addr = alloc_hop(ctx);
hop1_addr = hl_mmu_dr_alloc_hop(ctx);
if (hop1_addr == ULLONG_MAX) {
dev_err(hdev->dev, "failed to alloc hop 1\n");
rc = -ENOMEM;
@ -247,7 +58,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
ctx->dram_default_hops[total_hops - 1] = hop1_addr;
hop2_addr = alloc_hop(ctx);
hop2_addr = hl_mmu_dr_alloc_hop(ctx);
if (hop2_addr == ULLONG_MAX) {
dev_err(hdev->dev, "failed to alloc hop 2\n");
rc = -ENOMEM;
@ -257,7 +68,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
ctx->dram_default_hops[total_hops - 2] = hop2_addr;
for (i = 0 ; i < num_of_hop3 ; i++) {
ctx->dram_default_hops[i] = alloc_hop(ctx);
ctx->dram_default_hops[i] = hl_mmu_dr_alloc_hop(ctx);
if (ctx->dram_default_hops[i] == ULLONG_MAX) {
dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
rc = -ENOMEM;
@ -268,18 +79,18 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
/* need only pte 0 in hops 0 and 1 */
pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
write_pte(ctx, hop0_addr, pte_val);
hl_mmu_dr_write_pte(ctx, hop0_addr, pte_val);
pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
write_pte(ctx, hop1_addr, pte_val);
get_pte(ctx, hop1_addr);
hl_mmu_dr_write_pte(ctx, hop1_addr, pte_val);
hl_mmu_dr_get_pte(ctx, hop1_addr);
hop2_pte_addr = hop2_addr;
for (i = 0 ; i < num_of_hop3 ; i++) {
pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
PAGE_PRESENT_MASK;
write_pte(ctx, hop2_pte_addr, pte_val);
get_pte(ctx, hop2_addr);
hl_mmu_dr_write_pte(ctx, hop2_pte_addr, pte_val);
hl_mmu_dr_get_pte(ctx, hop2_addr);
hop2_pte_addr += HL_PTE_SIZE;
}
@ -289,23 +100,23 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
for (i = 0 ; i < num_of_hop3 ; i++) {
hop3_pte_addr = ctx->dram_default_hops[i];
for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
write_final_pte(ctx, hop3_pte_addr, pte_val);
get_pte(ctx, ctx->dram_default_hops[i]);
hl_mmu_dr_write_final_pte(ctx, hop3_pte_addr, pte_val);
hl_mmu_dr_get_pte(ctx, ctx->dram_default_hops[i]);
hop3_pte_addr += HL_PTE_SIZE;
}
}
flush(ctx);
hl_mmu_dr_flush(ctx);
return 0;
hop3_err:
for (i = 0 ; i < hop3_allocated ; i++)
free_hop(ctx, ctx->dram_default_hops[i]);
hl_mmu_dr_free_hop(ctx, ctx->dram_default_hops[i]);
free_hop(ctx, hop2_addr);
hl_mmu_dr_free_hop(ctx, hop2_addr);
hop2_err:
free_hop(ctx, hop1_addr);
hl_mmu_dr_free_hop(ctx, hop1_addr);
hop1_err:
kfree(ctx->dram_default_hops);
@ -329,7 +140,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
do_div(num_of_hop3, prop->dram_page_size);
do_div(num_of_hop3, HOP_PTE_ENTRIES_512);
hop0_addr = get_hop0_addr(ctx);
hop0_addr = hl_mmu_dr_get_hop0_addr(ctx);
/* add hop1 and hop2 */
total_hops = num_of_hop3 + 2;
hop1_addr = ctx->dram_default_hops[total_hops - 1];
@ -338,101 +149,26 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
for (i = 0 ; i < num_of_hop3 ; i++) {
hop3_pte_addr = ctx->dram_default_hops[i];
for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
clear_pte(ctx, hop3_pte_addr);
put_pte(ctx, ctx->dram_default_hops[i]);
hl_mmu_dr_clear_pte(ctx, hop3_pte_addr);
hl_mmu_dr_put_pte(ctx, ctx->dram_default_hops[i]);
hop3_pte_addr += HL_PTE_SIZE;
}
}
hop2_pte_addr = hop2_addr;
for (i = 0 ; i < num_of_hop3 ; i++) {
clear_pte(ctx, hop2_pte_addr);
put_pte(ctx, hop2_addr);
hl_mmu_dr_clear_pte(ctx, hop2_pte_addr);
hl_mmu_dr_put_pte(ctx, hop2_addr);
hop2_pte_addr += HL_PTE_SIZE;
}
clear_pte(ctx, hop1_addr);
put_pte(ctx, hop1_addr);
clear_pte(ctx, hop0_addr);
hl_mmu_dr_clear_pte(ctx, hop1_addr);
hl_mmu_dr_put_pte(ctx, hop1_addr);
hl_mmu_dr_clear_pte(ctx, hop0_addr);
kfree(ctx->dram_default_hops);
flush(ctx);
}
/**
* hl_mmu_v1_init() - initialize the MMU module.
* @hdev: habanalabs device structure.
*
* This function does the following:
* - Create a pool of pages for pgt_infos.
* - Create a shadow table for pgt
*
* Return: 0 for success, non-zero for failure.
*/
static int hl_mmu_v1_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
hdev->mmu_priv.dr.mmu_pgt_pool =
gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
dev_err(hdev->dev, "Failed to create page gen pool\n");
return -ENOMEM;
}
rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
prop->mmu_hop0_tables_total_size,
prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
-1);
if (rc) {
dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
goto err_pool_add;
}
hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid, prop->mmu_hop_table_size,
GFP_KERNEL);
if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
rc = -ENOMEM;
goto err_pool_add;
}
/* MMU H/W init will be done in device hw_init() */
return 0;
err_pool_add:
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
return rc;
}
/**
* hl_mmu_v1_fini() - release the MMU module.
* @hdev: habanalabs device structure.
*
* This function does the following:
* - Disable MMU in H/W.
* - Free the pgt_infos pool.
*
* All contexts should be freed before calling this function.
*/
static void hl_mmu_v1_fini(struct hl_device *hdev)
{
/* MMU H/W fini was already done in device hw_fini() */
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
/* Make sure that if we arrive here again without init was
* called we won't cause kernel panic. This can happen for
* example if we fail during hard reset code at certain points
*/
hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
}
hl_mmu_dr_flush(ctx);
}
/**
@ -476,7 +212,7 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
dev_err_ratelimited(hdev->dev,
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
_free_hop(ctx, pgt_info);
hl_mmu_dr_free_pgt_node(ctx, pgt_info);
}
}
@ -495,7 +231,7 @@ static int hl_mmu_v1_unmap(struct hl_ctx *ctx,
for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) {
if (hop_idx == MMU_HOP0) {
hop_addr[hop_idx] = get_hop0_addr(ctx);
hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx);
} else {
hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop_addr[hop_idx] == ULLONG_MAX)
@ -546,30 +282,30 @@ static int hl_mmu_v1_unmap(struct hl_ctx *ctx,
}
hop_idx = MMU_HOP3;
write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
put_pte(ctx, hop_addr[hop_idx]);
hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx]);
} else {
if (!(curr_pte & PAGE_PRESENT_MASK))
goto not_mapped;
if (hop_addr[MMU_HOP4])
clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
else
clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4]))
if (hop_addr[MMU_HOP4] && !hl_mmu_dr_put_pte(ctx, hop_addr[MMU_HOP4]))
clear_hop3 = true;
if (!clear_hop3)
goto mapped;
for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) {
clear_pte(ctx, hop_pte_addr[hop_idx]);
hl_mmu_dr_clear_pte(ctx, hop_pte_addr[hop_idx]);
if (hop_idx == MMU_HOP0)
break;
if (put_pte(ctx, hop_addr[hop_idx]))
if (hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx]))
goto mapped;
}
}
@ -616,10 +352,10 @@ static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) {
if (hop_idx == MMU_HOP0) {
hop_addr[hop_idx] = get_hop0_addr(ctx);
hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx);
} else {
hop_addr[hop_idx] =
get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
if (hop_addr[hop_idx] == ULLONG_MAX)
goto err;
}
@ -666,27 +402,27 @@ static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask
| PAGE_PRESENT_MASK;
write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
prev_hop = hop_idx - 1;
if (hop_new[hop_idx]) {
curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
hl_mmu_dr_write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
if (hop_idx != MMU_HOP1)
get_pte(ctx, hop_addr[prev_hop]);
hl_mmu_dr_get_pte(ctx, hop_addr[prev_hop]);
}
}
get_pte(ctx, hop_addr[num_hops - 1]);
hl_mmu_dr_get_pte(ctx, hop_addr[num_hops - 1]);
return 0;
err:
for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) {
if (hop_new[hop_idx])
free_hop(ctx, hop_addr[hop_idx]);
hl_mmu_dr_free_hop(ctx, hop_addr[hop_idx]);
}
return rc;
@ -752,7 +488,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
if (is_huge)
used_hops--;
hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
hops->hop_info[0].hop_pte_addr =
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
hops->hop_info[0].hop_addr, virt_addr);
@ -801,13 +537,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
*/
void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
{
mmu->init = hl_mmu_v1_init;
mmu->fini = hl_mmu_v1_fini;
mmu->init = hl_mmu_dr_init;
mmu->fini = hl_mmu_dr_fini;
mmu->ctx_init = hl_mmu_v1_ctx_init;
mmu->ctx_fini = hl_mmu_v1_ctx_fini;
mmu->map = hl_mmu_v1_map;
mmu->unmap = hl_mmu_v1_unmap;
mmu->flush = flush;
mmu->flush = hl_mmu_dr_flush;
mmu->swap_out = hl_mmu_v1_swap_out;
mmu->swap_in = hl_mmu_v1_swap_in;
mmu->get_tlb_info = hl_mmu_v1_get_tlb_info;

View File

@ -0,0 +1,338 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2020 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "../habanalabs.h"
#include "../../include/hw_ip/mmu/mmu_general.h"
#include "../../include/hw_ip/mmu/mmu_v2_0.h"
#include <linux/slab.h>
/**
* hl_mmu_v2_ctx_init() - initialize a context for using the MMU module.
* @ctx: pointer to the context structure to initialize.
*
* Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
* page tables hops related to this context.
* Return: 0 on success, non-zero otherwise.
*/
static int hl_mmu_v2_ctx_init(struct hl_ctx *ctx)
{
hash_init(ctx->mmu_shadow_hash);
return 0;
}
/*
* hl_mmu_v2_ctx_fini - disable a ctx from using the mmu module
*
* @ctx: pointer to the context structure
*
* This function does the following:
* - Free any pgts which were not freed yet
* - Free the mutex
* - Free DRAM default page mapping hops
*/
static void hl_mmu_v2_ctx_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
struct pgt_info *pgt_info;
struct hlist_node *tmp;
int i;
if (!hash_empty(ctx->mmu_shadow_hash))
dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
ctx->asid);
hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
dev_err_ratelimited(hdev->dev,
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
hl_mmu_dr_free_pgt_node(ctx, pgt_info);
}
}
static int hl_mmu_v2_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
{
u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, curr_pte,
scrambled_virt_addr;
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
struct hl_device *hdev = ctx->hdev;
struct hl_mmu_properties *mmu_prop;
bool is_huge = false;
int i, hop_last;
/* device resident in V2 are allowed only for HMMU */
if (!is_dram_addr)
return -EINVAL;
mmu_prop = &prop->dmmu;
hop_last = mmu_prop->num_hops - 1;
scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
hop_addr[0], scrambled_virt_addr);
if (hop_pte_addr[0] == U64_MAX)
return -EFAULT;
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
for (i = 1 ; i < mmu_prop->num_hops ; i++) {
hop_addr[i] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop_addr[i] == ULLONG_MAX)
goto not_mapped;
hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
hop_addr[i], scrambled_virt_addr);
if (hop_pte_addr[i] == U64_MAX)
return -EFAULT;
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
if ((i <= hop_last) && (curr_pte & mmu_prop->last_mask)) {
hop_last = i;
is_huge = true;
break;
}
}
if (is_dram_addr && !is_huge) {
dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
return -EFAULT;
}
if (!(curr_pte & PAGE_PRESENT_MASK))
goto not_mapped;
for (i = hop_last ; i > 0 ; i--) {
hl_mmu_dr_clear_pte(ctx, hop_pte_addr[i]);
if (hl_mmu_dr_put_pte(ctx, hop_addr[i]))
goto mapped;
}
hl_mmu_dr_clear_pte(ctx, hop_pte_addr[0]);
mapped:
return 0;
not_mapped:
dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
virt_addr);
return -EINVAL;
}
static int hl_mmu_v2_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
u32 page_size, bool is_dram_addr)
{
u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 },
curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr;
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
bool hop_new[MMU_ARCH_6_HOPS] = { false };
struct hl_device *hdev = ctx->hdev;
struct hl_mmu_properties *mmu_prop;
int rc, i, hop_last;
/* device resident in V2 are allowed only for HMMU */
if (!is_dram_addr)
return -EINVAL;
mmu_prop = &prop->dmmu;
hop_last = mmu_prop->num_hops - 1;
scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr);
/* First hop is preallocated therefore it is treated differently */
hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
hop_addr[0], scrambled_virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
/* Handle hop1 to hop_last */
for (i = 1 ; i <= hop_last ; i++) {
hop_addr[i] = hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[i]);
if (hop_addr[i] == ULLONG_MAX) {
rc = -ENOMEM;
goto err;
}
hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
hop_addr[i], scrambled_virt_addr);
if (hop_pte_addr[i] == U64_MAX) {
rc = -EINVAL;
goto err;
}
if (!hop_pte_addr[i]) {
rc = -EINVAL;
goto err;
}
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
}
if (curr_pte & PAGE_PRESENT_MASK) {
dev_err(hdev->dev,
"mapping already exists for virt_addr 0x%llx\n",
virt_addr);
for (i = 0 ; i <= hop_last ; i++)
dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n",
i, *(u64 *) (uintptr_t) hop_pte_addr[i],
hop_pte_addr[i]);
rc = -EINVAL;
goto err;
}
curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK)
| mmu_prop->last_mask | PAGE_PRESENT_MASK;
/* Write the PTEs */
hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_last], curr_pte);
/* for each new hop, add its address to the table of previous-hop */
for (i = 1 ; i <= hop_last ; i++) {
if (hop_new[i]) {
curr_pte = (hop_addr[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
hl_mmu_dr_write_pte(ctx, hop_pte_addr[i - 1], curr_pte);
if (i - 1)
hl_mmu_dr_get_pte(ctx, hop_addr[i - 1]);
}
}
hl_mmu_dr_get_pte(ctx, hop_addr[hop_last]);
return 0;
err:
for (i = 1 ; i <= hop_last ; i++)
if (hop_new[i] && (hop_addr[i] != U64_MAX))
hl_mmu_dr_free_hop(ctx, hop_addr[i]);
return rc;
}
/*
* hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out
*
* @ctx: pointer to the context structure
*
*/
static void hl_mmu_v2_swap_out(struct hl_ctx *ctx)
{
}
/*
* hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in
*
* @ctx: pointer to the context structure
*
*/
static void hl_mmu_v2_swap_in(struct hl_ctx *ctx)
{
}
static int hl_mmu_v2_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops)
{
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
struct hl_device *hdev = ctx->hdev;
struct hl_mmu_properties *mmu_prop;
bool is_dram_addr;
int i;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->dmmu.start_addr,
prop->dmmu.end_addr);
/* device resident in V2 are allowed only for HMMU */
if (!is_dram_addr)
return -EINVAL;
mmu_prop = &prop->dmmu;
hops->range_type = HL_VA_RANGE_TYPE_DRAM;
hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
hops->hop_info[0].hop_addr,
hops->scrambled_vaddr);
if (hops->hop_info[0].hop_pte_addr == U64_MAX)
return -EFAULT;
hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev,
hops->hop_info[0].hop_pte_addr);
if (hops->hop_info[0].hop_pte_val == U64_MAX)
return -EFAULT;
for (i = 1 ; i < mmu_prop->num_hops ; i++) {
hops->hop_info[i].hop_addr =
hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val);
if (hops->hop_info[i].hop_addr == ULLONG_MAX)
return -EFAULT;
hops->hop_info[i].hop_pte_addr =
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
hops->hop_info[i].hop_addr,
hops->scrambled_vaddr);
if (hops->hop_info[i].hop_pte_addr == U64_MAX)
return -EFAULT;
hops->hop_info[i].hop_pte_val =
hdev->asic_funcs->read_pte(hdev,
hops->hop_info[i].hop_pte_addr);
if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
return -EFAULT;
if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
break;
}
/* if passed over all hops then no last hop was found */
if (i == mmu_prop->num_hops)
return -EFAULT;
if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
return -EFAULT;
if (hops->scrambled_vaddr != virt_addr)
hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr
(hdev, hops->hop_info[i].hop_pte_val);
else
hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val;
hops->used_hops = i + 1;
return 0;
}
/*
* hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2
*
* @hdev: pointer to the device structure
* @mmu_if: pointer to the mmu interface structure
*/
void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
{
mmu->init = hl_mmu_dr_init;
mmu->fini = hl_mmu_dr_fini;
mmu->ctx_init = hl_mmu_v2_ctx_init;
mmu->ctx_fini = hl_mmu_v2_ctx_fini;
mmu->map = hl_mmu_v2_map;
mmu->unmap = hl_mmu_v2_unmap;
mmu->flush = hl_mmu_dr_flush;
mmu->swap_out = hl_mmu_v2_swap_out;
mmu->swap_in = hl_mmu_v2_swap_in;
mmu->get_tlb_info = hl_mmu_v2_get_tlb_info;
}

View File

@ -47,7 +47,7 @@ static inline int hl_mmu_v2_hr_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size,
return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size,
prop->mmu_pgt_size);
}
@ -65,7 +65,7 @@ static inline void hl_mmu_v2_hr_fini(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size);
hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size);
}
/**
@ -108,7 +108,7 @@ static void hl_mmu_v2_hr_ctx_fini(struct hl_ctx *ctx)
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
hl_mmu_hr_free_hop_remove_pgt(pgt_info, &ctx->hdev->mmu_priv.hr,
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
}
}
@ -150,7 +150,7 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx,
curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
hop_pte_phys_addr[i],
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
if ((i < hop_last) && (curr_pte & mmu_prop->last_mask)) {
hop_last = i;
@ -169,14 +169,14 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx,
for (i = hop_last ; i > 0 ; i--) {
hl_mmu_hr_clear_pte(ctx, hops_pgt_info[i], hop_pte_phys_addr[i],
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
if (hl_mmu_hr_put_pte(ctx, hops_pgt_info[i], &ctx->hdev->mmu_priv.hr,
ctx->hdev->asic_prop.mmu_hop_table_size))
ctx->hdev->asic_prop.pmmu.hop_table_size))
goto mapped;
}
hl_mmu_hr_clear_pte(ctx, hops_pgt_info[0], hop_pte_phys_addr[0],
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
mapped:
return 0;
@ -255,7 +255,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
scrambled_virt_addr);
curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
hop_pte_phys_addr[i],
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
}
if (curr_pte & PAGE_PRESENT_MASK) {
@ -268,7 +268,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
*(u64 *) (uintptr_t)
hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
hop_pte_phys_addr[i],
ctx->hdev->asic_prop.mmu_hop_table_size),
ctx->hdev->asic_prop.pmmu.hop_table_size),
hop_pte_phys_addr[i]);
rc = -EINVAL;
goto err;
@ -279,7 +279,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
/* Write the PTEs */
hl_mmu_hr_write_pte(ctx, hops_pgt_info[hop_last], hop_pte_phys_addr[hop_last], curr_pte,
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
/* for each new hop, add its address to the table of previous-hop */
for (i = 1 ; i <= hop_last ; i++) {
@ -287,7 +287,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
curr_pte = (hops_pgt_info[i]->phys_addr & HOP_PHYS_ADDR_MASK) |
PAGE_PRESENT_MASK;
hl_mmu_hr_write_pte(ctx, hops_pgt_info[i - 1], hop_pte_phys_addr[i - 1],
curr_pte, ctx->hdev->asic_prop.mmu_hop_table_size);
curr_pte, ctx->hdev->asic_prop.pmmu.hop_table_size);
if (i - 1)
hl_mmu_hr_get_pte(ctx, &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs,
hops_pgt_info[i - 1]->phys_addr);
@ -303,7 +303,7 @@ err:
for (i = 1 ; i <= hop_last ; i++)
if (hop_new[i] && hops_pgt_info[i])
hl_mmu_hr_free_hop_remove_pgt(hops_pgt_info[i], &ctx->hdev->mmu_priv.hr,
ctx->hdev->asic_prop.mmu_hop_table_size);
ctx->hdev->asic_prop.pmmu.hop_table_size);
return rc;
}

View File

@ -7,15 +7,31 @@
#include "habanalabs.h"
static const char * const hl_glbl_error_cause[HL_MAX_NUM_OF_GLBL_ERR_CAUSE] = {
static const char * const hl_glbl_error_cause[] = {
"Error due to un-priv read",
"Error due to un-secure read",
"Error due to read from unmapped reg",
"Error due to un-priv write",
"Error due to un-secure write",
"Error due to write to unmapped reg",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"External I/F write sec violation",
"External I/F write to un-mapped reg",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"Read to write only",
"Write to read only"
};
@ -671,10 +687,11 @@ static bool hl_check_block_range_exclusion(struct hl_device *hdev,
static int hl_read_glbl_errors(struct hl_device *hdev,
u32 blk_idx, u32 major, u32 minor, u32 sub_minor, void *data)
{
struct hl_special_block_info *special_blocks = hdev->asic_prop.special_blocks;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_special_block_info *special_blocks = prop->special_blocks;
struct hl_special_block_info *current_block = &special_blocks[blk_idx];
u32 glbl_err_addr, glbl_err_cause, addr_val, cause_val, block_base,
base = current_block->base_addr - lower_32_bits(hdev->asic_prop.cfg_base_address);
base = current_block->base_addr - lower_32_bits(prop->cfg_base_address);
int i;
block_base = base + major * current_block->major_offset +
@ -689,12 +706,12 @@ static int hl_read_glbl_errors(struct hl_device *hdev,
glbl_err_addr = block_base + HL_GLBL_ERR_ADDR_OFFSET;
addr_val = RREG32(glbl_err_addr);
for (i = 0 ; i < hdev->asic_prop.glbl_err_cause_num ; i++) {
for (i = 0 ; i <= prop->glbl_err_max_cause_num ; i++) {
if (cause_val & BIT(i))
dev_err_ratelimited(hdev->dev,
"%s, addr %#llx\n",
hl_glbl_error_cause[i],
hdev->asic_prop.cfg_base_address + block_base +
prop->cfg_base_address + block_base +
FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
}

View File

@ -13,7 +13,6 @@
struct hl_device;
/* special blocks */
#define HL_MAX_NUM_OF_GLBL_ERR_CAUSE 10
#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0)
/* GLBL_ERR_ADDR register offset from the start of the block */
#define HL_GLBL_ERR_ADDR_OFFSET 0xF44

View File

@ -614,8 +614,6 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->device_mem_alloc_default_page_size = prop->dram_page_size;
prop->dram_supports_virtual_memory = false;
@ -637,8 +635,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
prop->pmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
@ -649,6 +647,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
prop->dmmu.end_addr = VA_HOST_SPACE_END;
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->dmmu.pgt_size = prop->mmu_pgt_size;
prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID;
@ -3652,7 +3651,7 @@ static int gaudi_mmu_init(struct hl_device *hdev)
for (i = 0 ; i < prop->max_asid ; i++) {
hop0_addr = prop->mmu_pgt_addr +
(i * prop->mmu_hop_table_size);
(i * prop->dmmu.hop_table_size);
rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
if (rc) {

View File

@ -164,6 +164,8 @@
/* HW scrambles only bits 0-25 */
#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
#define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17
struct gaudi2_razwi_info {
u32 axuser_xy;
u32 rtr_ctrl;
@ -2308,11 +2310,26 @@ static int set_number_of_functional_hbms(struct hl_device *hdev)
return 0;
}
static bool gaudi2_is_edma_queue_id(u32 queue_id)
{
switch (queue_id) {
case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
return true;
default:
return false;
}
}
static int gaudi2_set_dram_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 basic_hbm_page_size;
int rc;
u64 hbm_drv_base_offset = 0, edma_pq_base_addr;
u32 basic_hbm_page_size, edma_idx = 0;
int rc, i;
rc = set_number_of_functional_hbms(hdev);
if (rc)
@ -2356,9 +2373,35 @@ static int gaudi2_set_dram_properties(struct hl_device *hdev)
prop->dmmu.start_addr = prop->dram_base_address +
(prop->dram_page_size *
DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
/*
* Driver can't share an (48MB) HBM page with the F/W in order to prevent FW to block
* the driver part by range register, so it must start at the next (48MB) page
*/
hbm_drv_base_offset = roundup(CPU_FW_IMAGE_SIZE, prop->num_functional_hbms * SZ_8M);
/*
* The NIC driver section size and the HMMU page tables section in the HBM needs
* to be the remaining size in the first dram page after taking into
* account the F/W image size
*/
/* Reserve region in HBM for HMMU page tables */
prop->mmu_pgt_addr = DRAM_PHYS_BASE + hbm_drv_base_offset +
((prop->dram_page_size - hbm_drv_base_offset) -
(HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE + EDMA_SCRATCHPAD_SIZE));
/* Set EDMA PQs HBM addresses */
edma_pq_base_addr = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE;
for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
if (gaudi2_is_edma_queue_id(i)) {
prop->hw_queues_props[i].q_dram_bd_address = edma_pq_base_addr +
(edma_idx * HL_QUEUE_SIZE_IN_BYTES);
edma_idx++;
}
}
return 0;
}
@ -2368,7 +2411,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hw_queue_properties *q_props;
u32 num_sync_stream_queues = 0;
int i;
int i, rc;
prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
@ -2391,6 +2434,9 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
}
q_props[i].cb_alloc_flags = CB_ALLOC_USER;
if (gaudi2_is_edma_queue_id(i))
q_props[i].dram_bd = 1;
}
q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
@ -2419,46 +2465,43 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
if (hdev->pldm)
prop->mmu_pgt_size = 0x800000; /* 8MB */
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
prop->max_asid = 2;
prop->dmmu.pgt_size = HMMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
prop->dmmu.page_size = PAGE_SIZE_1GB;
prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
prop->dmmu.num_hops = MMU_ARCH_4_HOPS;
prop->dmmu.last_mask = LAST_MASK;
prop->dmmu.host_resident = 1;
prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
prop->dmmu.host_resident = 0;
prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->dmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
/*
* this is done in order to be able to validate FW descriptor (i.e. validating that
* the addresses and allocated space for FW image does not cross memory bounds).
* for this reason we set the DRAM size to the minimum possible and later it will
* be modified according to what reported in the cpucp info packet
/* As we need to set the pgt address in dram for HMMU init so we cannot
* wait to the fw cpucp info to set the dram props as mmu init comes before
* hw init
*/
prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
rc = hdev->asic_funcs->set_dram_properties(hdev);
if (rc)
goto free_qprops;
prop->mmu_pgt_size = PMMU_PAGE_TABLES_SIZE;
prop->pmmu.pgt_size = prop->mmu_pgt_size;
hdev->pmmu_huge_range = true;
prop->pmmu.host_resident = 1;
prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
prop->pmmu.last_mask = LAST_MASK;
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->pmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
@ -2516,7 +2559,6 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
prop->num_engine_cores = CPU_ID_MAX;
prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID;
prop->num_of_events = GAUDI2_EVENT_SIZE;
prop->supports_engine_modes = true;
@ -2560,6 +2602,10 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
return 0;
free_qprops:
kfree(prop->hw_queues_props);
return rc;
}
static int gaudi2_pci_bars_map(struct hl_device *hdev)
@ -3033,6 +3079,25 @@ static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
return 0;
}
static int gaudi2_mmu_clear_pgt_range(struct hl_device *hdev)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
return 0;
if (prop->dmmu.host_resident)
return 0;
rc = gaudi2_memset_device_memory(hdev, prop->mmu_pgt_addr, prop->dmmu.pgt_size, 0);
if (rc)
dev_err(hdev->dev, "Failed to clear mmu pgt");
return rc;
}
static int gaudi2_early_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@ -3258,6 +3323,12 @@ static int gaudi2_late_init(struct hl_device *hdev)
goto disable_pci_access;
}
rc = gaudi2_mmu_clear_pgt_range(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
goto disable_pci_access;
}
gaudi2_init_arcs(hdev);
rc = gaudi2_scrub_arcs_dccm(hdev);
@ -3518,7 +3589,7 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev)
int i, rc;
/* Configure Special blocks */
prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM;
prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
sizeof(*prop->special_blocks), GFP_KERNEL);
@ -3697,13 +3768,7 @@ static int gaudi2_sw_init(struct hl_device *hdev)
spin_lock_init(&gaudi2->hw_queues_lock);
gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
&gaudi2->scratchpad_bus_address,
GFP_KERNEL | __GFP_ZERO);
if (!gaudi2->scratchpad_kernel_address) {
rc = -ENOMEM;
goto free_virt_msix_db_mem;
}
gaudi2->scratchpad_bus_address = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE;
gaudi2_user_mapped_blocks_init(hdev);
@ -3727,7 +3792,7 @@ static int gaudi2_sw_init(struct hl_device *hdev)
rc = gaudi2_special_blocks_iterator_config(hdev);
if (rc)
goto free_scratchpad_mem;
goto free_virt_msix_db_mem;
rc = gaudi2_test_queues_msgs_alloc(hdev);
if (rc)
@ -3737,9 +3802,6 @@ static int gaudi2_sw_init(struct hl_device *hdev)
special_blocks_free:
gaudi2_special_blocks_iterator_free(hdev);
free_scratchpad_mem:
hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
gaudi2->scratchpad_bus_address);
free_virt_msix_db_mem:
hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
free_cpu_accessible_dma_pool:
@ -3770,9 +3832,6 @@ static int gaudi2_sw_fini(struct hl_device *hdev)
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address);
hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
gaudi2->scratchpad_bus_address);
dma_pool_destroy(hdev->dma_pool);
kfree(gaudi2);
@ -4254,6 +4313,8 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
return 0;
hl_init_cpu_for_irq(hdev);
rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
PCI_IRQ_MSIX);
if (rc < 0) {
@ -4307,6 +4368,7 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
i++, j++, user_irq_init_cnt++) {
irq = pci_irq_vector(hdev->pdev, i);
hl_set_irq_affinity(hdev, irq);
rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
&hdev->user_interrupt[j]);
if (rc) {
@ -4333,6 +4395,7 @@ free_user_irq:
i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
irq = pci_irq_vector(hdev->pdev, i);
irq_set_affinity_and_hint(irq, NULL);
free_irq(irq, &hdev->user_interrupt[j]);
}
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
@ -4413,6 +4476,7 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
irq = pci_irq_vector(hdev->pdev, i);
irq_set_affinity_and_hint(irq, NULL);
free_irq(irq, &hdev->user_interrupt[j]);
}
@ -4957,10 +5021,17 @@ static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
q = &hdev->kernel_queues[queue_id_base + pq_id];
pq_offset = pq_id * 4;
if (q->dram_bd) {
WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
lower_32_bits(q->pq_dram_address));
WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
upper_32_bits(q->pq_dram_address));
} else {
WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
lower_32_bits(q->bus_address));
WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
upper_32_bits(q->bus_address));
}
WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
@ -5847,7 +5918,8 @@ static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_har
return rc;
}
static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base,
bool host_resident_pgt)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 hop0_addr;
@ -5859,7 +5931,11 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
max_asid = min((u32) 8, max_asid);
for (asid = 0 ; asid < max_asid ; asid++) {
if (host_resident_pgt)
hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
else
hop0_addr = prop->mmu_pgt_addr + (asid * prop->dmmu.hop_table_size);
rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
if (rc) {
dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
@ -5870,7 +5946,8 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
return 0;
}
static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base,
bool host_resident_pgt)
{
u32 status, timeout_usec;
int rc;
@ -5893,7 +5970,7 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb
if (rc)
dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base, host_resident_pgt);
if (rc)
return rc;
@ -5917,6 +5994,7 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb
static int gaudi2_pci_mmu_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi2_device *gaudi2 = hdev->asic_specific;
u32 mmu_base, stlb_base;
int rc;
@ -5956,7 +6034,7 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev)
WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->pmmu.host_resident);
if (rc)
return rc;
@ -6008,7 +6086,7 @@ static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->dmmu.host_resident);
if (rc)
return rc;
@ -7046,7 +7124,7 @@ static int gaudi2_test_queues(struct hl_device *hdev)
/* send test message on all enabled Qs */
for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
if (!gaudi2_is_queue_enabled(hdev, i))
if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
continue;
msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
@ -7063,7 +7141,7 @@ static int gaudi2_test_queues(struct hl_device *hdev)
/* verify that all messages were processed */
for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
if (!gaudi2_is_queue_enabled(hdev, i))
if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
continue;
rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
@ -8907,9 +8985,6 @@ static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_typ
u32 error_count = 0;
int i;
gaudi2_print_event(hdev, event_type, true,
"intr_cause_data: %#llx", intr_cause_data);
for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
if (!(intr_cause_data & BIT_ULL(i)))
continue;
@ -8918,14 +8993,15 @@ static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_typ
"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
error_count++;
/*
* Always check for LBW and HBW additional info as the indication itself is
* sometimes missing
*/
}
switch (intr_cause_data & BIT_ULL(i)) {
case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
hl_check_for_glbl_errors(hdev);
break;
case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
break;
}
}
return error_count;
}
@ -8983,7 +9059,6 @@ static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool
if (is_pmmu) {
dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr);
} else {
addr = gaudi2_mmu_descramble_addr(hdev, addr);
addr &= HW_UNSCRAMBLED_BITS_MASK;
dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n",
@ -9514,25 +9589,17 @@ static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
struct hl_eq_pcie_drain_ind_data *drain_data)
{
u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
u64 cause, error_count = 0;
cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
if (cause & BIT_ULL(0)) {
dev_err_ratelimited(hdev->dev,
"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
!!lbw_rd, !!lbw_wr);
dev_err_ratelimited(hdev->dev, "PCIE AXI drain LBW completed\n");
error_count++;
}
if (cause & BIT_ULL(1)) {
dev_err_ratelimited(hdev->dev,
"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
hbw_rd, hbw_wr);
dev_err_ratelimited(hdev->dev, "PCIE AXI drain HBW completed\n");
error_count++;
}
@ -10250,11 +10317,11 @@ reset_device:
}
static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
u32 hw_queue_id, u32 size, u64 addr, u32 val)
struct packet_lin_dma *lin_dma_pkt,
u64 phys_addr, u32 hw_queue_id, u32 size, u64 addr, u32 val)
{
u32 ctl, pkt_size;
int rc = 0;
int rc = 0, i;
ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
@ -10268,9 +10335,20 @@ static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
pkt_size = sizeof(struct packet_lin_dma);
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
for (i = 0; i < 3; i++) {
rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
phys_addr + (i * sizeof(u64)),
((u64 *)(lin_dma_pkt)) + i, DEBUGFS_WRITE64);
if (rc) {
dev_err(hdev->dev, "Failed to copy lin_dma packet to HBM (%#llx)\n",
phys_addr);
return rc;
}
}
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr);
if (rc)
dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n",
hw_queue_id);
return rc;
@ -10283,12 +10361,11 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
old_mmubp, mmubp, num_of_pkts, busy, pkt_size, cb_len;
u64 comp_addr, cur_addr = addr, end_addr = addr + size;
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc = 0, dma_num = 0, i;
void *lin_dma_pkts_arr;
dma_addr_t pkt_dma_addr;
int rc = 0, dma_num = 0;
if (prop->edma_enabled_mask == 0) {
dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
@ -10306,9 +10383,19 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
/* Calculate how many lin dma pkts we'll need */
num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
pkt_size = sizeof(struct packet_lin_dma);
cb_len = pkt_size * num_of_pkts;
lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
&pkt_dma_addr, GFP_KERNEL);
/*
* if we're not scrubing HMMU or NIC reserved sections in hbm,
* then it the scrubing of the user section, as we use the start of the user section
* to store the CB of the EDMA QM, so shift the start address of the scrubbing accordingly
* and scrub the CB section before leaving this function.
*/
if ((addr >= prop->dram_user_base_address) &&
(addr < prop->dram_user_base_address + cb_len))
cur_addr += (prop->dram_user_base_address + cb_len) - addr;
lin_dma_pkts_arr = kvcalloc(num_of_pkts, pkt_size, GFP_KERNEL);
if (!lin_dma_pkts_arr)
return -ENOMEM;
@ -10354,7 +10441,7 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
pkt_dma_addr + dma_num * pkt_size,
prop->dram_user_base_address + (dma_num * pkt_size),
edma_queues_id[dcore] + edma_idx * 4,
chunk_size, cur_addr, val);
if (rc)
@ -10363,14 +10450,16 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
dma_num++;
cur_addr += chunk_size;
if (cur_addr == end_addr)
break;
goto edma_wait;
}
}
}
edma_wait:
rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
if (rc) {
dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
dev_err(hdev->dev, "DMA Timeout during HBM scrubbing(sob: 0x%x, dma_num: 0x%x)\n",
busy, dma_num);
goto end;
}
end:
@ -10391,8 +10480,16 @@ end:
}
}
memset(lin_dma_pkts_arr, 0, sizeof(u64));
/* Zero the HBM area where we copied the CB */
for (i = 0; i < cb_len / sizeof(u64); i += sizeof(u64))
rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
prop->dram_user_base_address + i,
(u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64);
WREG32(sob_addr, 0);
hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
kfree(lin_dma_pkts_arr);
return rc;
}
@ -11450,7 +11547,7 @@ static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_p
return 0;
page_size_err:
dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
dev_err(hdev->dev, "page size of 0x%X is not 0x%X aligned, can't map\n",
page_size, mmu_prop->page_size >> 10);
return -EFAULT;
}
@ -11470,6 +11567,29 @@ int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
return hl_fw_send_device_activity(hdev, open);
}
static u64 gaudi2_read_pte(struct hl_device *hdev, u64 addr)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
u64 val;
if (hdev->reset_info.hard_reset_pending)
return U64_MAX;
val = readq(hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
return val;
}
static void gaudi2_write_pte(struct hl_device *hdev, u64 addr, u64 val)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
if (hdev->reset_info.hard_reset_pending)
return;
writeq(val, hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
}
static const struct hl_asic_funcs gaudi2_funcs = {
.early_init = gaudi2_early_init,
.early_fini = gaudi2_early_fini,
@ -11506,8 +11626,8 @@ static const struct hl_asic_funcs gaudi2_funcs = {
.add_device_attr = gaudi2_add_device_attr,
.handle_eqe = gaudi2_handle_eqe,
.get_events_stat = gaudi2_get_events_stat,
.read_pte = NULL,
.write_pte = NULL,
.read_pte = gaudi2_read_pte,
.write_pte = gaudi2_write_pte,
.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
.mmu_prefetch_cache_range = NULL,

View File

@ -19,8 +19,6 @@
#define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb"
#define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb"
#define MMU_PAGE_TABLES_INITIAL_SIZE 0x10000000 /* 256MB */
#define GAUDI2_CPU_TIMEOUT_USEC 30000000 /* 30s */
#define NUMBER_OF_PDMA_QUEUES 2
@ -109,13 +107,11 @@
/* DRAM Memory Map */
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
/* This define should be used only when working in a debug mode without dram.
* When working with dram, the driver size will be calculated dynamically.
*/
#define NIC_DEFAULT_DRV_SIZE 0x20000000 /* 512MB */
#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE
#define PMMU_PAGE_TABLES_SIZE 0x10000000 /* 256MB */
#define EDMA_PQS_SIZE SZ_2M
#define EDMA_SCRATCHPAD_SIZE SZ_1M
#define HMMU_PAGE_TABLES_SIZE SZ_1M
#define NIC_NUMBER_OF_PORTS NIC_NUMBER_OF_ENGINES
@ -243,7 +239,6 @@
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
#define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8
enum gaudi2_reserved_sob_id {
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,

View File

@ -413,8 +413,6 @@ int goya_set_fixed_properties(struct hl_device *hdev)
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->device_mem_alloc_default_page_size = prop->dram_page_size;
prop->dram_supports_virtual_memory = true;
@ -435,8 +433,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
prop->dmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->dmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
/* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
@ -446,8 +444,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
prop->pmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
@ -2678,7 +2676,7 @@ int goya_mmu_init(struct hl_device *hdev)
for (i = 0 ; i < prop->max_asid ; i++) {
hop0_addr = prop->mmu_pgt_addr +
(i * prop->mmu_hop_table_size);
(i * prop->dmmu.hop_table_size);
rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
if (rc) {

View File

@ -576,7 +576,6 @@ static int goya_config_spmu(struct hl_device *hdev,
struct hl_debug_params *params)
{
u64 base_reg;
struct hl_debug_params_spmu *input = params->input;
u64 *output;
u32 output_arr_len;
u32 events_num;
@ -592,7 +591,7 @@ static int goya_config_spmu(struct hl_device *hdev,
base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE;
if (params->enable) {
input = params->input;
struct hl_debug_params_spmu *input = params->input;
if (!input)
return -EINVAL;

View File

@ -26,6 +26,8 @@
#define LAST_MASK 0x0000000000800ull
#define FLAGS_MASK 0x0000000000FFFull
#define MMU_ARCH_3_HOPS 3
#define MMU_ARCH_4_HOPS 4
#define MMU_ARCH_5_HOPS 5
#define MMU_ARCH_6_HOPS 6

View File

@ -286,22 +286,6 @@ static const struct file_operations fw_trace_level_fops = {
.write = fw_trace_level_fops_write,
};
static ssize_t
ivpu_reset_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
{
struct ivpu_device *vdev = file->private_data;
if (!size)
return -EINVAL;
if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COMPUTE))
return -ENODEV;
if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COPY))
return -ENODEV;
return size;
}
static ssize_t
ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
{
@ -327,6 +311,22 @@ static const struct file_operations ivpu_force_recovery_fops = {
.write = ivpu_force_recovery_fn,
};
static ssize_t
ivpu_reset_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
{
struct ivpu_device *vdev = file->private_data;
if (!size)
return -EINVAL;
if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COMPUTE))
return -ENODEV;
if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COPY))
return -ENODEV;
return size;
}
static const struct file_operations ivpu_reset_engine_fops = {
.owner = THIS_MODULE,
.open = simple_open,

View File

@ -45,11 +45,11 @@ MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros.");
u8 ivpu_pll_min_ratio;
module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency");
MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set NPU frequency");
u8 ivpu_pll_max_ratio = U8_MAX;
module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644);
MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency");
MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency");
bool ivpu_disable_mmu_cont_pages;
module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0644);
@ -328,13 +328,13 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
ivpu_ipc_consumer_del(vdev, &cons);
if (!ret && ipc_hdr.data_addr != IVPU_IPC_BOOT_MSG_DATA_ADDR) {
ivpu_err(vdev, "Invalid VPU ready message: 0x%x\n",
ivpu_err(vdev, "Invalid NPU ready message: 0x%x\n",
ipc_hdr.data_addr);
return -EIO;
}
if (!ret)
ivpu_dbg(vdev, PM, "VPU ready message received successfully\n");
ivpu_dbg(vdev, PM, "NPU ready message received successfully\n");
return ret;
}
@ -532,6 +532,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
atomic64_set(&vdev->unique_id_counter, 0);
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
INIT_LIST_HEAD(&vdev->bo_list);
@ -605,6 +606,7 @@ err_power_down:
if (IVPU_WA(d3hot_after_power_off))
pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
err_xa_destroy:
xa_destroy(&vdev->db_xa);
xa_destroy(&vdev->submitted_jobs_xa);
xa_destroy(&vdev->context_xa);
return ret;
@ -640,6 +642,8 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
ivpu_mmu_reserved_context_fini(vdev);
ivpu_mmu_global_context_fini(vdev);
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->db_xa));
xa_destroy(&vdev->db_xa);
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
xa_destroy(&vdev->submitted_jobs_xa);
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa));

View File

@ -36,6 +36,9 @@
#define IVPU_USER_CONTEXT_MIN_SSID 2
#define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63)
#define IVPU_MIN_DB 1
#define IVPU_MAX_DB 255
#define IVPU_NUM_ENGINES 2
#define IVPU_PLATFORM_SILICON 0
@ -119,6 +122,8 @@ struct ivpu_device {
struct xarray context_xa;
struct xa_limit context_xa_limit;
struct xarray db_xa;
struct mutex bo_list_lock; /* Protects bo_list */
struct list_head bo_list;
@ -189,7 +194,7 @@ static inline int ivpu_hw_gen(struct ivpu_device *vdev)
case PCI_DEVICE_ID_LNL:
return IVPU_HW_40XX;
default:
ivpu_err(vdev, "Unknown VPU device\n");
ivpu_err(vdev, "Unknown NPU device\n");
return 0;
}
}

View File

@ -46,15 +46,13 @@
static char *ivpu_firmware;
module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/..");
MODULE_PARM_DESC(firmware, "NPU firmware binary in /lib/firmware/..");
/* TODO: Remove mtl_vpu.bin from names after transition to generation based FW names */
static struct {
int gen;
const char *name;
} fw_names[] = {
{ IVPU_HW_37XX, "vpu_37xx.bin" },
{ IVPU_HW_37XX, "mtl_vpu.bin" },
{ IVPU_HW_37XX, "intel/vpu/vpu_37xx_v0.0.bin" },
{ IVPU_HW_40XX, "vpu_40xx.bin" },
{ IVPU_HW_40XX, "intel/vpu/vpu_40xx_v0.0.bin" },
@ -250,6 +248,7 @@ static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
static int ivpu_fw_mem_init(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
struct ivpu_addr_range fw_range;
int log_verb_size;
int ret;
@ -257,16 +256,19 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
if (ret)
return ret;
fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC);
fw_range.start = fw->runtime_addr;
fw_range.end = fw->runtime_addr + fw->runtime_size;
fw->mem = ivpu_bo_create(vdev, &vdev->gctx, &fw_range, fw->runtime_size,
DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!fw->mem) {
ivpu_err(vdev, "Failed to allocate firmware runtime memory\n");
ivpu_err(vdev, "Failed to create firmware runtime memory buffer\n");
return -ENOMEM;
}
fw->mem_log_crit = ivpu_bo_alloc_internal(vdev, 0, IVPU_FW_CRITICAL_BUFFER_SIZE,
DRM_IVPU_BO_CACHED);
fw->mem_log_crit = ivpu_bo_create_global(vdev, IVPU_FW_CRITICAL_BUFFER_SIZE,
DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE);
if (!fw->mem_log_crit) {
ivpu_err(vdev, "Failed to allocate critical log buffer\n");
ivpu_err(vdev, "Failed to create critical log buffer\n");
ret = -ENOMEM;
goto err_free_fw_mem;
}
@ -276,18 +278,19 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
else
log_verb_size = IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE;
fw->mem_log_verb = ivpu_bo_alloc_internal(vdev, 0, log_verb_size, DRM_IVPU_BO_CACHED);
fw->mem_log_verb = ivpu_bo_create_global(vdev, log_verb_size,
DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE);
if (!fw->mem_log_verb) {
ivpu_err(vdev, "Failed to allocate verbose log buffer\n");
ivpu_err(vdev, "Failed to create verbose log buffer\n");
ret = -ENOMEM;
goto err_free_log_crit;
}
if (fw->shave_nn_size) {
fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start,
fw->mem_shave_nn = ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.shave,
fw->shave_nn_size, DRM_IVPU_BO_WC);
if (!fw->mem_shave_nn) {
ivpu_err(vdev, "Failed to allocate shavenn buffer\n");
ivpu_err(vdev, "Failed to create shavenn buffer\n");
ret = -ENOMEM;
goto err_free_log_verb;
}
@ -296,11 +299,11 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
return 0;
err_free_log_verb:
ivpu_bo_free_internal(fw->mem_log_verb);
ivpu_bo_free(fw->mem_log_verb);
err_free_log_crit:
ivpu_bo_free_internal(fw->mem_log_crit);
ivpu_bo_free(fw->mem_log_crit);
err_free_fw_mem:
ivpu_bo_free_internal(fw->mem);
ivpu_bo_free(fw->mem);
return ret;
}
@ -309,13 +312,13 @@ static void ivpu_fw_mem_fini(struct ivpu_device *vdev)
struct ivpu_fw_info *fw = vdev->fw;
if (fw->mem_shave_nn) {
ivpu_bo_free_internal(fw->mem_shave_nn);
ivpu_bo_free(fw->mem_shave_nn);
fw->mem_shave_nn = NULL;
}
ivpu_bo_free_internal(fw->mem_log_verb);
ivpu_bo_free_internal(fw->mem_log_crit);
ivpu_bo_free_internal(fw->mem);
ivpu_bo_free(fw->mem_log_verb);
ivpu_bo_free(fw->mem_log_crit);
ivpu_bo_free(fw->mem);
fw->mem_log_verb = NULL;
fw->mem_log_crit = NULL;
@ -469,6 +472,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->d0i3_residency_time_us);
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
boot_params->d0i3_entry_vpu_ts);
ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n",
boot_params->system_time_us);
}
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
@ -480,11 +485,14 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->d0i3_residency_time_us =
ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts);
boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts;
boot_params->system_time_us = ktime_to_us(ktime_get_real());
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
boot_params->d0i3_residency_time_us);
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
boot_params->d0i3_entry_vpu_ts);
ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n",
boot_params->system_time_us);
boot_params->save_restore_ret_address = 0;
vdev->pm->is_warmboot = true;
@ -562,6 +570,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->d0i3_residency_time_us = 0;
boot_params->d0i3_entry_vpu_ts = 0;
boot_params->system_time_us = ktime_to_us(ktime_get_real());
wmb(); /* Flush WC buffers after writing bootparams */
ivpu_fw_boot_params_print(vdev, boot_params);

View File

@ -20,7 +20,7 @@
unsigned int ivpu_log_level = IVPU_FW_LOG_ERROR;
module_param(ivpu_log_level, uint, 0444);
MODULE_PARM_DESC(ivpu_log_level,
"VPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG)
"NPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG)
" info=" __stringify(IVPU_FW_LOG_INFO)
" warn=" __stringify(IVPU_FW_LOG_WARN)
" error=" __stringify(IVPU_FW_LOG_ERROR)
@ -121,11 +121,11 @@ void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_
u32 next = 0;
while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0)
fw_log_print_buffer(vdev, log_header, "VPU critical", only_new_msgs, p);
fw_log_print_buffer(vdev, log_header, "NPU critical", only_new_msgs, p);
next = 0;
while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0)
fw_log_print_buffer(vdev, log_header, "VPU verbose", only_new_msgs, p);
fw_log_print_buffer(vdev, log_header, "NPU verbose", only_new_msgs, p);
}
void ivpu_fw_log_clear(struct ivpu_device *vdev)

View File

@ -172,8 +172,7 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz
return &bo->base.base;
}
static struct ivpu_bo *
ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags)
static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags)
{
struct drm_gem_shmem_object *shmem;
struct ivpu_bo *bo;
@ -201,7 +200,7 @@ ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags)
return bo;
}
static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
static int ivpu_gem_bo_open(struct drm_gem_object *obj, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
struct ivpu_device *vdev = file_priv->vdev;
@ -224,7 +223,7 @@ static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range);
}
static void ivpu_bo_free(struct drm_gem_object *obj)
static void ivpu_gem_bo_free(struct drm_gem_object *obj)
{
struct ivpu_device *vdev = to_ivpu_device(obj->dev);
struct ivpu_bo *bo = to_ivpu_bo(obj);
@ -245,8 +244,8 @@ static void ivpu_bo_free(struct drm_gem_object *obj)
}
static const struct drm_gem_object_funcs ivpu_gem_funcs = {
.free = ivpu_bo_free,
.open = ivpu_bo_open,
.free = ivpu_gem_bo_free,
.open = ivpu_gem_bo_open,
.print_info = drm_gem_shmem_object_print_info,
.pin = drm_gem_shmem_object_pin,
.unpin = drm_gem_shmem_object_unpin,
@ -272,9 +271,9 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
if (size == 0)
return -EINVAL;
bo = ivpu_bo_create(vdev, size, args->flags);
bo = ivpu_bo_alloc(vdev, size, args->flags);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
ivpu_err(vdev, "Failed to allocate BO: %pe (ctx %u size %llu flags 0x%x)",
bo, file_priv->ctx.id, args->size, args->flags);
return PTR_ERR(bo);
}
@ -289,33 +288,28 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
}
struct ivpu_bo *
ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags)
ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
struct ivpu_addr_range *range, u64 size, u32 flags)
{
const struct ivpu_addr_range *range;
struct ivpu_addr_range fixed_range;
struct iosys_map map;
struct ivpu_bo *bo;
int ret;
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
if (drm_WARN_ON(&vdev->drm, !range))
return NULL;
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->start));
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->end));
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
if (vpu_addr) {
fixed_range.start = vpu_addr;
fixed_range.end = vpu_addr + size;
range = &fixed_range;
} else {
range = &vdev->hw->ranges.global;
}
bo = ivpu_bo_create(vdev, size, flags);
bo = ivpu_bo_alloc(vdev, size, flags);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
bo, vpu_addr, size, flags);
ivpu_err(vdev, "Failed to allocate BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
bo, range->start, size, flags);
return NULL;
}
ret = ivpu_bo_alloc_vpu_addr(bo, &vdev->gctx, range);
ret = ivpu_bo_alloc_vpu_addr(bo, ctx, range);
if (ret)
goto err_put;
@ -323,11 +317,14 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
if (ret)
goto err_put;
if (flags & DRM_IVPU_BO_MAPPABLE) {
dma_resv_lock(bo->base.base.resv, NULL);
ret = drm_gem_shmem_vmap(&bo->base, &map);
dma_resv_unlock(bo->base.base.resv);
if (ret)
goto err_put;
}
return bo;
@ -336,13 +333,20 @@ err_put:
return NULL;
}
void ivpu_bo_free_internal(struct ivpu_bo *bo)
struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags)
{
return ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.global, size, flags);
}
void ivpu_bo_free(struct ivpu_bo *bo)
{
struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
if (bo->flags & DRM_IVPU_BO_MAPPABLE) {
dma_resv_lock(bo->base.base.resv, NULL);
drm_gem_shmem_vunmap(&bo->base, &map);
dma_resv_unlock(bo->base.base.resv);
}
drm_gem_object_put(&bo->base.base);
}

View File

@ -28,8 +28,10 @@ int ivpu_bo_pin(struct ivpu_bo *bo);
void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
void ivpu_bo_free_internal(struct ivpu_bo *bo);
struct ivpu_bo *ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
struct ivpu_addr_range *range, u64 size, u32 flags);
struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags);
void ivpu_bo_free(struct ivpu_bo *bo);
int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);

View File

@ -13,7 +13,7 @@
#include "ivpu_pm.h"
#define TILE_FUSE_ENABLE_BOTH 0x0
#define TILE_SKU_BOTH_MTL 0x3630
#define TILE_SKU_BOTH 0x3630
/* Work point configuration values */
#define CONFIG_1_TILE 0x01
@ -228,7 +228,7 @@ static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable)
ret = ivpu_hw_37xx_wait_for_vpuip_bar(vdev);
if (ret) {
ivpu_err(vdev, "Timed out waiting for VPUIP bar\n");
ivpu_err(vdev, "Timed out waiting for NPU IP bar\n");
return ret;
}
}
@ -589,7 +589,7 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
struct ivpu_hw_info *hw = vdev->hw;
hw->tile_fuse = TILE_FUSE_ENABLE_BOTH;
hw->sku = TILE_SKU_BOTH_MTL;
hw->sku = TILE_SKU_BOTH;
hw->config = WP_CONFIG_2_TILE_4_3_RATIO;
ivpu_pll_init_frequency_ratios(vdev);
@ -762,10 +762,10 @@ static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev);
if (!ivpu_hw_37xx_is_idle(vdev))
ivpu_warn(vdev, "VPU not idle during power down\n");
ivpu_warn(vdev, "NPU not idle during power down\n");
if (ivpu_hw_37xx_reset(vdev)) {
ivpu_err(vdev, "Failed to reset VPU\n");
ivpu_err(vdev, "Failed to reset NPU\n");
ret = -EIO;
}

View File

@ -80,11 +80,11 @@ static char *ivpu_platform_to_str(u32 platform)
{
switch (platform) {
case IVPU_PLATFORM_SILICON:
return "IVPU_PLATFORM_SILICON";
return "SILICON";
case IVPU_PLATFORM_SIMICS:
return "IVPU_PLATFORM_SIMICS";
return "SIMICS";
case IVPU_PLATFORM_FPGA:
return "IVPU_PLATFORM_FPGA";
return "FPGA";
default:
return "Invalid platform";
}
@ -768,7 +768,7 @@ static int ivpu_hw_40xx_reset(struct ivpu_device *vdev)
int ret = 0;
if (ivpu_hw_40xx_ip_reset(vdev)) {
ivpu_err(vdev, "Failed to reset VPU IP\n");
ivpu_err(vdev, "Failed to reset NPU IP\n");
ret = -EIO;
}
@ -926,7 +926,7 @@ static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev)
ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev);
if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_ip_reset(vdev))
ivpu_warn(vdev, "Failed to reset the VPU\n");
ivpu_warn(vdev, "Failed to reset the NPU\n");
if (ivpu_pll_disable(vdev)) {
ivpu_err(vdev, "Failed to disable PLL\n");

View File

@ -58,8 +58,8 @@ static void ivpu_ipc_mem_fini(struct ivpu_device *vdev)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
ivpu_bo_free_internal(ipc->mem_rx);
ivpu_bo_free_internal(ipc->mem_tx);
ivpu_bo_free(ipc->mem_rx);
ivpu_bo_free(ipc->mem_tx);
}
static int
@ -471,13 +471,13 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
struct ivpu_ipc_info *ipc = vdev->ipc;
int ret;
ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
ipc->mem_tx = ivpu_bo_create_global(vdev, SZ_16K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!ipc->mem_tx) {
ivpu_err(vdev, "Failed to allocate mem_tx\n");
return -ENOMEM;
}
ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
ipc->mem_rx = ivpu_bo_create_global(vdev, SZ_16K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!ipc->mem_rx) {
ivpu_err(vdev, "Failed to allocate mem_rx\n");
ret = -ENOMEM;
@ -506,9 +506,9 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
return 0;
err_free_rx:
ivpu_bo_free_internal(ipc->mem_rx);
ivpu_bo_free(ipc->mem_rx);
err_free_tx:
ivpu_bo_free_internal(ipc->mem_tx);
ivpu_bo_free(ipc->mem_tx);
return ret;
}

View File

@ -30,19 +30,26 @@ static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine)
{
struct xa_limit db_xa_limit = {.max = IVPU_MAX_DB, .min = IVPU_MIN_DB};
struct ivpu_device *vdev = file_priv->vdev;
struct vpu_job_queue_header *jobq_header;
struct ivpu_cmdq *cmdq;
int ret;
cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL);
if (!cmdq)
return NULL;
cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC);
if (!cmdq->mem)
goto cmdq_free;
ret = xa_alloc(&vdev->db_xa, &cmdq->db_id, NULL, db_xa_limit, GFP_KERNEL);
if (ret) {
ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret);
goto err_free_cmdq;
}
cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!cmdq->mem)
goto err_erase_xa;
cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev);
cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) /
sizeof(struct vpu_job_queue_entry));
@ -55,7 +62,9 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 e
return cmdq;
cmdq_free:
err_erase_xa:
xa_erase(&vdev->db_xa, cmdq->db_id);
err_free_cmdq:
kfree(cmdq);
return NULL;
}
@ -65,7 +74,8 @@ static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *c
if (!cmdq)
return;
ivpu_bo_free_internal(cmdq->mem);
ivpu_bo_free(cmdq->mem);
xa_erase(&file_priv->vdev->db_xa, cmdq->db_id);
kfree(cmdq);
}

View File

@ -22,7 +22,7 @@
static bool ivpu_disable_recovery;
module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
MODULE_PARM_DESC(disable_recovery, "Disables recovery when NPU hang is detected");
static unsigned long ivpu_tdr_timeout_ms;
module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
@ -118,11 +118,11 @@ static void ivpu_pm_recovery_work(struct work_struct *work)
char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
int ret;
ivpu_err(vdev, "Recovering the VPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
ret = pm_runtime_resume_and_get(vdev->drm.dev);
if (ret)
ivpu_err(vdev, "Failed to resume VPU: %d\n", ret);
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
ivpu_fw_log_dump(vdev);
@ -260,10 +260,10 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
ret = ivpu_suspend(vdev);
if (ret)
ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
ivpu_err(vdev, "Failed to suspend NPU: %d\n", ret);
if (!hw_is_idle) {
ivpu_err(vdev, "VPU failed to enter idle, force suspended.\n");
ivpu_err(vdev, "NPU failed to enter idle, force suspended.\n");
ivpu_fw_log_dump(vdev);
ivpu_pm_prepare_cold_boot(vdev);
} else {

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (c) 2020-2023, Intel Corporation.
*/
#ifndef VPU_BOOT_API_H
@ -27,12 +27,12 @@
* Minor version changes when API backward compatibility is preserved.
* Resets to 0 if Major version is incremented.
*/
#define VPU_BOOT_API_VER_MINOR 20
#define VPU_BOOT_API_VER_MINOR 22
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
#define VPU_BOOT_API_VER_PATCH 4
#define VPU_BOOT_API_VER_PATCH 0
/*
* Index in the API version table
@ -41,7 +41,7 @@
#define VPU_BOOT_API_VER_INDEX 0
/* ------------ FW API version information end ---------------------*/
#pragma pack(push, 1)
#pragma pack(push, 4)
/*
* Firmware image header format
@ -66,9 +66,17 @@ struct vpu_firmware_header {
/* Size of memory require for firmware execution */
u32 runtime_size;
u32 shave_nn_fw_size;
/* Size of primary preemption buffer. */
/*
* Size of primary preemption buffer, assuming a 2-job submission queue.
* NOTE: host driver is expected to adapt size accordingly to actual
* submission queue size and device capabilities.
*/
u32 preemption_buffer_1_size;
/* Size of secondary preemption buffer. */
/*
* Size of secondary preemption buffer, assuming a 2-job submission queue.
* NOTE: host driver is expected to adapt size accordingly to actual
* submission queue size and device capabilities.
*/
u32 preemption_buffer_2_size;
/* Space reserved for future preemption-related fields. */
u32 preemption_reserved[6];
@ -181,10 +189,10 @@ struct vpu_warm_boot_section {
#define VPU_PRESENT_CALL_PERIOD_MS_MAX 10000
/**
* Macros to enable various operation modes within the VPU.
* Macros to enable various power profiles within the NPU.
* To be defined as part of 32 bit mask.
*/
#define VPU_OP_MODE_SURVIVABILITY 0x1
#define POWER_PROFILE_SURVIVABILITY 0x1
struct vpu_boot_params {
u32 magic;
@ -317,7 +325,15 @@ struct vpu_boot_params {
u64 d0i3_residency_time_us;
/* Value of VPU perf counter at the time of entering D0i3 state . */
u64 d0i3_entry_vpu_ts;
u32 pad4[20];
/*
* The system time of the host operating system in microseconds.
* E.g the number of microseconds since 1st of January 1970, or whatever date the
* host operating system uses to maintain system time.
* This value will be used to track system time on the VPU.
* The KMD is required to update this value on every VPU reset.
*/
u64 system_time_us;
u32 pad4[18];
/* Warm boot information: 0x400 - 0x43F */
u32 warm_boot_sections_count;
u32 warm_boot_start_address_reference;
@ -344,10 +360,14 @@ struct vpu_boot_params {
u32 vpu_focus_present_timer_ms;
/* VPU ECC Signaling */
u32 vpu_uses_ecc_mca_signal;
/* Values defined by VPU_OP_MODE* macros */
u32 vpu_operation_mode;
/* Unused/reserved: 0x480 - 0xFFF */
u32 pad6[736];
/* Values defined by POWER_PROFILE* macros */
u32 power_profile;
/* Microsecond value for DCT active cycle */
u32 dct_active_us;
/* Microsecond value for DCT inactive cycle */
u32 dct_inactive_us;
/* Unused/reserved: 0x488 - 0xFFF */
u32 pad6[734];
};
/*

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (c) 2020-2023, Intel Corporation.
*/
/**
@ -27,7 +27,7 @@
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
#define VPU_JSM_API_VER_PATCH 0
#define VPU_JSM_API_VER_PATCH 6
/*
* Index in the API version table
@ -43,8 +43,11 @@
/* Max number of impacted contexts that can be dealt with the engine reset command */
#define VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS 3
/** Pack the API structures for now, once alignment issues are fixed this can be removed */
#pragma pack(push, 1)
/*
* Pack the API structures to enforce binary compatibility
* Align to 8 bytes for optimal performance
*/
#pragma pack(push, 8)
/*
* Engine indexes.
@ -124,6 +127,19 @@
*/
#define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U
/*
* vpu_jsm_engine_reset_context flag definitions
*/
#define VPU_ENGINE_RESET_CONTEXT_FLAG_COLLATERAL_DAMAGE_MASK BIT(0)
#define VPU_ENGINE_RESET_CONTEXT_HANG_PRIMARY_CAUSE 0
#define VPU_ENGINE_RESET_CONTEXT_COLLATERAL_DAMAGE 1
/*
* Invalid command queue handle identifier. Applies to cmdq_id and cmdq_group
* in this API.
*/
#define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL
/*
* Job format.
*/
@ -613,7 +629,7 @@ struct vpu_jsm_engine_reset_context {
u32 reserved_0;
/* Command queue id */
u64 cmdq_id;
/* Flags: 0: cause of hang; 1: collateral damage of reset */
/* See VPU_ENGINE_RESET_CONTEXT_* defines */
u64 flags;
};
@ -730,11 +746,7 @@ struct vpu_ipc_msg_payload_hws_create_cmdq {
u32 host_ssid;
/* Engine for which queue is being created */
u32 engine_idx;
/*
* Cmdq group may be set to 0 or equal to
* cmdq_id while each priority band contains
* only single engine instances.
*/
/* Cmdq group: only used for HWS logging of state changes */
u64 cmdq_group;
/* Command queue id */
u64 cmdq_id;

View File

@ -20,7 +20,7 @@ static unsigned int mhi_timeout_ms = 2000; /* 2 sec default */
module_param(mhi_timeout_ms, uint, 0600);
MODULE_PARM_DESC(mhi_timeout_ms, "MHI controller timeout value");
static struct mhi_channel_config aic100_channels[] = {
static const struct mhi_channel_config aic100_channels[] = {
{
.name = "QAIC_LOOPBACK",
.num = 0,
@ -358,8 +358,8 @@ static struct mhi_channel_config aic100_channels[] = {
.wake_capable = false,
},
{
.num = 21,
.name = "QAIC_TIMESYNC",
.num = 21,
.num_elements = 32,
.local_elements = 0,
.event_ring = 0,
@ -390,8 +390,8 @@ static struct mhi_channel_config aic100_channels[] = {
.wake_capable = false,
},
{
.num = 23,
.name = "QAIC_TIMESYNC_PERIODIC",
.num = 23,
.num_elements = 32,
.local_elements = 0,
.event_ring = 0,

View File

@ -30,6 +30,7 @@
#define to_qaic_drm_device(dev) container_of(dev, struct qaic_drm_device, drm)
#define to_drm(qddev) (&(qddev)->drm)
#define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */
#define to_qaic_device(dev) (to_qaic_drm_device((dev))->qdev)
enum __packed dev_states {
/* Device is offline or will be very soon */
@ -191,8 +192,6 @@ struct qaic_bo {
u32 nr_slice;
/* Number of slice that have been transferred by DMA engine */
u32 nr_slice_xfer_done;
/* true = BO is queued for execution, true = BO is not queued */
bool queued;
/*
* If true then user has attached slicing information to this BO by
* calling DRM_IOCTL_QAIC_ATTACH_SLICE_BO ioctl.

View File

@ -141,6 +141,11 @@ struct dbc_rsp {
__le16 status;
} __packed;
static inline bool bo_queued(struct qaic_bo *bo)
{
return !list_empty(&bo->xfer_list);
}
inline int get_dbc_req_elem_size(void)
{
return sizeof(struct dbc_req);
@ -569,6 +574,9 @@ static void qaic_free_sgt(struct sg_table *sgt)
{
struct scatterlist *sg;
if (!sgt)
return;
for (sg = sgt->sgl; sg; sg = sg_next(sg))
if (sg_page(sg))
__free_pages(sg_page(sg), get_order(sg->length));
@ -648,6 +656,7 @@ static void qaic_init_bo(struct qaic_bo *bo, bool reinit)
}
complete_all(&bo->xfer_done);
INIT_LIST_HEAD(&bo->slices);
INIT_LIST_HEAD(&bo->xfer_list);
}
static struct qaic_bo *qaic_alloc_init_bo(void)
@ -709,9 +718,13 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
if (ret)
goto free_bo;
ret = drm_gem_create_mmap_offset(obj);
if (ret)
goto free_bo;
ret = drm_gem_handle_create(file_priv, obj, &args->handle);
if (ret)
goto free_sgt;
goto free_bo;
bo->handle = args->handle;
drm_gem_object_put(obj);
@ -720,10 +733,8 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
return 0;
free_sgt:
qaic_free_sgt(bo->sgt);
free_bo:
kfree(bo);
drm_gem_object_put(obj);
unlock_dev_srcu:
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
unlock_usr_srcu:
@ -738,7 +749,7 @@ int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
struct drm_gem_object *obj;
struct qaic_device *qdev;
struct qaic_user *usr;
int ret;
int ret = 0;
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@ -760,8 +771,6 @@ int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
goto unlock_dev_srcu;
}
ret = drm_gem_create_mmap_offset(obj);
if (ret == 0)
args->offset = drm_vma_node_offset_addr(&obj->vma_node);
drm_gem_object_put(obj);
@ -828,9 +837,6 @@ static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_h
struct sg_table *sgt;
int ret;
if (obj->import_attach->dmabuf->size < hdr->size)
return -EINVAL;
sgt = dma_buf_map_attachment(obj->import_attach, hdr->dir);
if (IS_ERR(sgt)) {
ret = PTR_ERR(sgt);
@ -847,9 +853,6 @@ static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo,
{
int ret;
if (bo->base.size < hdr->size)
return -EINVAL;
ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0);
if (ret)
return -EFAULT;
@ -950,9 +953,6 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
if (arg_size / args->hdr.count != sizeof(*slice_ent))
return -EINVAL;
if (args->hdr.size == 0)
return -EINVAL;
if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE))
return -EINVAL;
@ -992,16 +992,16 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
goto free_slice_ent;
}
ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, args->hdr.size);
if (ret)
goto free_slice_ent;
obj = drm_gem_object_lookup(file_priv, args->hdr.handle);
if (!obj) {
ret = -ENOENT;
goto free_slice_ent;
}
ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, obj->size);
if (ret)
goto put_bo;
bo = to_qaic_bo(obj);
ret = mutex_lock_interruptible(&bo->lock);
if (ret)
@ -1173,7 +1173,6 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
struct bo_slice *slice;
unsigned long flags;
struct qaic_bo *bo;
bool queued;
int i, j;
int ret;
@ -1205,9 +1204,7 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
}
spin_lock_irqsave(&dbc->xfer_lock, flags);
queued = bo->queued;
bo->queued = true;
if (queued) {
if (bo_queued(bo)) {
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
ret = -EINVAL;
goto unlock_bo;
@ -1230,7 +1227,6 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
else
ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail);
if (ret) {
bo->queued = false;
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
goto unlock_bo;
}
@ -1253,8 +1249,7 @@ failed_to_send_bo:
spin_lock_irqsave(&dbc->xfer_lock, flags);
bo = list_last_entry(&dbc->xfer_list, struct qaic_bo, xfer_list);
obj = &bo->base;
bo->queued = false;
list_del(&bo->xfer_list);
list_del_init(&bo->xfer_list);
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
drm_gem_object_put(obj);
@ -1615,8 +1610,7 @@ read_fifo:
*/
dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
bo->nr_slice_xfer_done = 0;
bo->queued = false;
list_del(&bo->xfer_list);
list_del_init(&bo->xfer_list);
bo->perf_stats.req_processed_ts = ktime_get_ns();
complete_all(&bo->xfer_done);
drm_gem_object_put(&bo->base);
@ -1875,7 +1869,7 @@ int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
/* Check if BO is committed to H/W for DMA */
spin_lock_irqsave(&dbc->xfer_lock, flags);
if (bo->queued) {
if (bo_queued(bo)) {
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
ret = -EBUSY;
goto unlock_ch_srcu;
@ -1905,8 +1899,7 @@ static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *db
spin_lock_irqsave(&dbc->xfer_lock, flags);
while (!list_empty(&dbc->xfer_list)) {
bo = list_first_entry(&dbc->xfer_list, typeof(*bo), xfer_list);
bo->queued = false;
list_del(&bo->xfer_list);
list_del_init(&bo->xfer_list);
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
bo->nr_slice_xfer_done = 0;
bo->req_id = 0;

View File

@ -44,6 +44,53 @@ MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode");
static bool link_up;
static DEFINE_IDA(qaic_usrs);
static void qaicm_wq_release(struct drm_device *dev, void *res)
{
struct workqueue_struct *wq = res;
destroy_workqueue(wq);
}
static struct workqueue_struct *qaicm_wq_init(struct drm_device *dev, const char *fmt)
{
struct workqueue_struct *wq;
int ret;
wq = alloc_workqueue(fmt, WQ_UNBOUND, 0);
if (!wq)
return ERR_PTR(-ENOMEM);
ret = drmm_add_action_or_reset(dev, qaicm_wq_release, wq);
if (ret)
return ERR_PTR(ret);
return wq;
}
static void qaicm_srcu_release(struct drm_device *dev, void *res)
{
struct srcu_struct *lock = res;
cleanup_srcu_struct(lock);
}
static int qaicm_srcu_init(struct drm_device *dev, struct srcu_struct *lock)
{
int ret;
ret = init_srcu_struct(lock);
if (ret)
return ret;
return drmm_add_action_or_reset(dev, qaicm_srcu_release, lock);
}
static void qaicm_pci_release(struct drm_device *dev, void *res)
{
struct qaic_device *qdev = to_qaic_device(dev);
pci_set_drvdata(qdev->pdev, NULL);
}
static void free_usr(struct kref *kref)
{
struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count);
@ -299,74 +346,73 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
release_dbc(qdev, i);
}
static void cleanup_qdev(struct qaic_device *qdev)
{
int i;
for (i = 0; i < qdev->num_dbc; ++i)
cleanup_srcu_struct(&qdev->dbc[i].ch_lock);
cleanup_srcu_struct(&qdev->dev_lock);
pci_set_drvdata(qdev->pdev, NULL);
destroy_workqueue(qdev->cntl_wq);
destroy_workqueue(qdev->qts_wq);
}
static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
struct qaic_drm_device *qddev;
struct qaic_device *qdev;
int i;
struct drm_device *drm;
int i, ret;
qdev = devm_kzalloc(&pdev->dev, sizeof(*qdev), GFP_KERNEL);
qdev = devm_kzalloc(dev, sizeof(*qdev), GFP_KERNEL);
if (!qdev)
return NULL;
qdev->dev_state = QAIC_OFFLINE;
if (id->device == PCI_DEV_AIC100) {
qdev->num_dbc = 16;
qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
if (!qdev->dbc)
return NULL;
}
qdev->cntl_wq = alloc_workqueue("qaic_cntl", WQ_UNBOUND, 0);
if (!qdev->cntl_wq)
qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm);
if (IS_ERR(qddev))
return NULL;
qdev->qts_wq = alloc_workqueue("qaic_ts", WQ_UNBOUND, 0);
if (!qdev->qts_wq) {
destroy_workqueue(qdev->cntl_wq);
return NULL;
}
drm = to_drm(qddev);
pci_set_drvdata(pdev, qdev);
qdev->pdev = pdev;
mutex_init(&qdev->cntl_mutex);
ret = drmm_mutex_init(drm, &qddev->users_mutex);
if (ret)
return NULL;
ret = drmm_add_action_or_reset(drm, qaicm_pci_release, NULL);
if (ret)
return NULL;
ret = drmm_mutex_init(drm, &qdev->cntl_mutex);
if (ret)
return NULL;
qdev->cntl_wq = qaicm_wq_init(drm, "qaic_cntl");
if (IS_ERR(qdev->cntl_wq))
return NULL;
qdev->qts_wq = qaicm_wq_init(drm, "qaic_ts");
if (IS_ERR(qdev->qts_wq))
return NULL;
ret = qaicm_srcu_init(drm, &qdev->dev_lock);
if (ret)
return NULL;
qdev->qddev = qddev;
qdev->pdev = pdev;
qddev->qdev = qdev;
INIT_LIST_HEAD(&qdev->cntl_xfer_list);
init_srcu_struct(&qdev->dev_lock);
INIT_LIST_HEAD(&qddev->users);
for (i = 0; i < qdev->num_dbc; ++i) {
spin_lock_init(&qdev->dbc[i].xfer_lock);
qdev->dbc[i].qdev = qdev;
qdev->dbc[i].id = i;
INIT_LIST_HEAD(&qdev->dbc[i].xfer_list);
init_srcu_struct(&qdev->dbc[i].ch_lock);
ret = qaicm_srcu_init(drm, &qdev->dbc[i].ch_lock);
if (ret)
return NULL;
init_waitqueue_head(&qdev->dbc[i].dbc_release);
INIT_LIST_HEAD(&qdev->dbc[i].bo_lists);
}
qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm);
if (IS_ERR(qddev)) {
cleanup_qdev(qdev);
return NULL;
}
drmm_mutex_init(to_drm(qddev), &qddev->users_mutex);
INIT_LIST_HEAD(&qddev->users);
qddev->qdev = qdev;
qdev->qddev = qddev;
return qdev;
}
@ -472,35 +518,28 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
ret = init_pci(qdev, pdev);
if (ret)
goto cleanup_qdev;
return ret;
for (i = 0; i < qdev->num_dbc; ++i)
qdev->dbc[i].dbc_base = qdev->bar_2 + QAIC_DBC_OFF(i);
mhi_irq = init_msi(qdev, pdev);
if (mhi_irq < 0) {
ret = mhi_irq;
goto cleanup_qdev;
}
if (mhi_irq < 0)
return mhi_irq;
ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
if (ret)
goto cleanup_qdev;
return ret;
qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
qdev->single_msi);
if (IS_ERR(qdev->mhi_cntrl)) {
ret = PTR_ERR(qdev->mhi_cntrl);
goto cleanup_drm_dev;
qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
return ret;
}
return 0;
cleanup_drm_dev:
qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
cleanup_qdev:
cleanup_qdev(qdev);
return ret;
}
static void qaic_pci_remove(struct pci_dev *pdev)
@ -511,9 +550,8 @@ static void qaic_pci_remove(struct pci_dev *pdev)
return;
qaic_dev_reset_clean_local_state(qdev);
qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
qaic_mhi_free_controller(qdev->mhi_cntrl, link_up);
cleanup_qdev(qdev);
qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
}
static void qaic_pci_shutdown(struct pci_dev *pdev)

View File

@ -138,7 +138,6 @@ struct agp_bridge_data {
unsigned long gart_bus_addr;
unsigned long gatt_bus_addr;
u32 mode;
enum chipset_type type;
unsigned long *key_list;
atomic_t current_memory_agp;
atomic_t agp_in_use;

View File

@ -102,7 +102,7 @@ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1);
*
* * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier
* respectively &mmu_interval_notifier callbacks. This means any code required
* for fence completeion cannot allocate memory with GFP_NOFS or GFP_NOIO.
* for fence completion cannot allocate memory with GFP_NOFS or GFP_NOIO.
* Only GFP_ATOMIC is permissible, which might fail.
*
* Note that only GPU drivers have a reasonable excuse for both requiring
@ -522,7 +522,7 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
EXPORT_SYMBOL(dma_fence_wait_timeout);
/**
* dma_fence_release - default relese function for fences
* dma_fence_release - default release function for fences
* @kref: &dma_fence.recfount
*
* This is the default release functions for &dma_fence. Drivers shouldn't call
@ -974,8 +974,8 @@ void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
EXPORT_SYMBOL(dma_fence_set_deadline);
/**
* dma_fence_describe - Dump fence describtion into seq_file
* @fence: the 6fence to describe
* dma_fence_describe - Dump fence description into seq_file
* @fence: the fence to describe
* @seq: the seq_file to put the textual description into
*
* Dump a textual description of the fence and it's state into the seq_file.

View File

@ -405,7 +405,7 @@ static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor)
*
* Beware that the iterator can be restarted. Code which accumulates statistics
* or similar needs to check for this with dma_resv_iter_is_restarted(). For
* this reason prefer the locked dma_resv_iter_first() whenver possible.
* this reason prefer the locked dma_resv_iter_first() whenever possible.
*
* Returns the first fence from an unlocked dma_resv obj.
*/
@ -428,7 +428,7 @@ EXPORT_SYMBOL(dma_resv_iter_first_unlocked);
*
* Beware that the iterator can be restarted. Code which accumulates statistics
* or similar needs to check for this with dma_resv_iter_is_restarted(). For
* this reason prefer the locked dma_resv_iter_next() whenver possible.
* this reason prefer the locked dma_resv_iter_next() whenever possible.
*
* Returns the next fence from an unlocked dma_resv obj.
*/

View File

@ -182,6 +182,7 @@ config MTK_ADSP_IPC
config SYSFB
bool
select BOOT_VESA_SUPPORT
select SCREEN_INFO
config SYSFB_SIMPLEFB
bool "Mark VGA/VBE/EFI FB as generic system framebuffer"

View File

@ -29,6 +29,7 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/pci.h>
#include <linux/platform_data/simplefb.h>
#include <linux/platform_device.h>
#include <linux/screen_info.h>
@ -69,24 +70,72 @@ void sysfb_disable(void)
}
EXPORT_SYMBOL_GPL(sysfb_disable);
#if defined(CONFIG_PCI)
static __init bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev)
{
/*
* TODO: Try to integrate this code into the PCI subsystem
*/
int ret;
u16 command;
ret = pci_read_config_word(pdev, PCI_COMMAND, &command);
if (ret != PCIBIOS_SUCCESSFUL)
return false;
if (!(command & PCI_COMMAND_MEMORY))
return false;
return true;
}
#else
static __init bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev)
{
return false;
}
#endif
static __init struct device *sysfb_parent_dev(const struct screen_info *si)
{
struct pci_dev *pdev;
pdev = screen_info_pci_dev(si);
if (IS_ERR(pdev)) {
return ERR_CAST(pdev);
} else if (pdev) {
if (!sysfb_pci_dev_is_enabled(pdev))
return ERR_PTR(-ENODEV);
return &pdev->dev;
}
return NULL;
}
static __init int sysfb_init(void)
{
struct screen_info *si = &screen_info;
struct device *parent;
struct simplefb_platform_data mode;
const char *name;
bool compatible;
int ret = 0;
screen_info_apply_fixups();
mutex_lock(&disable_lock);
if (disabled)
goto unlock_mutex;
sysfb_apply_efi_quirks();
parent = sysfb_parent_dev(si);
if (IS_ERR(parent)) {
ret = PTR_ERR(parent);
goto unlock_mutex;
}
/* try to create a simple-framebuffer device */
compatible = sysfb_parse_mode(si, &mode);
if (compatible) {
pd = sysfb_create_simplefb(si, &mode);
pd = sysfb_create_simplefb(si, &mode, parent);
if (!IS_ERR(pd))
goto unlock_mutex;
}
@ -109,6 +158,8 @@ static __init int sysfb_init(void)
goto unlock_mutex;
}
pd->dev.parent = parent;
sysfb_set_efifb_fwnode(pd);
ret = platform_device_add_data(pd, si, sizeof(*si));

View File

@ -91,7 +91,8 @@ __init bool sysfb_parse_mode(const struct screen_info *si,
}
__init struct platform_device *sysfb_create_simplefb(const struct screen_info *si,
const struct simplefb_platform_data *mode)
const struct simplefb_platform_data *mode,
struct device *parent)
{
struct platform_device *pd;
struct resource res;
@ -143,6 +144,8 @@ __init struct platform_device *sysfb_create_simplefb(const struct screen_info *s
if (!pd)
return ERR_PTR(-ENOMEM);
pd->dev.parent = parent;
sysfb_set_efifb_fwnode(pd);
ret = platform_device_add_resources(pd, &res, 1);

View File

@ -19,8 +19,7 @@ menuconfig DRM
# gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
# device and dmabuf fd. Let's make sure that is available for our userspace.
select KCMP
select VIDEO_CMDLINE
select VIDEO_NOMODESET
select VIDEO
help
Kernel-level support for the Direct Rendering Infrastructure (DRI)
introduced in XFree86 4.0. If you say Y here, you need to select
@ -42,7 +41,7 @@ config DRM_MIPI_DSI
config DRM_DEBUG_MM
bool "Insert extra checks and debug info into the DRM range managers"
default n
depends on DRM=y
depends on DRM
depends on STACKTRACE_SUPPORT
select STACKDEPOT
help
@ -290,19 +289,7 @@ config DRM_VGEM
as used by Mesa's software renderer for enhanced performance.
If M is selected the module will be called vgem.
config DRM_VKMS
tristate "Virtual KMS (EXPERIMENTAL)"
depends on DRM && MMU
select DRM_KMS_HELPER
select DRM_GEM_SHMEM_HELPER
select CRC32
default n
help
Virtual Kernel Mode-Setting (VKMS) is used for testing or for
running GPU in a headless machines. Choose this option to get
a VKMS.
If M is selected the module will be called vkms.
source "drivers/gpu/drm/vkms/Kconfig"
source "drivers/gpu/drm/exynos/Kconfig"

View File

@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o
amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
@ -98,7 +98,7 @@ amdgpu-y += \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o
# add DF block
amdgpu-y += \
@ -132,7 +132,8 @@ amdgpu-y += \
vega20_ih.o \
navi10_ih.o \
ih_v6_0.o \
ih_v6_1.o
ih_v6_1.o \
ih_v7_0.o
# add PSP block
amdgpu-y += \
@ -143,7 +144,8 @@ amdgpu-y += \
psp_v11_0_8.o \
psp_v12_0.o \
psp_v13_0.o \
psp_v13_0_4.o
psp_v13_0_4.o \
psp_v14_0.o
# add DCE block
amdgpu-y += \
@ -208,6 +210,7 @@ amdgpu-y += \
vcn_v4_0.o \
vcn_v4_0_3.o \
vcn_v4_0_5.o \
vcn_v5_0_0.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
@ -215,7 +218,8 @@ amdgpu-y += \
jpeg_v3_0.o \
jpeg_v4_0.o \
jpeg_v4_0_3.o \
jpeg_v4_0_5.o
jpeg_v4_0_5.o \
jpeg_v5_0_0.o
# add VPE block
amdgpu-y += \
@ -233,7 +237,8 @@ amdgpu-y += \
athub_v1_0.o \
athub_v2_0.o \
athub_v2_1.o \
athub_v3_0.o
athub_v3_0.o \
athub_v4_1_0.o
# add SMUIO block
amdgpu-y += \

View File

@ -107,6 +107,7 @@
#include "amdgpu_smuio.h"
#include "amdgpu_fdinfo.h"
#include "amdgpu_mca.h"
#include "amdgpu_aca.h"
#include "amdgpu_ras.h"
#include "amdgpu_xcp.h"
#include "amdgpu_seq64.h"
@ -114,14 +115,12 @@
#define MAX_GPU_INSTANCE 64
struct amdgpu_gpu_instance
{
struct amdgpu_gpu_instance {
struct amdgpu_device *adev;
int mgpu_fan_enabled;
};
struct amdgpu_mgpu_info
{
struct amdgpu_mgpu_info {
struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE];
struct mutex mutex;
uint32_t num_gpu;
@ -140,8 +139,7 @@ enum amdgpu_ss {
AMDGPU_SS_DRV_UNLOAD
};
struct amdgpu_watchdog_timer
{
struct amdgpu_watchdog_timer {
bool timeout_fatal_disable;
uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */
};
@ -196,9 +194,10 @@ extern int amdgpu_emu_mode;
extern uint amdgpu_smu_memory_pool_size;
extern int amdgpu_smu_pptable_id;
extern uint amdgpu_dc_feature_mask;
extern uint amdgpu_freesync_vid_mode;
extern uint amdgpu_dc_debug_mask;
extern uint amdgpu_dc_visual_confirm;
extern uint amdgpu_dm_abm_level;
extern int amdgpu_dm_abm_level;
extern int amdgpu_backlight;
extern int amdgpu_damage_clips;
extern struct amdgpu_mgpu_info mgpu_info;
@ -1046,6 +1045,9 @@ struct amdgpu_device {
/* MCA */
struct amdgpu_mca mca;
/* ACA */
struct amdgpu_aca aca;
struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
uint32_t harvest_ip_mask;
int num_ip_blocks;
@ -1095,6 +1097,7 @@ struct amdgpu_device {
long sdma_timeout;
long video_timeout;
long compute_timeout;
long psp_timeout;
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
@ -1332,6 +1335,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32_FIELD_OFFSET(reg, offset, field, val) \
WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
#define AMDGPU_GET_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> (l))
/*
* BIOS helpers.
*/

View File

@ -0,0 +1,879 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/list.h>
#include "amdgpu.h"
#include "amdgpu_aca.h"
#include "amdgpu_ras.h"
#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype}
typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, void *data);
struct aca_banks {
int nr_banks;
struct list_head list;
};
struct aca_hwip {
int hwid;
int mcatype;
};
static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = {
ACA_BANK_HWID(SMU, 0x01, 0x01),
ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00),
ACA_BANK_HWID(UMC, 0x96, 0x00),
};
static void aca_banks_init(struct aca_banks *banks)
{
if (!banks)
return;
memset(banks, 0, sizeof(*banks));
INIT_LIST_HEAD(&banks->list);
}
static int aca_banks_add_bank(struct aca_banks *banks, struct aca_bank *bank)
{
struct aca_bank_node *node;
if (!bank)
return -EINVAL;
node = kvzalloc(sizeof(*node), GFP_KERNEL);
if (!node)
return -ENOMEM;
memcpy(&node->bank, bank, sizeof(*bank));
INIT_LIST_HEAD(&node->node);
list_add_tail(&node->node, &banks->list);
banks->nr_banks++;
return 0;
}
static void aca_banks_release(struct aca_banks *banks)
{
struct aca_bank_node *node, *tmp;
list_for_each_entry_safe(node, tmp, &banks->list, node) {
list_del(&node->node);
kvfree(node);
}
}
static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_error_type type, u32 *count)
{
struct amdgpu_aca *aca = &adev->aca;
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
if (!count)
return -EINVAL;
if (!smu_funcs || !smu_funcs->get_valid_aca_count)
return -EOPNOTSUPP;
return smu_funcs->get_valid_aca_count(adev, type, count);
}
static struct aca_regs_dump {
const char *name;
int reg_idx;
} aca_regs[] = {
{"CONTROL", ACA_REG_IDX_CTL},
{"STATUS", ACA_REG_IDX_STATUS},
{"ADDR", ACA_REG_IDX_ADDR},
{"MISC", ACA_REG_IDX_MISC0},
{"CONFIG", ACA_REG_IDX_CONFG},
{"IPID", ACA_REG_IDX_IPID},
{"SYND", ACA_REG_IDX_SYND},
{"DESTAT", ACA_REG_IDX_DESTAT},
{"DEADDR", ACA_REG_IDX_DEADDR},
{"CONTROL_MASK", ACA_REG_IDX_CTL_MASK},
};
static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank)
{
int i;
dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n");
/* plus 1 for output format, e.g: ACA[08/08]: xxxx */
for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
dev_info(adev->dev, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
}
static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_error_type type,
int start, int count,
struct aca_banks *banks)
{
struct amdgpu_aca *aca = &adev->aca;
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
struct aca_bank bank;
int i, max_count, ret;
if (!count)
return 0;
if (!smu_funcs || !smu_funcs->get_valid_aca_bank)
return -EOPNOTSUPP;
switch (type) {
case ACA_ERROR_TYPE_UE:
max_count = smu_funcs->max_ue_bank_count;
break;
case ACA_ERROR_TYPE_CE:
max_count = smu_funcs->max_ce_bank_count;
break;
case ACA_ERROR_TYPE_DEFERRED:
default:
return -EINVAL;
}
if (start + count >= max_count)
return -EINVAL;
count = min_t(int, count, max_count);
for (i = 0; i < count; i++) {
memset(&bank, 0, sizeof(bank));
ret = smu_funcs->get_valid_aca_bank(adev, type, start + i, &bank);
if (ret)
return ret;
aca_smu_bank_dump(adev, i, count, &bank);
ret = aca_banks_add_bank(banks, &bank);
if (ret)
return ret;
}
return 0;
}
static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
{
struct aca_hwip *hwip;
int hwid, mcatype;
u64 ipid;
if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
return false;
hwip = &aca_hwid_mcatypes[type];
if (!hwip->hwid)
return false;
ipid = bank->regs[ACA_REG_IDX_IPID];
hwid = ACA_REG__IPID__HARDWAREID(ipid);
mcatype = ACA_REG__IPID__MCATYPE(ipid);
return hwip->hwid == hwid && hwip->mcatype == mcatype;
}
static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type)
{
const struct aca_bank_ops *bank_ops = handle->bank_ops;
if (!aca_bank_hwip_is_matched(bank, handle->hwip))
return false;
if (!bank_ops->aca_bank_is_valid)
return true;
return bank_ops->aca_bank_is_valid(handle, bank, type, handle->data);
}
static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
{
struct aca_bank_error *bank_error;
bank_error = kvzalloc(sizeof(*bank_error), GFP_KERNEL);
if (!bank_error)
return NULL;
INIT_LIST_HEAD(&bank_error->node);
memcpy(&bank_error->info, info, sizeof(*info));
mutex_lock(&aerr->lock);
list_add_tail(&bank_error->node, &aerr->list);
mutex_unlock(&aerr->lock);
return bank_error;
}
static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
{
struct aca_bank_error *bank_error = NULL;
struct aca_bank_info *tmp_info;
bool found = false;
mutex_lock(&aerr->lock);
list_for_each_entry(bank_error, &aerr->list, node) {
tmp_info = &bank_error->info;
if (tmp_info->socket_id == info->socket_id &&
tmp_info->die_id == info->die_id) {
found = true;
goto out_unlock;
}
}
out_unlock:
mutex_unlock(&aerr->lock);
return found ? bank_error : NULL;
}
static void aca_bank_error_remove(struct aca_error *aerr, struct aca_bank_error *bank_error)
{
if (!aerr || !bank_error)
return;
list_del(&bank_error->node);
aerr->nr_errors--;
kvfree(bank_error);
}
static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
{
struct aca_bank_error *bank_error;
if (!aerr || !info)
return NULL;
bank_error = find_bank_error(aerr, info);
if (bank_error)
return bank_error;
return new_bank_error(aerr, info);
}
static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type,
struct aca_bank_report *report)
{
struct aca_error_cache *error_cache = &handle->error_cache;
struct aca_bank_error *bank_error;
struct aca_error *aerr;
if (!handle || !report)
return -EINVAL;
if (!report->count[type])
return 0;
aerr = &error_cache->errors[type];
bank_error = get_bank_error(aerr, &report->info);
if (!bank_error)
return -ENOMEM;
bank_error->count[type] += report->count[type];
return 0;
}
static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank,
enum aca_error_type type, struct aca_bank_report *report)
{
const struct aca_bank_ops *bank_ops = handle->bank_ops;
if (!bank || !report)
return -EINVAL;
if (!bank_ops->aca_bank_generate_report)
return -EOPNOTSUPP;
memset(report, 0, sizeof(*report));
return bank_ops->aca_bank_generate_report(handle, bank, type,
report, handle->data);
}
static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank,
enum aca_error_type type, void *data)
{
struct aca_bank_report report;
int ret;
ret = aca_generate_bank_report(handle, bank, type, &report);
if (ret)
return ret;
if (!report.count[type])
return 0;
ret = aca_log_errors(handle, type, &report);
if (ret)
return ret;
return 0;
}
static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank,
enum aca_error_type type, bank_handler_t handler, void *data)
{
struct aca_handle *handle;
int ret;
if (list_empty(&mgr->list))
return 0;
list_for_each_entry(handle, &mgr->list, node) {
if (!aca_bank_is_valid(handle, bank, type))
continue;
ret = handler(handle, bank, type, data);
if (ret)
return ret;
}
return 0;
}
static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks,
enum aca_error_type type, bank_handler_t handler, void *data)
{
struct aca_bank_node *node;
struct aca_bank *bank;
int ret;
if (!mgr || !banks)
return -EINVAL;
/* pre check to avoid unnecessary operations */
if (list_empty(&mgr->list) || list_empty(&banks->list))
return 0;
list_for_each_entry(node, &banks->list, node) {
bank = &node->bank;
ret = aca_dispatch_bank(mgr, bank, type, handler, data);
if (ret)
return ret;
}
return 0;
}
static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type,
bank_handler_t handler, void *data)
{
struct amdgpu_aca *aca = &adev->aca;
struct aca_banks banks;
u32 count = 0;
int ret;
if (list_empty(&aca->mgr.list))
return 0;
/* NOTE: pmfw is only support UE and CE */
if (type == ACA_ERROR_TYPE_DEFERRED)
type = ACA_ERROR_TYPE_CE;
ret = aca_smu_get_valid_aca_count(adev, type, &count);
if (ret)
return ret;
if (!count)
return 0;
aca_banks_init(&banks);
ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks);
if (ret)
goto err_release_banks;
if (list_empty(&banks.list)) {
ret = 0;
goto err_release_banks;
}
ret = aca_dispatch_banks(&aca->mgr, &banks, type,
handler, data);
if (ret)
goto err_release_banks;
err_release_banks:
aca_banks_release(&banks);
return ret;
}
static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_error_type type, struct ras_err_data *err_data)
{
struct aca_bank_info *info;
struct amdgpu_smuio_mcm_config_info mcm_info;
u64 count;
if (type >= ACA_ERROR_TYPE_COUNT)
return -EINVAL;
count = bank_error->count[type];
if (!count)
return 0;
info = &bank_error->info;
mcm_info.die_id = info->die_id;
mcm_info.socket_id = info->socket_id;
switch (type) {
case ACA_ERROR_TYPE_UE:
amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count);
break;
case ACA_ERROR_TYPE_CE:
amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count);
break;
case ACA_ERROR_TYPE_DEFERRED:
default:
break;
}
return 0;
}
static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data)
{
struct aca_error_cache *error_cache = &handle->error_cache;
struct aca_error *aerr = &error_cache->errors[type];
struct aca_bank_error *bank_error, *tmp;
mutex_lock(&aerr->lock);
if (list_empty(&aerr->list))
goto out_unlock;
list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) {
aca_log_aca_error_data(bank_error, type, err_data);
aca_bank_error_remove(aerr, bank_error);
}
out_unlock:
mutex_unlock(&aerr->lock);
return 0;
}
static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type,
struct ras_err_data *err_data)
{
int ret;
/* udpate aca bank to aca source error_cache first */
ret = aca_banks_update(adev, type, handler_aca_log_bank_error, NULL);
if (ret)
return ret;
return aca_log_aca_error(handle, type, err_data);
}
static bool aca_handle_is_valid(struct aca_handle *handle)
{
if (!handle->mask || !list_empty(&handle->node))
return false;
return true;
}
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
enum aca_error_type type, void *data)
{
struct ras_err_data *err_data = (struct ras_err_data *)data;
if (!handle || !err_data)
return -EINVAL;
if (aca_handle_is_valid(handle))
return -EOPNOTSUPP;
if (!(BIT(type) & handle->mask))
return 0;
return __aca_get_error_data(adev, handle, type, err_data);
}
static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
{
mutex_init(&aerr->lock);
INIT_LIST_HEAD(&aerr->list);
aerr->type = type;
aerr->nr_errors = 0;
}
static void aca_init_error_cache(struct aca_handle *handle)
{
struct aca_error_cache *error_cache = &handle->error_cache;
int type;
for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
aca_error_init(&error_cache->errors[type], type);
}
static void aca_error_fini(struct aca_error *aerr)
{
struct aca_bank_error *bank_error, *tmp;
mutex_lock(&aerr->lock);
list_for_each_entry_safe(bank_error, tmp, &aerr->list, node)
aca_bank_error_remove(aerr, bank_error);
mutex_destroy(&aerr->lock);
}
static void aca_fini_error_cache(struct aca_handle *handle)
{
struct aca_error_cache *error_cache = &handle->error_cache;
int type;
for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
aca_error_fini(&error_cache->errors[type]);
}
static int add_aca_handle(struct amdgpu_device *adev, struct aca_handle_manager *mgr, struct aca_handle *handle,
const char *name, const struct aca_info *ras_info, void *data)
{
memset(handle, 0, sizeof(*handle));
handle->adev = adev;
handle->mgr = mgr;
handle->name = name;
handle->hwip = ras_info->hwip;
handle->mask = ras_info->mask;
handle->bank_ops = ras_info->bank_ops;
handle->data = data;
aca_init_error_cache(handle);
INIT_LIST_HEAD(&handle->node);
list_add_tail(&handle->node, &mgr->list);
mgr->nr_handles++;
return 0;
}
static ssize_t aca_sysfs_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct aca_handle *handle = container_of(attr, struct aca_handle, aca_attr);
/* NOTE: the aca cache will be auto cleared once read,
* So the driver should unify the query entry point, forward request to ras query interface directly */
return amdgpu_ras_aca_sysfs_read(dev, attr, handle, buf, handle->data);
}
static int add_aca_sysfs(struct amdgpu_device *adev, struct aca_handle *handle)
{
struct device_attribute *aca_attr = &handle->aca_attr;
snprintf(handle->attr_name, sizeof(handle->attr_name) - 1, "aca_%s", handle->name);
aca_attr->show = aca_sysfs_read;
aca_attr->attr.name = handle->attr_name;
aca_attr->attr.mode = S_IRUGO;
sysfs_attr_init(&aca_attr->attr);
return sysfs_add_file_to_group(&adev->dev->kobj,
&aca_attr->attr,
"ras");
}
int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
const char *name, const struct aca_info *ras_info, void *data)
{
struct amdgpu_aca *aca = &adev->aca;
int ret;
if (!amdgpu_aca_is_enabled(adev))
return 0;
ret = add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data);
if (ret)
return ret;
return add_aca_sysfs(adev, handle);
}
static void remove_aca_handle(struct aca_handle *handle)
{
struct aca_handle_manager *mgr = handle->mgr;
aca_fini_error_cache(handle);
list_del(&handle->node);
mgr->nr_handles--;
}
static void remove_aca_sysfs(struct aca_handle *handle)
{
struct amdgpu_device *adev = handle->adev;
struct device_attribute *aca_attr = &handle->aca_attr;
if (adev->dev->kobj.sd)
sysfs_remove_file_from_group(&adev->dev->kobj,
&aca_attr->attr,
"ras");
}
void amdgpu_aca_remove_handle(struct aca_handle *handle)
{
if (!handle || list_empty(&handle->node))
return;
remove_aca_sysfs(handle);
remove_aca_handle(handle);
}
static int aca_manager_init(struct aca_handle_manager *mgr)
{
INIT_LIST_HEAD(&mgr->list);
mgr->nr_handles = 0;
return 0;
}
static void aca_manager_fini(struct aca_handle_manager *mgr)
{
struct aca_handle *handle, *tmp;
list_for_each_entry_safe(handle, tmp, &mgr->list, node)
amdgpu_aca_remove_handle(handle);
}
bool amdgpu_aca_is_enabled(struct amdgpu_device *adev)
{
return adev->aca.is_enabled;
}
int amdgpu_aca_init(struct amdgpu_device *adev)
{
struct amdgpu_aca *aca = &adev->aca;
int ret;
ret = aca_manager_init(&aca->mgr);
if (ret)
return ret;
return 0;
}
void amdgpu_aca_fini(struct amdgpu_device *adev)
{
struct amdgpu_aca *aca = &adev->aca;
aca_manager_fini(&aca->mgr);
}
int amdgpu_aca_reset(struct amdgpu_device *adev)
{
amdgpu_aca_fini(adev);
return amdgpu_aca_init(adev);
}
void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)
{
struct amdgpu_aca *aca = &adev->aca;
WARN_ON(aca->smu_funcs);
aca->smu_funcs = smu_funcs;
}
int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info)
{
u64 ipid;
u32 instidhi, instidlo;
if (!bank || !info)
return -EINVAL;
ipid = bank->regs[ACA_REG_IDX_IPID];
info->hwid = ACA_REG__IPID__HARDWAREID(ipid);
info->mcatype = ACA_REG__IPID__MCATYPE(ipid);
/*
* Unfied DieID Format: SAASS. A:AID, S:Socket.
* Unfied DieID[4:4] = InstanceId[0:0]
* Unfied DieID[0:3] = InstanceIdHi[0:3]
*/
instidhi = ACA_REG__IPID__INSTANCEIDHI(ipid);
instidlo = ACA_REG__IPID__INSTANCEIDLO(ipid);
info->die_id = ((instidhi >> 2) & 0x03);
info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03);
return 0;
}
static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank)
{
int error_code;
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 6):
if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) {
error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]);
return error_code & 0xff;
}
break;
default:
break;
}
/* NOTE: the true error code is encoded in status.errorcode[0:7] */
error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]);
return error_code & 0xff;
}
int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size)
{
int i, error_code;
if (!bank || !err_codes)
return -EINVAL;
error_code = aca_bank_get_error_code(adev, bank);
for (i = 0; i < size; i++) {
if (err_codes[i] == error_code)
return 0;
}
return -EINVAL;
}
int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en)
{
struct amdgpu_aca *aca = &adev->aca;
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
if (!smu_funcs || !smu_funcs->set_debug_mode)
return -EOPNOTSUPP;
return smu_funcs->set_debug_mode(adev, en);
}
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val)
{
struct amdgpu_device *adev = (struct amdgpu_device *)data;
int ret;
ret = amdgpu_ras_set_aca_debug_mode(adev, val ? true : false);
if (ret)
return ret;
dev_info(adev->dev, "amdgpu set smu aca debug mode %s success\n", val ? "on" : "off");
return 0;
}
static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_error_type type, int idx)
{
struct aca_bank_info info;
int i, ret;
ret = aca_bank_info_decode(bank, &info);
if (ret)
return;
seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_ERROR_TYPE_UE ? "UE" : "CE");
seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
idx, info.socket_id, info.die_id, info.hwid, info.mcatype);
for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
seq_printf(m, "aca entry[%d].regs[%d]: 0x%016llx\n", idx, aca_regs[i].reg_idx, bank->regs[aca_regs[i].reg_idx]);
}
struct aca_dump_context {
struct seq_file *m;
int idx;
};
static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank,
enum aca_error_type type, void *data)
{
struct aca_dump_context *ctx = (struct aca_dump_context *)data;
aca_dump_entry(ctx->m, bank, type, ctx->idx++);
return handler_aca_log_bank_error(handle, bank, type, NULL);
}
static int aca_dump_show(struct seq_file *m, enum aca_error_type type)
{
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
struct aca_dump_context context = {
.m = m,
.idx = 0,
};
return aca_banks_update(adev, type, handler_aca_bank_dump, (void *)&context);
}
static int aca_dump_ce_show(struct seq_file *m, void *unused)
{
return aca_dump_show(m, ACA_ERROR_TYPE_CE);
}
static int aca_dump_ce_open(struct inode *inode, struct file *file)
{
return single_open(file, aca_dump_ce_show, inode->i_private);
}
static const struct file_operations aca_ce_dump_debug_fops = {
.owner = THIS_MODULE,
.open = aca_dump_ce_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int aca_dump_ue_show(struct seq_file *m, void *unused)
{
return aca_dump_show(m, ACA_ERROR_TYPE_UE);
}
static int aca_dump_ue_open(struct inode *inode, struct file *file)
{
return single_open(file, aca_dump_ue_show, inode->i_private);
}
static const struct file_operations aca_ue_dump_debug_fops = {
.owner = THIS_MODULE,
.open = aca_dump_ue_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n");
#endif
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
{
#if defined(CONFIG_DEBUG_FS)
if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6))
return;
debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops);
debugfs_create_file("aca_ue_dump", 0400, root, adev, &aca_ue_dump_debug_fops);
debugfs_create_file("aca_ce_dump", 0400, root, adev, &aca_ce_dump_debug_fops);
#endif
}

View File

@ -0,0 +1,202 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_ACA_H__
#define __AMDGPU_ACA_H__
#include <linux/list.h>
#define ACA_MAX_REGS_COUNT (16)
#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
#define ACA_REG__STATUS__VAL(x) ACA_REG_FIELD(x, 63, 63)
#define ACA_REG__STATUS__OVERFLOW(x) ACA_REG_FIELD(x, 62, 62)
#define ACA_REG__STATUS__UC(x) ACA_REG_FIELD(x, 61, 61)
#define ACA_REG__STATUS__EN(x) ACA_REG_FIELD(x, 60, 60)
#define ACA_REG__STATUS__MISCV(x) ACA_REG_FIELD(x, 59, 59)
#define ACA_REG__STATUS__ADDRV(x) ACA_REG_FIELD(x, 58, 58)
#define ACA_REG__STATUS__PCC(x) ACA_REG_FIELD(x, 57, 57)
#define ACA_REG__STATUS__ERRCOREIDVAL(x) ACA_REG_FIELD(x, 56, 56)
#define ACA_REG__STATUS__TCC(x) ACA_REG_FIELD(x, 55, 55)
#define ACA_REG__STATUS__SYNDV(x) ACA_REG_FIELD(x, 53, 53)
#define ACA_REG__STATUS__CECC(x) ACA_REG_FIELD(x, 46, 46)
#define ACA_REG__STATUS__UECC(x) ACA_REG_FIELD(x, 45, 45)
#define ACA_REG__STATUS__DEFERRED(x) ACA_REG_FIELD(x, 44, 44)
#define ACA_REG__STATUS__POISON(x) ACA_REG_FIELD(x, 43, 43)
#define ACA_REG__STATUS__SCRUB(x) ACA_REG_FIELD(x, 40, 40)
#define ACA_REG__STATUS__ERRCOREID(x) ACA_REG_FIELD(x, 37, 32)
#define ACA_REG__STATUS__ADDRLSB(x) ACA_REG_FIELD(x, 29, 24)
#define ACA_REG__STATUS__ERRORCODEEXT(x) ACA_REG_FIELD(x, 21, 16)
#define ACA_REG__STATUS__ERRORCODE(x) ACA_REG_FIELD(x, 15, 0)
#define ACA_REG__IPID__MCATYPE(x) ACA_REG_FIELD(x, 63, 48)
#define ACA_REG__IPID__INSTANCEIDHI(x) ACA_REG_FIELD(x, 47, 44)
#define ACA_REG__IPID__HARDWAREID(x) ACA_REG_FIELD(x, 43, 32)
#define ACA_REG__IPID__INSTANCEIDLO(x) ACA_REG_FIELD(x, 31, 0)
#define ACA_REG__MISC0__VALID(x) ACA_REG_FIELD(x, 63, 63)
#define ACA_REG__MISC0__OVRFLW(x) ACA_REG_FIELD(x, 48, 48)
#define ACA_REG__MISC0__ERRCNT(x) ACA_REG_FIELD(x, 43, 32)
#define ACA_REG__SYND__ERRORINFORMATION(x) ACA_REG_FIELD(x, 17, 0)
/* NOTE: The following codes refers to the smu header file */
#define ACA_EXTERROR_CODE_CE 0x3a
#define ACA_EXTERROR_CODE_FAULT 0x3b
#define ACA_ERROR_UE_MASK BIT_MASK(ACA_ERROR_TYPE_UE)
#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE)
#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED)
enum aca_reg_idx {
ACA_REG_IDX_CTL = 0,
ACA_REG_IDX_STATUS = 1,
ACA_REG_IDX_ADDR = 2,
ACA_REG_IDX_MISC0 = 3,
ACA_REG_IDX_CONFG = 4,
ACA_REG_IDX_IPID = 5,
ACA_REG_IDX_SYND = 6,
ACA_REG_IDX_DESTAT = 8,
ACA_REG_IDX_DEADDR = 9,
ACA_REG_IDX_CTL_MASK = 10,
ACA_REG_IDX_COUNT = 16,
};
enum aca_hwip_type {
ACA_HWIP_TYPE_UNKNOW = -1,
ACA_HWIP_TYPE_PSP = 0,
ACA_HWIP_TYPE_UMC,
ACA_HWIP_TYPE_SMU,
ACA_HWIP_TYPE_PCS_XGMI,
ACA_HWIP_TYPE_COUNT,
};
enum aca_error_type {
ACA_ERROR_TYPE_INVALID = -1,
ACA_ERROR_TYPE_UE = 0,
ACA_ERROR_TYPE_CE,
ACA_ERROR_TYPE_DEFERRED,
ACA_ERROR_TYPE_COUNT
};
struct aca_bank {
u64 regs[ACA_MAX_REGS_COUNT];
};
struct aca_bank_node {
struct aca_bank bank;
struct list_head node;
};
struct aca_bank_info {
int die_id;
int socket_id;
int hwid;
int mcatype;
};
struct aca_bank_report {
struct aca_bank_info info;
u64 count[ACA_ERROR_TYPE_COUNT];
};
struct aca_bank_error {
struct list_head node;
struct aca_bank_info info;
u64 count[ACA_ERROR_TYPE_COUNT];
};
struct aca_error {
struct list_head list;
struct mutex lock;
enum aca_error_type type;
int nr_errors;
};
struct aca_handle_manager {
struct list_head list;
int nr_handles;
};
struct aca_error_cache {
struct aca_error errors[ACA_ERROR_TYPE_COUNT];
};
struct aca_handle {
struct list_head node;
enum aca_hwip_type hwip;
struct amdgpu_device *adev;
struct aca_handle_manager *mgr;
struct aca_error_cache error_cache;
const struct aca_bank_ops *bank_ops;
struct device_attribute aca_attr;
char attr_name[64];
const char *name;
u32 mask;
void *data;
};
struct aca_bank_ops {
int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
struct aca_bank_report *report, void *data);
bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
void *data);
};
struct aca_smu_funcs {
int max_ue_bank_count;
int max_ce_bank_count;
int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_error_type type, u32 *count);
int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_error_type type, int idx, struct aca_bank *bank);
};
struct amdgpu_aca {
struct aca_handle_manager mgr;
const struct aca_smu_funcs *smu_funcs;
bool is_enabled;
};
struct aca_info {
enum aca_hwip_type hwip;
const struct aca_bank_ops *bank_ops;
u32 mask;
};
int amdgpu_aca_init(struct amdgpu_device *adev);
void amdgpu_aca_fini(struct amdgpu_device *adev);
int amdgpu_aca_reset(struct amdgpu_device *adev);
void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);
bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);
int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info);
int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size);
int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
const char *name, const struct aca_info *aca_info, void *data);
void amdgpu_aca_remove_handle(struct aca_handle *handle);
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
enum aca_error_type type, void *data);
int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
#endif

View File

@ -742,9 +742,15 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
amdgpu_device_flush_hdp(adev, NULL);
}
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
{
amdgpu_umc_poison_handler(adev, reset);
return amdgpu_ras_get_fed_status(adev);
}
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
enum amdgpu_ras_block block, bool reset)
{
amdgpu_umc_poison_handler(adev, block, reset);
}
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,

View File

@ -193,6 +193,9 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
unsigned long cur_seq, struct kgd_mem *mem);
int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
uint32_t domain,
struct dma_fence *fence);
#else
static inline
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
@ -218,6 +221,13 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
{
return 0;
}
static inline
int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
uint32_t domain,
struct dma_fence *fence)
{
return 0;
}
#endif
/* Shared API */
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
@ -310,7 +320,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
void **kptr, uint64_t *size);
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence __rcu **ef);
@ -326,7 +336,8 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config);
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
bool reset);
enum amdgpu_ras_block block, bool reset);
bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
void amdgpu_amdkfd_block_mmu_notifications(void *p);
int amdgpu_amdkfd_criu_resume(void *p);

View File

@ -426,7 +426,7 @@ validate_fail:
return ret;
}
static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
uint32_t domain,
struct dma_fence *fence)
{
@ -464,13 +464,15 @@ static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
* again. Page directories are only updated after updating page
* tables.
*/
static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm,
struct ww_acquire_ctx *ticket)
{
struct amdgpu_bo *pd = vm->root.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
int ret;
ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
ret = amdgpu_vm_validate(adev, vm, ticket,
amdgpu_amdkfd_validate_vm_bo, NULL);
if (ret) {
pr_err("failed to validate PT BOs\n");
return ret;
@ -1310,14 +1312,15 @@ update_gpuvm_pte_failed:
return ret;
}
static int process_validate_vms(struct amdkfd_process_info *process_info)
static int process_validate_vms(struct amdkfd_process_info *process_info,
struct ww_acquire_ctx *ticket)
{
struct amdgpu_vm *peer_vm;
int ret;
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
ret = vm_validate_pt_pd_bos(peer_vm);
ret = vm_validate_pt_pd_bos(peer_vm, ticket);
if (ret)
return ret;
}
@ -1402,7 +1405,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
ret = amdgpu_bo_reserve(vm->root.bo, true);
if (ret)
goto reserve_pd_fail;
ret = vm_validate_pt_pd_bos(vm);
ret = vm_validate_pt_pd_bos(vm, NULL);
if (ret) {
pr_err("validate_pt_pd_bos() failed\n");
goto validate_pd_fail;
@ -2043,7 +2046,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
is_invalid_userptr = true;
ret = vm_validate_pt_pd_bos(avm);
ret = vm_validate_pt_pd_bos(avm, NULL);
if (unlikely(ret))
goto out_unreserve;
@ -2136,7 +2139,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
goto unreserve_out;
}
ret = vm_validate_pt_pd_bos(avm);
ret = vm_validate_pt_pd_bos(avm, NULL);
if (unlikely(ret))
goto unreserve_out;
@ -2186,13 +2189,12 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
/**
* amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
* @adev: Device to which allocated BO belongs
* @bo: Buffer object to be mapped
*
* Before return, bo reference count is incremented. To release the reference and unpin/
* unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
*/
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
{
int ret;
@ -2634,7 +2636,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
ret = process_validate_vms(process_info);
ret = process_validate_vms(process_info, NULL);
if (ret)
goto unreserve_out;
@ -2894,11 +2896,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
amdgpu_sync_create(&sync_obj);
/* Validate PDs and PTs */
ret = process_validate_vms(process_info);
if (ret)
goto validate_map_fail;
/* Validate BOs and map them to GPUVM (update VM page tables). */
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list) {
@ -2949,6 +2946,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
if (failed_size)
pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
/* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
* validations above would invalidate DMABuf imports again.
*/
ret = process_validate_vms(process_info, &exec.ticket);
if (ret)
goto validate_map_fail;
/* Update mappings not managed by KFD */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
@ -3020,7 +3024,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
&process_info->eviction_fence->base,
DMA_RESV_USAGE_BOOKKEEP);
}
/* Attach eviction fence to PD / PT BOs */
/* Attach eviction fence to PD / PT BOs and DMABuf imports */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
struct amdgpu_bo *bo = peer_vm->root.bo;

View File

@ -1018,7 +1018,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
sizeof(args));
dividers->post_div = args.v3.ucPostDiv;
dividers->enable_post_div = (args.v3.ucCntlFlag &
@ -1038,7 +1039,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
if (strobe_mode)
args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
sizeof(args));
dividers->post_div = args.v5.ucPostDiv;
dividers->enable_post_div = (args.v5.ucCntlFlag &
@ -1056,7 +1058,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
/* fusion */
args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
sizeof(args));
dividers->post_divider = dividers->post_div = args.v4.ucPostDiv;
dividers->real_clock = le32_to_cpu(args.v4.ulClock);
@ -1067,7 +1070,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
args.v6_in.ulClock.ulComputeClockFlag = clock_type;
args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
sizeof(args));
dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv);
dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac);
@ -1109,7 +1113,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
if (strobe_mode)
args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN;
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
sizeof(args));
mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac);
mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv);
@ -1151,7 +1156,8 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
if (mem_clock)
args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
sizeof(args));
}
void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
@ -1205,7 +1211,8 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
args.v2.ucVoltageMode = 0;
args.v2.usVoltageLevel = 0;
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
sizeof(args));
*voltage = le16_to_cpu(args.v2.usVoltageLevel);
break;
@ -1214,7 +1221,8 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
sizeof(args));
*voltage = le16_to_cpu(args.v3.usVoltageLevel);
break;

View File

@ -941,5 +941,6 @@ int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset)
return -EINVAL;
}
return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1);
return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1,
sizeof(asic_init_ps_v2_1));
}

View File

@ -952,10 +952,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
p->bytes_moved = 0;
p->bytes_moved_vis = 0;
r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
amdgpu_cs_bo_validate, p);
if (r) {
DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
DRM_ERROR("amdgpu_vm_validate() failed.\n");
goto out_free_user_pages;
}

View File

@ -28,9 +28,8 @@
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(adev);
addr -= AMDGPU_VA_RESERVED_SIZE;
addr = amdgpu_gmc_sign_extend(addr);
return addr;

Some files were not shown because too many files have changed in this diff Show More