diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 4b592ffbafee..03f1985a4bd1 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -239,6 +239,14 @@ Driver supports dedicated render nodes. + + DRIVER_ATOMIC + + Driver supports atomic properties. In this case the driver + must implement appropriate obj->atomic_get_property() vfuncs + for any modeset objects with driver specific properties. + + @@ -1377,7 +1385,7 @@ int max_width, max_height; DRM_PLANE_TYPE_PRIMARY represents a "main" plane for a CRTC. Primary - planes are the planes operated upon by by CRTC modesetting and flipping + planes are the planes operated upon by CRTC modesetting and flipping operations described in . @@ -2362,6 +2370,7 @@ void intel_crt_init(struct drm_device *dev) Modeset Helper Functions Reference +!Iinclude/drm/drm_crtc_helper.h !Edrivers/gpu/drm/drm_crtc_helper.c !Pdrivers/gpu/drm/drm_crtc_helper.c overview @@ -2564,8 +2573,8 @@ void intel_crt_init(struct drm_device *dev) Description/Restrictions - DRM - Generic + DRM + Connector “EDID” BLOB | IMMUTABLE 0 @@ -2594,7 +2603,14 @@ void intel_crt_init(struct drm_device *dev) Contains tiling information for a connector. - Plane + “CRTC_ID” + OBJECT + DRM_MODE_OBJECT_CRTC + Connector + CRTC that connector is attached to (atomic) + + + Plane “type” ENUM | IMMUTABLE { "Overlay", "Primary", "Cursor" } @@ -2602,6 +2618,76 @@ void intel_crt_init(struct drm_device *dev) Plane type + “SRC_X” + RANGE + Min=0, Max=UINT_MAX + Plane + Scanout source x coordinate in 16.16 fixed point (atomic) + + + “SRC_Y” + RANGE + Min=0, Max=UINT_MAX + Plane + Scanout source y coordinate in 16.16 fixed point (atomic) + + + “SRC_W” + RANGE + Min=0, Max=UINT_MAX + Plane + Scanout source width in 16.16 fixed point (atomic) + + + “SRC_H” + RANGE + Min=0, Max=UINT_MAX + Plane + Scanout source height in 16.16 fixed point (atomic) + + + “CRTC_X” + SIGNED_RANGE + Min=INT_MIN, Max=INT_MAX + Plane + Scanout CRTC (destination) x coordinate (atomic) + + + “CRTC_Y” + SIGNED_RANGE + Min=INT_MIN, Max=INT_MAX + Plane + Scanout CRTC (destination) y coordinate (atomic) + + + “CRTC_W” + RANGE + Min=0, Max=UINT_MAX + Plane + Scanout CRTC (destination) width (atomic) + + + “CRTC_H” + RANGE + Min=0, Max=UINT_MAX + Plane + Scanout CRTC (destination) height (atomic) + + + “FB_ID” + OBJECT + DRM_MODE_OBJECT_FB + Plane + Scanout framebuffer (atomic) + + + “CRTC_ID” + OBJECT + DRM_MODE_OBJECT_CRTC + Plane + CRTC that plane is attached to (atomic) + + DVI-I “subconnector” ENUM @@ -3883,6 +3969,7 @@ int num_ioctls; Runtime Power Management !Pdrivers/gpu/drm/i915/intel_runtime_pm.c runtime pm !Idrivers/gpu/drm/i915/intel_runtime_pm.c +!Idrivers/gpu/drm/i915/intel_uncore.c Interrupt Handling @@ -3931,6 +4018,11 @@ int num_ioctls; framebuffer compression and panel self refresh. + + Atomic Plane Helpers +!Pdrivers/gpu/drm/i915/intel_atomic_plane.c atomic plane helpers +!Idrivers/gpu/drm/i915/intel_atomic_plane.c + Output Probing @@ -3949,6 +4041,11 @@ int num_ioctls; Panel Self Refresh PSR (PSR/SRD) !Pdrivers/gpu/drm/i915/intel_psr.c Panel Self Refresh (PSR/SRD) !Idrivers/gpu/drm/i915/intel_psr.c + + + Frame Buffer Compression (FBC) +!Pdrivers/gpu/drm/i915/intel_fbc.c Frame Buffer Compression (FBC) +!Idrivers/gpu/drm/i915/intel_fbc.c DPIO @@ -4052,12 +4149,33 @@ int num_ioctls; Batchbuffer Parsing !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser !Idrivers/gpu/drm/i915/i915_cmd_parser.c + + + Batchbuffer Pools +!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool +!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c Logical Rings, Logical Ring Contexts and Execlists !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and Execlists !Idrivers/gpu/drm/i915/intel_lrc.c + + Global GTT views +!Pdrivers/gpu/drm/i915/i915_gem_gtt.c Global GTT views +!Idrivers/gpu/drm/i915/i915_gem_gtt.c + + + Buffer Object Eviction + + This section documents the interface function for evicting buffer + objects to make space available in the virtual gpu address spaces. + Note that this is mostly orthogonal to shrinking buffer objects + caches, which has the goal to make main memory (shared with the gpu + through the unified memory architecture) available. + +!Idrivers/gpu/drm/i915/i915_gem_evict.c + diff --git a/Documentation/devicetree/bindings/drm/atmel/hlcdc-dc.txt b/Documentation/devicetree/bindings/drm/atmel/hlcdc-dc.txt new file mode 100644 index 000000000000..ebc1a914bda3 --- /dev/null +++ b/Documentation/devicetree/bindings/drm/atmel/hlcdc-dc.txt @@ -0,0 +1,53 @@ +Device-Tree bindings for Atmel's HLCDC (High LCD Controller) DRM driver + +The Atmel HLCDC Display Controller is subdevice of the HLCDC MFD device. +See ../mfd/atmel-hlcdc.txt for more details. + +Required properties: + - compatible: value should be "atmel,hlcdc-display-controller" + - pinctrl-names: the pin control state names. Should contain "default". + - pinctrl-0: should contain the default pinctrl states. + - #address-cells: should be set to 1. + - #size-cells: should be set to 0. + +Required children nodes: + Children nodes are encoding available output ports and their connections + to external devices using the OF graph reprensentation (see ../graph.txt). + At least one port node is required. + +Example: + + hlcdc: hlcdc@f0030000 { + compatible = "atmel,sama5d3-hlcdc"; + reg = <0xf0030000 0x2000>; + interrupts = <36 IRQ_TYPE_LEVEL_HIGH 0>; + clocks = <&lcdc_clk>, <&lcdck>, <&clk32k>; + clock-names = "periph_clk","sys_clk", "slow_clk"; + status = "disabled"; + + hlcdc-display-controller { + compatible = "atmel,hlcdc-display-controller"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_lcd_base &pinctrl_lcd_rgb888>; + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + + hlcdc_panel_output: endpoint@0 { + reg = <0>; + remote-endpoint = <&panel_input>; + }; + }; + }; + + hlcdc_pwm: hlcdc-pwm { + compatible = "atmel,hlcdc-pwm"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_lcd_pwm>; + #pwm-cells = <3>; + }; + }; diff --git a/Documentation/devicetree/bindings/drm/bridge/dw_hdmi.txt b/Documentation/devicetree/bindings/drm/bridge/dw_hdmi.txt new file mode 100644 index 000000000000..a905c1413558 --- /dev/null +++ b/Documentation/devicetree/bindings/drm/bridge/dw_hdmi.txt @@ -0,0 +1,50 @@ +DesignWare HDMI bridge bindings + +Required properties: +- compatible: platform specific such as: + * "snps,dw-hdmi-tx" + * "fsl,imx6q-hdmi" + * "fsl,imx6dl-hdmi" + * "rockchip,rk3288-dw-hdmi" +- reg: Physical base address and length of the controller's registers. +- interrupts: The HDMI interrupt number +- clocks, clock-names : must have the phandles to the HDMI iahb and isfr clocks, + as described in Documentation/devicetree/bindings/clock/clock-bindings.txt, + the clocks are soc specific, the clock-names should be "iahb", "isfr" +-port@[X]: SoC specific port nodes with endpoint definitions as defined + in Documentation/devicetree/bindings/media/video-interfaces.txt, + please refer to the SoC specific binding document: + * Documentation/devicetree/bindings/drm/imx/hdmi.txt + * Documentation/devicetree/bindings/video/dw_hdmi-rockchip.txt + +Optional properties +- reg-io-width: the width of the reg:1,4, default set to 1 if not present +- ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing +- clocks, clock-names: phandle to the HDMI CEC clock, name should be "cec" + +Example: + hdmi: hdmi@0120000 { + compatible = "fsl,imx6q-hdmi"; + reg = <0x00120000 0x9000>; + interrupts = <0 115 0x04>; + gpr = <&gpr>; + clocks = <&clks 123>, <&clks 124>; + clock-names = "iahb", "isfr"; + ddc-i2c-bus = <&i2c2>; + + port@0 { + reg = <0>; + + hdmi_mux_0: endpoint { + remote-endpoint = <&ipu1_di0_hdmi>; + }; + }; + + port@1 { + reg = <1>; + + hdmi_mux_1: endpoint { + remote-endpoint = <&ipu1_di1_hdmi>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/drm/msm/hdmi.txt b/Documentation/devicetree/bindings/drm/msm/hdmi.txt index aca917fe2ba7..a29a55f3d937 100644 --- a/Documentation/devicetree/bindings/drm/msm/hdmi.txt +++ b/Documentation/devicetree/bindings/drm/msm/hdmi.txt @@ -2,6 +2,8 @@ Qualcomm adreno/snapdragon hdmi output Required properties: - compatible: one of the following + * "qcom,hdmi-tx-8084" + * "qcom,hdmi-tx-8074" * "qcom,hdmi-tx-8660" * "qcom,hdmi-tx-8960" - reg: Physical base address and length of the controller's registers diff --git a/Documentation/devicetree/bindings/gpu/st,stih4xx.txt b/Documentation/devicetree/bindings/gpu/st,stih4xx.txt index c99eb34e640b..6b1d75f1a529 100644 --- a/Documentation/devicetree/bindings/gpu/st,stih4xx.txt +++ b/Documentation/devicetree/bindings/gpu/st,stih4xx.txt @@ -83,6 +83,22 @@ sti-hda: - clock-names: names of the clocks listed in clocks property in the same order. +sti-dvo: + Required properties: + must be a child of sti-tvout + - compatible: "st,stih-dvo" + - reg: Physical base address of the IP registers and length of memory mapped region. + - reg-names: names of the mapped memory regions listed in regs property in + the same order. + - clocks: from common clock binding: handle hardware IP needed clocks, the + number of clocks may depend of the SoC type. + See ../clocks/clock-bindings.txt for details. + - clock-names: names of the clocks listed in clocks property in the same + order. + - pinctrl-0: pin control handle + - pinctrl-name: names of the pin control to use + - sti,panel: phandle of the panel connected to the DVO output + sti-hqvdp: must be a child of sti-display-subsystem Required properties: @@ -198,6 +214,19 @@ Example: clock-names = "pix", "hddac"; clocks = <&clockgen_c_vcc CLK_S_PIX_HD>, <&clockgen_c_vcc CLK_S_HDDAC>; }; + + sti-dvo@8d00400 { + compatible = "st,stih407-dvo"; + reg = <0x8d00400 0x200>; + reg-names = "dvo-reg"; + clock-names = "dvo_pix", "dvo", + "main_parent", "aux_parent"; + clocks = <&clk_s_d2_flexgen CLK_PIX_DVO>, <&clk_s_d2_flexgen CLK_DVO>, + <&clk_s_d2_quadfs 0>, <&clk_s_d2_quadfs 1>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_dvo>; + sti,panel = <&panel_dvo>; + }; }; sti-hqvdp@9c000000 { diff --git a/Documentation/devicetree/bindings/panel/avic,tm070ddh03.txt b/Documentation/devicetree/bindings/panel/avic,tm070ddh03.txt new file mode 100644 index 000000000000..b6f2f3e8f44e --- /dev/null +++ b/Documentation/devicetree/bindings/panel/avic,tm070ddh03.txt @@ -0,0 +1,7 @@ +Shanghai AVIC Optoelectronics 7" 1024x600 color TFT-LCD panel + +Required properties: +- compatible: should be "avic,tm070ddh03" + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/panel/giantplus,gpg482739qs5.txt b/Documentation/devicetree/bindings/panel/giantplus,gpg482739qs5.txt new file mode 100644 index 000000000000..24b0b624434b --- /dev/null +++ b/Documentation/devicetree/bindings/panel/giantplus,gpg482739qs5.txt @@ -0,0 +1,7 @@ +GiantPlus GPG48273QS5 4.3" (480x272) WQVGA TFT LCD panel + +Required properties: +- compatible: should be "giantplus,gpg48273qs5" + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index cba613f8aa4f..0229b71dc74b 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -25,6 +25,7 @@ asahi-kasei Asahi Kasei Corp. atmel Atmel Corporation auo AU Optronics Corporation avago Avago Technologies +avic Shanghai AVIC Optoelectronics Co., Ltd. bosch Bosch Sensortec GmbH brcm Broadcom Corporation buffalo Buffalo, Inc. @@ -68,6 +69,7 @@ fsl Freescale Semiconductor GEFanuc GE Fanuc Intelligent Platforms Embedded Systems, Inc. gef GE Fanuc Intelligent Platforms Embedded Systems, Inc. geniatech Geniatech, Inc. +giantplus Giantplus Technology Co., Ltd. globalscale Globalscale Technologies, Inc. gmt Global Mixed-mode Technology, Inc. google Google, Inc. @@ -126,6 +128,7 @@ onnn ON Semiconductor Corp. opencores OpenCores.org ovti OmniVision Technologies panasonic Panasonic Corporation +parade Parade Technologies Inc. pericom Pericom Technology Inc. phytec PHYTEC Messtechnik GmbH picochip Picochip Ltd diff --git a/Documentation/devicetree/bindings/video/bridge/ps8622.txt b/Documentation/devicetree/bindings/video/bridge/ps8622.txt new file mode 100644 index 000000000000..c989c3807f2b --- /dev/null +++ b/Documentation/devicetree/bindings/video/bridge/ps8622.txt @@ -0,0 +1,31 @@ +ps8622-bridge bindings + +Required properties: + - compatible: "parade,ps8622" or "parade,ps8625" + - reg: first i2c address of the bridge + - sleep-gpios: OF device-tree gpio specification for PD_ pin. + - reset-gpios: OF device-tree gpio specification for RST_ pin. + +Optional properties: + - lane-count: number of DP lanes to use + - use-external-pwm: backlight will be controlled by an external PWM + - video interfaces: Device node can contain video interface port + nodes for panel according to [1]. + +[1]: Documentation/devicetree/bindings/media/video-interfaces.txt + +Example: + lvds-bridge@48 { + compatible = "parade,ps8622"; + reg = <0x48>; + sleep-gpios = <&gpc3 6 1 0 0>; + reset-gpios = <&gpc3 1 1 0 0>; + lane-count = <1>; + ports { + port@0 { + bridge_out: endpoint { + remote-endpoint = <&panel_in>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/drm/bridge/ptn3460.txt b/Documentation/devicetree/bindings/video/bridge/ptn3460.txt similarity index 65% rename from Documentation/devicetree/bindings/drm/bridge/ptn3460.txt rename to Documentation/devicetree/bindings/video/bridge/ptn3460.txt index 52b93b2c6748..361971ba104d 100644 --- a/Documentation/devicetree/bindings/drm/bridge/ptn3460.txt +++ b/Documentation/devicetree/bindings/video/bridge/ptn3460.txt @@ -3,8 +3,8 @@ ptn3460 bridge bindings Required properties: - compatible: "nxp,ptn3460" - reg: i2c address of the bridge - - powerdown-gpio: OF device-tree gpio specification - - reset-gpio: OF device-tree gpio specification + - powerdown-gpio: OF device-tree gpio specification for PD_N pin. + - reset-gpio: OF device-tree gpio specification for RST_N pin. - edid-emulation: The EDID emulation entry to use +-------+------------+------------------+ | Value | Resolution | Description | @@ -17,6 +17,11 @@ Required properties: | 6 | 1600x900 | ChiMei M215HGE | +-------+------------+------------------+ + - video interfaces: Device node can contain video interface port + nodes for panel according to [1]. + +[1]: Documentation/devicetree/bindings/media/video-interfaces.txt + Example: lvds-bridge@20 { compatible = "nxp,ptn3460"; @@ -24,4 +29,11 @@ Example: powerdown-gpio = <&gpy2 5 1 0 0>; reset-gpio = <&gpx1 5 1 0 0>; edid-emulation = <5>; + ports { + port@0 { + bridge_out: endpoint { + remote-endpoint = <&panel_in>; + }; + }; + }; }; diff --git a/Documentation/devicetree/bindings/video/dw_hdmi-rockchip.txt b/Documentation/devicetree/bindings/video/dw_hdmi-rockchip.txt new file mode 100644 index 000000000000..668091f27674 --- /dev/null +++ b/Documentation/devicetree/bindings/video/dw_hdmi-rockchip.txt @@ -0,0 +1,46 @@ +Rockchip specific extensions to the Synopsys Designware HDMI +================================ + +Required properties: +- compatible: "rockchip,rk3288-dw-hdmi"; +- reg: Physical base address and length of the controller's registers. +- clocks: phandle to hdmi iahb and isfr clocks. +- clock-names: should be "iahb" "isfr" +- rockchip,grf: this soc should set GRF regs to mux vopl/vopb. +- interrupts: HDMI interrupt number +- ports: contain a port node with endpoint definitions as defined in + Documentation/devicetree/bindings/media/video-interfaces.txt. For + vopb,set the reg = <0> and set the reg = <1> for vopl. +- reg-io-width: the width of the reg:1,4, the value should be 4 on + rk3288 platform + +Optional properties +- ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing +- clocks, clock-names: phandle to the HDMI CEC clock, name should be "cec" + +Example: +hdmi: hdmi@ff980000 { + compatible = "rockchip,rk3288-dw-hdmi"; + reg = <0xff980000 0x20000>; + reg-io-width = <4>; + ddc-i2c-bus = <&i2c5>; + rockchip,grf = <&grf>; + interrupts = ; + clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>; + clock-names = "iahb", "isfr"; + status = "disabled"; + ports { + hdmi_in: port { + #address-cells = <1>; + #size-cells = <0>; + hdmi_in_vopb: endpoint@0 { + reg = <0>; + remote-endpoint = <&vopb_out_hdmi>; + }; + hdmi_in_vopl: endpoint@1 { + reg = <1>; + remote-endpoint = <&vopl_out_hdmi>; + }; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/video/exynos7-decon.txt b/Documentation/devicetree/bindings/video/exynos7-decon.txt new file mode 100644 index 000000000000..f5f9c8d4a55a --- /dev/null +++ b/Documentation/devicetree/bindings/video/exynos7-decon.txt @@ -0,0 +1,68 @@ +Device-Tree bindings for Samsung Exynos7 SoC display controller (DECON) + +DECON (Display and Enhancement Controller) is the Display Controller for the +Exynos7 series of SoCs which transfers the image data from a video memory +buffer to an external LCD interface. + +Required properties: +- compatible: value should be "samsung,exynos7-decon"; + +- reg: physical base address and length of the DECON registers set. + +- interrupt-parent: should be the phandle of the decon controller's + parent interrupt controller. + +- interrupts: should contain a list of all DECON IP block interrupts in the + order: FIFO Level, VSYNC, LCD_SYSTEM. The interrupt specifier + format depends on the interrupt controller used. + +- interrupt-names: should contain the interrupt names: "fifo", "vsync", + "lcd_sys", in the same order as they were listed in the interrupts + property. + +- pinctrl-0: pin control group to be used for this controller. + +- pinctrl-names: must contain a "default" entry. + +- clocks: must include clock specifiers corresponding to entries in the + clock-names property. + +- clock-names: list of clock names sorted in the same order as the clocks + property. Must contain "pclk_decon0", "aclk_decon0", + "decon0_eclk", "decon0_vclk". +- i80-if-timings: timing configuration for lcd i80 interface support. + +Optional Properties: +- samsung,power-domain: a phandle to DECON power domain node. +- display-timings: timing settings for DECON, as described in document [1]. + Can be used in case timings cannot be provided otherwise + or to override timings provided by the panel. + +[1]: Documentation/devicetree/bindings/video/display-timing.txt + +Example: + +SoC specific DT entry: + + decon@13930000 { + compatible = "samsung,exynos7-decon"; + interrupt-parent = <&combiner>; + reg = <0x13930000 0x1000>; + interrupt-names = "lcd_sys", "vsync", "fifo"; + interrupts = <0 188 0>, <0 189 0>, <0 190 0>; + clocks = <&clock_disp PCLK_DECON_INT>, + <&clock_disp ACLK_DECON_INT>, + <&clock_disp SCLK_DECON_INT_ECLK>, + <&clock_disp SCLK_DECON_INT_EXTCLKPLL>; + clock-names = "pclk_decon0", "aclk_decon0", "decon0_eclk", + "decon0_vclk"; + status = "disabled"; + }; + +Board specific DT entry: + + decon@13930000 { + pinctrl-0 = <&lcd_clk &pwm1_out>; + pinctrl-names = "default"; + status = "okay"; + }; diff --git a/Documentation/devicetree/bindings/video/exynos_dp.txt b/Documentation/devicetree/bindings/video/exynos_dp.txt index 53dbccfa80ca..7a3a9cdb86ab 100644 --- a/Documentation/devicetree/bindings/video/exynos_dp.txt +++ b/Documentation/devicetree/bindings/video/exynos_dp.txt @@ -66,6 +66,10 @@ Optional properties for dp-controller: Hotplug detect GPIO. Indicates which GPIO should be used for hotplug detection + -video interfaces: Device node can contain video interface port + nodes according to [1]. + +[1]: Documentation/devicetree/bindings/media/video-interfaces.txt Example: @@ -105,4 +109,12 @@ Board Specific portion: vsync-len = <6>; }; }; + + ports { + port@0 { + dp_out: endpoint { + remote-endpoint = <&bridge_in>; + }; + }; + }; }; diff --git a/Documentation/devicetree/bindings/video/exynos_mixer.txt b/Documentation/devicetree/bindings/video/exynos_mixer.txt index 08b394b1edbf..3e38128f866b 100644 --- a/Documentation/devicetree/bindings/video/exynos_mixer.txt +++ b/Documentation/devicetree/bindings/video/exynos_mixer.txt @@ -15,6 +15,7 @@ Required properties: a) mixer: Gate of Mixer IP bus clock. b) sclk_hdmi: HDMI Special clock, one of the two possible inputs of mixer mux. + c) hdmi: Gate of HDMI IP bus clock, needed together with sclk_hdmi. Example: diff --git a/Documentation/devicetree/bindings/video/renesas,du.txt b/Documentation/devicetree/bindings/video/renesas,du.txt index 5102830f2760..c902323928f7 100644 --- a/Documentation/devicetree/bindings/video/renesas,du.txt +++ b/Documentation/devicetree/bindings/video/renesas,du.txt @@ -26,6 +26,10 @@ Required Properties: per LVDS encoder. The functional clocks must be named "du.x" with "x" being the channel numerical index. The LVDS clocks must be named "lvds.x" with "x" being the LVDS encoder numerical index. + - In addition to the functional and encoder clocks, all DU versions also + support externally supplied pixel clocks. Those clocks are optional. + When supplied they must be named "dclkin.x" with "x" being the input + clock numerical index. Required nodes: diff --git a/MAINTAINERS b/MAINTAINERS index b663e3590bd4..0beaaac20a83 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -630,6 +630,8 @@ L: dri-devel@lists.freedesktop.org T: git git://people.freedesktop.org/~gabbayo/linux.git S: Supported F: drivers/gpu/drm/amd/amdkfd/ +F: drivers/gpu/drm/amd/include/cik_structs.h +F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h F: drivers/gpu/drm/radeon/radeon_kfd.c F: drivers/gpu/drm/radeon/radeon_kfd.h F: include/uapi/linux/kfd_ioctl.h diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index b709749c8639..4eb1c772ded7 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h @@ -219,7 +219,10 @@ struct agp_bridge_data *agp_generic_find_bridge(struct pci_dev *pdev); /* generic functions for user-populated AGP memory types */ struct agp_memory *agp_generic_alloc_user(size_t page_count, int type); void agp_alloc_page_array(size_t size, struct agp_memory *mem); -void agp_free_page_array(struct agp_memory *mem); +static inline void agp_free_page_array(struct agp_memory *mem) +{ + kvfree(mem->pages); +} /* generic routines for agp>=3 */ diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index 0fbccce1cee9..f002fa5d1887 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -98,17 +98,6 @@ void agp_alloc_page_array(size_t size, struct agp_memory *mem) } EXPORT_SYMBOL(agp_alloc_page_array); -void agp_free_page_array(struct agp_memory *mem) -{ - if (is_vmalloc_addr(mem->pages)) { - vfree(mem->pages); - } else { - kfree(mem->pages); - } -} -EXPORT_SYMBOL(agp_free_page_array); - - static struct agp_memory *agp_create_user_memory(unsigned long num_agp_pages) { struct agp_memory *new; diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 92aa43fa8d70..0b4188b9af7c 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -225,7 +225,7 @@ static int i810_insert_dcache_entries(struct agp_memory *mem, off_t pg_start, intel_private.driver->write_entry(addr, i, type); } - readl(intel_private.gtt+i-1); + wmb(); return 0; } @@ -329,7 +329,7 @@ static void i810_write_entry(dma_addr_t addr, unsigned int entry, break; } - writel(addr | pte_flags, intel_private.gtt + entry); + writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } static const struct aper_size_info_fixed intel_fake_agp_sizes[] = { @@ -735,7 +735,7 @@ static void i830_write_entry(dma_addr_t addr, unsigned int entry, if (flags == AGP_USER_CACHED_MEMORY) pte_flags |= I830_PTE_SYSTEM_CACHED; - writel(addr | pte_flags, intel_private.gtt + entry); + writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } bool intel_enable_gtt(void) @@ -858,7 +858,7 @@ void intel_gtt_insert_sg_entries(struct sg_table *st, j++; } } - readl(intel_private.gtt+j-1); + wmb(); } EXPORT_SYMBOL(intel_gtt_insert_sg_entries); @@ -875,7 +875,7 @@ static void intel_gtt_insert_pages(unsigned int first_entry, intel_private.driver->write_entry(addr, j, flags); } - readl(intel_private.gtt+j-1); + wmb(); } static int intel_fake_agp_insert_entries(struct agp_memory *mem, @@ -938,7 +938,7 @@ void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries) intel_private.driver->write_entry(intel_private.scratch_page_dma, i, 0); } - readl(intel_private.gtt+i-1); + wmb(); } EXPORT_SYMBOL(intel_gtt_clear_range); @@ -1106,7 +1106,7 @@ static void i965_write_entry(dma_addr_t addr, /* Shift high bits down */ addr |= (addr >> 28) & 0xf0; - writel(addr | pte_flags, intel_private.gtt + entry); + writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } static int i9xx_setup(void) diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index 70da9eb52a42..e9ed439a5b65 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -1,3 +1,6 @@ -obj-y += drm/ vga/ +# drm/tegra depends on host1x, so if both drivers are built-in care must be +# taken to initialize them in the correct order. Link order is the only way +# to ensure this currently. obj-$(CONFIG_TEGRA_HOST1X) += host1x/ +obj-y += drm/ vga/ obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index c3413b6adb17..151a050129e7 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -62,12 +62,13 @@ config DRM_TTM config DRM_GEM_CMA_HELPER bool - depends on DRM + depends on DRM && HAVE_DMA_ATTRS help Choose this if you need the GEM CMA helper functions config DRM_KMS_CMA_HELPER bool + depends on DRM && HAVE_DMA_ATTRS select DRM_GEM_CMA_HELPER select DRM_KMS_FB_HELPER select FB_SYS_FILLRECT @@ -110,7 +111,6 @@ config DRM_RADEON select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE - select MMU_NOTIFIER help Choose this option if you have an ATI Radeon graphics card. There are both PCI and AGP versions. You don't need to choose this to @@ -183,6 +183,8 @@ source "drivers/gpu/drm/cirrus/Kconfig" source "drivers/gpu/drm/armada/Kconfig" +source "drivers/gpu/drm/atmel-hlcdc/Kconfig" + source "drivers/gpu/drm/rcar-du/Kconfig" source "drivers/gpu/drm/shmobile/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index e620807418ea..2c239b99de64 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -14,7 +14,7 @@ drm-y := drm_auth.o drm_bufs.o drm_cache.o \ drm_info.o drm_debugfs.o drm_encoder_slave.o \ drm_trace_points.o drm_global.o drm_prime.o \ drm_rect.o drm_vma_manager.o drm_flip_work.o \ - drm_modeset_lock.o drm_atomic.o + drm_modeset_lock.o drm_atomic.o drm_bridge.o drm-$(CONFIG_COMPAT) += drm_ioc32.o drm-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_gem_cma_helper.o @@ -55,6 +55,7 @@ obj-$(CONFIG_DRM_GMA500) += gma500/ obj-$(CONFIG_DRM_UDL) += udl/ obj-$(CONFIG_DRM_AST) += ast/ obj-$(CONFIG_DRM_ARMADA) += armada/ +obj-$(CONFIG_DRM_ATMEL_HLCDC) += atmel-hlcdc/ obj-$(CONFIG_DRM_RCAR_DU) += rcar-du/ obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/ obj-$(CONFIG_DRM_OMAP) += omapdrm/ diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 307a309110e6..0f4960148126 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -7,7 +7,10 @@ ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ kfd_process.o kfd_queue.o kfd_mqd_manager.o \ - kfd_kernel_queue.o kfd_packet_manager.o \ - kfd_process_queue_manager.o kfd_device_queue_manager.o + kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \ + kfd_kernel_queue.o kfd_kernel_queue_cik.o \ + kfd_kernel_queue_vi.o kfd_packet_manager.o \ + kfd_process_queue_manager.o kfd_device_queue_manager.o \ + kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h index 607fc5ceadbe..01ff332fabd4 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h @@ -168,6 +168,8 @@ #define IB_ATC_EN (1U << 23) #define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20) +#define AQL_ENABLE 1 + #define CP_HQD_DEQUEUE_REQUEST 0xC974 #define DEQUEUE_REQUEST_DRAIN 1 #define DEQUEUE_REQUEST_RESET 2 @@ -188,6 +190,17 @@ #define MQD_VMID_MASK (0xf << 0) #define MQD_CONTROL_PRIV_STATE_EN (1U << 8) +#define SDMA_RB_VMID(x) (x << 24) +#define SDMA_RB_ENABLE (1 << 0) +#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */ +#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12) +#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ +#define SDMA_OFFSET(x) (x << 0) +#define SDMA_DB_ENABLE (1 << 28) +#define SDMA_ATC (1 << 0) +#define SDMA_VA_PTR32 (1 << 4) +#define SDMA_VA_SHARED_BASE(x) (x << 8) + #define GRBM_GFX_INDEX 0x30800 #define INSTANCE_INDEX(x) ((x) << 0) #define SH_INDEX(x) ((x) << 8) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index fcfdf23e1913..5c50aa8a8908 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -178,6 +178,22 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, return -EFAULT; } + if (args->eop_buffer_address && + !access_ok(VERIFY_WRITE, + (const void __user *) args->eop_buffer_address, + sizeof(uint32_t))) { + pr_debug("kfd: can't access eop buffer"); + return -EFAULT; + } + + if (args->ctx_save_restore_address && + !access_ok(VERIFY_WRITE, + (const void __user *) args->ctx_save_restore_address, + sizeof(uint32_t))) { + pr_debug("kfd: can't access ctx save restore buffer"); + return -EFAULT; + } + q_properties->is_interop = false; q_properties->queue_percent = args->queue_percentage; q_properties->priority = args->queue_priority; @@ -185,9 +201,16 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->queue_size = args->ring_size; q_properties->read_ptr = (uint32_t *) args->read_pointer_address; q_properties->write_ptr = (uint32_t *) args->write_pointer_address; + q_properties->eop_ring_buffer_address = args->eop_buffer_address; + q_properties->eop_ring_buffer_size = args->eop_buffer_size; + q_properties->ctx_save_restore_area_address = + args->ctx_save_restore_address; + q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) + q_properties->type = KFD_QUEUE_TYPE_SDMA; else return -ENOTSUPP; @@ -214,6 +237,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, pr_debug("Queue Format (%d)\n", q_properties->format); + pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address); + + pr_debug("Queue CTX save arex (0x%llX)\n", + q_properties->ctx_save_restore_area_address); + return 0; } @@ -235,9 +263,12 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, if (err) return err; + pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id); dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) + if (dev == NULL) { + pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id); return -EINVAL; + } mutex_lock(&p->mutex); @@ -251,8 +282,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0, - KFD_QUEUE_TYPE_COMPUTE, &queue_id); + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, + 0, q_properties.type, &queue_id); if (err != 0) goto err_create_queue; @@ -385,7 +416,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep, (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; - if (!dev->dqm->set_cache_memory_policy(dev->dqm, + if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, &pdd->qpd, default_policy, alternate_policy, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 25bc47f3c1cf..5bc32c26b989 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -31,11 +31,20 @@ #define MQD_SIZE_ALIGNED 768 static const struct kfd_device_info kaveri_device_info = { + .asic_family = CHIP_KAVERI, .max_pasid_bits = 16, .ih_ring_entry_size = 4 * sizeof(uint32_t), .mqd_size_aligned = MQD_SIZE_ALIGNED }; +static const struct kfd_device_info carrizo_device_info = { + .asic_family = CHIP_CARRIZO, + .max_pasid_bits = 16, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED +}; + struct kfd_deviceid { unsigned short did; const struct kfd_device_info *device_info; @@ -64,9 +73,13 @@ static const struct kfd_deviceid supported_devices[] = { { 0x1318, &kaveri_device_info }, /* Kaveri */ { 0x131B, &kaveri_device_info }, /* Kaveri */ { 0x131C, &kaveri_device_info }, /* Kaveri */ - { 0x131D, &kaveri_device_info }, /* Kaveri */ + { 0x131D, &kaveri_device_info } /* Kaveri */ }; +static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, + unsigned int chunk_size); +static void kfd_gtt_sa_fini(struct kfd_dev *kfd); + static const struct kfd_device_info *lookup_device_info(unsigned short did) { size_t i; @@ -173,16 +186,39 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; - /* add another 512KB for all other allocations on gart */ + /* + * calculate max size of runlist packet. + * There can be only 2 packets at once + */ + size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) + + max_num_of_queues_per_device * + sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2; + + /* Add size of HIQ & DIQ */ + size += KFD_KERNEL_QUEUE_SIZE * 2; + + /* add another 512KB for all other allocations on gart (HPD, fences) */ size += 512 * 1024; - if (kfd2kgd->init_sa_manager(kfd->kgd, size)) { + if (kfd2kgd->init_gtt_mem_allocation(kfd->kgd, size, &kfd->gtt_mem, + &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)) { dev_err(kfd_device, - "Error initializing sa manager for device (%x:%x)\n", - kfd->pdev->vendor, kfd->pdev->device); + "Could not allocate %d bytes for device (%x:%x)\n", + size, kfd->pdev->vendor, kfd->pdev->device); goto out; } + dev_info(kfd_device, + "Allocated %d bytes on gart for device(%x:%x)\n", + size, kfd->pdev->vendor, kfd->pdev->device); + + /* Initialize GTT sa with 512 byte chunk size */ + if (kfd_gtt_sa_init(kfd, size, 512) != 0) { + dev_err(kfd_device, + "Error initializing gtt sub-allocator\n"); + goto kfd_gtt_sa_init_error; + } + kfd_doorbell_init(kfd); if (kfd_topology_add_device(kfd) != 0) { @@ -209,7 +245,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_queue_manager_error; } - if (kfd->dqm->start(kfd->dqm) != 0) { + if (kfd->dqm->ops.start(kfd->dqm) != 0) { dev_err(kfd_device, "Error starting queuen manager for device (%x:%x)\n", kfd->pdev->vendor, kfd->pdev->device); @@ -232,7 +268,9 @@ device_queue_manager_error: device_iommu_pasid_error: kfd_topology_remove_device(kfd); kfd_topology_add_device_error: - kfd2kgd->fini_sa_manager(kfd->kgd); + kfd_gtt_sa_fini(kfd); +kfd_gtt_sa_init_error: + kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); dev_err(kfd_device, "device (%x:%x) NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); @@ -246,6 +284,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) device_queue_manager_uninit(kfd->dqm); amd_iommu_free_device(kfd->pdev); kfd_topology_remove_device(kfd); + kfd_gtt_sa_fini(kfd); + kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); } kfree(kfd); @@ -256,7 +296,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) BUG_ON(kfd == NULL); if (kfd->init_complete) { - kfd->dqm->stop(kfd->dqm); + kfd->dqm->ops.stop(kfd->dqm); amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); amd_iommu_free_device(kfd->pdev); } @@ -277,7 +317,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) return -ENXIO; amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback); - kfd->dqm->start(kfd->dqm); + kfd->dqm->ops.start(kfd->dqm); } return 0; @@ -288,3 +328,188 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { /* Process interrupts / schedule work as necessary */ } + +static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, + unsigned int chunk_size) +{ + unsigned int num_of_bits; + + BUG_ON(!kfd); + BUG_ON(!kfd->gtt_mem); + BUG_ON(buf_size < chunk_size); + BUG_ON(buf_size == 0); + BUG_ON(chunk_size == 0); + + kfd->gtt_sa_chunk_size = chunk_size; + kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; + + num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE; + BUG_ON(num_of_bits == 0); + + kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL); + + if (!kfd->gtt_sa_bitmap) + return -ENOMEM; + + pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", + kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); + + mutex_init(&kfd->gtt_sa_lock); + + return 0; + +} + +static void kfd_gtt_sa_fini(struct kfd_dev *kfd) +{ + mutex_destroy(&kfd->gtt_sa_lock); + kfree(kfd->gtt_sa_bitmap); +} + +static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr, + unsigned int bit_num, + unsigned int chunk_size) +{ + return start_addr + bit_num * chunk_size; +} + +static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr, + unsigned int bit_num, + unsigned int chunk_size) +{ + return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size); +} + +int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, + struct kfd_mem_obj **mem_obj) +{ + unsigned int found, start_search, cur_size; + + BUG_ON(!kfd); + + if (size == 0) + return -EINVAL; + + if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) + return -ENOMEM; + + *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + if ((*mem_obj) == NULL) + return -ENOMEM; + + pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size); + + start_search = 0; + + mutex_lock(&kfd->gtt_sa_lock); + +kfd_gtt_restart_search: + /* Find the first chunk that is free */ + found = find_next_zero_bit(kfd->gtt_sa_bitmap, + kfd->gtt_sa_num_of_chunks, + start_search); + + pr_debug("kfd: found = %d\n", found); + + /* If there wasn't any free chunk, bail out */ + if (found == kfd->gtt_sa_num_of_chunks) + goto kfd_gtt_no_free_chunk; + + /* Update fields of mem_obj */ + (*mem_obj)->range_start = found; + (*mem_obj)->range_end = found; + (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr( + kfd->gtt_start_gpu_addr, + found, + kfd->gtt_sa_chunk_size); + (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr( + kfd->gtt_start_cpu_ptr, + found, + kfd->gtt_sa_chunk_size); + + pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n", + (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); + + /* If we need only one chunk, mark it as allocated and get out */ + if (size <= kfd->gtt_sa_chunk_size) { + pr_debug("kfd: single bit\n"); + set_bit(found, kfd->gtt_sa_bitmap); + goto kfd_gtt_out; + } + + /* Otherwise, try to see if we have enough contiguous chunks */ + cur_size = size - kfd->gtt_sa_chunk_size; + do { + (*mem_obj)->range_end = + find_next_zero_bit(kfd->gtt_sa_bitmap, + kfd->gtt_sa_num_of_chunks, ++found); + /* + * If next free chunk is not contiguous than we need to + * restart our search from the last free chunk we found (which + * wasn't contiguous to the previous ones + */ + if ((*mem_obj)->range_end != found) { + start_search = found; + goto kfd_gtt_restart_search; + } + + /* + * If we reached end of buffer, bail out with error + */ + if (found == kfd->gtt_sa_num_of_chunks) + goto kfd_gtt_no_free_chunk; + + /* Check if we don't need another chunk */ + if (cur_size <= kfd->gtt_sa_chunk_size) + cur_size = 0; + else + cur_size -= kfd->gtt_sa_chunk_size; + + } while (cur_size > 0); + + pr_debug("kfd: range_start = %d, range_end = %d\n", + (*mem_obj)->range_start, (*mem_obj)->range_end); + + /* Mark the chunks as allocated */ + for (found = (*mem_obj)->range_start; + found <= (*mem_obj)->range_end; + found++) + set_bit(found, kfd->gtt_sa_bitmap); + +kfd_gtt_out: + mutex_unlock(&kfd->gtt_sa_lock); + return 0; + +kfd_gtt_no_free_chunk: + pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj); + mutex_unlock(&kfd->gtt_sa_lock); + kfree(mem_obj); + return -ENOMEM; +} + +int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) +{ + unsigned int bit; + + BUG_ON(!kfd); + + /* Act like kfree when trying to free a NULL object */ + if (!mem_obj) + return 0; + + pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n", + mem_obj, mem_obj->range_start, mem_obj->range_end); + + mutex_lock(&kfd->gtt_sa_lock); + + /* Mark the chunks as free */ + for (bit = mem_obj->range_start; + bit <= mem_obj->range_end; + bit++) + clear_bit(bit, kfd->gtt_sa_bitmap); + + mutex_unlock(&kfd->gtt_sa_lock); + + kfree(mem_obj); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0fd592799d58..b3589d0e39b9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -26,34 +26,40 @@ #include #include #include +#include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_mqd_manager.h" #include "cik_regs.h" #include "kfd_kernel_queue.h" -#include "../../radeon/cik_reg.h" /* Size of the per-pipe EOP queue */ #define CIK_HPD_EOP_BYTES_LOG2 11 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) -static bool is_mem_initialized; - -static int init_memory(struct device_queue_manager *dqm); static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, unsigned int vmid); static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); + static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); +static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); -static inline unsigned int get_pipes_num(struct device_queue_manager *dqm) +static void deallocate_sdma_queue(struct device_queue_manager *dqm, + unsigned int sdma_queue_id); + +static inline +enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { - BUG_ON(!dqm || !dqm->dev); - return dqm->dev->shared_resources.compute_pipe_count; + if (type == KFD_QUEUE_TYPE_SDMA) + return KFD_MQD_TYPE_SDMA; + return KFD_MQD_TYPE_CP; } static inline unsigned int get_first_pipe(struct device_queue_manager *dqm) @@ -67,61 +73,7 @@ static inline unsigned int get_pipes_num_cpsch(void) return PIPE_PER_ME_CP_SCHEDULING; } -static inline unsigned int -get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) -{ - uint32_t nybble; - - nybble = (pdd->lds_base >> 60) & 0x0E; - - return nybble; - -} - -static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) -{ - unsigned int shared_base; - - shared_base = (pdd->lds_base >> 16) & 0xFF; - - return shared_base; -} - -static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble); -static void init_process_memory(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) -{ - struct kfd_process_device *pdd; - unsigned int temp; - - BUG_ON(!dqm || !qpd); - - pdd = qpd_to_pdd(qpd); - - /* check if sh_mem_config register already configured */ - if (qpd->sh_mem_config == 0) { - qpd->sh_mem_config = - ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | - DEFAULT_MTYPE(MTYPE_NONCACHED) | - APE1_MTYPE(MTYPE_NONCACHED); - qpd->sh_mem_ape1_limit = 0; - qpd->sh_mem_ape1_base = 0; - } - - if (qpd->pqm->process->is_32bit_user_mode) { - temp = get_sh_mem_bases_32(pdd); - qpd->sh_mem_bases = SHARED_BASE(temp); - qpd->sh_mem_config |= PTR32; - } else { - temp = get_sh_mem_bases_nybble_64(pdd); - qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); - } - - pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", - qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); -} - -static void program_sh_mem_settings(struct device_queue_manager *dqm, +void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid, @@ -200,7 +152,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; - retval = create_compute_queue_nocpsch(dqm, q, qpd); + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) + retval = create_compute_queue_nocpsch(dqm, q, qpd); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + retval = create_sdma_queue_nocpsch(dqm, q, qpd); if (retval != 0) { if (list_empty(&qpd->queues_list)) { @@ -212,7 +167,11 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, } list_add(&q->list, &qpd->queues_list); - dqm->queue_count++; + if (q->properties.is_active) + dqm->queue_count++; + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's @@ -229,12 +188,12 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) { bool set; - int pipe, bit; + int pipe, bit, i; set = false; - for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm); - pipe = (pipe + 1) % get_pipes_num(dqm)) { + for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm); + pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) { if (dqm->allocated_queues[pipe] != 0) { bit = find_first_bit( (unsigned long *)&dqm->allocated_queues[pipe], @@ -275,7 +234,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, BUG_ON(!dqm || !q || !qpd); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); if (mqd == NULL) return -ENOMEM; @@ -319,28 +278,44 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, pr_debug("kfd: In Func %s\n", __func__); mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); - if (mqd == NULL) { - retval = -ENOMEM; + + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + if (mqd == NULL) { + retval = -ENOMEM; + goto out; + } + deallocate_hqd(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); + if (mqd == NULL) { + retval = -ENOMEM; + goto out; + } + dqm->sdma_queue_count--; + deallocate_sdma_queue(dqm, q->sdma_id); + } else { + pr_debug("q->properties.type is invalid (%d)\n", + q->properties.type); + retval = -EINVAL; goto out; } retval = mqd->destroy_mqd(mqd, q->mqd, - KFD_PREEMPT_TYPE_WAVEFRONT, + KFD_PREEMPT_TYPE_WAVEFRONT_RESET, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, q->pipe, q->queue); if (retval != 0) goto out; - deallocate_hqd(dqm, q); - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); list_del(&q->list); if (list_empty(&qpd->queues_list)) deallocate_vmid(dqm, qpd, q); - dqm->queue_count--; + if (q->properties.is_active) + dqm->queue_count--; /* * Unconditionally decrement this counter, regardless of the queue's @@ -364,7 +339,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) BUG_ON(!dqm || !q || !q->mqd); mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); if (mqd == NULL) { mutex_unlock(&dqm->lock); return -ENOMEM; @@ -415,6 +391,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct device_process_node *n; + int retval; BUG_ON(!dqm || !qpd); @@ -429,12 +406,13 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, mutex_lock(&dqm->lock); list_add(&n->list, &dqm->queues); - init_process_memory(dqm, qpd); + retval = dqm->ops_asic_specific.register_process(dqm, qpd); + dqm->processes_count++; mutex_unlock(&dqm->lock); - return 0; + return retval; } static int unregister_process_nocpsch(struct device_queue_manager *dqm, @@ -479,48 +457,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, vmid); } -static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) -{ - /* In 64-bit mode, we can only control the top 3 bits of the LDS, - * scratch and GPUVM apertures. - * The hardware fills in the remaining 59 bits according to the - * following pattern: - * LDS: X0000000'00000000 - X0000001'00000000 (4GB) - * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) - * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) - * - * (where X/Y is the configurable nybble with the low-bit 0) - * - * LDS and scratch will have the same top nybble programmed in the - * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. - * GPUVM can have a different top nybble programmed in the - * top 3 bits of SH_MEM_BASES.SHARED_BASE. - * We don't bother to support different top nybbles - * for LDS/Scratch and GPUVM. - */ - - BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || - top_address_nybble == 0); - - return PRIVATE_BASE(top_address_nybble << 12) | - SHARED_BASE(top_address_nybble << 12); -} - -static int init_memory(struct device_queue_manager *dqm) -{ - int i, retval; - - for (i = 8; i < 16; i++) - set_pasid_vmid_mapping(dqm, 0, i); - - retval = kfd2kgd->init_memory(dqm->dev->kgd); - if (retval == 0) - is_mem_initialized = true; - return retval; -} - - -static int init_pipelines(struct device_queue_manager *dqm, +int init_pipelines(struct device_queue_manager *dqm, unsigned int pipes_num, unsigned int first_pipe) { void *hpdptr; @@ -539,11 +476,8 @@ static int init_pipelines(struct device_queue_manager *dqm, * because it contains no data when there are no active queues. */ - err = kfd2kgd->allocate_mem(dqm->dev->kgd, - CIK_HPD_EOP_BYTES * pipes_num, - PAGE_SIZE, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &dqm->pipeline_mem); + err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num, + &dqm->pipeline_mem); if (err) { pr_err("kfd: error allocate vidmem num pipes: %d\n", @@ -556,10 +490,9 @@ static int init_pipelines(struct device_queue_manager *dqm, memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); if (mqd == NULL) { - kfd2kgd->free_mem(dqm->dev->kgd, - (struct kgd_mem *) dqm->pipeline_mem); + kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); return -ENOMEM; } @@ -579,7 +512,6 @@ static int init_pipelines(struct device_queue_manager *dqm, return 0; } - static int init_scheduler(struct device_queue_manager *dqm) { int retval; @@ -589,11 +521,6 @@ static int init_scheduler(struct device_queue_manager *dqm) pr_debug("kfd: In %s\n", __func__); retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm)); - if (retval != 0) - return retval; - - retval = init_memory(dqm); - return retval; } @@ -609,6 +536,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; + dqm->sdma_queue_count = 0; dqm->allocated_queues = kcalloc(get_pipes_num(dqm), sizeof(unsigned int), GFP_KERNEL); if (!dqm->allocated_queues) { @@ -620,6 +548,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1; dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; + dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; init_scheduler(dqm); return 0; @@ -637,8 +566,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm) for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) kfree(dqm->mqds[i]); mutex_destroy(&dqm->lock); - kfd2kgd->free_mem(dqm->dev->kgd, - (struct kgd_mem *) dqm->pipeline_mem); + kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); } static int start_nocpsch(struct device_queue_manager *dqm) @@ -651,6 +579,77 @@ static int stop_nocpsch(struct device_queue_manager *dqm) return 0; } +static int allocate_sdma_queue(struct device_queue_manager *dqm, + unsigned int *sdma_queue_id) +{ + int bit; + + if (dqm->sdma_bitmap == 0) + return -ENOMEM; + + bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap, + CIK_SDMA_QUEUES); + + clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap); + *sdma_queue_id = bit; + + return 0; +} + +static void deallocate_sdma_queue(struct device_queue_manager *dqm, + unsigned int sdma_queue_id) +{ + if (sdma_queue_id >= CIK_SDMA_QUEUES) + return; + set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); +} + +static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + uint32_t value = SDMA_ATC; + + if (q->process->is_32bit_user_mode) + value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd)); + else + value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64( + qpd_to_pdd(qpd))); + q->properties.sdma_vm_addr = value; +} + +static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) +{ + struct mqd_manager *mqd; + int retval; + + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); + if (!mqd) + return -ENOMEM; + + retval = allocate_sdma_queue(dqm, &q->sdma_id); + if (retval != 0) + return retval; + + q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; + + pr_debug("kfd: sdma id is: %d\n", q->sdma_id); + pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); + pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); + + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (retval != 0) { + deallocate_sdma_queue(dqm, q->sdma_id); + return retval; + } + + init_sdma_vm(dqm, q, qpd); + return 0; +} + /* * Device Queue Manager implementation for cp scheduler */ @@ -692,8 +691,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm) mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->processes_count = 0; + dqm->sdma_queue_count = 0; dqm->active_runlist = false; - retval = init_pipelines(dqm, get_pipes_num(dqm), 0); + retval = dqm->ops_asic_specific.initialize(dqm); if (retval != 0) goto fail_init_pipelines; @@ -724,18 +724,14 @@ static int start_cpsch(struct device_queue_manager *dqm) pr_debug("kfd: allocating fence memory\n"); /* allocate fence memory on the gart */ - retval = kfd2kgd->allocate_mem(dqm->dev->kgd, - sizeof(*dqm->fence_addr), - 32, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &dqm->fence_mem); + retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), + &dqm->fence_mem); if (retval != 0) goto fail_allocate_vidmem; dqm->fence_addr = dqm->fence_mem->cpu_ptr; dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; - list_for_each_entry(node, &dqm->queues, list) if (node->qpd->pqm->process && dqm->dev) kfd_bind_process_to_device(dqm->dev, @@ -764,8 +760,7 @@ static int stop_cpsch(struct device_queue_manager *dqm) pdd = qpd_to_pdd(node->qpd); pdd->bound = false; } - kfd2kgd->free_mem(dqm->dev->kgd, - (struct kgd_mem *) dqm->fence_mem); + kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packets); return 0; @@ -828,6 +823,14 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, mutex_unlock(&dqm->lock); } +static void select_sdma_engine_id(struct queue *q) +{ + static int sdma_id; + + q->sdma_id = sdma_id; + sdma_id = (sdma_id + 1) % 2; +} + static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, int *allocate_vmid) { @@ -850,7 +853,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, goto out; } - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + select_sdma_engine_id(q); + + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (mqd == NULL) { mutex_unlock(&dqm->lock); return -ENOMEM; @@ -867,6 +875,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = execute_queues_cpsch(dqm, false); } + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -893,12 +903,20 @@ static int fence_wait_timeout(unsigned int *fence_addr, pr_err("kfd: qcm fence wait loop timeout expired\n"); return -ETIME; } - cpu_relax(); + schedule(); } return 0; } +static int destroy_sdma_queues(struct device_queue_manager *dqm, + unsigned int sdma_engine) +{ + return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, + KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, + sdma_engine); +} + static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) { int retval; @@ -911,6 +929,15 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) mutex_lock(&dqm->lock); if (dqm->active_runlist == false) goto out; + + pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n", + dqm->sdma_queue_count); + + if (dqm->sdma_queue_count > 0) { + destroy_sdma_queues(dqm, 0); + destroy_sdma_queues(dqm, 1); + } + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); if (retval != 0) @@ -982,15 +1009,19 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, /* remove queue from list to prevent rescheduling after preemption */ mutex_lock(&dqm->lock); - - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; goto failed; } + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count--; + list_del(&q->list); - dqm->queue_count--; + if (q->properties.is_active) + dqm->queue_count--; execute_queues_cpsch(dqm, false); @@ -1028,8 +1059,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, void __user *alternate_aperture_base, uint64_t alternate_aperture_size) { - uint32_t default_mtype; - uint32_t ape1_mtype; + bool retval; pr_debug("kfd: In func %s\n", __func__); @@ -1066,18 +1096,13 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit = limit >> 16; } - default_mtype = (default_policy == cache_policy_coherent) ? - MTYPE_NONCACHED : - MTYPE_CACHED; - - ape1_mtype = (alternate_policy == cache_policy_coherent) ? - MTYPE_NONCACHED : - MTYPE_CACHED; - - qpd->sh_mem_config = (qpd->sh_mem_config & PTR32) - | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) - | DEFAULT_MTYPE(default_mtype) - | APE1_MTYPE(ape1_mtype); + retval = dqm->ops_asic_specific.set_cache_memory_policy( + dqm, + qpd, + default_policy, + alternate_policy, + alternate_aperture_base, + alternate_aperture_size); if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) program_sh_mem_settings(dqm, qpd); @@ -1087,7 +1112,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit); mutex_unlock(&dqm->lock); - return true; + return retval; out: mutex_unlock(&dqm->lock); @@ -1100,6 +1125,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) BUG_ON(!dev); + pr_debug("kfd: loading device queue manager\n"); + dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); if (!dqm) return NULL; @@ -1109,40 +1136,50 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case KFD_SCHED_POLICY_HWS: case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: /* initialize dqm for cp scheduling */ - dqm->create_queue = create_queue_cpsch; - dqm->initialize = initialize_cpsch; - dqm->start = start_cpsch; - dqm->stop = stop_cpsch; - dqm->destroy_queue = destroy_queue_cpsch; - dqm->update_queue = update_queue; - dqm->get_mqd_manager = get_mqd_manager_nocpsch; - dqm->register_process = register_process_nocpsch; - dqm->unregister_process = unregister_process_nocpsch; - dqm->uninitialize = uninitialize_nocpsch; - dqm->create_kernel_queue = create_kernel_queue_cpsch; - dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch; - dqm->set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.create_queue = create_queue_cpsch; + dqm->ops.initialize = initialize_cpsch; + dqm->ops.start = start_cpsch; + dqm->ops.stop = stop_cpsch; + dqm->ops.destroy_queue = destroy_queue_cpsch; + dqm->ops.update_queue = update_queue; + dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; + dqm->ops.register_process = register_process_nocpsch; + dqm->ops.unregister_process = unregister_process_nocpsch; + dqm->ops.uninitialize = uninitialize_nocpsch; + dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; + dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; + dqm->ops.set_cache_memory_policy = set_cache_memory_policy; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ - dqm->start = start_nocpsch; - dqm->stop = stop_nocpsch; - dqm->create_queue = create_queue_nocpsch; - dqm->destroy_queue = destroy_queue_nocpsch; - dqm->update_queue = update_queue; - dqm->get_mqd_manager = get_mqd_manager_nocpsch; - dqm->register_process = register_process_nocpsch; - dqm->unregister_process = unregister_process_nocpsch; - dqm->initialize = initialize_nocpsch; - dqm->uninitialize = uninitialize_nocpsch; - dqm->set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.start = start_nocpsch; + dqm->ops.stop = stop_nocpsch; + dqm->ops.create_queue = create_queue_nocpsch; + dqm->ops.destroy_queue = destroy_queue_nocpsch; + dqm->ops.update_queue = update_queue; + dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; + dqm->ops.register_process = register_process_nocpsch; + dqm->ops.unregister_process = unregister_process_nocpsch; + dqm->ops.initialize = initialize_nocpsch; + dqm->ops.uninitialize = uninitialize_nocpsch; + dqm->ops.set_cache_memory_policy = set_cache_memory_policy; break; default: BUG(); break; } - if (dqm->initialize(dqm) != 0) { + switch (dev->device_info->asic_family) { + case CHIP_CARRIZO: + device_queue_manager_init_vi(&dqm->ops_asic_specific); + break; + + case CHIP_KAVERI: + device_queue_manager_init_cik(&dqm->ops_asic_specific); + break; + } + + if (dqm->ops.initialize(dqm) != 0) { kfree(dqm); return NULL; } @@ -1154,7 +1191,6 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) { BUG_ON(!dqm); - dqm->uninitialize(dqm); + dqm->ops.uninitialize(dqm); kfree(dqm); } - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 52035bf0c1cb..d64f86cda34f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -36,6 +36,9 @@ #define KFD_VMID_START_OFFSET (8) #define VMID_PER_DEVICE CIK_VMID_NUM #define KFD_DQM_FIRST_PIPE (0) +#define CIK_SDMA_QUEUES (4) +#define CIK_SDMA_QUEUES_PER_ENGINE (2) +#define CIK_SDMA_ENGINE_NUM (2) struct device_process_node { struct qcm_process_device *qpd; @@ -43,7 +46,7 @@ struct device_process_node { }; /** - * struct device_queue_manager + * struct device_queue_manager_ops * * @create_queue: Queue creation routine. * @@ -78,15 +81,9 @@ struct device_process_node { * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the * memory apertures. * - * This struct is a base class for the kfd queues scheduler in the - * device level. The device base class should expose the basic operations - * for queue creation and queue destruction. This base class hides the - * scheduling mode of the driver and the specific implementation of the - * concrete device. This class is the only class in the queues scheduler - * that configures the H/W. */ -struct device_queue_manager { +struct device_queue_manager_ops { int (*create_queue)(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, @@ -121,7 +118,23 @@ struct device_queue_manager { enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); +}; +/** + * struct device_queue_manager + * + * This struct is a base class for the kfd queues scheduler in the + * device level. The device base class should expose the basic operations + * for queue creation and queue destruction. This base class hides the + * scheduling mode of the driver and the specific implementation of the + * concrete device. This class is the only class in the queues scheduler + * that configures the H/W. + * + */ + +struct device_queue_manager { + struct device_queue_manager_ops ops; + struct device_queue_manager_ops ops_asic_specific; struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct packet_manager packets; @@ -130,9 +143,11 @@ struct device_queue_manager { struct list_head queues; unsigned int processes_count; unsigned int queue_count; + unsigned int sdma_queue_count; unsigned int total_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; + unsigned int sdma_bitmap; unsigned int vmid_bitmap; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; @@ -142,6 +157,28 @@ struct device_queue_manager { bool active_runlist; }; +void device_queue_manager_init_cik(struct device_queue_manager_ops *ops); +void device_queue_manager_init_vi(struct device_queue_manager_ops *ops); +void program_sh_mem_settings(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +int init_pipelines(struct device_queue_manager *dqm, + unsigned int pipes_num, unsigned int first_pipe); +extern inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) +{ + return (pdd->lds_base >> 16) & 0xFF; +} + +extern inline unsigned int +get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) +{ + return (pdd->lds_base >> 60) & 0x0E; +} + +extern inline unsigned int get_pipes_num(struct device_queue_manager *dqm) +{ + BUG_ON(!dqm || !dqm->dev); + return dqm->dev->shared_resources.compute_pipe_count; +} #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c new file mode 100644 index 000000000000..6b072466e2a6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -0,0 +1,135 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" +#include "cik_regs.h" + +static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); +static int register_process_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static int initialize_cpsch_cik(struct device_queue_manager *dqm); + +void device_queue_manager_init_cik(struct device_queue_manager_ops *ops) +{ + ops->set_cache_memory_policy = set_cache_memory_policy_cik; + ops->register_process = register_process_cik; + ops->initialize = initialize_cpsch_cik; +} + +static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) +{ + /* In 64-bit mode, we can only control the top 3 bits of the LDS, + * scratch and GPUVM apertures. + * The hardware fills in the remaining 59 bits according to the + * following pattern: + * LDS: X0000000'00000000 - X0000001'00000000 (4GB) + * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) + * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) + * + * (where X/Y is the configurable nybble with the low-bit 0) + * + * LDS and scratch will have the same top nybble programmed in the + * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. + * GPUVM can have a different top nybble programmed in the + * top 3 bits of SH_MEM_BASES.SHARED_BASE. + * We don't bother to support different top nybbles + * for LDS/Scratch and GPUVM. + */ + + BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + top_address_nybble == 0); + + return PRIVATE_BASE(top_address_nybble << 12) | + SHARED_BASE(top_address_nybble << 12); +} + +static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + uint32_t default_mtype; + uint32_t ape1_mtype; + + default_mtype = (default_policy == cache_policy_coherent) ? + MTYPE_NONCACHED : + MTYPE_CACHED; + + ape1_mtype = (alternate_policy == cache_policy_coherent) ? + MTYPE_NONCACHED : + MTYPE_CACHED; + + qpd->sh_mem_config = (qpd->sh_mem_config & PTR32) + | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) + | DEFAULT_MTYPE(default_mtype) + | APE1_MTYPE(ape1_mtype); + + return true; +} + +static int register_process_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + BUG_ON(!dqm || !qpd); + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | + DEFAULT_MTYPE(MTYPE_NONCACHED) | + APE1_MTYPE(MTYPE_NONCACHED); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + if (qpd->pqm->process->is_32bit_user_mode) { + temp = get_sh_mem_bases_32(pdd); + qpd->sh_mem_bases = SHARED_BASE(temp); + qpd->sh_mem_config |= PTR32; + } else { + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + } + + pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); + + return 0; +} + +static int initialize_cpsch_cik(struct device_queue_manager *dqm) +{ + return init_pipelines(dqm, get_pipes_num(dqm), 0); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c new file mode 100644 index 000000000000..20553dcd257d --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -0,0 +1,64 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" + +static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); +static int register_process_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static int initialize_cpsch_vi(struct device_queue_manager *dqm); + +void device_queue_manager_init_vi(struct device_queue_manager_ops *ops) +{ + pr_warn("amdkfd: VI DQM is not currently supported\n"); + + ops->set_cache_memory_policy = set_cache_memory_policy_vi; + ops->register_process = register_process_vi; + ops->initialize = initialize_cpsch_vi; +} + +static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + return false; +} + +static int register_process_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + return -1; +} + +static int initialize_cpsch_vi(struct device_queue_manager *dqm) +{ + return 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index b5791a5c7c06..1a9b355dd114 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -137,10 +137,6 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) if (dev == NULL) return -EINVAL; - /* Find if pdd exists for combination of process and gpu id */ - if (!kfd_get_process_device_data(dev, process, 0)) - return -EINVAL; - /* Calculate physical address of doorbell */ address = kfd_get_process_doorbells(dev, process); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index e64aa99e5e41..35b987574633 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -303,10 +303,11 @@ int kfd_init_apertures(struct kfd_process *process) while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && id < NUM_OF_SUPPORTED_GPUS) { - pdd = kfd_get_process_device_data(dev, process, 1); - if (!pdd) + pdd = kfd_create_process_device_data(dev, process); + if (pdd == NULL) { + pr_err("Failed to create process device data\n"); return -1; - + } /* * For 64 bit process aperture will be statically reserved in * the x86_64 non canonical process address space diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 935071410724..e415a2a9207e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -56,8 +56,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, switch (type) { case KFD_QUEUE_TYPE_DIQ: case KFD_QUEUE_TYPE_HIQ: - kq->mqd = dev->dqm->get_mqd_manager(dev->dqm, - KFD_MQD_TYPE_CIK_HIQ); + kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm, + KFD_MQD_TYPE_HIQ); break; default: BUG(); @@ -72,23 +72,19 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, if (prop.doorbell_ptr == NULL) goto err_get_kernel_doorbell; - retval = kfd2kgd->allocate_mem(dev->kgd, - queue_size, - PAGE_SIZE, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &kq->pq); - + retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq); if (retval != 0) goto err_pq_allocate_vidmem; kq->pq_kernel_addr = kq->pq->cpu_ptr; kq->pq_gpu_addr = kq->pq->gpu_addr; - retval = kfd2kgd->allocate_mem(dev->kgd, - sizeof(*kq->rptr_kernel), - 32, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &kq->rptr_mem); + retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size); + if (retval == false) + goto err_eop_allocate_vidmem; + + retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), + &kq->rptr_mem); if (retval != 0) goto err_rptr_allocate_vidmem; @@ -96,11 +92,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->rptr_kernel = kq->rptr_mem->cpu_ptr; kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; - retval = kfd2kgd->allocate_mem(dev->kgd, - sizeof(*kq->wptr_kernel), - 32, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &kq->wptr_mem); + retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel), + &kq->wptr_mem); if (retval != 0) goto err_wptr_allocate_vidmem; @@ -121,6 +114,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, prop.queue_address = kq->pq_gpu_addr; prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr; prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; + prop.eop_ring_buffer_address = kq->eop_gpu_addr; + prop.eop_ring_buffer_size = PAGE_SIZE; if (init_queue(&kq->queue, prop) != 0) goto err_init_queue; @@ -145,11 +140,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, } else { /* allocate fence for DIQ */ - retval = kfd2kgd->allocate_mem(dev->kgd, - sizeof(uint32_t), - 32, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &kq->fence_mem_obj); + retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t), + &kq->fence_mem_obj); if (retval != 0) goto err_alloc_fence; @@ -165,11 +157,13 @@ err_alloc_fence: err_init_mqd: uninit_queue(kq->queue); err_init_queue: - kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->wptr_mem); + kfd_gtt_sa_free(dev, kq->wptr_mem); err_wptr_allocate_vidmem: - kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->rptr_mem); + kfd_gtt_sa_free(dev, kq->rptr_mem); err_rptr_allocate_vidmem: - kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->pq); + kfd_gtt_sa_free(dev, kq->eop_mem); +err_eop_allocate_vidmem: + kfd_gtt_sa_free(dev, kq->pq); err_pq_allocate_vidmem: pr_err("kfd: error init pq\n"); kfd_release_kernel_doorbell(dev, prop.doorbell_ptr); @@ -190,10 +184,13 @@ static void uninitialize(struct kernel_queue *kq) QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, kq->queue->pipe, kq->queue->queue); + else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) + kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj); - kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->rptr_mem); - kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->wptr_mem); - kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->pq); + kfd_gtt_sa_free(kq->dev, kq->rptr_mem); + kfd_gtt_sa_free(kq->dev, kq->wptr_mem); + kq->ops_asic_specific.uninitialize(kq); + kfd_gtt_sa_free(kq->dev, kq->pq); kfd_release_kernel_doorbell(kq->dev, kq->queue->properties.doorbell_ptr); uninit_queue(kq->queue); @@ -265,28 +262,6 @@ static void submit_packet(struct kernel_queue *kq) kq->pending_wptr); } -static int sync_with_hw(struct kernel_queue *kq, unsigned long timeout_ms) -{ - unsigned long org_timeout_ms; - - BUG_ON(!kq); - - org_timeout_ms = timeout_ms; - timeout_ms += jiffies * 1000 / HZ; - while (*kq->wptr_kernel != *kq->rptr_kernel) { - if (time_after(jiffies * 1000 / HZ, timeout_ms)) { - pr_err("kfd: kernel_queue %s timeout expired %lu\n", - __func__, org_timeout_ms); - pr_err("kfd: wptr: %d rptr: %d\n", - *kq->wptr_kernel, *kq->rptr_kernel); - return -ETIME; - } - schedule(); - } - - return 0; -} - static void rollback_packet(struct kernel_queue *kq) { BUG_ON(!kq); @@ -304,14 +279,23 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, if (!kq) return NULL; - kq->initialize = initialize; - kq->uninitialize = uninitialize; - kq->acquire_packet_buffer = acquire_packet_buffer; - kq->submit_packet = submit_packet; - kq->sync_with_hw = sync_with_hw; - kq->rollback_packet = rollback_packet; + kq->ops.initialize = initialize; + kq->ops.uninitialize = uninitialize; + kq->ops.acquire_packet_buffer = acquire_packet_buffer; + kq->ops.submit_packet = submit_packet; + kq->ops.rollback_packet = rollback_packet; - if (kq->initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) { + switch (dev->device_info->asic_family) { + case CHIP_CARRIZO: + kernel_queue_init_vi(&kq->ops_asic_specific); + break; + + case CHIP_KAVERI: + kernel_queue_init_cik(&kq->ops_asic_specific); + break; + } + + if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) { pr_err("kfd: failed to init kernel queue\n"); kfree(kq); return NULL; @@ -323,7 +307,7 @@ void kernel_queue_uninit(struct kernel_queue *kq) { BUG_ON(!kq); - kq->uninitialize(kq); + kq->ops.uninitialize(kq); kfree(kq); } @@ -335,19 +319,18 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) BUG_ON(!dev); - pr_debug("kfd: starting kernel queue test\n"); + pr_err("kfd: starting kernel queue test\n"); kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); BUG_ON(!kq); - retval = kq->acquire_packet_buffer(kq, 5, &buffer); + retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer); BUG_ON(retval != 0); for (i = 0; i < 5; i++) buffer[i] = kq->nop_packet; - kq->submit_packet(kq); - kq->sync_with_hw(kq, 1000); + kq->ops.submit_packet(kq); - pr_debug("kfd: ending kernel queue test\n"); + pr_err("kfd: ending kernel queue test\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index dcd2bdb68d44..594053136ee4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -28,8 +28,31 @@ #include #include "kfd_priv.h" -struct kernel_queue { - /* interface */ +/** + * struct kernel_queue_ops + * + * @initialize: Initialize a kernel queue, including allocations of GART memory + * needed for the queue. + * + * @uninitialize: Uninitialize a kernel queue and free all its memory usages. + * + * @acquire_packet_buffer: Returns a pointer to the location in the kernel + * queue ring buffer where the calling function can write its packet. It is + * Guaranteed that there is enough space for that packet. It also updates the + * pending write pointer to that location so subsequent calls to + * acquire_packet_buffer will get a correct write pointer + * + * @submit_packet: Update the write pointer and doorbell of a kernel queue. + * + * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel + * queue are equal, which means the CP has read all the submitted packets. + * + * @rollback_packet: This routine is called if we failed to build an acquired + * packet for some reason. It just overwrites the pending wptr with the current + * one + * + */ +struct kernel_queue_ops { bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); void (*uninitialize)(struct kernel_queue *kq); @@ -38,9 +61,12 @@ struct kernel_queue { unsigned int **buffer_ptr); void (*submit_packet)(struct kernel_queue *kq); - int (*sync_with_hw)(struct kernel_queue *kq, - unsigned long timeout_ms); void (*rollback_packet)(struct kernel_queue *kq); +}; + +struct kernel_queue { + struct kernel_queue_ops ops; + struct kernel_queue_ops ops_asic_specific; /* data */ struct kfd_dev *dev; @@ -58,6 +84,9 @@ struct kernel_queue { struct kfd_mem_obj *pq; uint64_t pq_gpu_addr; uint32_t *pq_kernel_addr; + struct kfd_mem_obj *eop_mem; + uint64_t eop_gpu_addr; + uint32_t *eop_kernel_addr; struct kfd_mem_obj *fence_mem_obj; uint64_t fence_gpu_addr; @@ -66,4 +95,7 @@ struct kernel_queue { struct list_head list; }; +void kernel_queue_init_cik(struct kernel_queue_ops *ops); +void kernel_queue_init_vi(struct kernel_queue_ops *ops); + #endif /* KFD_KERNEL_QUEUE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c new file mode 100644 index 000000000000..a90eb440b1fb --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c @@ -0,0 +1,44 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_kernel_queue.h" + +static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +static void uninitialize_cik(struct kernel_queue *kq); + +void kernel_queue_init_cik(struct kernel_queue_ops *ops) +{ + ops->initialize = initialize_cik; + ops->uninitialize = uninitialize_cik; +} + +static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + return true; +} + +static void uninitialize_cik(struct kernel_queue *kq) +{ +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c new file mode 100644 index 000000000000..f1d48281e322 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -0,0 +1,56 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_kernel_queue.h" + +static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +static void uninitialize_vi(struct kernel_queue *kq); + +void kernel_queue_init_vi(struct kernel_queue_ops *ops) +{ + ops->initialize = initialize_vi; + ops->uninitialize = uninitialize_vi; +} + +static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + int retval; + + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); + if (retval != 0) + return false; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; + kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; + + memset(kq->eop_kernel_addr, 0, PAGE_SIZE); + + return true; +} + +static void uninitialize_vi(struct kernel_queue *kq) +{ + kfd_gtt_sa_free(kq->dev, kq->eop_mem); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 1c385c23dd0b..3f34ae16f075 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -29,10 +29,10 @@ #define KFD_DRIVER_AUTHOR "AMD Inc. and others" #define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs" -#define KFD_DRIVER_DATE "20141113" +#define KFD_DRIVER_DATE "20150122" #define KFD_DRIVER_MAJOR 0 #define KFD_DRIVER_MINOR 7 -#define KFD_DRIVER_PATCHLEVEL 0 +#define KFD_DRIVER_PATCHLEVEL 1 const struct kfd2kgd_calls *kfd2kgd; static const struct kgd2kfd_calls kgd2kfd = { @@ -48,7 +48,7 @@ static const struct kgd2kfd_calls kgd2kfd = { int sched_policy = KFD_SCHED_POLICY_HWS; module_param(sched_policy, int, 0444); MODULE_PARM_DESC(sched_policy, - "Kernel cmdline parameter that defines the amdkfd scheduling policy"); + "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT; module_param(max_num_of_queues_per_device, int, 0444); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 4c3828cf45bf..b1ef1368c3bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -21,326 +21,17 @@ * */ -#include -#include #include "kfd_priv.h" -#include "kfd_mqd_manager.h" -#include "cik_regs.h" -#include "../../radeon/cik_reg.h" - -inline void busy_wait(unsigned long ms) -{ - while (time_before(jiffies, ms)) - cpu_relax(); -} - -static inline struct cik_mqd *get_mqd(void *mqd) -{ - return (struct cik_mqd *)mqd; -} - -static int init_mqd(struct mqd_manager *mm, void **mqd, - struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, - struct queue_properties *q) -{ - uint64_t addr; - struct cik_mqd *m; - int retval; - - BUG_ON(!mm || !q || !mqd); - - pr_debug("kfd: In func %s\n", __func__); - - retval = kfd2kgd->allocate_mem(mm->dev->kgd, - sizeof(struct cik_mqd), - 256, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) mqd_mem_obj); - - if (retval != 0) - return -ENOMEM; - - m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; - addr = (*mqd_mem_obj)->gpu_addr; - - memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); - - m->header = 0xC0310800; - m->compute_pipelinestat_enable = 1; - m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; - - /* - * Make sure to use the last queue state saved on mqd when the cp - * reassigns the queue, so when queue is switched on/off (e.g over - * subscription or quantum timeout) the context will be consistent - */ - m->cp_hqd_persistent_state = - DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ; - - m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; - m->cp_mqd_base_addr_lo = lower_32_bits(addr); - m->cp_mqd_base_addr_hi = upper_32_bits(addr); - - m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN; - /* Although WinKFD writes this, I suspect it should not be necessary */ - m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE; - - m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | - QUANTUM_DURATION(10); - - /* - * Pipe Priority - * Identifies the pipe relative priority when this queue is connected - * to the pipeline. The pipe priority is against the GFX pipe and HP3D. - * In KFD we are using a fixed pipe priority set to CS_MEDIUM. - * 0 = CS_LOW (typically below GFX) - * 1 = CS_MEDIUM (typically between HP3D and GFX - * 2 = CS_HIGH (typically above HP3D) - */ - m->cp_hqd_pipe_priority = 1; - m->cp_hqd_queue_priority = 15; - - *mqd = m; - if (gart_addr != NULL) - *gart_addr = addr; - retval = mm->update_mqd(mm, m, q); - - return retval; -} - -static void uninit_mqd(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - BUG_ON(!mm || !mqd); - kfd2kgd->free_mem(mm->dev->kgd, (struct kgd_mem *) mqd_mem_obj); -} - -static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr) -{ - return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr); - -} - -static int update_mqd(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct cik_mqd *m; - - BUG_ON(!mm || !q || !mqd); - - pr_debug("kfd: In func %s\n", __func__); - - m = get_mqd(mqd); - m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | - DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; - - /* - * Calculating queue size which is log base 2 of actual queue size -1 - * dwords and another -1 for ffs - */ - m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - - 1 - 1; - m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); - m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); - m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_doorbell_control = DOORBELL_EN | - DOORBELL_OFFSET(q->doorbell_off); - - m->cp_hqd_vmid = q->vmid; - - if (q->format == KFD_QUEUE_FORMAT_AQL) { - m->cp_hqd_iq_rptr = AQL_ENABLE; - m->cp_hqd_pq_control |= NO_UPDATE_RPTR; - } - - m->cp_hqd_active = 0; - q->is_active = false; - if (q->queue_size > 0 && - q->queue_address != 0 && - q->queue_percent > 0) { - m->cp_hqd_active = 1; - q->is_active = true; - } - - return 0; -} - -static int destroy_mqd(struct mqd_manager *mm, void *mqd, - enum kfd_preempt_type type, - unsigned int timeout, uint32_t pipe_id, - uint32_t queue_id) -{ - return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout, - pipe_id, queue_id); -} - -static bool is_occupied(struct mqd_manager *mm, void *mqd, - uint64_t queue_address, uint32_t pipe_id, - uint32_t queue_id) -{ - - return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address, - pipe_id, queue_id); - -} - -/* - * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation. - * The HIQ queue in Kaveri is using the same MQD structure as all the user mode - * queues but with different initial values. - */ - -static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, - struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, - struct queue_properties *q) -{ - uint64_t addr; - struct cik_mqd *m; - int retval; - - BUG_ON(!mm || !q || !mqd || !mqd_mem_obj); - - pr_debug("kfd: In func %s\n", __func__); - - retval = kfd2kgd->allocate_mem(mm->dev->kgd, - sizeof(struct cik_mqd), - 256, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) mqd_mem_obj); - - if (retval != 0) - return -ENOMEM; - - m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; - addr = (*mqd_mem_obj)->gpu_addr; - - memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); - - m->header = 0xC0310800; - m->compute_pipelinestat_enable = 1; - m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; - - m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE | - PRELOAD_REQ; - m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | - QUANTUM_DURATION(10); - - m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; - m->cp_mqd_base_addr_lo = lower_32_bits(addr); - m->cp_mqd_base_addr_hi = upper_32_bits(addr); - - m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; - - /* - * Pipe Priority - * Identifies the pipe relative priority when this queue is connected - * to the pipeline. The pipe priority is against the GFX pipe and HP3D. - * In KFD we are using a fixed pipe priority set to CS_MEDIUM. - * 0 = CS_LOW (typically below GFX) - * 1 = CS_MEDIUM (typically between HP3D and GFX - * 2 = CS_HIGH (typically above HP3D) - */ - m->cp_hqd_pipe_priority = 1; - m->cp_hqd_queue_priority = 15; - - *mqd = m; - if (gart_addr) - *gart_addr = addr; - retval = mm->update_mqd(mm, m, q); - - return retval; -} - -static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct cik_mqd *m; - - BUG_ON(!mm || !q || !mqd); - - pr_debug("kfd: In func %s\n", __func__); - - m = get_mqd(mqd); - m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | - DEFAULT_MIN_AVAIL_SIZE | - PRIV_STATE | - KMD_QUEUE; - - /* - * Calculating queue size which is log base 2 of actual queue - * size -1 dwords - */ - m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - - 1 - 1; - m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); - m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); - m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_doorbell_control = DOORBELL_EN | - DOORBELL_OFFSET(q->doorbell_off); - - m->cp_hqd_vmid = q->vmid; - - m->cp_hqd_active = 0; - q->is_active = false; - if (q->queue_size > 0 && - q->queue_address != 0 && - q->queue_percent > 0) { - m->cp_hqd_active = 1; - q->is_active = true; - } - - return 0; -} struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev) { - struct mqd_manager *mqd; - - BUG_ON(!dev); - BUG_ON(type >= KFD_MQD_TYPE_MAX); - - pr_debug("kfd: In func %s\n", __func__); - - mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); - if (!mqd) - return NULL; - - mqd->dev = dev; - - switch (type) { - case KFD_MQD_TYPE_CIK_CP: - case KFD_MQD_TYPE_CIK_COMPUTE: - mqd->init_mqd = init_mqd; - mqd->uninit_mqd = uninit_mqd; - mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd; - mqd->destroy_mqd = destroy_mqd; - mqd->is_occupied = is_occupied; - break; - case KFD_MQD_TYPE_CIK_HIQ: - mqd->init_mqd = init_mqd_hiq; - mqd->uninit_mqd = uninit_mqd; - mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; - mqd->destroy_mqd = destroy_mqd; - mqd->is_occupied = is_occupied; - break; - default: - kfree(mqd); - return NULL; + switch (dev->device_info->asic_family) { + case CHIP_KAVERI: + return mqd_manager_init_cik(type, dev); + case CHIP_CARRIZO: + return mqd_manager_init_vi(type, dev); } - return mqd; + return NULL; } - -/* SDMA queues should be implemented here when the cp will supports them */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c new file mode 100644 index 000000000000..a09e18a339f3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -0,0 +1,450 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" +#include "cik_regs.h" +#include "cik_structs.h" + +static inline struct cik_mqd *get_mqd(void *mqd) +{ + return (struct cik_mqd *)mqd; +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + uint64_t addr; + struct cik_mqd *m; + int retval; + + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + /* + * Make sure to use the last queue state saved on mqd when the cp + * reassigns the queue, so when queue is switched on/off (e.g over + * subscription or quantum timeout) the context will be consistent + */ + m->cp_hqd_persistent_state = + DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ; + + m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN; + /* Although WinKFD writes this, I suspect it should not be necessary */ + m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE; + + m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | + QUANTUM_DURATION(10); + + /* + * Pipe Priority + * Identifies the pipe relative priority when this queue is connected + * to the pipeline. The pipe priority is against the GFX pipe and HP3D. + * In KFD we are using a fixed pipe priority set to CS_MEDIUM. + * 0 = CS_LOW (typically below GFX) + * 1 = CS_MEDIUM (typically between HP3D and GFX + * 2 = CS_HIGH (typically above HP3D) + */ + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + if (q->format == KFD_QUEUE_FORMAT_AQL) + m->cp_hqd_iq_rptr = AQL_ENABLE; + + *mqd = m; + if (gart_addr != NULL) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + struct cik_sdma_rlc_registers *m; + + BUG_ON(!mm || !mqd || !mqd_mem_obj); + + retval = kfd_gtt_sa_allocate(mm->dev, + sizeof(struct cik_sdma_rlc_registers), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr; + + memset(m, 0, sizeof(struct cik_sdma_rlc_registers)); + + *mqd = m; + if (gart_addr != NULL) + *gart_addr = (*mqd_mem_obj)->gpu_addr; + + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static void uninit_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + BUG_ON(!mm || !mqd); + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + BUG_ON(!mm || !mqd); + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr) +{ + return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr); +} + +static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr) +{ + return kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); +} + +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_mqd *m; + + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + + m = get_mqd(mqd); + m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | + DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; + + /* + * Calculating queue size which is log base 2 of actual queue size -1 + * dwords and another -1 for ffs + */ + m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) + - 1 - 1; + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_doorbell_control = DOORBELL_EN | + DOORBELL_OFFSET(q->doorbell_off); + + m->cp_hqd_vmid = q->vmid; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_pq_control |= NO_UPDATE_RPTR; + } + + m->cp_hqd_active = 0; + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { + m->cp_hqd_active = 1; + q->is_active = true; + } + + return 0; +} + +static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_sdma_rlc_registers *m; + + BUG_ON(!mm || !mqd || !q); + + m = get_sdma_mqd(mqd); + m->sdma_rlc_rb_cntl = + SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) | + SDMA_RB_VMID(q->vmid) | + SDMA_RPTR_WRITEBACK_ENABLE | + SDMA_RPTR_WRITEBACK_TIMER(6); + + m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8); + m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); + m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE; + m->sdma_rlc_virtual_addr = q->sdma_vm_addr; + + m->sdma_engine_id = q->sdma_engine_id; + m->sdma_queue_id = q->sdma_queue_id; + + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { + m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE; + q->is_active = true; + } + + return 0; +} + +static int destroy_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout, + pipe_id, queue_id); +} + +/* + * preempt type here is ignored because there is only one way + * to preempt sdma queue + */ +static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +} + +static bool is_occupied(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + + return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address, + pipe_id, queue_id); + +} + +static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +} + +/* + * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation. + * The HIQ queue in Kaveri is using the same MQD structure as all the user mode + * queues but with different initial values. + */ + +static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + uint64_t addr; + struct cik_mqd *m; + int retval; + + BUG_ON(!mm || !q || !mqd || !mqd_mem_obj); + + pr_debug("kfd: In func %s\n", __func__); + + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE | + PRELOAD_REQ; + m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | + QUANTUM_DURATION(10); + + m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; + + /* + * Pipe Priority + * Identifies the pipe relative priority when this queue is connected + * to the pipeline. The pipe priority is against the GFX pipe and HP3D. + * In KFD we are using a fixed pipe priority set to CS_MEDIUM. + * 0 = CS_LOW (typically below GFX) + * 1 = CS_MEDIUM (typically between HP3D and GFX + * 2 = CS_HIGH (typically above HP3D) + */ + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_mqd *m; + + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + + m = get_mqd(mqd); + m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | + DEFAULT_MIN_AVAIL_SIZE | + PRIV_STATE | + KMD_QUEUE; + + /* + * Calculating queue size which is log base 2 of actual queue + * size -1 dwords + */ + m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) + - 1 - 1; + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_doorbell_control = DOORBELL_EN | + DOORBELL_OFFSET(q->doorbell_off); + + m->cp_hqd_vmid = q->vmid; + + m->cp_hqd_active = 0; + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { + m->cp_hqd_active = 1; + q->is_active = true; + } + + return 0; +} + +struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +{ + struct cik_sdma_rlc_registers *m; + + BUG_ON(!mqd); + + m = (struct cik_sdma_rlc_registers *)mqd; + + return m; +} + +struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + BUG_ON(!dev); + BUG_ON(type >= KFD_MQD_TYPE_MAX); + + pr_debug("kfd: In func %s\n", __func__); + + mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); + if (!mqd) + return NULL; + + mqd->dev = dev; + + switch (type) { + case KFD_MQD_TYPE_CP: + case KFD_MQD_TYPE_COMPUTE: + mqd->init_mqd = init_mqd; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + break; + case KFD_MQD_TYPE_HIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + break; + case KFD_MQD_TYPE_SDMA: + mqd->init_mqd = init_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_sdma; + mqd->load_mqd = load_mqd_sdma; + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; + break; + default: + kfree(mqd); + return NULL; + } + + return mqd; +} + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c new file mode 100644 index 000000000000..b3a7e3ba1e38 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -0,0 +1,33 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" + +struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + pr_warn("amdkfd: VI MQD is not currently supported\n"); + return NULL; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 5ce9233d2004..e2533d875f43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -97,11 +97,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); - retval = kfd2kgd->allocate_mem(pm->dqm->dev->kgd, - *rl_buffer_size, - PAGE_SIZE, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &pm->ib_buffer_obj); + retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, + &pm->ib_buffer_obj); if (retval != 0) { pr_err("kfd: failed to allocate runlist IB\n"); @@ -351,7 +348,7 @@ int pm_send_set_resources(struct packet_manager *pm, pr_debug("kfd: In func %s\n", __func__); mutex_lock(&pm->lock); - pm->priv_queue->acquire_packet_buffer(pm->priv_queue, + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, sizeof(*packet) / sizeof(uint32_t), (unsigned int **)&packet); if (packet == NULL) { @@ -378,8 +375,7 @@ int pm_send_set_resources(struct packet_manager *pm, packet->queue_mask_lo = lower_32_bits(res->queue_mask); packet->queue_mask_hi = upper_32_bits(res->queue_mask); - pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); @@ -405,7 +401,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t); mutex_lock(&pm->lock); - retval = pm->priv_queue->acquire_packet_buffer(pm->priv_queue, + retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, packet_size_dwords, &rl_buffer); if (retval != 0) goto fail_acquire_packet_buffer; @@ -415,15 +411,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval != 0) goto fail_create_runlist; - pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); return retval; fail_create_runlist: - pm->priv_queue->rollback_packet(pm->priv_queue); + pm->priv_queue->ops.rollback_packet(pm->priv_queue); fail_acquire_packet_buffer: mutex_unlock(&pm->lock); fail_create_runlist_ib: @@ -441,7 +436,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, BUG_ON(!pm || !fence_address); mutex_lock(&pm->lock); - retval = pm->priv_queue->acquire_packet_buffer( + retval = pm->priv_queue->ops.acquire_packet_buffer( pm->priv_queue, sizeof(struct pm4_query_status) / sizeof(uint32_t), (unsigned int **)&packet); @@ -462,8 +457,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, packet->data_hi = upper_32_bits((uint64_t)fence_value); packet->data_lo = lower_32_bits((uint64_t)fence_value); - pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); return 0; @@ -485,7 +479,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, BUG_ON(!pm); mutex_lock(&pm->lock); - retval = pm->priv_queue->acquire_packet_buffer( + retval = pm->priv_queue->ops.acquire_packet_buffer( pm->priv_queue, sizeof(struct pm4_unmap_queues) / sizeof(uint32_t), &buffer); @@ -540,8 +534,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, break; }; - pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); return 0; @@ -557,8 +550,7 @@ void pm_release_ib(struct packet_manager *pm) mutex_lock(&pm->lock); if (pm->allocated) { - kfd2kgd->free_mem(pm->dqm->dev->kgd, - (struct kgd_mem *) pm->ib_buffer_obj); + kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj); pm->allocated = false; } mutex_unlock(&pm->lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 96dc10e8904a..5a44f2fecf38 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -103,12 +103,26 @@ enum cache_policy { cache_policy_noncoherent }; +enum asic_family_type { + CHIP_KAVERI = 0, + CHIP_CARRIZO +}; + struct kfd_device_info { + unsigned int asic_family; unsigned int max_pasid_bits; size_t ih_ring_entry_size; + uint8_t num_of_watch_points; uint16_t mqd_size_aligned; }; +struct kfd_mem_obj { + uint32_t range_start; + uint32_t range_end; + uint64_t gpu_addr; + uint32_t *cpu_ptr; +}; + struct kfd_dev { struct kgd_dev *kgd; @@ -134,6 +148,14 @@ struct kfd_dev { struct kgd2kfd_shared_resources shared_resources; + void *gtt_mem; + uint64_t gtt_start_gpu_addr; + void *gtt_start_cpu_ptr; + void *gtt_sa_bitmap; + struct mutex gtt_sa_lock; + unsigned int gtt_sa_chunk_size; + unsigned int gtt_sa_num_of_chunks; + /* QCM Device instance */ struct device_queue_manager *dqm; @@ -149,12 +171,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd); extern const struct kfd2kgd_calls *kfd2kgd; -struct kfd_mem_obj { - void *bo; - uint64_t gpu_addr; - uint32_t *cpu_ptr; -}; - enum kfd_mempool { KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, @@ -272,6 +288,15 @@ struct queue_properties { bool is_active; /* Not relevant for user mode queues in cp scheduling */ unsigned int vmid; + /* Relevant only for sdma queues*/ + uint32_t sdma_engine_id; + uint32_t sdma_queue_id; + uint32_t sdma_vm_addr; + /* Relevant only for VI */ + uint64_t eop_ring_buffer_address; + uint32_t eop_ring_buffer_size; + uint64_t ctx_save_restore_area_address; + uint32_t ctx_save_restore_area_size; }; /** @@ -314,6 +339,8 @@ struct queue { uint32_t pipe; uint32_t queue; + unsigned int sdma_id; + struct kfd_process *process; struct kfd_dev *device; }; @@ -322,10 +349,10 @@ struct queue { * Please read the kfd_mqd_manager.h description. */ enum KFD_MQD_TYPE { - KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */ - KFD_MQD_TYPE_CIK_HIQ, /* for hiq */ - KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */ - KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */ + KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */ + KFD_MQD_TYPE_HIQ, /* for hiq */ + KFD_MQD_TYPE_CP, /* for cp queues and diq */ + KFD_MQD_TYPE_SDMA, /* for sdma queues */ KFD_MQD_TYPE_MAX }; @@ -477,8 +504,9 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process *p); void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid); struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, - struct kfd_process *p, - int create_pdd); + struct kfd_process *p); +struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + struct kfd_process *p); /* Process device data iterator */ struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); @@ -506,6 +534,13 @@ unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, struct kfd_process *process, unsigned int queue_id); +/* GTT Sub-Allocator */ + +int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, + struct kfd_mem_obj **mem_obj); + +int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj); + extern struct device *kfd_device; /* Topology */ @@ -530,6 +565,8 @@ int kfd_init_apertures(struct kfd_process *process); /* Queue Context Management */ inline uint32_t lower_32(uint64_t x); inline uint32_t upper_32(uint64_t x); +struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd); +inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m); int init_queue(struct queue **q, struct queue_properties properties); void uninit_queue(struct queue *q); @@ -538,6 +575,10 @@ void print_queue(struct queue *q); struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3c76ef05cbcf..a369c149d172 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -311,24 +311,29 @@ err_alloc_process: } struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, - struct kfd_process *p, - int create_pdd) + struct kfd_process *p) { struct kfd_process_device *pdd = NULL; list_for_each_entry(pdd, &p->per_device_data, per_device_list) if (pdd->dev == dev) - return pdd; + break; - if (create_pdd) { - pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); - if (pdd != NULL) { - pdd->dev = dev; - INIT_LIST_HEAD(&pdd->qpd.queues_list); - INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); - pdd->qpd.dqm = dev->dqm; - list_add(&pdd->per_device_list, &p->per_device_data); - } + return pdd; +} + +struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + struct kfd_process *p) +{ + struct kfd_process_device *pdd = NULL; + + pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); + if (pdd != NULL) { + pdd->dev = dev; + INIT_LIST_HEAD(&pdd->qpd.queues_list); + INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); + pdd->qpd.dqm = dev->dqm; + list_add(&pdd->per_device_list, &p->per_device_data); } return pdd; @@ -344,11 +349,14 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process *p) { - struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p, 1); + struct kfd_process_device *pdd; int err; - if (pdd == NULL) + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); return ERR_PTR(-ENOMEM); + } if (pdd->bound) return pdd; @@ -384,7 +392,7 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) pqm_uninit(&p->pqm); - pdd = kfd_get_process_device_data(dev, p, 0); + pdd = kfd_get_process_device_data(dev, p); /* * Just mark pdd as unbound, because we still need it to call diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 2fda1927bff7..530b82c4e78b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -128,7 +128,6 @@ static int create_cp_queue(struct process_queue_manager *pqm, /* let DQM handle it*/ q_properties->vmid = 0; q_properties->queue_id = qid; - q_properties->type = KFD_QUEUE_TYPE_COMPUTE; retval = init_queue(q, *q_properties); if (retval != 0) @@ -167,8 +166,11 @@ int pqm_create_queue(struct process_queue_manager *pqm, q = NULL; kq = NULL; - pdd = kfd_get_process_device_data(dev, pqm->process, 1); - BUG_ON(!pdd); + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return -1; + } retval = find_available_queue_slot(pqm, qid); if (retval != 0) @@ -176,7 +178,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, if (list_empty(&pqm->queues)) { pdd->qpd.pqm = pqm; - dev->dqm->register_process(dev->dqm, &pdd->qpd); + dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); } pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL); @@ -186,6 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, } switch (type) { + case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && @@ -201,7 +204,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd, + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, &q->properties.vmid); pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); @@ -215,7 +218,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, kq->queue->properties.queue_id = *qid; pqn->kq = kq; pqn->q = NULL; - retval = dev->dqm->create_kernel_queue(dev->dqm, kq, &pdd->qpd); + retval = dev->dqm->ops.create_kernel_queue(dev->dqm, + kq, &pdd->qpd); break; default: BUG(); @@ -245,7 +249,7 @@ err_allocate_pqn: /* check if queues list is empty unregister process from device */ clear_bit(*qid, pqm->queue_slot_bitmap); if (list_empty(&pqm->queues)) - dev->dqm->unregister_process(dev->dqm, &pdd->qpd); + dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); return retval; } @@ -277,19 +281,22 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dev = pqn->q->device; BUG_ON(!dev); - pdd = kfd_get_process_device_data(dev, pqm->process, 1); - BUG_ON(!pdd); + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return -1; + } if (pqn->kq) { /* destroy kernel queue (DIQ) */ dqm = pqn->kq->dev->dqm; - dqm->destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); + dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); kernel_queue_uninit(pqn->kq); } if (pqn->q) { dqm = pqn->q->device->dqm; - retval = dqm->destroy_queue(dqm, &pdd->qpd, pqn->q); + retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval != 0) return retval; @@ -301,7 +308,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) clear_bit(qid, pqm->queue_slot_bitmap); if (list_empty(&pqm->queues)) - dqm->unregister_process(dqm, &pdd->qpd); + dqm->ops.unregister_process(dqm, &pdd->qpd); return retval; } @@ -326,7 +333,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, pqn->q->properties.queue_percent = p->queue_percent; pqn->q->properties.priority = p->priority; - retval = pqn->q->device->dqm->update_queue(pqn->q->device->dqm, pqn->q); + retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, + pqn->q); if (retval != 0) return retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index cca1708fd811..498399323a8c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "kfd_priv.h" #include "kfd_crat.h" @@ -630,10 +631,10 @@ static struct kobj_type cache_type = { static ssize_t node_show(struct kobject *kobj, struct attribute *attr, char *buffer) { - ssize_t ret; struct kfd_topology_device *dev; char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; uint32_t i; + uint32_t log_max_watch_addr; /* Making sure that the buffer is an empty string */ buffer[0] = 0; @@ -641,8 +642,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, if (strcmp(attr->name, "gpu_id") == 0) { dev = container_of(attr, struct kfd_topology_device, attr_gpuid); - ret = sysfs_show_32bit_val(buffer, dev->gpu_id); - } else if (strcmp(attr->name, "name") == 0) { + return sysfs_show_32bit_val(buffer, dev->gpu_id); + } + + if (strcmp(attr->name, "name") == 0) { dev = container_of(attr, struct kfd_topology_device, attr_name); for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) { @@ -652,80 +655,90 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, break; } public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0; - ret = sysfs_show_str_val(buffer, public_name); - } else { - dev = container_of(attr, struct kfd_topology_device, - attr_props); - sysfs_show_32bit_prop(buffer, "cpu_cores_count", - dev->node_props.cpu_cores_count); - sysfs_show_32bit_prop(buffer, "simd_count", - dev->node_props.simd_count); - - if (dev->mem_bank_count < dev->node_props.mem_banks_count) { - pr_warn("kfd: mem_banks_count truncated from %d to %d\n", - dev->node_props.mem_banks_count, - dev->mem_bank_count); - sysfs_show_32bit_prop(buffer, "mem_banks_count", - dev->mem_bank_count); - } else { - sysfs_show_32bit_prop(buffer, "mem_banks_count", - dev->node_props.mem_banks_count); - } - - sysfs_show_32bit_prop(buffer, "caches_count", - dev->node_props.caches_count); - sysfs_show_32bit_prop(buffer, "io_links_count", - dev->node_props.io_links_count); - sysfs_show_32bit_prop(buffer, "cpu_core_id_base", - dev->node_props.cpu_core_id_base); - sysfs_show_32bit_prop(buffer, "simd_id_base", - dev->node_props.simd_id_base); - sysfs_show_32bit_prop(buffer, "capability", - dev->node_props.capability); - sysfs_show_32bit_prop(buffer, "max_waves_per_simd", - dev->node_props.max_waves_per_simd); - sysfs_show_32bit_prop(buffer, "lds_size_in_kb", - dev->node_props.lds_size_in_kb); - sysfs_show_32bit_prop(buffer, "gds_size_in_kb", - dev->node_props.gds_size_in_kb); - sysfs_show_32bit_prop(buffer, "wave_front_size", - dev->node_props.wave_front_size); - sysfs_show_32bit_prop(buffer, "array_count", - dev->node_props.array_count); - sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine", - dev->node_props.simd_arrays_per_engine); - sysfs_show_32bit_prop(buffer, "cu_per_simd_array", - dev->node_props.cu_per_simd_array); - sysfs_show_32bit_prop(buffer, "simd_per_cu", - dev->node_props.simd_per_cu); - sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu", - dev->node_props.max_slots_scratch_cu); - sysfs_show_32bit_prop(buffer, "vendor_id", - dev->node_props.vendor_id); - sysfs_show_32bit_prop(buffer, "device_id", - dev->node_props.device_id); - sysfs_show_32bit_prop(buffer, "location_id", - dev->node_props.location_id); - - if (dev->gpu) { - sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute", - kfd2kgd->get_max_engine_clock_in_mhz( - dev->gpu->kgd)); - sysfs_show_64bit_prop(buffer, "local_mem_size", - kfd2kgd->get_vmem_size(dev->gpu->kgd)); - - sysfs_show_32bit_prop(buffer, "fw_version", - kfd2kgd->get_fw_version( - dev->gpu->kgd, - KGD_ENGINE_MEC1)); - - } - - ret = sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute", - cpufreq_quick_get_max(0)/1000); + return sysfs_show_str_val(buffer, public_name); } - return ret; + dev = container_of(attr, struct kfd_topology_device, + attr_props); + sysfs_show_32bit_prop(buffer, "cpu_cores_count", + dev->node_props.cpu_cores_count); + sysfs_show_32bit_prop(buffer, "simd_count", + dev->node_props.simd_count); + + if (dev->mem_bank_count < dev->node_props.mem_banks_count) { + pr_warn("kfd: mem_banks_count truncated from %d to %d\n", + dev->node_props.mem_banks_count, + dev->mem_bank_count); + sysfs_show_32bit_prop(buffer, "mem_banks_count", + dev->mem_bank_count); + } else { + sysfs_show_32bit_prop(buffer, "mem_banks_count", + dev->node_props.mem_banks_count); + } + + sysfs_show_32bit_prop(buffer, "caches_count", + dev->node_props.caches_count); + sysfs_show_32bit_prop(buffer, "io_links_count", + dev->node_props.io_links_count); + sysfs_show_32bit_prop(buffer, "cpu_core_id_base", + dev->node_props.cpu_core_id_base); + sysfs_show_32bit_prop(buffer, "simd_id_base", + dev->node_props.simd_id_base); + sysfs_show_32bit_prop(buffer, "capability", + dev->node_props.capability); + sysfs_show_32bit_prop(buffer, "max_waves_per_simd", + dev->node_props.max_waves_per_simd); + sysfs_show_32bit_prop(buffer, "lds_size_in_kb", + dev->node_props.lds_size_in_kb); + sysfs_show_32bit_prop(buffer, "gds_size_in_kb", + dev->node_props.gds_size_in_kb); + sysfs_show_32bit_prop(buffer, "wave_front_size", + dev->node_props.wave_front_size); + sysfs_show_32bit_prop(buffer, "array_count", + dev->node_props.array_count); + sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine", + dev->node_props.simd_arrays_per_engine); + sysfs_show_32bit_prop(buffer, "cu_per_simd_array", + dev->node_props.cu_per_simd_array); + sysfs_show_32bit_prop(buffer, "simd_per_cu", + dev->node_props.simd_per_cu); + sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu", + dev->node_props.max_slots_scratch_cu); + sysfs_show_32bit_prop(buffer, "vendor_id", + dev->node_props.vendor_id); + sysfs_show_32bit_prop(buffer, "device_id", + dev->node_props.device_id); + sysfs_show_32bit_prop(buffer, "location_id", + dev->node_props.location_id); + + if (dev->gpu) { + log_max_watch_addr = + __ilog2_u32(dev->gpu->device_info->num_of_watch_points); + + if (log_max_watch_addr) { + dev->node_props.capability |= + HSA_CAP_WATCH_POINTS_SUPPORTED; + + dev->node_props.capability |= + ((log_max_watch_addr << + HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) & + HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); + } + + sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute", + kfd2kgd->get_max_engine_clock_in_mhz( + dev->gpu->kgd)); + sysfs_show_64bit_prop(buffer, "local_mem_size", + kfd2kgd->get_vmem_size(dev->gpu->kgd)); + + sysfs_show_32bit_prop(buffer, "fw_version", + kfd2kgd->get_fw_version( + dev->gpu->kgd, + KGD_ENGINE_MEC1)); + } + + return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute", + cpufreq_quick_get_max(0)/1000); } static const struct sysfs_ops node_ops = { diff --git a/drivers/gpu/drm/amd/include/cik_structs.h b/drivers/gpu/drm/amd/include/cik_structs.h new file mode 100644 index 000000000000..749eab94e335 --- /dev/null +++ b/drivers/gpu/drm/amd/include/cik_structs.h @@ -0,0 +1,293 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef CIK_STRUCTS_H_ +#define CIK_STRUCTS_H_ + +struct cik_mqd { + uint32_t header; + uint32_t compute_dispatch_initiator; + uint32_t compute_dim_x; + uint32_t compute_dim_y; + uint32_t compute_dim_z; + uint32_t compute_start_x; + uint32_t compute_start_y; + uint32_t compute_start_z; + uint32_t compute_num_thread_x; + uint32_t compute_num_thread_y; + uint32_t compute_num_thread_z; + uint32_t compute_pipelinestat_enable; + uint32_t compute_perfcount_enable; + uint32_t compute_pgm_lo; + uint32_t compute_pgm_hi; + uint32_t compute_tba_lo; + uint32_t compute_tba_hi; + uint32_t compute_tma_lo; + uint32_t compute_tma_hi; + uint32_t compute_pgm_rsrc1; + uint32_t compute_pgm_rsrc2; + uint32_t compute_vmid; + uint32_t compute_resource_limits; + uint32_t compute_static_thread_mgmt_se0; + uint32_t compute_static_thread_mgmt_se1; + uint32_t compute_tmpring_size; + uint32_t compute_static_thread_mgmt_se2; + uint32_t compute_static_thread_mgmt_se3; + uint32_t compute_restart_x; + uint32_t compute_restart_y; + uint32_t compute_restart_z; + uint32_t compute_thread_trace_enable; + uint32_t compute_misc_reserved; + uint32_t compute_user_data_0; + uint32_t compute_user_data_1; + uint32_t compute_user_data_2; + uint32_t compute_user_data_3; + uint32_t compute_user_data_4; + uint32_t compute_user_data_5; + uint32_t compute_user_data_6; + uint32_t compute_user_data_7; + uint32_t compute_user_data_8; + uint32_t compute_user_data_9; + uint32_t compute_user_data_10; + uint32_t compute_user_data_11; + uint32_t compute_user_data_12; + uint32_t compute_user_data_13; + uint32_t compute_user_data_14; + uint32_t compute_user_data_15; + uint32_t cp_compute_csinvoc_count_lo; + uint32_t cp_compute_csinvoc_count_hi; + uint32_t cp_mqd_base_addr_lo; + uint32_t cp_mqd_base_addr_hi; + uint32_t cp_hqd_active; + uint32_t cp_hqd_vmid; + uint32_t cp_hqd_persistent_state; + uint32_t cp_hqd_pipe_priority; + uint32_t cp_hqd_queue_priority; + uint32_t cp_hqd_quantum; + uint32_t cp_hqd_pq_base_lo; + uint32_t cp_hqd_pq_base_hi; + uint32_t cp_hqd_pq_rptr; + uint32_t cp_hqd_pq_rptr_report_addr_lo; + uint32_t cp_hqd_pq_rptr_report_addr_hi; + uint32_t cp_hqd_pq_wptr_poll_addr_lo; + uint32_t cp_hqd_pq_wptr_poll_addr_hi; + uint32_t cp_hqd_pq_doorbell_control; + uint32_t cp_hqd_pq_wptr; + uint32_t cp_hqd_pq_control; + uint32_t cp_hqd_ib_base_addr_lo; + uint32_t cp_hqd_ib_base_addr_hi; + uint32_t cp_hqd_ib_rptr; + uint32_t cp_hqd_ib_control; + uint32_t cp_hqd_iq_timer; + uint32_t cp_hqd_iq_rptr; + uint32_t cp_hqd_dequeue_request; + uint32_t cp_hqd_dma_offload; + uint32_t cp_hqd_sema_cmd; + uint32_t cp_hqd_msg_type; + uint32_t cp_hqd_atomic0_preop_lo; + uint32_t cp_hqd_atomic0_preop_hi; + uint32_t cp_hqd_atomic1_preop_lo; + uint32_t cp_hqd_atomic1_preop_hi; + uint32_t cp_hqd_hq_status0; + uint32_t cp_hqd_hq_control0; + uint32_t cp_mqd_control; + uint32_t cp_mqd_query_time_lo; + uint32_t cp_mqd_query_time_hi; + uint32_t cp_mqd_connect_start_time_lo; + uint32_t cp_mqd_connect_start_time_hi; + uint32_t cp_mqd_connect_end_time_lo; + uint32_t cp_mqd_connect_end_time_hi; + uint32_t cp_mqd_connect_end_wf_count; + uint32_t cp_mqd_connect_end_pq_rptr; + uint32_t cp_mqd_connect_end_pq_wptr; + uint32_t cp_mqd_connect_end_ib_rptr; + uint32_t reserved_96; + uint32_t reserved_97; + uint32_t reserved_98; + uint32_t reserved_99; + uint32_t iqtimer_pkt_header; + uint32_t iqtimer_pkt_dw0; + uint32_t iqtimer_pkt_dw1; + uint32_t iqtimer_pkt_dw2; + uint32_t iqtimer_pkt_dw3; + uint32_t iqtimer_pkt_dw4; + uint32_t iqtimer_pkt_dw5; + uint32_t iqtimer_pkt_dw6; + uint32_t reserved_108; + uint32_t reserved_109; + uint32_t reserved_110; + uint32_t reserved_111; + uint32_t queue_doorbell_id0; + uint32_t queue_doorbell_id1; + uint32_t queue_doorbell_id2; + uint32_t queue_doorbell_id3; + uint32_t queue_doorbell_id4; + uint32_t queue_doorbell_id5; + uint32_t queue_doorbell_id6; + uint32_t queue_doorbell_id7; + uint32_t queue_doorbell_id8; + uint32_t queue_doorbell_id9; + uint32_t queue_doorbell_id10; + uint32_t queue_doorbell_id11; + uint32_t queue_doorbell_id12; + uint32_t queue_doorbell_id13; + uint32_t queue_doorbell_id14; + uint32_t queue_doorbell_id15; +}; + +struct cik_sdma_rlc_registers { + uint32_t sdma_rlc_rb_cntl; + uint32_t sdma_rlc_rb_base; + uint32_t sdma_rlc_rb_base_hi; + uint32_t sdma_rlc_rb_rptr; + uint32_t sdma_rlc_rb_wptr; + uint32_t sdma_rlc_rb_wptr_poll_cntl; + uint32_t sdma_rlc_rb_wptr_poll_addr_hi; + uint32_t sdma_rlc_rb_wptr_poll_addr_lo; + uint32_t sdma_rlc_rb_rptr_addr_hi; + uint32_t sdma_rlc_rb_rptr_addr_lo; + uint32_t sdma_rlc_ib_cntl; + uint32_t sdma_rlc_ib_rptr; + uint32_t sdma_rlc_ib_offset; + uint32_t sdma_rlc_ib_base_lo; + uint32_t sdma_rlc_ib_base_hi; + uint32_t sdma_rlc_ib_size; + uint32_t sdma_rlc_skip_cntl; + uint32_t sdma_rlc_context_status; + uint32_t sdma_rlc_doorbell; + uint32_t sdma_rlc_virtual_addr; + uint32_t sdma_rlc_ape1_cntl; + uint32_t sdma_rlc_doorbell_log; + uint32_t reserved_22; + uint32_t reserved_23; + uint32_t reserved_24; + uint32_t reserved_25; + uint32_t reserved_26; + uint32_t reserved_27; + uint32_t reserved_28; + uint32_t reserved_29; + uint32_t reserved_30; + uint32_t reserved_31; + uint32_t reserved_32; + uint32_t reserved_33; + uint32_t reserved_34; + uint32_t reserved_35; + uint32_t reserved_36; + uint32_t reserved_37; + uint32_t reserved_38; + uint32_t reserved_39; + uint32_t reserved_40; + uint32_t reserved_41; + uint32_t reserved_42; + uint32_t reserved_43; + uint32_t reserved_44; + uint32_t reserved_45; + uint32_t reserved_46; + uint32_t reserved_47; + uint32_t reserved_48; + uint32_t reserved_49; + uint32_t reserved_50; + uint32_t reserved_51; + uint32_t reserved_52; + uint32_t reserved_53; + uint32_t reserved_54; + uint32_t reserved_55; + uint32_t reserved_56; + uint32_t reserved_57; + uint32_t reserved_58; + uint32_t reserved_59; + uint32_t reserved_60; + uint32_t reserved_61; + uint32_t reserved_62; + uint32_t reserved_63; + uint32_t reserved_64; + uint32_t reserved_65; + uint32_t reserved_66; + uint32_t reserved_67; + uint32_t reserved_68; + uint32_t reserved_69; + uint32_t reserved_70; + uint32_t reserved_71; + uint32_t reserved_72; + uint32_t reserved_73; + uint32_t reserved_74; + uint32_t reserved_75; + uint32_t reserved_76; + uint32_t reserved_77; + uint32_t reserved_78; + uint32_t reserved_79; + uint32_t reserved_80; + uint32_t reserved_81; + uint32_t reserved_82; + uint32_t reserved_83; + uint32_t reserved_84; + uint32_t reserved_85; + uint32_t reserved_86; + uint32_t reserved_87; + uint32_t reserved_88; + uint32_t reserved_89; + uint32_t reserved_90; + uint32_t reserved_91; + uint32_t reserved_92; + uint32_t reserved_93; + uint32_t reserved_94; + uint32_t reserved_95; + uint32_t reserved_96; + uint32_t reserved_97; + uint32_t reserved_98; + uint32_t reserved_99; + uint32_t reserved_100; + uint32_t reserved_101; + uint32_t reserved_102; + uint32_t reserved_103; + uint32_t reserved_104; + uint32_t reserved_105; + uint32_t reserved_106; + uint32_t reserved_107; + uint32_t reserved_108; + uint32_t reserved_109; + uint32_t reserved_110; + uint32_t reserved_111; + uint32_t reserved_112; + uint32_t reserved_113; + uint32_t reserved_114; + uint32_t reserved_115; + uint32_t reserved_116; + uint32_t reserved_117; + uint32_t reserved_118; + uint32_t reserved_119; + uint32_t reserved_120; + uint32_t reserved_121; + uint32_t reserved_122; + uint32_t reserved_123; + uint32_t reserved_124; + uint32_t reserved_125; + uint32_t reserved_126; + uint32_t reserved_127; + uint32_t sdma_engine_id; + uint32_t sdma_queue_id; +}; + + + +#endif /* CIK_STRUCTS_H_ */ diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 96a512208fad..239bc16a1ddd 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -110,17 +110,10 @@ struct kgd2kfd_calls { /** * struct kfd2kgd_calls * - * @init_sa_manager: Initialize an instance of the sa manager, used by - * amdkfd for all system memory allocations that are mapped to the GART - * address space + * @init_gtt_mem_allocation: Allocate a buffer on the gart aperture. + * The buffer can be used for mqds, hpds, kernel queue, fence and runlists * - * @fini_sa_manager: Releases all memory allocations for amdkfd that are - * handled by kgd sa manager - * - * @allocate_mem: Allocate a buffer from amdkfd's sa manager. The buffer can - * be used for mqds, hpds, kernel queue, fence and runlists - * - * @free_mem: Frees a buffer that was allocated by amdkfd's sa manager + * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture * * @get_vmem_size: Retrieves (physical) size of VRAM * @@ -136,18 +129,23 @@ struct kgd2kfd_calls { * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp * scheduling mode. Only used for no cp scheduling mode. * - * @init_memory: Initializes memory apertures to fixed base/limit address - * and non cached memory types. - * * @init_pipeline: Initialized the compute pipelines. * * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp * sceduling mode. * + * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot. + * used only for no HWS mode. + * * @hqd_is_occupies: Checks if a hqd slot is occupied. * * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot. * + * @hqd_sdma_is_occupied: Checks if an SDMA hqd slot is occupied. + * + * @hqd_sdma_destroy: Destructs and preempts the SDMA queue assigned to that + * SDMA hqd slot. + * * @get_fw_version: Returns FW versions from the header * * This structure contains function pointers to services that the kgd driver @@ -155,13 +153,11 @@ struct kgd2kfd_calls { * */ struct kfd2kgd_calls { - /* Memory management. */ - int (*init_sa_manager)(struct kgd_dev *kgd, unsigned int size); - void (*fini_sa_manager)(struct kgd_dev *kgd); - int (*allocate_mem)(struct kgd_dev *kgd, size_t size, size_t alignment, - enum kgd_memory_pool pool, struct kgd_mem **mem); + int (*init_gtt_mem_allocation)(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr); - void (*free_mem)(struct kgd_dev *kgd, struct kgd_mem *mem); + void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj); uint64_t (*get_vmem_size)(struct kgd_dev *kgd); uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); @@ -176,25 +172,32 @@ struct kfd2kgd_calls { int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); - int (*init_memory)(struct kgd_dev *kgd); int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); + int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd); + bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id); + + bool (*hqd_sdma_is_occupied)(struct kgd_dev *kgd, void *mqd); + + int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd, + unsigned int timeout); + uint16_t (*get_fw_version)(struct kgd_dev *kgd, enum kgd_engine_type type); }; bool kgd2kfd_init(unsigned interface_version, - const struct kfd2kgd_calls *f2g, - const struct kgd2kfd_calls **g2f); + const struct kfd2kgd_calls *f2g, + const struct kgd2kfd_calls **g2f); -#endif /* KGD_KFD_INTERFACE_H_INCLUDED */ +#endif /* KGD_KFD_INTERFACE_H_INCLUDED */ diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index e3a7a5078e5c..42d2ffa08716 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -653,10 +653,6 @@ static int armada_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, return 0; } -static void armada_drm_crtc_load_lut(struct drm_crtc *crtc) -{ -} - /* The mode_config.mutex will be held for this call */ static void armada_drm_crtc_disable(struct drm_crtc *crtc) { @@ -678,7 +674,6 @@ static const struct drm_crtc_helper_funcs armada_crtc_helper_funcs = { .mode_fixup = armada_drm_crtc_mode_fixup, .mode_set = armada_drm_crtc_mode_set, .mode_set_base = armada_drm_crtc_mode_set_base, - .load_lut = armada_drm_crtc_load_lut, .disable = armada_drm_crtc_disable, }; diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c index 5c60ae524c45..ff68eefae273 100644 --- a/drivers/gpu/drm/ast/ast_fb.c +++ b/drivers/gpu/drm/ast/ast_fb.c @@ -335,18 +335,27 @@ int ast_fbdev_init(struct drm_device *dev) ret = drm_fb_helper_init(dev, &afbdev->helper, 1, 1); - if (ret) { - kfree(afbdev); - return ret; - } + if (ret) + goto free; - drm_fb_helper_single_add_all_connectors(&afbdev->helper); + ret = drm_fb_helper_single_add_all_connectors(&afbdev->helper); + if (ret) + goto fini; /* disable all the possible outputs/crtcs before entering KMS mode */ drm_helper_disable_unused_functions(dev); - drm_fb_helper_initial_config(&afbdev->helper, 32); + ret = drm_fb_helper_initial_config(&afbdev->helper, 32); + if (ret) + goto fini; + return 0; + +fini: + drm_fb_helper_fini(&afbdev->helper); +free: + kfree(afbdev); + return ret; } void ast_fbdev_fini(struct drm_device *dev) diff --git a/drivers/gpu/drm/atmel-hlcdc/Kconfig b/drivers/gpu/drm/atmel-hlcdc/Kconfig new file mode 100644 index 000000000000..99b4f0698a30 --- /dev/null +++ b/drivers/gpu/drm/atmel-hlcdc/Kconfig @@ -0,0 +1,11 @@ +config DRM_ATMEL_HLCDC + tristate "DRM Support for ATMEL HLCDC Display Controller" + depends on DRM && OF && COMMON_CLK && MFD_ATMEL_HLCDC && ARM + select DRM_GEM_CMA_HELPER + select DRM_KMS_HELPER + select DRM_KMS_FB_HELPER + select DRM_KMS_CMA_HELPER + select DRM_PANEL + help + Choose this option if you have an ATMEL SoC with an HLCDC display + controller (i.e. at91sam9n12, at91sam9x5 family or sama5d3 family). diff --git a/drivers/gpu/drm/atmel-hlcdc/Makefile b/drivers/gpu/drm/atmel-hlcdc/Makefile new file mode 100644 index 000000000000..10ae426e60bd --- /dev/null +++ b/drivers/gpu/drm/atmel-hlcdc/Makefile @@ -0,0 +1,7 @@ +atmel-hlcdc-dc-y := atmel_hlcdc_crtc.o \ + atmel_hlcdc_dc.o \ + atmel_hlcdc_layer.o \ + atmel_hlcdc_output.o \ + atmel_hlcdc_plane.o + +obj-$(CONFIG_DRM_ATMEL_HLCDC) += atmel-hlcdc-dc.o diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c new file mode 100644 index 000000000000..0409b907de5d --- /dev/null +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -0,0 +1,406 @@ +/* + * Copyright (C) 2014 Traphandler + * Copyright (C) 2014 Free Electrons + * + * Author: Jean-Jacques Hiblot + * Author: Boris BREZILLON + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include + +#include +#include +#include + +#include