drm for v4.18-rc1

-----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJbFzauAAoJEAx081l5xIa+jzwP/AwfTreH3XBmlLeMO8kwkAMW
 AiH1u3KHrlitN1U85x90dC8hVuuV2Kv9pEXQo1me/TAL/QCb9VZYCSf2dHHkkMwD
 1UdwxAQW7soBbTRo9k0zuVJpRGSYIiYfqDh3L6KmTC3UF0tUJm53VOWCLG/DNrtn
 XhEnGnlCUNbABZMMpavEvlwxPtvaYFlp6M+MmwRPIx32SrPJ3vSaBzxwWxOaFmjU
 xiRdK+GpAbCV8yGeCCSkDgEe/TdWGUhZxoC9dLb0H9ex7ip8uZ0W4D+VTHPFrhQX
 6nCpqUbp7BQTsbVSd1pAVsAv45scmSgWbKcqfC0NKSVcsHcJZBR0tQOF9OvnGZcf
 o1Hv/beTqJ++IcG2rEIwyJTGxAGfZ0YSb0evTC9VcszaYo+b3+G283bdztIjzDeS
 0QCTeLHYbZRHPITWVULNpMWy3TkJv32IdFhQfYSnD8/OGQIxLNhh4FFOtHnOmxSF
 N8dnzOLKXhXMo/NgOL+UMNnbgLqIyOtEXCPDLuOQJNv/SOp8662m/A0yRjQNR6M2
 gsPmR7dxQIwwJMyqrkLDOF411ABZohulquYgwLgG938MRPmTpPWOR72PtpGF4hAW
 HLg+3HHBd1N/A1mlJUMAbUn2eMUACZBUIycE9u+U/geRgve/OQnzJH/FKGP2EJ4R
 pf6CruEva+6GRR5GVzuM
 =twst
 -----END PGP SIGNATURE-----

Merge tag 'drm-next-2018-06-06-1' of git://anongit.freedesktop.org/drm/drm

Pull drm updates from Dave Airlie:
 "This starts to support NVIDIA volta hardware with nouveau, and adds
  amdgpu support for the GPU in the Kabylake-G (the intel + radeon
  single package chip), along with some initial Intel icelake enabling.

  Summary:

  New Drivers:
   - v3d - driver for broadcom V3D V3.x+ hardware
   - xen-front - XEN PV display frontend

  core:
   - handle zpos normalization in the core
   - stop looking at legacy pointers in atomic paths
   - improved scheduler documentation
   - improved aspect ratio validation
   - aspect ratio support for 64:27 and 256:135
   - drop unused control node code.

  i915:
   - Icelake (ICL) enabling
   - GuC/HuC refactoring
   - PSR/PSR2 enabling and fixes
   - DPLL management refactoring
   - DP MST fixes
   - NV12 enabling
   - HDCP improvements
   - GEM/Execlist/reset improvements
   - GVT improvements
   - stolen memory first 4k fix

  amdgpu:
   - Vega 20 support
   - VEGAM support (Kabylake-G)
   - preOS scanout buffer reservation
   - power management gfxoff support for raven
   - SR-IOV fixes
   - Vega10 power profiles and clock voltage control
   - scatter/gather display support on CZ/ST

  amdkfd:
   - GFX9 dGPU support
   - userptr memory mapping

  nouveau:
   - major refactoring for Volta GV100 support

  tda998x:
   - HDMI i2c CEC support

  etnaviv:
   - removed unused logging code
   - license text cleanups
   - MMU handling improvements
   - timeout fence fix for 50 days uptime

  tegra:
   - IOMMU support in gr2d/gr3d drivers
   - zpos support

  vc4:
   - syncobj support
   - CTM, plane alpha and async cursor support

  analogix_dp:
   - HPD and aux chan fixes

  sun4i:
   - MIPI DSI support

  tilcdc:
   - clock divider fixes for OMAP-l138 LCDK board

  rcar-du:
   - R8A77965 support
   - dma-buf fences fixes
   - hardware indexed crtc/du group handling
   - generic zplane property support

  atmel-hclcdc:
   - generic zplane property support

  mediatek:
   - use generic video mode function

  exynos:
   - S5PV210 FIMD variant support
   - IPP v2 framework
   - more HW overlays support"

* tag 'drm-next-2018-06-06-1' of git://anongit.freedesktop.org/drm/drm: (1286 commits)
  drm/amdgpu: fix 32-bit build warning
  drm/exynos: fimc: signedness bug in fimc_setup_clocks()
  drm/exynos: scaler: fix static checker warning
  drm/amdgpu: Use dev_info() to report amdkfd is not supported for this ASIC
  drm/amd/display: Remove use of division operator for long longs
  drm/amdgpu: Update GFX info structure to match what vega20 used
  drm/amdgpu/pp: remove duplicate assignment
  drm/sched: add rcu_barrier after entity fini
  drm/amdgpu: move VM BOs on LRU again
  drm/amdgpu: consistenly use VM moved flag
  drm/amdgpu: kmap PDs/PTs in amdgpu_vm_update_directories
  drm/amdgpu: further optimize amdgpu_vm_handle_moved
  drm/amdgpu: cleanup amdgpu_vm_validate_pt_bos v2
  drm/amdgpu: rework VM state machine lock handling v2
  drm/amdgpu: Add runtime VCN PG support
  drm/amdgpu: Enable VCN static PG by default on RV
  drm/amdgpu: Add VCN static PG support on RV
  drm/amdgpu: Enable VCN CG by default on RV
  drm/amdgpu: Add static CG control for VCN on RV
  drm/exynos: Fix default value for zpos plane property
  ...
This commit is contained in:
Linus Torvalds 2018-06-06 08:16:33 -07:00
commit 135c5504a6
1110 changed files with 72661 additions and 26358 deletions

View File

@ -14,7 +14,13 @@ Required properties:
"adi,adv7513"
"adi,adv7533"
- reg: I2C slave address
- reg: I2C slave addresses
The ADV7511 internal registers are split into four pages exposed through
different I2C addresses, creating four register maps. Each map has it own
I2C address and acts as a standard slave device on the I2C bus. The main
address is mandatory, others are optional and revert to defaults if not
specified.
The ADV7511 supports a large number of input data formats that differ by their
color depth, color format, clock mode, bit justification and random
@ -70,6 +76,9 @@ Optional properties:
rather than generate its own timings for HDMI output.
- clocks: from common clock binding: reference to the CEC clock.
- clock-names: from common clock binding: must be "cec".
- reg-names : Names of maps with programmable addresses.
It can contain any map needing a non-default address.
Possible maps names are : "main", "edid", "cec", "packet"
Required nodes:
@ -88,7 +97,12 @@ Example
adv7511w: hdmi@39 {
compatible = "adi,adv7511w";
reg = <39>;
/*
* The EDID page will be accessible on address 0x66 on the I2C
* bus. All other maps continue to use their default addresses.
*/
reg = <0x39>, <0x66>;
reg-names = "main", "edid";
interrupt-parent = <&gpio3>;
interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
clocks = <&cec_clock>;

View File

@ -0,0 +1,133 @@
Cadence DSI bridge
==================
The Cadence DSI bridge is a DPI to DSI bridge supporting up to 4 DSI lanes.
Required properties:
- compatible: should be set to "cdns,dsi".
- reg: physical base address and length of the controller's registers.
- interrupts: interrupt line connected to the DSI bridge.
- clocks: DSI bridge clocks.
- clock-names: must contain "dsi_p_clk" and "dsi_sys_clk".
- phys: phandle link to the MIPI D-PHY controller.
- phy-names: must contain "dphy".
- #address-cells: must be set to 1.
- #size-cells: must be set to 0.
Optional properties:
- resets: DSI reset lines.
- reset-names: can contain "dsi_p_rst".
Required subnodes:
- ports: Ports as described in Documentation/devicetree/bindings/graph.txt.
2 ports are available:
* port 0: this port is only needed if some of your DSI devices are
controlled through an external bus like I2C or SPI. Can have at
most 4 endpoints. The endpoint number is directly encoding the
DSI virtual channel used by this device.
* port 1: represents the DPI input.
Other ports will be added later to support the new kind of inputs.
- one subnode per DSI device connected on the DSI bus. Each DSI device should
contain a reg property encoding its virtual channel.
Cadence DPHY
============
Cadence DPHY block.
Required properties:
- compatible: should be set to "cdns,dphy".
- reg: physical base address and length of the DPHY registers.
- clocks: DPHY reference clocks.
- clock-names: must contain "psm" and "pll_ref".
- #phy-cells: must be set to 0.
Example:
dphy0: dphy@fd0e0000{
compatible = "cdns,dphy";
reg = <0x0 0xfd0e0000 0x0 0x1000>;
clocks = <&psm_clk>, <&pll_ref_clk>;
clock-names = "psm", "pll_ref";
#phy-cells = <0>;
};
dsi0: dsi@fd0c0000 {
compatible = "cdns,dsi";
reg = <0x0 0xfd0c0000 0x0 0x1000>;
clocks = <&pclk>, <&sysclk>;
clock-names = "dsi_p_clk", "dsi_sys_clk";
interrupts = <1>;
phys = <&dphy0>;
phy-names = "dphy";
#address-cells = <1>;
#size-cells = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@1 {
reg = <1>;
dsi0_dpi_input: endpoint {
remote-endpoint = <&xxx_dpi_output>;
};
};
};
panel: dsi-dev@0 {
compatible = "<vendor,panel>";
reg = <0>;
};
};
or
dsi0: dsi@fd0c0000 {
compatible = "cdns,dsi";
reg = <0x0 0xfd0c0000 0x0 0x1000>;
clocks = <&pclk>, <&sysclk>;
clock-names = "dsi_p_clk", "dsi_sys_clk";
interrupts = <1>;
phys = <&dphy1>;
phy-names = "dphy";
#address-cells = <1>;
#size-cells = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
#address-cells = <1>;
#size-cells = <0>;
dsi0_output: endpoint@0 {
reg = <0>;
remote-endpoint = <&dsi_panel_input>;
};
};
port@1 {
reg = <1>;
dsi0_dpi_input: endpoint {
remote-endpoint = <&xxx_dpi_output>;
};
};
};
};
i2c@xxx {
panel: panel@59 {
compatible = "<vendor,panel>";
reg = <0x59>;
port {
dsi_panel_input: endpoint {
remote-endpoint = <&dsi0_output>;
};
};
};
};

View File

@ -14,6 +14,7 @@ Required properties:
- compatible : Shall contain one or more of
- "renesas,r8a7795-hdmi" for R8A7795 (R-Car H3) compatible HDMI TX
- "renesas,r8a7796-hdmi" for R8A7796 (R-Car M3-W) compatible HDMI TX
- "renesas,r8a77965-hdmi" for R8A77965 (R-Car M3-N) compatible HDMI TX
- "renesas,rcar-gen3-hdmi" for the generic R-Car Gen3 compatible HDMI TX
When compatible with generic versions, nodes must list the SoC-specific

View File

@ -27,6 +27,9 @@ Optional properties:
in question is used. The implementation allows one or two DAIs. If two
DAIs are defined, they must be of different type.
- nxp,calib-gpios: calibration GPIO, which must correspond with the
gpio used for the TDA998x interrupt pin.
[1] Documentation/sound/alsa/soc/DAI.txt
[2] include/dt-bindings/display/tda998x.h

View File

@ -0,0 +1,60 @@
Thine Electronics THC63LVD1024 LVDS decoder
-------------------------------------------
The THC63LVD1024 is a dual link LVDS receiver designed to convert LVDS streams
to parallel data outputs. The chip supports single/dual input/output modes,
handling up to two LVDS input streams and up to two digital CMOS/TTL outputs.
Single or dual operation mode, output data mapping and DDR output modes are
configured through input signals and the chip does not expose any control bus.
Required properties:
- compatible: Shall be "thine,thc63lvd1024"
- vcc-supply: Power supply for TTL output, TTL CLOCKOUT signal, LVDS input,
PPL and digital circuitry
Optional properties:
- powerdown-gpios: Power down GPIO signal, pin name "/PDWN". Active low
- oe-gpios: Output enable GPIO signal, pin name "OE". Active high
The THC63LVD1024 video port connections are modeled according
to OF graph bindings specified by Documentation/devicetree/bindings/graph.txt
Required video port nodes:
- port@0: First LVDS input port
- port@2: First digital CMOS/TTL parallel output
Optional video port nodes:
- port@1: Second LVDS input port
- port@3: Second digital CMOS/TTL parallel output
Example:
--------
thc63lvd1024: lvds-decoder {
compatible = "thine,thc63lvd1024";
vcc-supply = <&reg_lvds_vcc>;
powerdown-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
lvds_dec_in_0: endpoint {
remote-endpoint = <&lvds_out>;
};
};
port@2{
reg = <2>;
lvds_dec_out_2: endpoint {
remote-endpoint = <&adv7511_in>;
};
};
};
};

View File

@ -19,7 +19,8 @@ Required properties:
clock-names property.
- clock-names: list of clock names sorted in the same order as the clocks
property. Must contain "pclk", "aclk_decon", "aclk_smmu_decon0x",
"aclk_xiu_decon0x", "pclk_smmu_decon0x", clk_decon_vclk",
"aclk_xiu_decon0x", "pclk_smmu_decon0x", "aclk_smmu_decon1x",
"aclk_xiu_decon1x", "pclk_smmu_decon1x", clk_decon_vclk",
"sclk_decon_eclk"
- ports: contains a port which is connected to mic node. address-cells and
size-cells must 1 and 0, respectively.
@ -34,10 +35,14 @@ decon: decon@13800000 {
clocks = <&cmu_disp CLK_ACLK_DECON>, <&cmu_disp CLK_ACLK_SMMU_DECON0X>,
<&cmu_disp CLK_ACLK_XIU_DECON0X>,
<&cmu_disp CLK_PCLK_SMMU_DECON0X>,
<&cmu_disp CLK_ACLK_SMMU_DECON1X>,
<&cmu_disp CLK_ACLK_XIU_DECON1X>,
<&cmu_disp CLK_PCLK_SMMU_DECON1X>,
<&cmu_disp CLK_SCLK_DECON_VCLK>,
<&cmu_disp CLK_SCLK_DECON_ECLK>;
clock-names = "aclk_decon", "aclk_smmu_decon0x", "aclk_xiu_decon0x",
"pclk_smmu_decon0x", "sclk_decon_vclk", "sclk_decon_eclk";
"pclk_smmu_decon0x", "aclk_smmu_decon1x", "aclk_xiu_decon1x",
"pclk_smmu_decon1x", "sclk_decon_vclk", "sclk_decon_eclk";
interrupt-names = "vsync", "lcd_sys";
interrupts = <0 202 0>, <0 203 0>;

View File

@ -13,6 +13,7 @@ Required Properties:
- "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU
- "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU
- "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU
- "renesas,du-r8a77965" for R8A77965 (R-Car M3-N) compatible DU
- "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU
- "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU
@ -47,20 +48,21 @@ bindings specified in Documentation/devicetree/bindings/graph.txt.
The following table lists for each supported model the port number
corresponding to each DU output.
Port0 Port1 Port2 Port3
Port0 Port1 Port2 Port3
-----------------------------------------------------------------------------
R8A7743 (RZ/G1M) DPAD 0 LVDS 0 - -
R8A7745 (RZ/G1E) DPAD 0 DPAD 1 - -
R8A7779 (R-Car H1) DPAD 0 DPAD 1 - -
R8A7790 (R-Car H2) DPAD 0 LVDS 0 LVDS 1 -
R8A7791 (R-Car M2-W) DPAD 0 LVDS 0 - -
R8A7792 (R-Car V2H) DPAD 0 DPAD 1 - -
R8A7793 (R-Car M2-N) DPAD 0 LVDS 0 - -
R8A7794 (R-Car E2) DPAD 0 DPAD 1 - -
R8A7795 (R-Car H3) DPAD 0 HDMI 0 HDMI 1 LVDS 0
R8A7796 (R-Car M3-W) DPAD 0 HDMI 0 LVDS 0 -
R8A77970 (R-Car V3M) DPAD 0 LVDS 0 - -
R8A77995 (R-Car D3) DPAD 0 LVDS 0 LVDS 1 -
R8A7743 (RZ/G1M) DPAD 0 LVDS 0 - -
R8A7745 (RZ/G1E) DPAD 0 DPAD 1 - -
R8A7779 (R-Car H1) DPAD 0 DPAD 1 - -
R8A7790 (R-Car H2) DPAD 0 LVDS 0 LVDS 1 -
R8A7791 (R-Car M2-W) DPAD 0 LVDS 0 - -
R8A7792 (R-Car V2H) DPAD 0 DPAD 1 - -
R8A7793 (R-Car M2-N) DPAD 0 LVDS 0 - -
R8A7794 (R-Car E2) DPAD 0 DPAD 1 - -
R8A7795 (R-Car H3) DPAD 0 HDMI 0 HDMI 1 LVDS 0
R8A7796 (R-Car M3-W) DPAD 0 HDMI 0 LVDS 0 -
R8A77965 (R-Car M3-N) DPAD 0 HDMI 0 LVDS 0 -
R8A77970 (R-Car V3M) DPAD 0 LVDS 0 - -
R8A77995 (R-Car D3) DPAD 0 LVDS 0 LVDS 1 -
Example: R8A7795 (R-Car H3) ES2.0 DU

View File

@ -0,0 +1,93 @@
Allwinner A31 DSI Encoder
=========================
The DSI pipeline consists of two separate blocks: the DSI controller
itself, and its associated D-PHY.
DSI Encoder
-----------
The DSI Encoder generates the DSI signal from the TCON's.
Required properties:
- compatible: value must be one of:
* allwinner,sun6i-a31-mipi-dsi
- reg: base address and size of memory-mapped region
- interrupts: interrupt associated to this IP
- clocks: phandles to the clocks feeding the DSI encoder
* bus: the DSI interface clock
* mod: the DSI module clock
- clock-names: the clock names mentioned above
- phys: phandle to the D-PHY
- phy-names: must be "dphy"
- resets: phandle to the reset controller driving the encoder
- ports: A ports node with endpoint definitions as defined in
Documentation/devicetree/bindings/media/video-interfaces.txt. The
first port should be the input endpoint, usually coming from the
associated TCON.
Any MIPI-DSI device attached to this should be described according to
the bindings defined in ../mipi-dsi-bus.txt
D-PHY
-----
Required properties:
- compatible: value must be one of:
* allwinner,sun6i-a31-mipi-dphy
- reg: base address and size of memory-mapped region
- clocks: phandles to the clocks feeding the DSI encoder
* bus: the DSI interface clock
* mod: the DSI module clock
- clock-names: the clock names mentioned above
- resets: phandle to the reset controller driving the encoder
Example:
dsi0: dsi@1ca0000 {
compatible = "allwinner,sun6i-a31-mipi-dsi";
reg = <0x01ca0000 0x1000>;
interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_MIPI_DSI>,
<&ccu CLK_DSI_SCLK>;
clock-names = "bus", "mod";
resets = <&ccu RST_BUS_MIPI_DSI>;
phys = <&dphy0>;
phy-names = "dphy";
#address-cells = <1>;
#size-cells = <0>;
panel@0 {
compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
reg = <0>;
power-gpios = <&pio 1 7 GPIO_ACTIVE_HIGH>; /* PB07 */
reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */
backlight = <&pwm_bl>;
};
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
#address-cells = <1>;
#size-cells = <0>;
reg = <0>;
dsi0_in_tcon0: endpoint {
remote-endpoint = <&tcon0_out_dsi0>;
};
};
};
};
dphy0: d-phy@1ca1000 {
compatible = "allwinner,sun6i-a31-mipi-dphy";
reg = <0x01ca1000 0x1000>;
clocks = <&ccu CLK_BUS_MIPI_DSI>,
<&ccu CLK_DSI_DPHY>;
clock-names = "bus", "mod";
resets = <&ccu RST_BUS_MIPI_DSI>;
#phy-cells = <0>;
};

View File

@ -0,0 +1,28 @@
Broadcom V3D GPU
Only the Broadcom V3D 3.x and newer GPUs are covered by this binding.
For V3D 2.x, see brcm,bcm-vc4.txt.
Required properties:
- compatible: Should be "brcm,7268-v3d" or "brcm,7278-v3d"
- reg: Physical base addresses and lengths of the register areas
- reg-names: Names for the register areas. The "hub", "bridge", and "core0"
register areas are always required. The "gca" register area
is required if the GCA cache controller is present.
- interrupts: The interrupt numbers. The first interrupt is for the hub,
while the following interrupts are for the cores.
See bindings/interrupt-controller/interrupts.txt
Optional properties:
- clocks: The core clock the unit runs on
v3d {
compatible = "brcm,7268-v3d";
reg = <0xf1204000 0x100>,
<0xf1200000 0x4000>,
<0xf1208000 0x4000>,
<0xf1204100 0x100>;
reg-names = "bridge", "hub", "core0", "gca";
interrupts = <0 78 4>,
<0 77 4>;
};

View File

@ -0,0 +1,27 @@
* Samsung Exynos Image Scaler
Required properties:
- compatible : value should be one of the following:
(a) "samsung,exynos5420-scaler" for Scaler IP in Exynos5420
(b) "samsung,exynos5433-scaler" for Scaler IP in Exynos5433
- reg : Physical base address of the IP registers and length of memory
mapped region.
- interrupts : Interrupt specifier for scaler interrupt, according to format
specific to interrupt parent.
- clocks : Clock specifier for scaler clock, according to generic clock
bindings. (See Documentation/devicetree/bindings/clock/exynos*.txt)
- clock-names : Names of clocks. For exynos scaler, it should be "mscl"
on 5420 and "pclk", "aclk" and "aclk_xiu" on 5433.
Example:
scaler@12800000 {
compatible = "samsung,exynos5420-scaler";
reg = <0x12800000 0x1294>;
interrupts = <0 220 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clock CLK_MSCL0>;
clock-names = "mscl";
};

View File

@ -10,8 +10,10 @@ GPU Driver Documentation
tegra
tinydrm
tve200
v3d
vc4
bridge/dw-hdmi
xen-front
.. only:: subproject and html

View File

@ -58,6 +58,12 @@ Intel GVT-g Host Support(vGPU device model)
.. kernel-doc:: drivers/gpu/drm/i915/intel_gvt.c
:internal:
Workarounds
-----------
.. kernel-doc:: drivers/gpu/drm/i915/intel_workarounds.c
:doc: Hardware workarounds
Display Hardware Handling
=========================
@ -249,6 +255,103 @@ Memory Management and Command Submission
This sections covers all things related to the GEM implementation in the
i915 driver.
Intel GPU Basics
----------------
An Intel GPU has multiple engines. There are several engine types.
- RCS engine is for rendering 3D and performing compute, this is named
`I915_EXEC_RENDER` in user space.
- BCS is a blitting (copy) engine, this is named `I915_EXEC_BLT` in user
space.
- VCS is a video encode and decode engine, this is named `I915_EXEC_BSD`
in user space
- VECS is video enhancement engine, this is named `I915_EXEC_VEBOX` in user
space.
- The enumeration `I915_EXEC_DEFAULT` does not refer to specific engine;
instead it is to be used by user space to specify a default rendering
engine (for 3D) that may or may not be the same as RCS.
The Intel GPU family is a family of integrated GPU's using Unified
Memory Access. For having the GPU "do work", user space will feed the
GPU batch buffers via one of the ioctls `DRM_IOCTL_I915_GEM_EXECBUFFER2`
or `DRM_IOCTL_I915_GEM_EXECBUFFER2_WR`. Most such batchbuffers will
instruct the GPU to perform work (for example rendering) and that work
needs memory from which to read and memory to which to write. All memory
is encapsulated within GEM buffer objects (usually created with the ioctl
`DRM_IOCTL_I915_GEM_CREATE`). An ioctl providing a batchbuffer for the GPU
to create will also list all GEM buffer objects that the batchbuffer reads
and/or writes. For implementation details of memory management see
`GEM BO Management Implementation Details`_.
The i915 driver allows user space to create a context via the ioctl
`DRM_IOCTL_I915_GEM_CONTEXT_CREATE` which is identified by a 32-bit
integer. Such a context should be viewed by user-space as -loosely-
analogous to the idea of a CPU process of an operating system. The i915
driver guarantees that commands issued to a fixed context are to be
executed so that writes of a previously issued command are seen by
reads of following commands. Actions issued between different contexts
(even if from the same file descriptor) are NOT given that guarantee
and the only way to synchronize across contexts (even from the same
file descriptor) is through the use of fences. At least as far back as
Gen4, also have that a context carries with it a GPU HW context;
the HW context is essentially (most of atleast) the state of a GPU.
In addition to the ordering guarantees, the kernel will restore GPU
state via HW context when commands are issued to a context, this saves
user space the need to restore (most of atleast) the GPU state at the
start of each batchbuffer. The non-deprecated ioctls to submit batchbuffer
work can pass that ID (in the lower bits of drm_i915_gem_execbuffer2::rsvd1)
to identify what context to use with the command.
The GPU has its own memory management and address space. The kernel
driver maintains the memory translation table for the GPU. For older
GPUs (i.e. those before Gen8), there is a single global such translation
table, a global Graphics Translation Table (GTT). For newer generation
GPUs each context has its own translation table, called Per-Process
Graphics Translation Table (PPGTT). Of important note, is that although
PPGTT is named per-process it is actually per context. When user space
submits a batchbuffer, the kernel walks the list of GEM buffer objects
used by the batchbuffer and guarantees that not only is the memory of
each such GEM buffer object resident but it is also present in the
(PP)GTT. If the GEM buffer object is not yet placed in the (PP)GTT,
then it is given an address. Two consequences of this are: the kernel
needs to edit the batchbuffer submitted to write the correct value of
the GPU address when a GEM BO is assigned a GPU address and the kernel
might evict a different GEM BO from the (PP)GTT to make address room
for another GEM BO. Consequently, the ioctls submitting a batchbuffer
for execution also include a list of all locations within buffers that
refer to GPU-addresses so that the kernel can edit the buffer correctly.
This process is dubbed relocation.
GEM BO Management Implementation Details
----------------------------------------
.. kernel-doc:: drivers/gpu/drm/i915/i915_vma.h
:doc: Virtual Memory Address
Buffer Object Eviction
----------------------
This section documents the interface functions for evicting buffer
objects to make space available in the virtual gpu address spaces. Note
that this is mostly orthogonal to shrinking buffer objects caches, which
has the goal to make main memory (shared with the gpu through the
unified memory architecture) available.
.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_evict.c
:internal:
Buffer Object Memory Shrinking
------------------------------
This section documents the interface function for shrinking memory usage
of buffer object caches. Shrinking is used to make main memory
available. Note that this is mostly orthogonal to evicting buffer
objects, which has the goal to make space in gpu virtual address spaces.
.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c
:internal:
Batchbuffer Parsing
-------------------
@ -267,6 +370,12 @@ Batchbuffer Pools
.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c
:internal:
User Batchbuffer Execution
--------------------------
.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_execbuffer.c
:doc: User command execution
Logical Rings, Logical Ring Contexts and Execlists
--------------------------------------------------
@ -312,28 +421,14 @@ Object Tiling IOCTLs
.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
:doc: buffer object tiling
Buffer Object Eviction
----------------------
WOPCM
=====
This section documents the interface functions for evicting buffer
objects to make space available in the virtual gpu address spaces. Note
that this is mostly orthogonal to shrinking buffer objects caches, which
has the goal to make main memory (shared with the gpu through the
unified memory architecture) available.
WOPCM Layout
------------
.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_evict.c
:internal:
Buffer Object Memory Shrinking
------------------------------
This section documents the interface function for shrinking memory usage
of buffer object caches. Shrinking is used to make main memory
available. Note that this is mostly orthogonal to evicting buffer
objects, which has the goal to make space in gpu virtual address spaces.
.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c
:internal:
.. kernel-doc:: drivers/gpu/drm/i915/intel_wopcm.c
:doc: WOPCM Layout
GuC
===
@ -359,6 +454,12 @@ GuC Firmware Layout
.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fwif.h
:doc: GuC Firmware Layout
GuC Address Space
-----------------
.. kernel-doc:: drivers/gpu/drm/i915/intel_guc.c
:doc: GuC Address Space
Tracing
=======

View File

@ -98,5 +98,4 @@ radeon,DVI-I,“coherent”,RANGE,"Min=0, Max=1",Connector,TBD
,,"""underscan vborder""",RANGE,"Min=0, Max=128",Connector,TBD
,Audio,“audio”,ENUM,"{ ""off"", ""on"", ""auto"" }",Connector,TBD
,FMT Dithering,“dither”,ENUM,"{ ""off"", ""on"" }",Connector,TBD
rcar-du,Generic,"""alpha""",RANGE,"Min=0, Max=255",Plane,TBD
,,"""colorkey""",RANGE,"Min=0, Max=0x01ffffff",Plane,TBD

1 Owner Module/Drivers Group Property Name Type Property Values Object attached Description/Restrictions
98 "underscan vborder" RANGE Min=0, Max=128 Connector TBD
99 Audio “audio” ENUM { "off", "on", "auto" } Connector TBD
100 FMT Dithering “dither” ENUM { "off", "on" } Connector TBD
rcar-du Generic "alpha" RANGE Min=0, Max=255 Plane TBD
101 "colorkey" RANGE Min=0, Max=0x01ffffff Plane TBD

View File

@ -212,6 +212,24 @@ probably use drm_fb_helper_fbdev_teardown().
Contact: Maintainer of the driver you plan to convert
Clean up mmap forwarding
------------------------
A lot of drivers forward gem mmap calls to dma-buf mmap for imported buffers.
And also a lot of them forward dma-buf mmap to the gem mmap implementations.
Would be great to refactor this all into a set of small common helpers.
Contact: Daniel Vetter
Put a reservation_object into drm_gem_object
--------------------------------------------
This would remove the need for the ->gem_prime_res_obj callback. It would also
allow us to implement generic helpers for waiting for a bo, allowing for quite a
bit of refactoring in the various wait ioctl implementations.
Contact: Daniel Vetter
idr_init_base()
---------------

View File

@ -0,0 +1,31 @@
====================================================
drm/xen-front Xen para-virtualized frontend driver
====================================================
This frontend driver implements Xen para-virtualized display
according to the display protocol described at
include/xen/interface/io/displif.h
Driver modes of operation in terms of display buffers used
==========================================================
.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
:doc: Driver modes of operation in terms of display buffers used
Buffers allocated by the frontend driver
----------------------------------------
.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
:doc: Buffers allocated by the frontend driver
Buffers allocated by the backend
--------------------------------
.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
:doc: Buffers allocated by the backend
Driver limitations
==================
.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
:doc: Driver limitations

View File

@ -767,12 +767,14 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
F: drivers/gpu/drm/amd/amdkfd/
F: drivers/gpu/drm/amd/include/cik_structs.h
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
F: drivers/gpu/drm/amd/include/vi_structs.h
F: drivers/gpu/drm/amd/include/v9_structs.h
F: include/uapi/linux/kfd_ioctl.h
AMD SEATTLE DEVICE TREE SUPPORT
@ -4685,7 +4687,7 @@ F: Documentation/devicetree/bindings/display/exynos/
DRM DRIVERS FOR FREESCALE DCU
M: Stefan Agner <stefan@agner.ch>
M: Alison Wang <alison.wang@freescale.com>
M: Alison Wang <alison.wang@nxp.com>
L: dri-devel@lists.freedesktop.org
S: Supported
F: drivers/gpu/drm/fsl-dcu/
@ -4796,6 +4798,14 @@ S: Maintained
F: drivers/gpu/drm/omapdrm/
F: Documentation/devicetree/bindings/display/ti/
DRM DRIVERS FOR V3D
M: Eric Anholt <eric@anholt.net>
S: Supported
F: drivers/gpu/drm/v3d/
F: include/uapi/drm/v3d_drm.h
F: Documentation/devicetree/bindings/display/brcm,bcm-v3d.txt
T: git git://anongit.freedesktop.org/drm/drm-misc
DRM DRIVERS FOR VC4
M: Eric Anholt <eric@anholt.net>
T: git git://github.com/anholt/linux
@ -4842,6 +4852,15 @@ S: Maintained
F: drivers/gpu/drm/tinydrm/
F: include/drm/tinydrm/
DRM DRIVERS FOR XEN
M: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
T: git git://anongit.freedesktop.org/drm/drm-misc
L: dri-devel@lists.freedesktop.org
L: xen-devel@lists.xen.org
S: Supported
F: drivers/gpu/drm/xen/
F: Documentation/gpu/xen-front.rst
DRM TTM SUBSYSTEM
M: Christian Koenig <christian.koenig@amd.com>
M: Roger He <Hongbo.He@amd.com>

View File

@ -62,8 +62,6 @@ struct sync_pt {
struct rb_node node;
};
#ifdef CONFIG_SW_SYNC
extern const struct file_operations sw_sync_debugfs_fops;
void sync_timeline_debug_add(struct sync_timeline *obj);
@ -72,12 +70,4 @@ void sync_file_debug_add(struct sync_file *fence);
void sync_file_debug_remove(struct sync_file *fence);
void sync_dump(void);
#else
# define sync_timeline_debug_add(obj)
# define sync_timeline_debug_remove(obj)
# define sync_file_debug_add(fence)
# define sync_file_debug_remove(fence)
# define sync_dump()
#endif
#endif /* _LINUX_SYNC_H */

View File

@ -49,16 +49,17 @@ config DRM_DEBUG_MM
If in doubt, say "N".
config DRM_DEBUG_MM_SELFTEST
tristate "kselftests for DRM range manager (struct drm_mm)"
config DRM_DEBUG_SELFTEST
tristate "kselftests for DRM"
depends on DRM
depends on DEBUG_KERNEL
select PRIME_NUMBERS
select DRM_LIB_RANDOM
select DRM_KMS_HELPER
default n
help
This option provides a kernel module that can be used to test
the DRM range manager (drm_mm) and its API. This option is not
This option provides kernel modules that can be used to run
various selftests on parts of the DRM api. This option is not
useful for distributions or general kernels, but only for kernel
developers working on DRM and associated drivers.
@ -267,6 +268,8 @@ source "drivers/gpu/drm/amd/amdkfd/Kconfig"
source "drivers/gpu/drm/imx/Kconfig"
source "drivers/gpu/drm/v3d/Kconfig"
source "drivers/gpu/drm/vc4/Kconfig"
source "drivers/gpu/drm/etnaviv/Kconfig"
@ -289,6 +292,8 @@ source "drivers/gpu/drm/pl111/Kconfig"
source "drivers/gpu/drm/tve200/Kconfig"
source "drivers/gpu/drm/xen/Kconfig"
# Keep legacy drivers last
menuconfig DRM_LEGACY

View File

@ -43,7 +43,7 @@ drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += selftests/
obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/
obj-$(CONFIG_DRM) += drm.o
obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
@ -61,6 +61,7 @@ obj-$(CONFIG_DRM_MGA) += mga/
obj-$(CONFIG_DRM_I810) += i810/
obj-$(CONFIG_DRM_I915) += i915/
obj-$(CONFIG_DRM_MGAG200) += mgag200/
obj-$(CONFIG_DRM_V3D) += v3d/
obj-$(CONFIG_DRM_VC4) += vc4/
obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
obj-$(CONFIG_DRM_SIS) += sis/
@ -103,3 +104,4 @@ obj-$(CONFIG_DRM_MXSFB) += mxsfb/
obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
obj-$(CONFIG_DRM_PL111) += pl111/
obj-$(CONFIG_DRM_TVE200) += tve200/
obj-$(CONFIG_DRM_XEN) += xen/

View File

@ -56,13 +56,18 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
amdgpu_amdkfd_gfx_v7.o
ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o
# add DF block
amdgpu-y += \
df_v1_7.o \
df_v3_6.o
# add GMC block
amdgpu-y += \
@ -126,11 +131,20 @@ amdgpu-y += \
vcn_v1_0.o
# add amdkfd interfaces
amdgpu-y += amdgpu_amdkfd.o
ifneq ($(CONFIG_HSA_AMD),)
amdgpu-y += \
amdgpu_amdkfd.o \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
amdgpu_amdkfd_gfx_v8.o
amdgpu_amdkfd_gfx_v8.o \
amdgpu_amdkfd_gfx_v9.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o
endif
endif
# add cgs
amdgpu-y += amdgpu_cgs.o

View File

@ -129,6 +129,7 @@ extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
extern int amdgpu_emu_mode;
extern uint amdgpu_smu_memory_pool_size;
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
@ -137,6 +138,7 @@ extern int amdgpu_si_support;
extern int amdgpu_cik_support;
#endif
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
@ -222,10 +224,10 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_LAST
};
int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
int amdgpu_device_ip_set_clockgating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state);
int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
int amdgpu_device_ip_set_powergating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_powergating_state state);
void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
@ -681,6 +683,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
@ -771,9 +775,18 @@ struct amdgpu_rlc {
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
u32 reg_list_format_direct_reg_list_length;
u32 save_restore_list_cntl_size_bytes;
u32 save_restore_list_gpm_size_bytes;
u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
u8 *save_restore_list_cntl;
u8 *save_restore_list_gpm;
u8 *save_restore_list_srm;
bool is_rlc_v2_1;
};
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@ -867,6 +880,8 @@ struct amdgpu_gfx_config {
/* gfx configure feature */
uint32_t double_offchip_lds_buf;
/* cached value of DB_DEBUG2 */
uint32_t db_debug2;
};
struct amdgpu_cu_info {
@ -938,6 +953,12 @@ struct amdgpu_gfx {
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
uint32_t rlc_feature_version;
uint32_t rlc_srlc_fw_version;
uint32_t rlc_srlc_feature_version;
uint32_t rlc_srlg_fw_version;
uint32_t rlc_srlg_feature_version;
uint32_t rlc_srls_fw_version;
uint32_t rlc_srls_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
@ -1204,6 +1225,8 @@ struct amdgpu_asic_funcs {
/* invalidate hdp read cache */
void (*invalidate_hdp)(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
/* check if the asic needs a full reset of if soft reset will work */
bool (*need_full_reset)(struct amdgpu_device *adev);
};
/*
@ -1368,7 +1391,19 @@ struct amdgpu_nbio_funcs {
void (*detect_hw_virt)(struct amdgpu_device *adev);
};
struct amdgpu_df_funcs {
void (*init)(struct amdgpu_device *adev);
void (*enable_broadcast_mode)(struct amdgpu_device *adev,
bool enable);
u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
bool enable);
void (*get_clockgating_state)(struct amdgpu_device *adev,
u32 *flags);
void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
bool enable);
};
/* Define the HW IP blocks will be used in driver , add more if necessary */
enum amd_hw_ip_block_type {
GC_HWIP = 1,
@ -1398,6 +1433,7 @@ enum amd_hw_ip_block_type {
struct amd_powerplay {
void *pp_handle;
const struct amd_pm_funcs *pp_funcs;
uint32_t pp_feature;
};
#define AMDGPU_RESET_MAGIC_NUM 64
@ -1590,6 +1626,7 @@ struct amdgpu_device {
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
const struct amdgpu_nbio_funcs *nbio_funcs;
const struct amdgpu_df_funcs *df_funcs;
/* delayed work_func for deferring clockgating during resume */
struct delayed_work late_init_work;
@ -1764,6 +1801,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
@ -1790,6 +1828,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))

View File

@ -290,12 +290,11 @@ static int acp_hw_init(void *handle)
else if (r)
return r;
r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO,
0x5289, 0, &acp_base);
if (r == -ENODEV)
return 0;
else if (r)
return r;
if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
return -EINVAL;
acp_base = adev->rmmio_base;
if (adev->asic_type != CHIP_STONEY) {
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
if (adev->acp.acp_genpd == NULL)

View File

@ -50,15 +50,21 @@ int amdgpu_amdkfd_init(void)
kgd2kfd = NULL;
}
#elif defined(CONFIG_HSA_AMD)
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret)
kgd2kfd = NULL;
#else
kgd2kfd = NULL;
ret = -ENOENT;
#endif
#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
amdgpu_amdkfd_gpuvm_init_mem_limits();
#endif
return ret;
}
@ -92,8 +98,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
case CHIP_POLARIS11:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
case CHIP_VEGA10:
case CHIP_RAVEN:
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
break;
default:
dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
dev_info(adev->dev, "kfd not supported on this ASIC\n");
return;
}
@ -175,6 +185,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
if (adev->asic_type >= CHIP_VEGA10) {
/* On SOC15 the BIF is involved in routing
* doorbells using the low 12 bits of the
* address. Communicate the assignments to
* KFD. KFD uses two doorbell pages per
* process in case of 64-bit doorbells so we
* can use each doorbell assignment twice.
*/
gpu_resources.sdma_doorbell[0][0] =
AMDGPU_DOORBELL64_sDMA_ENGINE0;
gpu_resources.sdma_doorbell[0][1] =
AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
gpu_resources.sdma_doorbell[1][0] =
AMDGPU_DOORBELL64_sDMA_ENGINE1;
gpu_resources.sdma_doorbell[1][1] =
AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
/* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
* SDMA, IH and VCN. So don't use them for the CP.
*/
gpu_resources.reserved_doorbell_mask = 0x1f0;
gpu_resources.reserved_doorbell_val = 0x0f0;
}
kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
@ -217,13 +249,19 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
struct amdgpu_bo_param bp;
int r;
uint64_t gpu_addr_tmp = 0;
void *cpu_ptr_tmp = NULL;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
NULL, &bo);
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
r = amdgpu_bo_create(adev, &bp, &bo);
if (r) {
dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
@ -432,3 +470,44 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD)
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{
return false;
}
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
{
}
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
}
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
{
return NULL;
}
int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
{
return 0;
}
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
{
return NULL;
}
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
{
return NULL;
}
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
{
return NULL;
}
#endif

View File

@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/workqueue.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
@ -59,7 +60,9 @@ struct kgd_mem {
uint32_t mapping_flags;
atomic_t invalid;
struct amdkfd_process_info *process_info;
struct page **user_pages;
struct amdgpu_sync sync;
@ -84,6 +87,9 @@ struct amdkfd_process_info {
struct list_head vm_list_head;
/* List head for all KFD BOs that belong to a KFD process. */
struct list_head kfd_bo_list;
/* List of userptr BOs that are valid or invalid */
struct list_head userptr_valid_list;
struct list_head userptr_inval_list;
/* Lock to protect kfd_bo_list */
struct mutex lock;
@ -91,6 +97,11 @@ struct amdkfd_process_info {
unsigned int n_vms;
/* Eviction Fence */
struct amdgpu_amdkfd_fence *eviction_fence;
/* MMU-notifier related fields */
atomic_t evicted_bos;
struct delayed_work restore_userptr_work;
struct pid *pid;
};
int amdgpu_amdkfd_init(void);
@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
@ -143,14 +156,14 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
/* GPUVM API */
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
void **process_info,
struct dma_fence **ef);
void **process_info,
struct dma_fence **ef);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp,
void **vm, void **process_info,
struct dma_fence **ef);
struct file *filp,
void **vm, void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
struct amdgpu_vm *vm);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

View File

@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
{
/* amdgpu owns the per-pipe state */
return 0;
}
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);

View File

@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_bases);
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
{
/* amdgpu owns the per-pipe state */
return 0;
}
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);

File diff suppressed because it is too large Load Diff

View File

@ -23,6 +23,8 @@
#define pr_fmt(fmt) "kfd2kgd: " fmt
#include <linux/list.h>
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <drm/drmP.h>
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@ -33,10 +35,20 @@
*/
#define VI_BO_SIZE_ALIGN (0x8000)
/* BO flag to indicate a KFD userptr BO */
#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
/* Userptr restore delay, just long enough to allow consecutive VM
* changes to accumulate
*/
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
uint64_t max_userptr_mem_limit;
int64_t system_mem_used;
int64_t userptr_mem_used;
spinlock_t mem_limit_lock;
} kfd_mem_limit;
@ -57,6 +69,7 @@ static const char * const domain_bit_to_string[] = {
#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@ -78,6 +91,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
/* Set memory usage limits. Current, limits are
* System (kernel) memory - 3/8th System RAM
* Userptr memory - 3/4th System RAM
*/
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
@ -90,8 +104,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
pr_debug("Kernel memory limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20));
kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_userptr_mem_limit >> 20));
}
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
@ -111,6 +127,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += (acc_size + size);
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
if ((kfd_mem_limit.system_mem_used + acc_size >
kfd_mem_limit.max_system_mem_limit) ||
(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
kfd_mem_limit.max_userptr_mem_limit)) {
ret = -ENOMEM;
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += acc_size;
kfd_mem_limit.userptr_mem_used += size;
}
err_no_mem:
spin_unlock(&kfd_mem_limit.mem_limit_lock);
@ -126,10 +152,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
sizeof(struct amdgpu_bo));
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (domain == AMDGPU_GEM_DOMAIN_GTT)
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
kfd_mem_limit.system_mem_used -= acc_size;
kfd_mem_limit.userptr_mem_used -= size;
}
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
"kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@ -138,12 +170,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
{
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -=
(bo->tbo.acc_size + amdgpu_bo_size(bo));
}
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
"kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@ -506,7 +543,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev,
}
static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
struct amdkfd_process_info *process_info)
struct amdkfd_process_info *process_info,
bool userptr)
{
struct ttm_validate_buffer *entry = &mem->validate_list;
struct amdgpu_bo *bo = mem->bo;
@ -515,10 +553,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
entry->shared = true;
entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
list_add_tail(&entry->head, &process_info->kfd_bo_list);
if (userptr)
list_add_tail(&entry->head, &process_info->userptr_valid_list);
else
list_add_tail(&entry->head, &process_info->kfd_bo_list);
mutex_unlock(&process_info->lock);
}
/* Initializes user pages. It registers the MMU notifier and validates
* the userptr BO in the GTT domain.
*
* The BO must already be on the userptr_valid_list. Otherwise an
* eviction and restore may happen that leaves the new BO unmapped
* with the user mode queues running.
*
* Takes the process_info->lock to protect against concurrent restore
* workers.
*
* Returns 0 for success, negative errno for errors.
*/
static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
uint64_t user_addr)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
struct ttm_operation_ctx ctx = { true, false };
int ret = 0;
mutex_lock(&process_info->lock);
ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
if (ret) {
pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
goto out;
}
ret = amdgpu_mn_register(bo, user_addr);
if (ret) {
pr_err("%s: Failed to register MMU notifier: %d\n",
__func__, ret);
goto out;
}
/* If no restore worker is running concurrently, user_pages
* should not be allocated
*/
WARN(mem->user_pages, "Leaking user_pages array");
mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
sizeof(struct page *),
GFP_KERNEL | __GFP_ZERO);
if (!mem->user_pages) {
pr_err("%s: Failed to allocate pages array\n", __func__);
ret = -ENOMEM;
goto unregister_out;
}
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
goto free_out;
}
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__);
goto release_out;
}
amdgpu_ttm_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
pr_err("%s: failed to validate BO\n", __func__);
amdgpu_bo_unreserve(bo);
release_out:
if (ret)
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
free_out:
kvfree(mem->user_pages);
mem->user_pages = NULL;
unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
out:
mutex_unlock(&process_info->lock);
return ret;
}
/* Reserving a BO and its page table BOs must happen atomically to
* avoid deadlocks. Some operations update multiple VMs at once. Track
* all the reservation info in a context structure. Optionally a sync
@ -748,7 +871,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
bool no_update_pte)
{
int ret;
@ -762,6 +886,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
return ret;
}
if (no_update_pte)
return 0;
ret = update_gpuvm_pte(adev, entry, sync);
if (ret) {
pr_err("update_gpuvm_pte() failed\n");
@ -820,6 +947,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
mutex_init(&info->lock);
INIT_LIST_HEAD(&info->vm_list_head);
INIT_LIST_HEAD(&info->kfd_bo_list);
INIT_LIST_HEAD(&info->userptr_valid_list);
INIT_LIST_HEAD(&info->userptr_inval_list);
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@ -830,6 +959,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
goto create_evict_fence_fail;
}
info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
atomic_set(&info->evicted_bos, 0);
INIT_DELAYED_WORK(&info->restore_userptr_work,
amdgpu_amdkfd_restore_userptr_worker);
*process_info = info;
*ef = dma_fence_get(&info->eviction_fence->base);
}
@ -872,6 +1006,7 @@ reserve_pd_fail:
dma_fence_put(*ef);
*ef = NULL;
*process_info = NULL;
put_pid(info->pid);
create_evict_fence_fail:
mutex_destroy(&info->lock);
kfree(info);
@ -967,8 +1102,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
/* Release per-process resources when last compute VM is destroyed */
if (!process_info->n_vms) {
WARN_ON(!list_empty(&process_info->kfd_bo_list));
WARN_ON(!list_empty(&process_info->userptr_valid_list));
WARN_ON(!list_empty(&process_info->userptr_inval_list));
dma_fence_put(&process_info->eviction_fence->base);
cancel_delayed_work_sync(&process_info->restore_userptr_work);
put_pid(process_info->pid);
mutex_destroy(&process_info->lock);
kfree(process_info);
}
@ -1003,9 +1142,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
uint64_t user_addr = 0;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
int byte_align;
u32 alloc_domain;
u32 domain, alloc_domain;
u64 alloc_flags;
uint32_t mapping_flags;
int ret;
@ -1014,14 +1155,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
* Check on which domain to allocate BO
*/
if (flags & ALLOC_MEM_FLAGS_VRAM) {
alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
} else if (flags & ALLOC_MEM_FLAGS_GTT) {
alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
} else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
alloc_flags = 0;
if (!offset || !*offset)
return -EINVAL;
user_addr = *offset;
} else {
return -EINVAL;
}
@ -1069,8 +1217,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
va, size, domain_string(alloc_domain));
ret = amdgpu_bo_create(adev, size, byte_align,
alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = byte_align;
bp.domain = alloc_domain;
bp.flags = alloc_flags;
bp.type = ttm_bo_type_device;
bp.resv = NULL;
ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret);
@ -1078,18 +1232,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
bo->kfd_bo = *mem;
(*mem)->bo = bo;
if (user_addr)
bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
(*mem)->va = va;
(*mem)->domain = alloc_domain;
(*mem)->domain = domain;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
ret = init_user_pages(*mem, current->mm, user_addr);
if (ret) {
mutex_lock(&avm->process_info->lock);
list_del(&(*mem)->validate_list.head);
mutex_unlock(&avm->process_info->lock);
goto allocate_init_user_pages_failed;
}
}
if (offset)
*offset = amdgpu_bo_mmap_offset(bo);
return 0;
allocate_init_user_pages_failed:
amdgpu_bo_unref(&bo);
/* Don't unreserve system mem limit twice */
goto err_reserve_system_mem;
err_bo_create:
unreserve_system_mem_limit(adev, size, alloc_domain);
err_reserve_system_mem:
@ -1122,12 +1292,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
* be freed anyway
*/
/* No more MMU notifiers */
amdgpu_mn_unregister(mem->bo);
/* Make sure restore workers don't access the BO any more */
bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
/* Free user pages if necessary */
if (mem->user_pages) {
pr_debug("%s: Freeing user_pages array\n", __func__);
if (mem->user_pages[0])
release_pages(mem->user_pages,
mem->bo->tbo.ttm->num_pages);
kvfree(mem->user_pages);
}
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
@ -1173,21 +1355,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kfd_bo_va_list *bo_va_entry = NULL;
struct kfd_bo_va_list *bo_va_entry_aql = NULL;
unsigned long bo_size;
bool is_invalid_userptr = false;
/* Make sure restore is not running concurrently.
bo = mem->bo;
if (!bo) {
pr_err("Invalid BO when mapping memory to GPU\n");
return -EINVAL;
}
/* Make sure restore is not running concurrently. Since we
* don't map invalid userptr BOs, we rely on the next restore
* worker to do the mapping
*/
mutex_lock(&mem->process_info->lock);
mutex_lock(&mem->lock);
bo = mem->bo;
if (!bo) {
pr_err("Invalid BO when mapping memory to GPU\n");
ret = -EINVAL;
goto out;
/* Lock mmap-sem. If we find an invalid userptr BO, we can be
* sure that the MMU notifier is no longer running
* concurrently and the queues are actually stopped
*/
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
down_write(&current->mm->mmap_sem);
is_invalid_userptr = atomic_read(&mem->invalid);
up_write(&current->mm->mmap_sem);
}
mutex_lock(&mem->lock);
domain = mem->domain;
bo_size = bo->tbo.mem.size;
@ -1200,6 +1393,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
if (unlikely(ret))
goto out;
/* Userptr can be marked as "not invalid", but not actually be
* validated yet (still in the system domain). In that case
* the queues are still stopped and we can leave mapping for
* the next restore worker
*/
if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
is_invalid_userptr = true;
if (check_if_add_bo_to_vm(avm, mem)) {
ret = add_bo_to_vm(adev, mem, avm, false,
&bo_va_entry);
@ -1217,7 +1418,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
goto add_bo_to_vm_failed;
}
if (mem->mapped_to_gpu_memory == 0) {
if (mem->mapped_to_gpu_memory == 0 &&
!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
/* Validate BO only once. The eviction fence gets added to BO
* the first time it is mapped. Validate will wait for all
* background evictions to complete.
@ -1235,7 +1437,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
entry->va, entry->va + bo_size,
entry);
ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
is_invalid_userptr);
if (ret) {
pr_err("Failed to map radeon bo to gpuvm\n");
goto map_bo_to_gpuvm_failed;
@ -1418,6 +1621,337 @@ bo_reserve_failed:
return ret;
}
/* Evict a userptr BO by stopping the queues if necessary
*
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
* cannot do any memory allocations, and cannot take any locks that
* are held elsewhere while allocating memory. Therefore this is as
* simple as possible, using atomic counters.
*
* It doesn't do anything to the BO itself. The real work happens in
* restore, where we get updated page addresses. This function only
* ensures that GPU access to the BO is stopped.
*/
int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
struct mm_struct *mm)
{
struct amdkfd_process_info *process_info = mem->process_info;
int invalid, evicted_bos;
int r = 0;
invalid = atomic_inc_return(&mem->invalid);
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
if (evicted_bos == 1) {
/* First eviction, stop the queues */
r = kgd2kfd->quiesce_mm(mm);
if (r)
pr_err("Failed to quiesce KFD\n");
schedule_delayed_work(&process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
}
return r;
}
/* Update invalid userptr BOs
*
* Moves invalidated (evicted) userptr BOs from userptr_valid_list to
* userptr_inval_list and updates user pages for all BOs that have
* been invalidated since their last update.
*/
static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
struct mm_struct *mm)
{
struct kgd_mem *mem, *tmp_mem;
struct amdgpu_bo *bo;
struct ttm_operation_ctx ctx = { false, false };
int invalid, ret;
/* Move all invalidated BOs to the userptr_inval_list and
* release their user pages by migration to the CPU domain
*/
list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_valid_list,
validate_list.head) {
if (!atomic_read(&mem->invalid))
continue; /* BO is still valid */
bo = mem->bo;
if (amdgpu_bo_reserve(bo, true))
return -EAGAIN;
amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
if (ret) {
pr_err("%s: Failed to invalidate userptr BO\n",
__func__);
return -EAGAIN;
}
list_move_tail(&mem->validate_list.head,
&process_info->userptr_inval_list);
}
if (list_empty(&process_info->userptr_inval_list))
return 0; /* All evicted userptr BOs were freed */
/* Go through userptr_inval_list and update any invalid user_pages */
list_for_each_entry(mem, &process_info->userptr_inval_list,
validate_list.head) {
invalid = atomic_read(&mem->invalid);
if (!invalid)
/* BO hasn't been invalidated since the last
* revalidation attempt. Keep its BO list.
*/
continue;
bo = mem->bo;
if (!mem->user_pages) {
mem->user_pages =
kvmalloc_array(bo->tbo.ttm->num_pages,
sizeof(struct page *),
GFP_KERNEL | __GFP_ZERO);
if (!mem->user_pages) {
pr_err("%s: Failed to allocate pages array\n",
__func__);
return -ENOMEM;
}
} else if (mem->user_pages[0]) {
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
}
/* Get updated user pages */
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
mem->user_pages);
if (ret) {
mem->user_pages[0] = NULL;
pr_info("%s: Failed to get user pages: %d\n",
__func__, ret);
/* Pretend it succeeded. It will fail later
* with a VM fault if the GPU tries to access
* it. Better than hanging indefinitely with
* stalled user mode queues.
*/
}
/* Mark the BO as valid unless it was invalidated
* again concurrently
*/
if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
return -EAGAIN;
}
return 0;
}
/* Validate invalid userptr BOs
*
* Validates BOs on the userptr_inval_list, and moves them back to the
* userptr_valid_list. Also updates GPUVM page tables with new page
* addresses and waits for the page table updates to complete.
*/
static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
{
struct amdgpu_bo_list_entry *pd_bo_list_entries;
struct list_head resv_list, duplicates;
struct ww_acquire_ctx ticket;
struct amdgpu_sync sync;
struct amdgpu_vm *peer_vm;
struct kgd_mem *mem, *tmp_mem;
struct amdgpu_bo *bo;
struct ttm_operation_ctx ctx = { false, false };
int i, ret;
pd_bo_list_entries = kcalloc(process_info->n_vms,
sizeof(struct amdgpu_bo_list_entry),
GFP_KERNEL);
if (!pd_bo_list_entries) {
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
return -ENOMEM;
}
INIT_LIST_HEAD(&resv_list);
INIT_LIST_HEAD(&duplicates);
/* Get all the page directory BOs that need to be reserved */
i = 0;
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node)
amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
&pd_bo_list_entries[i++]);
/* Add the userptr_inval_list entries to resv_list */
list_for_each_entry(mem, &process_info->userptr_inval_list,
validate_list.head) {
list_add_tail(&mem->resv_list.head, &resv_list);
mem->resv_list.bo = mem->validate_list.bo;
mem->resv_list.shared = mem->validate_list.shared;
}
/* Reserve all BOs and page tables for validation */
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
WARN(!list_empty(&duplicates), "Duplicates should be empty");
if (ret)
goto out;
amdgpu_sync_create(&sync);
/* Avoid triggering eviction fences when unmapping invalid
* userptr BOs (waits for all fences, doesn't use
* FENCE_OWNER_VM)
*/
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node)
amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
process_info->eviction_fence,
NULL, NULL);
ret = process_validate_vms(process_info);
if (ret)
goto unreserve_out;
/* Validate BOs and update GPUVM page tables */
list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_inval_list,
validate_list.head) {
struct kfd_bo_va_list *bo_va_entry;
bo = mem->bo;
/* Copy pages array and validate the BO if we got user pages */
if (mem->user_pages[0]) {
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
mem->user_pages);
amdgpu_ttm_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret) {
pr_err("%s: failed to validate BO\n", __func__);
goto unreserve_out;
}
}
/* Validate succeeded, now the BO owns the pages, free
* our copy of the pointer array. Put this BO back on
* the userptr_valid_list. If we need to revalidate
* it, we need to start from scratch.
*/
kvfree(mem->user_pages);
mem->user_pages = NULL;
list_move_tail(&mem->validate_list.head,
&process_info->userptr_valid_list);
/* Update mapping. If the BO was not validated
* (because we couldn't get user pages), this will
* clear the page table entries, which will result in
* VM faults if the GPU tries to access the invalid
* memory.
*/
list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
if (!bo_va_entry->is_mapped)
continue;
ret = update_gpuvm_pte((struct amdgpu_device *)
bo_va_entry->kgd_dev,
bo_va_entry, &sync);
if (ret) {
pr_err("%s: update PTE failed\n", __func__);
/* make sure this gets validated again */
atomic_inc(&mem->invalid);
goto unreserve_out;
}
}
}
/* Update page directories */
ret = process_update_pds(process_info, &sync);
unreserve_out:
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node)
amdgpu_bo_fence(peer_vm->root.base.bo,
&process_info->eviction_fence->base, true);
ttm_eu_backoff_reservation(&ticket, &resv_list);
amdgpu_sync_wait(&sync, false);
amdgpu_sync_free(&sync);
out:
kfree(pd_bo_list_entries);
return ret;
}
/* Worker callback to restore evicted userptr BOs
*
* Tries to update and validate all userptr BOs. If successful and no
* concurrent evictions happened, the queues are restarted. Otherwise,
* reschedule for another attempt later.
*/
static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct amdkfd_process_info *process_info =
container_of(dwork, struct amdkfd_process_info,
restore_userptr_work);
struct task_struct *usertask;
struct mm_struct *mm;
int evicted_bos;
evicted_bos = atomic_read(&process_info->evicted_bos);
if (!evicted_bos)
return;
/* Reference task and mm in case of concurrent process termination */
usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
if (!usertask)
return;
mm = get_task_mm(usertask);
if (!mm) {
put_task_struct(usertask);
return;
}
mutex_lock(&process_info->lock);
if (update_invalid_user_pages(process_info, mm))
goto unlock_out;
/* userptr_inval_list can be empty if all evicted userptr BOs
* have been freed. In that case there is nothing to validate
* and we can just restart the queues.
*/
if (!list_empty(&process_info->userptr_inval_list)) {
if (atomic_read(&process_info->evicted_bos) != evicted_bos)
goto unlock_out; /* Concurrent eviction, try again */
if (validate_invalid_user_pages(process_info))
goto unlock_out;
}
/* Final check for concurrent evicton and atomic update. If
* another eviction happens after successful update, it will
* be a first eviction that calls quiesce_mm. The eviction
* reference counting inside KFD will handle this case.
*/
if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
evicted_bos)
goto unlock_out;
evicted_bos = 0;
if (kgd2kfd->resume_mm(mm)) {
pr_err("%s: Failed to resume KFD\n", __func__);
/* No recovery from this failure. Probably the CP is
* hanging. No point trying again.
*/
}
unlock_out:
mutex_unlock(&process_info->lock);
mmput(mm);
put_task_struct(usertask);
/* If validation failed, reschedule another attempt */
if (evicted_bos)
schedule_delayed_work(&process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
}
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
* KFD process identified by process_info
*

View File

@ -322,3 +322,47 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
return ret;
}
union gfx_info {
struct atom_gfx_info_v2_4 v24;
};
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
int index;
uint8_t frev, crev;
uint16_t data_offset;
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
gfx_info);
if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
&frev, &crev, &data_offset)) {
union gfx_info *gfx_info = (union gfx_info *)
(mode_info->atom_context->bios + data_offset);
switch (crev) {
case 4:
adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se;
adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh;
adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se;
adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se;
adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs;
adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
adev->gfx.config.gs_prim_buffer_depth =
le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
adev->gfx.config.double_offchip_lds_buf =
gfx_info->v24.gc_double_offchip_lds_buffer;
adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
return 0;
default:
return -EINVAL;
}
}
return -EINVAL;
}

View File

@ -30,5 +30,6 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
#endif

View File

@ -550,7 +550,7 @@ static int amdgpu_atpx_init(void)
* look up whether we are the integrated or discrete GPU (all asics).
* Returns the client id.
*/
static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev)
{
if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
return VGA_SWITCHEROO_IGD;

View File

@ -75,13 +75,20 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
{
struct amdgpu_bo *dobj = NULL;
struct amdgpu_bo *sobj = NULL;
struct amdgpu_bo_param bp;
uint64_t saddr, daddr;
int r, n;
int time;
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = PAGE_SIZE;
bp.domain = sdomain;
bp.flags = 0;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
n = AMDGPU_BENCHMARK_ITERATIONS;
r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0,
ttm_bo_type_kernel, NULL, &sobj);
r = amdgpu_bo_create(adev, &bp, &sobj);
if (r) {
goto out_cleanup;
}
@ -93,8 +100,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
if (r) {
goto out_cleanup;
}
r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0,
ttm_bo_type_kernel, NULL, &dobj);
bp.domain = ddomain;
r = amdgpu_bo_create(adev, &bp, &dobj);
if (r) {
goto out_cleanup;
}

View File

@ -23,7 +23,6 @@
*/
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <drm/drmP.h>
#include <linux/firmware.h>
#include <drm/amdgpu_drm.h>
@ -109,121 +108,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
WARN(1, "Invalid indirect register space");
}
static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device,
enum cgs_resource_type resource_type,
uint64_t size,
uint64_t offset,
uint64_t *resource_base)
{
CGS_FUNC_ADEV;
if (resource_base == NULL)
return -EINVAL;
switch (resource_type) {
case CGS_RESOURCE_TYPE_MMIO:
if (adev->rmmio_size == 0)
return -ENOENT;
if ((offset + size) > adev->rmmio_size)
return -EINVAL;
*resource_base = adev->rmmio_base;
return 0;
case CGS_RESOURCE_TYPE_DOORBELL:
if (adev->doorbell.size == 0)
return -ENOENT;
if ((offset + size) > adev->doorbell.size)
return -EINVAL;
*resource_base = adev->doorbell.base;
return 0;
case CGS_RESOURCE_TYPE_FB:
case CGS_RESOURCE_TYPE_IO:
case CGS_RESOURCE_TYPE_ROM:
default:
return -EINVAL;
}
}
static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device,
unsigned table, uint16_t *size,
uint8_t *frev, uint8_t *crev)
{
CGS_FUNC_ADEV;
uint16_t data_start;
if (amdgpu_atom_parse_data_header(
adev->mode_info.atom_context, table, size,
frev, crev, &data_start))
return (uint8_t*)adev->mode_info.atom_context->bios +
data_start;
return NULL;
}
static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table,
uint8_t *frev, uint8_t *crev)
{
CGS_FUNC_ADEV;
if (amdgpu_atom_parse_cmd_header(
adev->mode_info.atom_context, table,
frev, crev))
return 0;
return -EINVAL;
}
static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table,
void *args)
{
CGS_FUNC_ADEV;
return amdgpu_atom_execute_table(
adev->mode_info.atom_context, table, args);
}
static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state)
{
CGS_FUNC_ADEV;
int i, r = -1;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->type == block_type) {
r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
(void *)adev,
state);
break;
}
}
return r;
}
static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
enum amd_ip_block_type block_type,
enum amd_powergating_state state)
{
CGS_FUNC_ADEV;
int i, r = -1;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->type == block_type) {
r = adev->ip_blocks[i].version->funcs->set_powergating_state(
(void *)adev,
state);
break;
}
}
return r;
}
static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
{
CGS_FUNC_ADEV;
@ -271,18 +155,6 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
return result;
}
static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type)
{
CGS_FUNC_ADEV;
if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) {
release_firmware(adev->pm.fw);
adev->pm.fw = NULL;
return 0;
}
/* cannot release other firmware because they are not created by cgs */
return -EINVAL;
}
static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
enum cgs_ucode_id type)
{
@ -326,34 +198,6 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
return fw_version;
}
static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device,
bool en)
{
CGS_FUNC_ADEV;
if (adev->gfx.rlc.funcs->enter_safe_mode == NULL ||
adev->gfx.rlc.funcs->exit_safe_mode == NULL)
return 0;
if (en)
adev->gfx.rlc.funcs->enter_safe_mode(adev);
else
adev->gfx.rlc.funcs->exit_safe_mode(adev);
return 0;
}
static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device,
bool lock)
{
CGS_FUNC_ADEV;
if (lock)
mutex_lock(&adev->grbm_idx_mutex);
else
mutex_unlock(&adev->grbm_idx_mutex);
}
static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
enum cgs_ucode_id type,
struct cgs_firmware_info *info)
@ -541,6 +385,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
case CHIP_POLARIS12:
strcpy(fw_name, "amdgpu/polaris12_smc.bin");
break;
case CHIP_VEGAM:
strcpy(fw_name, "amdgpu/vegam_smc.bin");
break;
case CHIP_VEGA10:
if ((adev->pdev->device == 0x687f) &&
((adev->pdev->revision == 0xc0) ||
@ -553,6 +400,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
case CHIP_VEGA12:
strcpy(fw_name, "amdgpu/vega12_smc.bin");
break;
case CHIP_VEGA20:
strcpy(fw_name, "amdgpu/vega20_smc.bin");
break;
default:
DRM_ERROR("SMC firmware not supported\n");
return -EINVAL;
@ -598,97 +448,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
return 0;
}
static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
{
CGS_FUNC_ADEV;
return amdgpu_sriov_vf(adev);
}
static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
struct cgs_display_info *info)
{
CGS_FUNC_ADEV;
struct cgs_mode_info *mode_info;
if (info == NULL)
return -EINVAL;
mode_info = info->mode_info;
if (mode_info)
/* if the displays are off, vblank time is max */
mode_info->vblank_time_us = 0xffffffff;
if (!amdgpu_device_has_dc_support(adev)) {
struct amdgpu_crtc *amdgpu_crtc;
struct drm_device *ddev = adev->ddev;
struct drm_crtc *crtc;
uint32_t line_time_us, vblank_lines;
if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
list_for_each_entry(crtc,
&ddev->mode_config.crtc_list, head) {
amdgpu_crtc = to_amdgpu_crtc(crtc);
if (crtc->enabled) {
info->active_display_mask |= (1 << amdgpu_crtc->crtc_id);
info->display_count++;
}
if (mode_info != NULL &&
crtc->enabled && amdgpu_crtc->enabled &&
amdgpu_crtc->hw_mode.clock) {
line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) /
amdgpu_crtc->hw_mode.clock;
vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end -
amdgpu_crtc->hw_mode.crtc_vdisplay +
(amdgpu_crtc->v_border * 2);
mode_info->vblank_time_us = vblank_lines * line_time_us;
mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
/* we have issues with mclk switching with refresh rates
* over 120 hz on the non-DC code.
*/
if (mode_info->refresh_rate > 120)
mode_info->vblank_time_us = 0;
mode_info = NULL;
}
}
}
} else {
info->display_count = adev->pm.pm_display_cfg.num_display;
if (mode_info != NULL) {
mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time;
mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh;
}
}
return 0;
}
static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled)
{
CGS_FUNC_ADEV;
adev->pm.dpm_enabled = enabled;
return 0;
}
static const struct cgs_ops amdgpu_cgs_ops = {
.read_register = amdgpu_cgs_read_register,
.write_register = amdgpu_cgs_write_register,
.read_ind_register = amdgpu_cgs_read_ind_register,
.write_ind_register = amdgpu_cgs_write_ind_register,
.get_pci_resource = amdgpu_cgs_get_pci_resource,
.atom_get_data_table = amdgpu_cgs_atom_get_data_table,
.atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs,
.atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table,
.get_firmware_info = amdgpu_cgs_get_firmware_info,
.rel_firmware = amdgpu_cgs_rel_firmware,
.set_powergating_state = amdgpu_cgs_set_powergating_state,
.set_clockgating_state = amdgpu_cgs_set_clockgating_state,
.get_active_displays_info = amdgpu_cgs_get_active_displays_info,
.notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
};
struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)

View File

@ -691,7 +691,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
return ret;
}
static int amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
@ -843,7 +843,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
return ret;
}
static int amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
@ -1172,7 +1172,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector)
amdgpu_connector->use_digital = true;
}
static int amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
@ -1448,7 +1448,7 @@ out:
return ret;
}
static int amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);

View File

@ -382,8 +382,7 @@ retry:
p->bytes_moved += ctx.bytes_moved;
if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
bo->tbo.mem.mem_type == TTM_PL_VRAM &&
bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
amdgpu_bo_in_cpu_visible_vram(bo))
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@ -411,7 +410,6 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *candidate = p->evictable;
struct amdgpu_bo *bo = candidate->robj;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
u64 initial_bytes_moved, bytes_moved;
bool update_bytes_moved_vis;
uint32_t other;
@ -435,18 +433,14 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
continue;
/* Good we can try to move this BO somewhere else */
amdgpu_ttm_placement_from_domain(bo, other);
update_bytes_moved_vis =
adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
bo->tbo.mem.mem_type == TTM_PL_VRAM &&
bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
amdgpu_bo_in_cpu_visible_vram(bo);
amdgpu_ttm_placement_from_domain(bo, other);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
bytes_moved = atomic64_read(&adev->num_bytes_moved) -
initial_bytes_moved;
p->bytes_moved += bytes_moved;
p->bytes_moved += ctx.bytes_moved;
if (update_bytes_moved_vis)
p->bytes_moved_vis += bytes_moved;
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r))
break;
@ -536,7 +530,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (p->bo_list) {
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
if (p->bo_list->first_userptr != p->bo_list->num_entries)
p->mn = amdgpu_mn_get(p->adev);
p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
}
INIT_LIST_HEAD(&duplicates);

View File

@ -91,7 +91,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
continue;
r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
rq, amdgpu_sched_jobs, &ctx->guilty);
rq, &ctx->guilty);
if (r)
goto failed;
}
@ -111,8 +111,9 @@ failed:
return r;
}
static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
kfree(ctx->fences);
ctx->fences = NULL;
for (i = 0; i < adev->num_rings; i++)
drm_sched_entity_fini(&adev->rings[i]->sched,
&ctx->rings[i].entity);
amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
mutex_destroy(&ctx->lock);
kfree(ctx);
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@ -170,12 +169,20 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
u32 i;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
amdgpu_ctx_fini(ctx);
for (i = 0; i < ctx->adev->num_rings; i++) {
kfree(ctx);
if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
continue;
drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
&ctx->rings[i].entity);
}
amdgpu_ctx_fini(ref);
}
static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
@ -437,16 +444,72 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
idr_init(&mgr->ctx_handles);
}
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id, i;
idp = &mgr->ctx_handles;
idr_for_each_entry(idp, ctx, id) {
if (!ctx->adev)
return;
for (i = 0; i < ctx->adev->num_rings; i++) {
if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
continue;
if (kref_read(&ctx->refcount) == 1)
drm_sched_entity_do_release(&ctx->adev->rings[i]->sched,
&ctx->rings[i].entity);
else
DRM_ERROR("ctx %p is still alive\n", ctx);
}
}
}
void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id, i;
idp = &mgr->ctx_handles;
idr_for_each_entry(idp, ctx, id) {
if (!ctx->adev)
return;
for (i = 0; i < ctx->adev->num_rings; i++) {
if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
continue;
if (kref_read(&ctx->refcount) == 1)
drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched,
&ctx->rings[i].entity);
else
DRM_ERROR("ctx %p is still alive\n", ctx);
}
}
}
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id;
amdgpu_ctx_mgr_entity_cleanup(mgr);
idp = &mgr->ctx_handles;
idr_for_each_entry(idp, ctx, id) {
if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1)
if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
DRM_ERROR("ctx %p is still alive\n", ctx);
}

View File

@ -28,8 +28,13 @@
#include <linux/debugfs.h>
#include "amdgpu.h"
/*
* Debugfs
/**
* amdgpu_debugfs_add_files - Add simple debugfs entries
*
* @adev: Device to attach debugfs entries to
* @files: Array of function callbacks that respond to reads
* @nfiles: Number of callbacks to register
*
*/
int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
const struct drm_info_list *files,
@ -64,7 +69,33 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
#if defined(CONFIG_DEBUG_FS)
/**
* amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
*
* @read: True if reading
* @f: open file handle
* @buf: User buffer to write/read to
* @size: Number of bytes to write/read
* @pos: Offset to seek to
*
* This debugfs entry has special meaning on the offset being sought.
* Various bits have different meanings:
*
* Bit 62: Indicates a GRBM bank switch is needed
* Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is
* zero)
* Bits 24..33: The SE or ME selector if needed
* Bits 34..43: The SH (or SA) or PIPE selector if needed
* Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
*
* Bit 23: Indicates that the PM power gating lock should be held
* This is necessary to read registers that might be
* unreliable during a power gating transistion.
*
* The lower bits are the BYTE offset of the register to read. This
* allows reading multiple registers in a single call and having
* the returned size reflect that.
*/
static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
char __user *buf, size_t size, loff_t *pos)
{
@ -164,19 +195,37 @@ end:
return result;
}
/**
* amdgpu_debugfs_regs_read - Callback for reading MMIO registers
*/
static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
}
/**
* amdgpu_debugfs_regs_write - Callback for writing MMIO registers
*/
static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
}
/**
* amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
*
* @f: open file handle
* @buf: User buffer to store read data in
* @size: Number of bytes to read
* @pos: Offset to seek to
*
* The lower bits are the BYTE offset of the register to read. This
* allows reading multiple registers in a single call and having
* the returned size reflect that.
*/
static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@ -204,6 +253,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
return result;
}
/**
* amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
*
* @f: open file handle
* @buf: User buffer to write data from
* @size: Number of bytes to write
* @pos: Offset to seek to
*
* The lower bits are the BYTE offset of the register to write. This
* allows writing multiple registers in a single call and having
* the returned size reflect that.
*/
static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@ -232,6 +293,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
return result;
}
/**
* amdgpu_debugfs_regs_didt_read - Read from a DIDT register
*
* @f: open file handle
* @buf: User buffer to store read data in
* @size: Number of bytes to read
* @pos: Offset to seek to
*
* The lower bits are the BYTE offset of the register to read. This
* allows reading multiple registers in a single call and having
* the returned size reflect that.
*/
static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@ -259,6 +332,18 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
return result;
}
/**
* amdgpu_debugfs_regs_didt_write - Write to a DIDT register
*
* @f: open file handle
* @buf: User buffer to write data from
* @size: Number of bytes to write
* @pos: Offset to seek to
*
* The lower bits are the BYTE offset of the register to write. This
* allows writing multiple registers in a single call and having
* the returned size reflect that.
*/
static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@ -287,6 +372,18 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
return result;
}
/**
* amdgpu_debugfs_regs_smc_read - Read from a SMC register
*
* @f: open file handle
* @buf: User buffer to store read data in
* @size: Number of bytes to read
* @pos: Offset to seek to
*
* The lower bits are the BYTE offset of the register to read. This
* allows reading multiple registers in a single call and having
* the returned size reflect that.
*/
static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@ -314,6 +411,18 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
return result;
}
/**
* amdgpu_debugfs_regs_smc_write - Write to a SMC register
*
* @f: open file handle
* @buf: User buffer to write data from
* @size: Number of bytes to write
* @pos: Offset to seek to
*
* The lower bits are the BYTE offset of the register to write. This
* allows writing multiple registers in a single call and having
* the returned size reflect that.
*/
static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@ -342,6 +451,20 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
return result;
}
/**
* amdgpu_debugfs_gca_config_read - Read from gfx config data
*
* @f: open file handle
* @buf: User buffer to store read data in
* @size: Number of bytes to read
* @pos: Offset to seek to
*
* This file is used to access configuration data in a somewhat
* stable fashion. The format is a series of DWORDs with the first
* indicating which revision it is. New content is appended to the
* end so that older software can still read the data.
*/
static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@ -418,6 +541,19 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
return result;
}
/**
* amdgpu_debugfs_sensor_read - Read from the powerplay sensors
*
* @f: open file handle
* @buf: User buffer to store read data in
* @size: Number of bytes to read
* @pos: Offset to seek to
*
* The offset is treated as the BYTE address of one of the sensors
* enumerated in amd/include/kgd_pp_interface.h under the
* 'amd_pp_sensors' enumeration. For instance to read the UVD VCLK
* you would use the offset 3 * 4 = 12.
*/
static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@ -428,7 +564,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
if (size & 3 || *pos & 0x3)
return -EINVAL;
if (amdgpu_dpm == 0)
if (!adev->pm.dpm_enabled)
return -EINVAL;
/* convert offset to sensor number */
@ -457,6 +593,27 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
return !r ? outsize : r;
}
/** amdgpu_debugfs_wave_read - Read WAVE STATUS data
*
* @f: open file handle
* @buf: User buffer to store read data in
* @size: Number of bytes to read
* @pos: Offset to seek to
*
* The offset being sought changes which wave that the status data
* will be returned for. The bits are used as follows:
*
* Bits 0..6: Byte offset into data
* Bits 7..14: SE selector
* Bits 15..22: SH/SA selector
* Bits 23..30: CU/{WGP+SIMD} selector
* Bits 31..36: WAVE ID selector
* Bits 37..44: SIMD ID selector
*
* The returned data begins with one DWORD of version information
* Followed by WAVE STATUS registers relevant to the GFX IP version
* being used. See gfx_v8_0_read_wave_data() for an example output.
*/
static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@ -507,6 +664,28 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
return result;
}
/** amdgpu_debugfs_gpr_read - Read wave gprs
*
* @f: open file handle
* @buf: User buffer to store read data in
* @size: Number of bytes to read
* @pos: Offset to seek to
*
* The offset being sought changes which wave that the status data
* will be returned for. The bits are used as follows:
*
* Bits 0..11: Byte offset into data
* Bits 12..19: SE selector
* Bits 20..27: SH/SA selector
* Bits 28..35: CU/{WGP+SIMD} selector
* Bits 36..43: WAVE ID selector
* Bits 37..44: SIMD ID selector
* Bits 52..59: Thread selector
* Bits 60..61: Bank selector (VGPR=0,SGPR=1)
*
* The return data comes from the SGPR or VGPR register bank for
* the selected operational unit.
*/
static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@ -637,6 +816,12 @@ static const char *debugfs_regs_names[] = {
"amdgpu_gpr",
};
/**
* amdgpu_debugfs_regs_init - Initialize debugfs entries that provide
* register access.
*
* @adev: The device to attach the debugfs entries to
*/
int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
struct drm_minor *minor = adev->ddev->primary;

View File

@ -83,8 +83,10 @@ static const char *amdgpu_asic_name[] = {
"POLARIS10",
"POLARIS11",
"POLARIS12",
"VEGAM",
"VEGA10",
"VEGA12",
"VEGA20",
"RAVEN",
"LAST",
};
@ -690,6 +692,8 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
{
u64 size_af, size_bf;
mc->gart_size += adev->pm.smu_prv_buffer_size;
size_af = adev->gmc.mc_mask - mc->vram_end;
size_bf = mc->vram_start;
if (size_bf > size_af) {
@ -907,6 +911,46 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
}
}
static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
{
struct sysinfo si;
bool is_os_64 = (sizeof(void *) == 8) ? true : false;
uint64_t total_memory;
uint64_t dram_size_seven_GB = 0x1B8000000;
uint64_t dram_size_three_GB = 0xB8000000;
if (amdgpu_smu_memory_pool_size == 0)
return;
if (!is_os_64) {
DRM_WARN("Not 64-bit OS, feature not supported\n");
goto def_value;
}
si_meminfo(&si);
total_memory = (uint64_t)si.totalram * si.mem_unit;
if ((amdgpu_smu_memory_pool_size == 1) ||
(amdgpu_smu_memory_pool_size == 2)) {
if (total_memory < dram_size_three_GB)
goto def_value1;
} else if ((amdgpu_smu_memory_pool_size == 4) ||
(amdgpu_smu_memory_pool_size == 8)) {
if (total_memory < dram_size_seven_GB)
goto def_value1;
} else {
DRM_WARN("Smu memory pool size not supported\n");
goto def_value;
}
adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
return;
def_value1:
DRM_WARN("No enough system memory\n");
def_value:
adev->pm.smu_prv_buffer_size = 0;
}
/**
* amdgpu_device_check_arguments - validate module params
*
@ -948,6 +992,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_vm_fragment_size = -1;
}
amdgpu_device_check_smu_prv_buffer_size(adev);
amdgpu_device_check_vm_size(adev);
amdgpu_device_check_block_size(adev);
@ -1039,10 +1085,11 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
* the hardware IP specified.
* Returns the error code from the last instance.
*/
int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
int amdgpu_device_ip_set_clockgating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = dev;
int i, r = 0;
for (i = 0; i < adev->num_ip_blocks; i++) {
@ -1072,10 +1119,11 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
* the hardware IP specified.
* Returns the error code from the last instance.
*/
int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
int amdgpu_device_ip_set_powergating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = dev;
int i, r = 0;
for (i = 0; i < adev->num_ip_blocks; i++) {
@ -1320,9 +1368,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS11:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
#ifdef CONFIG_DRM_AMDGPU_SI
@ -1339,6 +1388,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_KABINI:
case CHIP_MULLINS:
#endif
case CHIP_VEGA20:
default:
return 0;
case CHIP_VEGA10:
@ -1428,9 +1478,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS11:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
@ -1472,6 +1523,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
#endif
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
if (adev->asic_type == CHIP_RAVEN)
adev->family = AMDGPU_FAMILY_RV;
@ -1499,6 +1551,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return -EAGAIN;
}
adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
DRM_ERROR("disabled ip block: %d <%s>\n",
@ -1654,12 +1708,17 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
if (amdgpu_emu_mode == 1)
return 0;
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@ -1704,8 +1763,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
}
}
mod_delayed_work(system_wq, &adev->late_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
queue_delayed_work(system_wq, &adev->late_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
amdgpu_device_fill_reset_magic(adev);
@ -1759,6 +1818,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* ungate blocks before hw fini so that we can shutdown the blocks safely */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@ -1850,6 +1910,12 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_request_full_gpu(adev, false);
/* ungate SMC block powergating */
if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
amdgpu_device_ip_set_powergating_state(adev,
AMD_IP_BLOCK_TYPE_SMC,
AMD_CG_STATE_UNGATE);
/* ungate SMC block first */
r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
AMD_CG_STATE_UNGATE);
@ -2086,16 +2152,15 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_MULLINS:
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_POLARIS11:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_TONGA:
case CHIP_FIJI:
#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
return amdgpu_dc != 0;
#endif
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
case CHIP_RAVEN:
#endif
@ -2375,10 +2440,6 @@ fence_driver_init:
goto failed;
}
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
@ -2539,7 +2600,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
/* unpin the front buffers and cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb);
struct drm_framebuffer *fb = crtc->primary->fb;
struct amdgpu_bo *robj;
if (amdgpu_crtc->cursor_bo) {
@ -2551,10 +2612,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
}
}
if (rfb == NULL || rfb->obj == NULL) {
if (fb == NULL || fb->obj[0] == NULL) {
continue;
}
robj = gem_to_amdgpu_bo(rfb->obj);
robj = gem_to_amdgpu_bo(fb->obj[0]);
/* don't unpin kernel fb objects */
if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
r = amdgpu_bo_reserve(robj, true);
@ -2640,11 +2701,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
}
amdgpu_fence_driver_resume(adev);
if (resume) {
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
}
r = amdgpu_device_ip_late_init(adev);
if (r)
@ -2736,6 +2792,9 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return true;
if (amdgpu_asic_need_full_reset(adev))
return true;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@ -2792,6 +2851,9 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
{
int i;
if (amdgpu_asic_need_full_reset(adev))
return true;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@ -3087,20 +3149,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
amdgpu_virt_release_full_gpu(adev, true);
if (r)
goto error;
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
error:
amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
atomic_inc(&adev->vram_lost_counter);
r = amdgpu_device_handle_vram_lost(adev);
}
error:
return r;
}
@ -3117,7 +3178,6 @@ error:
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job, bool force)
{
struct drm_atomic_state *state = NULL;
int i, r, resched;
if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
@ -3140,10 +3200,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
/* store modesetting */
if (amdgpu_device_has_dc_support(adev))
state = drm_atomic_helper_suspend(adev->ddev);
/* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
@ -3183,10 +3239,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
kthread_unpark(ring->sched.thread);
}
if (amdgpu_device_has_dc_support(adev)) {
if (drm_atomic_helper_resume(adev->ddev, state))
dev_info(adev->dev, "drm resume failed:%d\n", r);
} else {
if (!amdgpu_device_has_dc_support(adev)) {
drm_helper_resume_force_mode(adev->ddev);
}

View File

@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_edid.h>
#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fb_helper.h>
static void amdgpu_display_flip_callback(struct dma_fence *f,
@ -151,8 +152,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct amdgpu_framebuffer *old_amdgpu_fb;
struct amdgpu_framebuffer *new_amdgpu_fb;
struct drm_gem_object *obj;
struct amdgpu_flip_work *work;
struct amdgpu_bo *new_abo;
@ -174,15 +173,13 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
/* schedule unpin of the old buffer */
old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
obj = old_amdgpu_fb->obj;
obj = crtc->primary->fb->obj[0];
/* take a reference to the old object */
work->old_abo = gem_to_amdgpu_bo(obj);
amdgpu_bo_ref(work->old_abo);
new_amdgpu_fb = to_amdgpu_framebuffer(fb);
obj = new_amdgpu_fb->obj;
obj = fb->obj[0];
new_abo = gem_to_amdgpu_bo(obj);
/* pin the new buffer */
@ -192,7 +189,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
goto cleanup;
}
r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base);
r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base);
if (unlikely(r != 0)) {
DRM_ERROR("failed to pin new abo buffer before flip\n");
goto unreserve;
@ -482,31 +479,12 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
return true;
}
static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb)
{
struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
drm_gem_object_put_unlocked(amdgpu_fb->obj);
drm_framebuffer_cleanup(fb);
kfree(amdgpu_fb);
}
static int amdgpu_display_user_framebuffer_create_handle(
struct drm_framebuffer *fb,
struct drm_file *file_priv,
unsigned int *handle)
{
struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
return drm_gem_handle_create(file_priv, amdgpu_fb->obj, handle);
}
static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
.destroy = amdgpu_display_user_framebuffer_destroy,
.create_handle = amdgpu_display_user_framebuffer_create_handle,
.destroy = drm_gem_fb_destroy,
.create_handle = drm_gem_fb_create_handle,
};
uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev)
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev)
{
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
@ -526,11 +504,11 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
struct drm_gem_object *obj)
{
int ret;
rfb->obj = obj;
rfb->base.obj[0] = obj;
drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
if (ret) {
rfb->obj = NULL;
rfb->base.obj[0] = NULL;
return ret;
}
return 0;

View File

@ -23,7 +23,7 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev);
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
struct drm_file *file_priv,

View File

@ -115,6 +115,26 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
pr_cont("\n");
}
void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev)
{
struct drm_device *ddev = adev->ddev;
struct drm_crtc *crtc;
struct amdgpu_crtc *amdgpu_crtc;
adev->pm.dpm.new_active_crtcs = 0;
adev->pm.dpm.new_active_crtc_count = 0;
if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
list_for_each_entry(crtc,
&ddev->mode_config.crtc_list, head) {
amdgpu_crtc = to_amdgpu_crtc(crtc);
if (amdgpu_crtc->enabled) {
adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
adev->pm.dpm.new_active_crtc_count++;
}
}
}
}
u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev)
{

View File

@ -52,8 +52,6 @@ enum amdgpu_dpm_event_src {
AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4
};
#define SCLK_DEEP_SLEEP_MASK 0x8
struct amdgpu_ps {
u32 caps; /* vbios flags */
u32 class; /* vbios flags */
@ -349,12 +347,6 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
(adev)->powerplay.pp_handle, msg_id))
#define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \
virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \
((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \
(adev)->powerplay.pp_handle, virtual_addr_low, \
virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
#define amdgpu_dpm_get_power_profile_mode(adev, buf) \
((adev)->powerplay.pp_funcs->get_power_profile_mode(\
(adev)->powerplay.pp_handle, buf))
@ -445,6 +437,8 @@ struct amdgpu_pm {
uint32_t pcie_gen_mask;
uint32_t pcie_mlw_mask;
struct amd_pp_display_configuration pm_display_cfg;/* set by dc */
uint32_t smu_prv_buffer_size;
struct amdgpu_bo *smu_prv_buffer;
};
#define R600_SSTU_DFLT 0
@ -482,6 +476,7 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
struct amdgpu_ps *rps);
u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev);
void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev);
bool amdgpu_is_uvd_state(u32 class, u32 class2);
void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
u32 *p, u32 *u);

View File

@ -75,9 +75,10 @@
* - 3.23.0 - Add query for VRAM lost counter
* - 3.24.0 - Add high priority compute support for gfx9
* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
* - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 25
#define KMS_DRIVER_MINOR 26
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@ -121,7 +122,7 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
uint amdgpu_pp_feature_mask = 0xffffbfff;
uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */
int amdgpu_ngg = 0;
int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0;
@ -132,6 +133,7 @@ int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
int amdgpu_emu_mode = 0;
uint amdgpu_smu_memory_pool_size = 0;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@ -316,6 +318,11 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)
module_param_named(cik_support, amdgpu_cik_support, int, 0444);
#endif
MODULE_PARM_DESC(smu_memory_pool_size,
"reserve gtt for smu debug usage, 0 = disable,"
"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@ -534,6 +541,9 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
/* VEGAM */
{0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
{0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
/* Vega 10 */
{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
@ -550,6 +560,13 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
/* Vega 20 */
{0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
/* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},

View File

@ -137,7 +137,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
/* need to align pitch with crtc limits */
mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
fb_tiled);
domain = amdgpu_display_framebuffer_domains(adev);
domain = amdgpu_display_supported_domains(adev);
height = ALIGN(mode_cmd->height, 8);
size = mode_cmd->pitches[0] * height;
@ -292,9 +292,9 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb
drm_fb_helper_unregister_fbi(&rfbdev->helper);
if (rfb->obj) {
amdgpufb_destroy_pinned_object(rfb->obj);
rfb->obj = NULL;
if (rfb->base.obj[0]) {
amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
rfb->base.obj[0] = NULL;
drm_framebuffer_unregister_private(&rfb->base);
drm_framebuffer_cleanup(&rfb->base);
}
@ -377,7 +377,7 @@ int amdgpu_fbdev_total_size(struct amdgpu_device *adev)
if (!adev->mode_info.rfbdev)
return 0;
robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj);
robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]);
size += amdgpu_bo_size(robj);
return size;
}
@ -386,7 +386,7 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
{
if (!adev->mode_info.rfbdev)
return false;
if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj))
if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]))
return true;
return false;
}

View File

@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
unsigned flags)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_fence *fence;
@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
adev->fence_context + ring->idx,
seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, AMDGPU_FENCE_FLAG_INT);
seq, flags | AMDGPU_FENCE_FLAG_INT);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
/* This function can't be called concurrently anyway, otherwise
@ -375,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
uint64_t index;
if (ring != &adev->uvd.ring) {
if (ring != &adev->uvd.inst[ring->me].ring) {
ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
} else {
/* put fence directly behind firmware */
index = ALIGN(adev->uvd.fw->size, 8);
ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
}
amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
amdgpu_irq_get(adev, irq_src, irq_type);

View File

@ -113,12 +113,17 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
int r;
if (adev->gart.robj == NULL) {
r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
ttm_bo_type_kernel, NULL,
&adev->gart.robj);
struct amdgpu_bo_param bp;
memset(&bp, 0, sizeof(bp));
bp.size = adev->gart.table_size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
r = amdgpu_bo_create(adev, &bp, &adev->gart.robj);
if (r) {
return r;
}

View File

@ -48,17 +48,25 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
struct drm_gem_object **obj)
{
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
int r;
memset(&bp, 0, sizeof(bp));
*obj = NULL;
/* At least align on page size */
if (alignment < PAGE_SIZE) {
alignment = PAGE_SIZE;
}
bp.size = size;
bp.byte_align = alignment;
bp.type = type;
bp.resv = resv;
bp.preferred_domain = initial_domain;
retry:
r = amdgpu_bo_create(adev, size, alignment, initial_domain,
flags, type, resv, &bo);
bp.flags = flags;
bp.domain = initial_domain;
r = amdgpu_bo_create(adev, &bp, &bo);
if (r) {
if (r != -ERESTARTSYS) {
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
@ -221,12 +229,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
/* reject invalid gem domains */
if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA))
if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
return -EINVAL;
/* create a gem object to contain this object in */
@ -771,16 +774,23 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
}
#if defined(CONFIG_DEBUG_FS)
#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag) \
if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \
seq_printf((m), " " #flag); \
}
static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
{
struct drm_gem_object *gobj = ptr;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct seq_file *m = data;
struct dma_buf_attachment *attachment;
struct dma_buf *dma_buf;
unsigned domain;
const char *placement;
unsigned pin_count;
uint64_t offset;
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
switch (domain) {
@ -798,13 +808,27 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
seq_printf(m, "\t0x%08x: %12ld byte %s",
id, amdgpu_bo_size(bo), placement);
offset = READ_ONCE(bo->tbo.mem.start);
if (offset != AMDGPU_BO_INVALID_OFFSET)
seq_printf(m, " @ 0x%010Lx", offset);
pin_count = READ_ONCE(bo->pin_count);
if (pin_count)
seq_printf(m, " pin count %d", pin_count);
dma_buf = READ_ONCE(bo->gem_base.dma_buf);
attachment = READ_ONCE(bo->gem_base.import_attach);
if (attachment)
seq_printf(m, " imported from %p", dma_buf);
else if (dma_buf)
seq_printf(m, " exported as %p", dma_buf);
amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS);
amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC);
amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED);
amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW);
amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID);
amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC);
seq_printf(m, "\n");
return 0;

View File

@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_vm *vm;
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
unsigned fence_flags = 0;
unsigned i;
int r = 0;
@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
#endif
amdgpu_asic_invalidate_hdp(adev, ring);
r = amdgpu_fence_emit(ring, f);
if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
r = amdgpu_fence_emit(ring, f, fence_flags);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
if (job && job->vmid)
@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
/* wrap the last IB with fence */
if (job && job->uf_addr) {
amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
AMDGPU_FENCE_FLAG_64BIT);
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
}
if (patch_offset != ~0 && ring->funcs->patch_cond_exec)

View File

@ -31,6 +31,7 @@
#include "amdgpu_sched.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
#include "atom.h"
#include <linux/vga_switcheroo.h>
#include <linux/slab.h>
@ -214,6 +215,18 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->gfx.rlc_fw_version;
fw_info->feature = adev->gfx.rlc_feature_version;
break;
case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL:
fw_info->ver = adev->gfx.rlc_srlc_fw_version;
fw_info->feature = adev->gfx.rlc_srlc_feature_version;
break;
case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM:
fw_info->ver = adev->gfx.rlc_srlg_fw_version;
fw_info->feature = adev->gfx.rlc_srlg_feature_version;
break;
case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM:
fw_info->ver = adev->gfx.rlc_srls_fw_version;
fw_info->feature = adev->gfx.rlc_srls_feature_version;
break;
case AMDGPU_INFO_FW_GFX_MEC:
if (query_fw->index == 0) {
fw_info->ver = adev->gfx.mec_fw_version;
@ -273,12 +286,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_crtc *crtc;
uint32_t ui32 = 0;
uint64_t ui64 = 0;
int i, found;
int i, j, found;
int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer)
return -EINVAL;
/* Ensure IB tests are run on ring */
flush_delayed_work(&adev->late_init_work);
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;
@ -332,7 +348,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
break;
case AMDGPU_HW_IP_UVD:
type = AMD_IP_BLOCK_TYPE_UVD;
ring_mask = adev->uvd.ring.ready ? 1 : 0;
for (i = 0; i < adev->uvd.num_uvd_inst; i++)
ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i);
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 16;
break;
@ -345,8 +362,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
for (i = 0; i < adev->uvd.num_enc_rings; i++)
ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i);
for (i = 0; i < adev->uvd.num_uvd_inst; i++)
for (j = 0; j < adev->uvd.num_enc_rings; j++)
ring_mask |=
((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) <<
(j + i * adev->uvd.num_enc_rings));
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 1;
break;
@ -701,10 +721,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
}
}
case AMDGPU_INFO_SENSOR: {
struct pp_gpu_power query = {0};
int query_size = sizeof(query);
if (amdgpu_dpm == 0)
if (!adev->pm.dpm_enabled)
return -ENOENT;
switch (info->sensor_info.type) {
@ -746,10 +763,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
/* get average GPU power */
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_GPU_POWER,
(void *)&query, &query_size)) {
(void *)&ui32, &ui32_size)) {
return -EINVAL;
}
ui32 = query.average_gpu_power >> 8;
ui32 >>= 8;
break;
case AMDGPU_INFO_SENSOR_VDDNB:
/* get VDDNB in millivolts */
@ -913,8 +930,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
return;
pm_runtime_get_sync(dev->dev);
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr);
if (adev->asic_type != CHIP_RAVEN) {
amdgpu_uvd_free_handles(adev, file_priv);
@ -935,6 +951,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
amdgpu_vm_fini(adev, &fpriv->vm);
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
if (pasid)
amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
amdgpu_bo_unref(&pd);
@ -1088,6 +1106,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private;
struct drm_amdgpu_info_firmware fw_info;
struct drm_amdgpu_query_fw query_fw;
struct atom_context *ctx = adev->mode_info.atom_context;
int ret, i;
/* VCE */
@ -1146,6 +1165,30 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* RLC SAVE RESTORE LIST CNTL */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* RLC SAVE RESTORE LIST GPM MEM */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* RLC SAVE RESTORE LIST SRM MEM */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
return ret;
seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
/* MEC */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
query_fw.index = 0;
@ -1210,6 +1253,9 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
return 0;
}

View File

@ -36,12 +36,14 @@
#include <drm/drm.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
struct mmu_notifier mn;
enum amdgpu_mn_type type;
/* only used on destruction */
struct work_struct work;
@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
}
/**
* amdgpu_mn_invalidate_range_start - callback to notify about mm change
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
* @mn: our notifier
* @mn: the mm this callback is about
@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
* We block for all BOs between start and end to be idle and
* unmap them by move them into system domain again.
*/
static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
@ -219,6 +221,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
}
}
/**
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
*
* @mn: our notifier
* @mn: the mm this callback is about
* @start: start of updated range
* @end: end of updated range
*
* We temporarily evict all BOs between start and end. This
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
end -= 1;
amdgpu_mn_read_lock(rmn);
it = interval_tree_iter_first(&rmn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
struct amdgpu_bo *bo;
node = container_of(it, struct amdgpu_mn_node, it);
it = interval_tree_iter_next(it, start, end);
list_for_each_entry(bo, &node->bos, mn_list) {
struct kgd_mem *mem = bo->kfd_bo;
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
start, end))
amdgpu_amdkfd_evict_userptr(mem, mm);
}
}
}
/**
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
*
@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
amdgpu_mn_read_unlock(rmn);
}
static const struct mmu_notifier_ops amdgpu_mn_ops = {
.release = amdgpu_mn_release,
.invalidate_range_start = amdgpu_mn_invalidate_range_start,
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
[AMDGPU_MN_TYPE_GFX] = {
.release = amdgpu_mn_release,
.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
},
[AMDGPU_MN_TYPE_HSA] = {
.release = amdgpu_mn_release,
.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
},
};
/* Low bits of any reasonable mm pointer will be unused due to struct
* alignment. Use these bits to make a unique key from the mm pointer
* and notifier type.
*/
#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
/**
* amdgpu_mn_get - create notifier context
*
* @adev: amdgpu device pointer
* @type: type of MMU notifier context
*
* Creates a notifier context for current->mm.
*/
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
{
struct mm_struct *mm = current->mm;
struct amdgpu_mn *rmn;
unsigned long key = AMDGPU_MN_KEY(mm, type);
int r;
mutex_lock(&adev->mn_lock);
@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
return ERR_PTR(-EINTR);
}
hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
if (rmn->mm == mm)
hash_for_each_possible(adev->mn_hash, rmn, node, key)
if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
goto release_locks;
rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
rmn->adev = adev;
rmn->mm = mm;
rmn->mn.ops = &amdgpu_mn_ops;
init_rwsem(&rmn->lock);
rmn->type = type;
rmn->mn.ops = &amdgpu_mn_ops[type];
rmn->objects = RB_ROOT_CACHED;
mutex_init(&rmn->read_lock);
atomic_set(&rmn->recursion, 0);
@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
if (r)
goto free_rmn;
hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
release_locks:
up_write(&mm->mmap_sem);
@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
unsigned long end = addr + amdgpu_bo_size(bo) - 1;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
enum amdgpu_mn_type type =
bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
struct amdgpu_mn *rmn;
struct amdgpu_mn_node *node = NULL;
struct amdgpu_mn_node *node = NULL, *new_node;
struct list_head bos;
struct interval_tree_node *it;
rmn = amdgpu_mn_get(adev);
rmn = amdgpu_mn_get(adev, type);
if (IS_ERR(rmn))
return PTR_ERR(rmn);
new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
if (!new_node)
return -ENOMEM;
INIT_LIST_HEAD(&bos);
down_write(&rmn->lock);
@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
list_splice(&node->bos, &bos);
}
if (!node) {
node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
if (!node) {
up_write(&rmn->lock);
return -ENOMEM;
}
}
if (!node)
node = new_node;
else
kfree(new_node);
bo->mn = rmn;

View File

@ -29,16 +29,23 @@
*/
struct amdgpu_mn;
enum amdgpu_mn_type {
AMDGPU_MN_TYPE_GFX,
AMDGPU_MN_TYPE_HSA,
};
#if defined(CONFIG_MMU_NOTIFIER)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
#else
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
{
return NULL;
}

View File

@ -308,7 +308,6 @@ struct amdgpu_display_funcs {
struct amdgpu_framebuffer {
struct drm_framebuffer base;
struct drm_gem_object *obj;
/* caching for later use */
uint64_t address;

View File

@ -191,14 +191,21 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr)
{
struct amdgpu_bo_param bp;
bool free = false;
int r;
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = align;
bp.domain = domain;
bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
if (!*bo_ptr) {
r = amdgpu_bo_create(adev, size, align, domain,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
ttm_bo_type_kernel, NULL, bo_ptr);
r = amdgpu_bo_create(adev, &bp, bo_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
r);
@ -341,27 +348,25 @@ fail:
return false;
}
static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
int byte_align, u32 domain,
u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
static int amdgpu_bo_do_create(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
struct ttm_operation_ctx ctx = {
.interruptible = (type != ttm_bo_type_kernel),
.interruptible = (bp->type != ttm_bo_type_kernel),
.no_wait_gpu = false,
.resv = resv,
.resv = bp->resv,
.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
};
struct amdgpu_bo *bo;
unsigned long page_align;
unsigned long page_align, size = bp->size;
size_t acc_size;
int r;
page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
size = ALIGN(size, PAGE_SIZE);
if (!amdgpu_bo_validate_size(adev, size, domain))
if (!amdgpu_bo_validate_size(adev, size, bp->domain))
return -ENOMEM;
*bo_ptr = NULL;
@ -375,18 +380,14 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
INIT_LIST_HEAD(&bo->shadow_list);
INIT_LIST_HEAD(&bo->va);
bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_CPU |
AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA);
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
bp->domain;
bo->allowed_domains = bo->preferred_domains;
if (type != ttm_bo_type_kernel &&
if (bp->type != ttm_bo_type_kernel &&
bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
bo->flags = flags;
bo->flags = bp->flags;
#ifdef CONFIG_X86_32
/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
@ -417,11 +418,13 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
#endif
bo->tbo.bdev = &adev->mman.bdev;
amdgpu_ttm_placement_from_domain(bo, domain);
amdgpu_ttm_placement_from_domain(bo, bp->domain);
if (bp->type == ttm_bo_type_kernel)
bo->tbo.priority = 1;
r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
&bo->placement, page_align, &ctx, acc_size,
NULL, resv, &amdgpu_ttm_bo_destroy);
NULL, bp->resv, &amdgpu_ttm_bo_destroy);
if (unlikely(r != 0))
return r;
@ -433,10 +436,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
else
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
if (type == ttm_bo_type_kernel)
bo->tbo.priority = 1;
if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
struct dma_fence *fence;
@ -449,20 +449,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
bo->tbo.moving = dma_fence_get(fence);
dma_fence_put(fence);
}
if (!resv)
if (!bp->resv)
amdgpu_bo_unreserve(bo);
*bo_ptr = bo;
trace_amdgpu_bo_create(bo);
/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
if (type == ttm_bo_type_device)
if (bp->type == ttm_bo_type_device)
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
return 0;
fail_unreserve:
if (!resv)
if (!bp->resv)
ww_mutex_unlock(&bo->tbo.resv->lock);
amdgpu_bo_unref(&bo);
return r;
@ -472,16 +472,22 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
unsigned long size, int byte_align,
struct amdgpu_bo *bo)
{
struct amdgpu_bo_param bp;
int r;
if (bo->shadow)
return 0;
r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW,
ttm_bo_type_kernel,
bo->tbo.resv, &bo->shadow);
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = byte_align;
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW;
bp.type = ttm_bo_type_kernel;
bp.resv = bo->tbo.resv;
r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock);
@ -492,28 +498,26 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
return r;
}
int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
int byte_align, u32 domain,
u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
int amdgpu_bo_create(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
u64 flags = bp->flags;
int r;
r = amdgpu_bo_do_create(adev, size, byte_align, domain,
parent_flags, type, resv, bo_ptr);
bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
r = amdgpu_bo_do_create(adev, bp, bo_ptr);
if (r)
return r;
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
if (!resv)
if (!bp->resv)
WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
NULL));
r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
if (!resv)
if (!bp->resv)
reservation_object_unlock((*bo_ptr)->tbo.resv);
if (r)
@ -689,8 +693,21 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
/* A shared bo cannot be migrated to VRAM */
if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM))
return -EINVAL;
if (bo->prime_shared_count) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)
domain = AMDGPU_GEM_DOMAIN_GTT;
else
return -EINVAL;
}
/* This assumes only APU display buffers are pinned with (VRAM|GTT).
* See function amdgpu_display_supported_domains()
*/
if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
domain = AMDGPU_GEM_DOMAIN_VRAM;
if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
domain = AMDGPU_GEM_DOMAIN_GTT;
}
if (bo->pin_count) {
uint32_t mem_type = bo->tbo.mem.mem_type;
@ -838,6 +855,13 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
return amdgpu_ttm_init(adev);
}
int amdgpu_bo_late_init(struct amdgpu_device *adev)
{
amdgpu_ttm_late_init(adev);
return 0;
}
void amdgpu_bo_fini(struct amdgpu_device *adev)
{
amdgpu_ttm_fini(adev);

View File

@ -33,6 +33,16 @@
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
struct amdgpu_bo_param {
unsigned long size;
int byte_align;
u32 domain;
u32 preferred_domain;
u64 flags;
enum ttm_bo_type type;
struct reservation_object *resv;
};
/* bo virtual addresses in a vm */
struct amdgpu_bo_va_mapping {
struct amdgpu_bo_va *bo_va;
@ -195,6 +205,27 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
}
}
/**
* amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
*/
static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
struct drm_mm_node *node = bo->tbo.mem.mm_node;
unsigned long pages_left;
if (bo->tbo.mem.mem_type != TTM_PL_VRAM)
return false;
for (pages_left = bo->tbo.mem.num_pages; pages_left;
pages_left -= node->size, node++)
if (node->start < fpfn)
return true;
return false;
}
/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/
@ -203,10 +234,8 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
}
int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
int byte_align, u32 domain,
u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
int amdgpu_bo_create(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr);
int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
unsigned long size, int align,
@ -230,6 +259,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
int amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
int amdgpu_bo_init(struct amdgpu_device *adev);
int amdgpu_bo_late_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
struct vm_area_struct *vma);

View File

@ -77,6 +77,37 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
}
}
/**
* DOC: power_dpm_state
*
* This is a legacy interface and is only provided for backwards compatibility.
* The amdgpu driver provides a sysfs API for adjusting certain power
* related parameters. The file power_dpm_state is used for this.
* It accepts the following arguments:
* - battery
* - balanced
* - performance
*
* battery
*
* On older GPUs, the vbios provided a special power state for battery
* operation. Selecting battery switched to this state. This is no
* longer provided on newer GPUs so the option does nothing in that case.
*
* balanced
*
* On older GPUs, the vbios provided a special power state for balanced
* operation. Selecting balanced switched to this state. This is no
* longer provided on newer GPUs so the option does nothing in that case.
*
* performance
*
* On older GPUs, the vbios provided a special power state for performance
* operation. Selecting performance switched to this state. This is no
* longer provided on newer GPUs so the option does nothing in that case.
*
*/
static ssize_t amdgpu_get_dpm_state(struct device *dev,
struct device_attribute *attr,
char *buf)
@ -131,6 +162,59 @@ fail:
return count;
}
/**
* DOC: power_dpm_force_performance_level
*
* The amdgpu driver provides a sysfs API for adjusting certain power
* related parameters. The file power_dpm_force_performance_level is
* used for this. It accepts the following arguments:
* - auto
* - low
* - high
* - manual
* - GPU fan
* - profile_standard
* - profile_min_sclk
* - profile_min_mclk
* - profile_peak
*
* auto
*
* When auto is selected, the driver will attempt to dynamically select
* the optimal power profile for current conditions in the driver.
*
* low
*
* When low is selected, the clocks are forced to the lowest power state.
*
* high
*
* When high is selected, the clocks are forced to the highest power state.
*
* manual
*
* When manual is selected, the user can manually adjust which power states
* are enabled for each clock domain via the sysfs pp_dpm_mclk, pp_dpm_sclk,
* and pp_dpm_pcie files and adjust the power state transition heuristics
* via the pp_power_profile_mode sysfs file.
*
* profile_standard
* profile_min_sclk
* profile_min_mclk
* profile_peak
*
* When the profiling modes are selected, clock and power gating are
* disabled and the clocks are set for different profiling cases. This
* mode is recommended for profiling specific work loads where you do
* not want clock or power gating for clock fluctuation to interfere
* with your results. profile_standard sets the clocks to a fixed clock
* level which varies from asic to asic. profile_min_sclk forces the sclk
* to the lowest level. profile_min_mclk forces the mclk to the lowest level.
* profile_peak sets all clocks (mclk, sclk, pcie) to the highest levels.
*
*/
static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
struct device_attribute *attr,
char *buf)
@ -324,6 +408,17 @@ fail:
return count;
}
/**
* DOC: pp_table
*
* The amdgpu driver provides a sysfs API for uploading new powerplay
* tables. The file pp_table is used for this. Reading the file
* will dump the current power play table. Writing to the file
* will attempt to upload a new powerplay table and re-initialize
* powerplay using that new table.
*
*/
static ssize_t amdgpu_get_pp_table(struct device *dev,
struct device_attribute *attr,
char *buf)
@ -360,6 +455,29 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
return count;
}
/**
* DOC: pp_od_clk_voltage
*
* The amdgpu driver provides a sysfs API for adjusting the clocks and voltages
* in each power level within a power state. The pp_od_clk_voltage is used for
* this.
*
* Reading the file will display:
* - a list of engine clock levels and voltages labeled OD_SCLK
* - a list of memory clock levels and voltages labeled OD_MCLK
* - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE
*
* To manually adjust these settings, first select manual using
* power_dpm_force_performance_level. Enter a new value for each
* level by writing a string that contains "s/m level clock voltage" to
* the file. E.g., "s 1 500 820" will update sclk level 1 to be 500 MHz
* at 820 mV; "m 0 350 810" will update mclk level 0 to be 350 MHz at
* 810 mV. When you have edited all of the states as needed, write
* "c" (commit) to the file to commit your changes. If you want to reset to the
* default power levels, write "r" (reset) to the file to reset them.
*
*/
static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
struct device_attribute *attr,
const char *buf,
@ -437,6 +555,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
if (adev->powerplay.pp_funcs->print_clock_levels) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
return size;
} else {
return snprintf(buf, PAGE_SIZE, "\n");
@ -444,6 +563,23 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
}
/**
* DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
*
* The amdgpu driver provides a sysfs API for adjusting what power levels
* are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk,
* and pp_dpm_pcie are used for this.
*
* Reading back the files will show you the available power levels within
* the power state and the clock information for those levels.
*
* To manually adjust these states, first select manual using
* power_dpm_force_performance_level.
* Secondly,Enter a new value for each level by inputing a string that
* contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
* E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
*/
static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
struct device_attribute *attr,
char *buf)
@ -466,23 +602,27 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long level;
uint32_t i, mask = 0;
char sub_str[2];
uint32_t mask = 0;
char *sub_str = NULL;
char *tmp;
char buf_cpy[count];
const char delimiter[3] = {' ', '\n', '\0'};
for (i = 0; i < strlen(buf); i++) {
if (*(buf + i) == '\n')
continue;
sub_str[0] = *(buf + i);
sub_str[1] = '\0';
ret = kstrtol(sub_str, 0, &level);
memcpy(buf_cpy, buf, count+1);
tmp = buf_cpy;
while (tmp[0]) {
sub_str = strsep(&tmp, delimiter);
if (strlen(sub_str)) {
ret = kstrtol(sub_str, 0, &level);
if (ret) {
count = -EINVAL;
goto fail;
}
mask |= 1 << level;
if (ret) {
count = -EINVAL;
goto fail;
}
mask |= 1 << level;
} else
break;
}
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
@ -512,21 +652,26 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long level;
uint32_t i, mask = 0;
char sub_str[2];
uint32_t mask = 0;
char *sub_str = NULL;
char *tmp;
char buf_cpy[count];
const char delimiter[3] = {' ', '\n', '\0'};
for (i = 0; i < strlen(buf); i++) {
if (*(buf + i) == '\n')
continue;
sub_str[0] = *(buf + i);
sub_str[1] = '\0';
ret = kstrtol(sub_str, 0, &level);
memcpy(buf_cpy, buf, count+1);
tmp = buf_cpy;
while (tmp[0]) {
sub_str = strsep(&tmp, delimiter);
if (strlen(sub_str)) {
ret = kstrtol(sub_str, 0, &level);
if (ret) {
count = -EINVAL;
goto fail;
}
mask |= 1 << level;
if (ret) {
count = -EINVAL;
goto fail;
}
mask |= 1 << level;
} else
break;
}
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
@ -557,21 +702,27 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long level;
uint32_t i, mask = 0;
char sub_str[2];
uint32_t mask = 0;
char *sub_str = NULL;
char *tmp;
char buf_cpy[count];
const char delimiter[3] = {' ', '\n', '\0'};
for (i = 0; i < strlen(buf); i++) {
if (*(buf + i) == '\n')
continue;
sub_str[0] = *(buf + i);
sub_str[1] = '\0';
ret = kstrtol(sub_str, 0, &level);
memcpy(buf_cpy, buf, count+1);
tmp = buf_cpy;
if (ret) {
count = -EINVAL;
goto fail;
}
mask |= 1 << level;
while (tmp[0]) {
sub_str = strsep(&tmp, delimiter);
if (strlen(sub_str)) {
ret = kstrtol(sub_str, 0, &level);
if (ret) {
count = -EINVAL;
goto fail;
}
mask |= 1 << level;
} else
break;
}
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
@ -668,6 +819,26 @@ fail:
return count;
}
/**
* DOC: pp_power_profile_mode
*
* The amdgpu driver provides a sysfs API for adjusting the heuristics
* related to switching between power levels in a power state. The file
* pp_power_profile_mode is used for this.
*
* Reading this file outputs a list of all of the predefined power profiles
* and the relevant heuristics settings for that profile.
*
* To select a profile or create a custom profile, first select manual using
* power_dpm_force_performance_level. Writing the number of a predefined
* profile to pp_power_profile_mode will enable those heuristics. To
* create a custom set of heuristics, write a string of numbers to the file
* starting with the number of the custom profile along with a setting
* for each heuristic parameter. Due to differences across asic families
* the heuristic parameters vary from family to family.
*
*/
static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
struct device_attribute *attr,
char *buf)
@ -1020,8 +1191,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
struct drm_device *ddev = adev->ddev;
struct pp_gpu_power query = {0};
int r, size = sizeof(query);
u32 query = 0;
int r, size = sizeof(u32);
unsigned uw;
/* Can't get power when the card is off */
@ -1041,7 +1212,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
return r;
/* convert to microwatts */
uw = (query.average_gpu_power >> 8) * 1000000;
uw = (query >> 8) * 1000000 + (query & 0xff) * 1000;
return snprintf(buf, PAGE_SIZE, "%u\n", uw);
}
@ -1109,6 +1280,46 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
return count;
}
/**
* DOC: hwmon
*
* The amdgpu driver exposes the following sensor interfaces:
* - GPU temperature (via the on-die sensor)
* - GPU voltage
* - Northbridge voltage (APUs only)
* - GPU power
* - GPU fan
*
* hwmon interfaces for GPU temperature:
* - temp1_input: the on die GPU temperature in millidegrees Celsius
* - temp1_crit: temperature critical max value in millidegrees Celsius
* - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
*
* hwmon interfaces for GPU voltage:
* - in0_input: the voltage on the GPU in millivolts
* - in1_input: the voltage on the Northbridge in millivolts
*
* hwmon interfaces for GPU power:
* - power1_average: average power used by the GPU in microWatts
* - power1_cap_min: minimum cap supported in microWatts
* - power1_cap_max: maximum cap supported in microWatts
* - power1_cap: selected power cap in microWatts
*
* hwmon interfaces for GPU fan:
* - pwm1: pulse width modulation fan level (0-255)
* - pwm1_enable: pulse width modulation fan control method
* 0: no fan speed control
* 1: manual fan speed control using pwm interface
* 2: automatic fan speed control
* - pwm1_min: pulse width modulation fan control minimum level (0)
* - pwm1_max: pulse width modulation fan control maximum level (255)
* - fan1_input: fan speed in RPM
*
* You can use hwmon tools like sensors to view this information on your system.
*
*/
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@ -1153,19 +1364,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
struct amdgpu_device *adev = dev_get_drvdata(dev);
umode_t effective_mode = attr->mode;
/* handle non-powerplay limitations */
if (!adev->powerplay.pp_handle) {
/* Skip fan attributes if fan is not present */
if (adev->pm.no_fan &&
(attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
return 0;
/* requires powerplay */
if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
return 0;
}
/* Skip fan attributes if fan is not present */
if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
attr == &sensor_dev_attr_fan1_input.dev_attr.attr))
return 0;
/* Skip limit attributes if DPM is not enabled */
if (!adev->pm.dpm_enabled &&
@ -1658,9 +1864,6 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
{
struct drm_device *ddev = adev->ddev;
struct drm_crtc *crtc;
struct amdgpu_crtc *amdgpu_crtc;
int i = 0;
if (!adev->pm.dpm_enabled)
@ -1676,21 +1879,25 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
}
if (adev->powerplay.pp_funcs->dispatch_tasks) {
if (!amdgpu_device_has_dc_support(adev)) {
mutex_lock(&adev->pm.mutex);
amdgpu_dpm_get_active_displays(adev);
adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtcs;
adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev);
adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev);
/* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */
if (adev->pm.pm_display_cfg.vrefresh > 120)
adev->pm.pm_display_cfg.min_vblank_time = 0;
if (adev->powerplay.pp_funcs->display_configuration_change)
adev->powerplay.pp_funcs->display_configuration_change(
adev->powerplay.pp_handle,
&adev->pm.pm_display_cfg);
mutex_unlock(&adev->pm.mutex);
}
amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
} else {
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.new_active_crtcs = 0;
adev->pm.dpm.new_active_crtc_count = 0;
if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
list_for_each_entry(crtc,
&ddev->mode_config.crtc_list, head) {
amdgpu_crtc = to_amdgpu_crtc(crtc);
if (amdgpu_crtc->enabled) {
adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
adev->pm.dpm.new_active_crtc_count++;
}
}
}
amdgpu_dpm_get_active_displays(adev);
/* update battery/ac status */
if (power_supply_is_system_supplied() > 0)
adev->pm.dpm.ac_power = true;
@ -1711,7 +1918,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
{
uint32_t value;
struct pp_gpu_power query = {0};
uint32_t query = 0;
int size;
/* sanity check PP is enabled */
@ -1734,17 +1941,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
seq_printf(m, "\t%u mV (VDDGFX)\n", value);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
seq_printf(m, "\t%u mV (VDDNB)\n", value);
size = sizeof(query);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) {
seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8,
query.vddc_power & 0xff);
seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8,
query.vddci_power & 0xff);
seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8,
query.max_gpu_power & 0xff);
seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8,
query.average_gpu_power & 0xff);
}
size = sizeof(uint32_t);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size))
seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff);
size = sizeof(value);
seq_printf(m, "\n");

View File

@ -102,12 +102,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct reservation_object *resv = attach->dmabuf->resv;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
int ret;
memset(&bp, 0, sizeof(bp));
bp.size = attach->dmabuf->size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_CPU;
bp.flags = 0;
bp.type = ttm_bo_type_sg;
bp.resv = resv;
ww_mutex_lock(&resv->lock, NULL);
ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg,
resv, &bo);
ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret)
goto error;
@ -209,7 +215,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { true, false };
u32 domain = amdgpu_display_framebuffer_domains(adev);
u32 domain = amdgpu_display_supported_domains(adev);
int ret;
bool reads = (direction == DMA_BIDIRECTIONAL ||
direction == DMA_FROM_DEVICE);

View File

@ -52,6 +52,7 @@ static int psp_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
psp_v3_1_set_psp_funcs(psp);
break;
case CHIP_RAVEN:

View File

@ -66,6 +66,8 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
u32 ring,
struct amdgpu_ring **out_ring)
{
u32 instance;
switch (mapper->hw_ip) {
case AMDGPU_HW_IP_GFX:
*out_ring = &adev->gfx.gfx_ring[ring];
@ -77,13 +79,16 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
*out_ring = &adev->sdma.instance[ring].ring;
break;
case AMDGPU_HW_IP_UVD:
*out_ring = &adev->uvd.ring;
instance = ring;
*out_ring = &adev->uvd.inst[instance].ring;
break;
case AMDGPU_HW_IP_VCE:
*out_ring = &adev->vce.ring[ring];
break;
case AMDGPU_HW_IP_UVD_ENC:
*out_ring = &adev->uvd.ring_enc[ring];
instance = ring / adev->uvd.num_enc_rings;
*out_ring =
&adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings];
break;
case AMDGPU_HW_IP_VCN_DEC:
*out_ring = &adev->vcn.ring_dec;
@ -240,13 +245,14 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
ip_num_rings = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
ip_num_rings = 1;
ip_num_rings = adev->uvd.num_uvd_inst;
break;
case AMDGPU_HW_IP_VCE:
ip_num_rings = adev->vce.num_rings;
break;
case AMDGPU_HW_IP_UVD_ENC:
ip_num_rings = adev->uvd.num_enc_rings;
ip_num_rings =
adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst;
break;
case AMDGPU_HW_IP_VCN_DEC:
ip_num_rings = 1;

View File

@ -362,6 +362,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = NULL;
ring->me = 0;
ring->adev->rings[ring->idx] = NULL;
}
@ -459,6 +460,26 @@ void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
spin_unlock(&adev->ring_lru_list_lock);
}
/**
* amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
*
* @adev: amdgpu_device pointer
* @reg0: register to write
* @reg1: register to wait on
* @ref: reference value to write/wait on
* @mask: mask to wait on
*
* Helper for rings that don't support write and wait in a
* single oneshot packet.
*/
void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
{
amdgpu_ring_emit_wreg(ring, reg0, ref);
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
/*
* Debugfs info
*/

View File

@ -29,7 +29,7 @@
#include <drm/drm_print.h>
/* max number of rings */
#define AMDGPU_MAX_RINGS 18
#define AMDGPU_MAX_RINGS 21
#define AMDGPU_MAX_GFX_RINGS 1
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
@ -42,6 +42,7 @@
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
enum amdgpu_ring_type {
AMDGPU_RING_TYPE_GFX,
@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
unsigned irq_type);
void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
unsigned flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
void amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
@ -154,6 +156,9 @@ struct amdgpu_ring_funcs {
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask);
void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask);
void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
/* priority functions */
void (*set_priority) (struct amdgpu_ring *ring,
@ -228,6 +233,10 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
int *blacklist, int num_blacklist,
bool lru_pipe_order, struct amdgpu_ring **ring);
void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
uint32_t reg0, uint32_t val0,
uint32_t reg1, uint32_t val1);
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
int i = 0;

View File

@ -33,6 +33,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_bo *vram_obj = NULL;
struct amdgpu_bo **gtt_obj = NULL;
struct amdgpu_bo_param bp;
uint64_t gart_addr, vram_addr;
unsigned n, size;
int i, r;
@ -58,9 +59,15 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
r = 1;
goto out_cleanup;
}
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
bp.flags = 0;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0,
ttm_bo_type_kernel, NULL, &vram_obj);
r = amdgpu_bo_create(adev, &bp, &vram_obj);
if (r) {
DRM_ERROR("Failed to create VRAM object\n");
goto out_cleanup;
@ -79,9 +86,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
void **vram_start, **vram_end;
struct dma_fence *fence = NULL;
r = amdgpu_bo_create(adev, size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, 0,
ttm_bo_type_kernel, NULL, gtt_obj + i);
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
r = amdgpu_bo_create(adev, &bp, gtt_obj + i);
if (r) {
DRM_ERROR("Failed to create GTT object %d\n", i);
goto out_lclean;

View File

@ -275,7 +275,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
),
TP_fast_assign(
__entry->bo = bo_va->base.bo;
__entry->bo = bo_va ? bo_va->base.bo : NULL;
__entry->start = mapping->start;
__entry->last = mapping->last;
__entry->offset = mapping->offset;

File diff suppressed because it is too large Load Diff

View File

@ -77,6 +77,7 @@ uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_late_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
bool enable);

View File

@ -161,8 +161,38 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
if (version_minor == 1) {
const struct rlc_firmware_header_v2_1 *v2_1 =
container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
}
} else {
DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
}
@ -265,6 +295,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
@ -276,6 +307,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
case CHIP_VEGA20:
return AMDGPU_FW_LOAD_DIRECT;
default:
DRM_ERROR("Unknown firmware load type\n");
}
@ -307,7 +340,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) {
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@ -329,6 +365,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(cp_hdr->jt_offset) * 4),
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) {
ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm,
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
ucode->ucode_size);
}
return 0;

View File

@ -98,6 +98,24 @@ struct rlc_firmware_header_v2_0 {
uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */
};
/* version_major=2, version_minor=1 */
struct rlc_firmware_header_v2_1 {
struct rlc_firmware_header_v2_0 v2_0;
uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */
uint32_t save_restore_list_cntl_ucode_ver;
uint32_t save_restore_list_cntl_feature_ver;
uint32_t save_restore_list_cntl_size_bytes;
uint32_t save_restore_list_cntl_offset_bytes;
uint32_t save_restore_list_gpm_ucode_ver;
uint32_t save_restore_list_gpm_feature_ver;
uint32_t save_restore_list_gpm_size_bytes;
uint32_t save_restore_list_gpm_offset_bytes;
uint32_t save_restore_list_srm_ucode_ver;
uint32_t save_restore_list_srm_feature_ver;
uint32_t save_restore_list_srm_size_bytes;
uint32_t save_restore_list_srm_offset_bytes;
};
/* version_major=1, version_minor=0 */
struct sdma_firmware_header_v1_0 {
struct common_firmware_header header;
@ -148,6 +166,7 @@ union amdgpu_firmware_header {
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;
struct rlc_firmware_header_v2_1 rlc_v2_1;
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct gpu_info_firmware_header_v1_0 gpu_info;
@ -168,6 +187,9 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_MEC2,
AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_UVD,

View File

@ -66,15 +66,18 @@
#define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin"
#define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin"
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin"
#define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin"
#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin"
#define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin"
#define FIRMWARE_VEGA20 "amdgpu/vega20_uvd.bin"
#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00)
#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00)
#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00)
#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00)
#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00)
/* These are common relative offsets for all asics, from uvd_7_0_offset.h, */
#define UVD_GPCOM_VCPU_CMD 0x03c3
#define UVD_GPCOM_VCPU_DATA0 0x03c4
#define UVD_GPCOM_VCPU_DATA1 0x03c5
#define UVD_NO_OP 0x03ff
#define UVD_BASE_SI 0x3800
/**
* amdgpu_uvd_cs_ctx - Command submission parser context
@ -109,9 +112,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
MODULE_FIRMWARE(FIRMWARE_POLARIS10);
MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
MODULE_FIRMWARE(FIRMWARE_VEGAM);
MODULE_FIRMWARE(FIRMWARE_VEGA10);
MODULE_FIRMWARE(FIRMWARE_VEGA12);
MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
@ -123,9 +128,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned version_major, version_minor, family_id;
int i, r;
int i, j, r;
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler);
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
@ -172,6 +177,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
case CHIP_VEGA12:
fw_name = FIRMWARE_VEGA12;
break;
case CHIP_VEGAM:
fw_name = FIRMWARE_VEGAM;
break;
case CHIP_VEGA20:
fw_name = FIRMWARE_VEGA20;
break;
default:
return -EINVAL;
}
@ -226,28 +237,30 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
&adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
return r;
}
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
ring = &adev->uvd.ring;
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity,
rq, amdgpu_sched_jobs, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up UVD run queue.\n");
return r;
}
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
&adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
return r;
}
for (i = 0; i < adev->uvd.max_handles; ++i) {
atomic_set(&adev->uvd.handles[i], 0);
adev->uvd.filp[i] = NULL;
}
ring = &adev->uvd.inst[j].ring;
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity,
rq, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j);
return r;
}
for (i = 0; i < adev->uvd.max_handles; ++i) {
atomic_set(&adev->uvd.inst[j].handles[i], 0);
adev->uvd.inst[j].filp[i] = NULL;
}
}
/* from uvd v5.0 HW addressing capacity increased to 64 bits */
if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
adev->uvd.address_64_bit = true;
@ -274,20 +287,22 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
int i;
kfree(adev->uvd.saved_bo);
int i, j;
drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
kfree(adev->uvd.inst[j].saved_bo);
amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
&adev->uvd.gpu_addr,
(void **)&adev->uvd.cpu_addr);
drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
amdgpu_ring_fini(&adev->uvd.ring);
amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
&adev->uvd.inst[j].gpu_addr,
(void **)&adev->uvd.inst[j].cpu_addr);
for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
amdgpu_ring_fini(&adev->uvd.inst[j].ring);
for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
}
release_firmware(adev->uvd.fw);
return 0;
@ -297,32 +312,33 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
{
unsigned size;
void *ptr;
int i;
int i, j;
if (adev->uvd.vcpu_bo == NULL)
return 0;
for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
if (adev->uvd.inst[j].vcpu_bo == NULL)
continue;
cancel_delayed_work_sync(&adev->uvd.idle_work);
cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work);
/* only valid for physical mode */
if (adev->asic_type < CHIP_POLARIS10) {
for (i = 0; i < adev->uvd.max_handles; ++i)
if (atomic_read(&adev->uvd.handles[i]))
break;
/* only valid for physical mode */
if (adev->asic_type < CHIP_POLARIS10) {
for (i = 0; i < adev->uvd.max_handles; ++i)
if (atomic_read(&adev->uvd.inst[j].handles[i]))
break;
if (i == adev->uvd.max_handles)
return 0;
if (i == adev->uvd.max_handles)
continue;
}
size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
ptr = adev->uvd.inst[j].cpu_addr;
adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL);
if (!adev->uvd.inst[j].saved_bo)
return -ENOMEM;
memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
}
size = amdgpu_bo_size(adev->uvd.vcpu_bo);
ptr = adev->uvd.cpu_addr;
adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
if (!adev->uvd.saved_bo)
return -ENOMEM;
memcpy_fromio(adev->uvd.saved_bo, ptr, size);
return 0;
}
@ -330,59 +346,65 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
{
unsigned size;
void *ptr;
int i;
if (adev->uvd.vcpu_bo == NULL)
return -EINVAL;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
if (adev->uvd.inst[i].vcpu_bo == NULL)
return -EINVAL;
size = amdgpu_bo_size(adev->uvd.vcpu_bo);
ptr = adev->uvd.cpu_addr;
size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
ptr = adev->uvd.inst[i].cpu_addr;
if (adev->uvd.saved_bo != NULL) {
memcpy_toio(ptr, adev->uvd.saved_bo, size);
kfree(adev->uvd.saved_bo);
adev->uvd.saved_bo = NULL;
} else {
const struct common_firmware_header *hdr;
unsigned offset;
if (adev->uvd.inst[i].saved_bo != NULL) {
memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
kfree(adev->uvd.inst[i].saved_bo);
adev->uvd.inst[i].saved_bo = NULL;
} else {
const struct common_firmware_header *hdr;
unsigned offset;
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
le32_to_cpu(hdr->ucode_size_bytes));
size -= le32_to_cpu(hdr->ucode_size_bytes);
ptr += le32_to_cpu(hdr->ucode_size_bytes);
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
le32_to_cpu(hdr->ucode_size_bytes));
size -= le32_to_cpu(hdr->ucode_size_bytes);
ptr += le32_to_cpu(hdr->ucode_size_bytes);
}
memset_io(ptr, 0, size);
/* to restore uvd fence seq */
amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
}
memset_io(ptr, 0, size);
/* to restore uvd fence seq */
amdgpu_fence_driver_force_completion(&adev->uvd.ring);
}
return 0;
}
void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
{
struct amdgpu_ring *ring = &adev->uvd.ring;
int i, r;
struct amdgpu_ring *ring;
int i, j, r;
for (i = 0; i < adev->uvd.max_handles; ++i) {
uint32_t handle = atomic_read(&adev->uvd.handles[i]);
if (handle != 0 && adev->uvd.filp[i] == filp) {
struct dma_fence *fence;
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
ring = &adev->uvd.inst[j].ring;
r = amdgpu_uvd_get_destroy_msg(ring, handle,
false, &fence);
if (r) {
DRM_ERROR("Error destroying UVD (%d)!\n", r);
continue;
for (i = 0; i < adev->uvd.max_handles; ++i) {
uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]);
if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) {
struct dma_fence *fence;
r = amdgpu_uvd_get_destroy_msg(ring, handle,
false, &fence);
if (r) {
DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r);
continue;
}
dma_fence_wait(fence, false);
dma_fence_put(fence);
adev->uvd.inst[j].filp[i] = NULL;
atomic_set(&adev->uvd.inst[j].handles[i], 0);
}
dma_fence_wait(fence, false);
dma_fence_put(fence);
adev->uvd.filp[i] = NULL;
atomic_set(&adev->uvd.handles[i], 0);
}
}
}
@ -657,15 +679,16 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
void *ptr;
long r;
int i;
uint32_t ip_instance = ctx->parser->job->ring->me;
if (offset & 0x3F) {
DRM_ERROR("UVD messages must be 64 byte aligned!\n");
DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance);
return -EINVAL;
}
r = amdgpu_bo_kmap(bo, &ptr);
if (r) {
DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r);
return r;
}
@ -675,7 +698,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
handle = msg[2];
if (handle == 0) {
DRM_ERROR("Invalid UVD handle!\n");
DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance);
return -EINVAL;
}
@ -686,18 +709,18 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
/* try to alloc a new handle */
for (i = 0; i < adev->uvd.max_handles; ++i) {
if (atomic_read(&adev->uvd.handles[i]) == handle) {
DRM_ERROR("Handle 0x%x already in use!\n", handle);
if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle);
return -EINVAL;
}
if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
adev->uvd.filp[i] = ctx->parser->filp;
if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) {
adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp;
return 0;
}
}
DRM_ERROR("No more free UVD handles!\n");
DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance);
return -ENOSPC;
case 1:
@ -709,27 +732,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
/* validate the handle */
for (i = 0; i < adev->uvd.max_handles; ++i) {
if (atomic_read(&adev->uvd.handles[i]) == handle) {
if (adev->uvd.filp[i] != ctx->parser->filp) {
DRM_ERROR("UVD handle collision detected!\n");
if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) {
DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance);
return -EINVAL;
}
return 0;
}
}
DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle);
return -ENOENT;
case 2:
/* it's a destroy msg, free the handle */
for (i = 0; i < adev->uvd.max_handles; ++i)
atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0);
amdgpu_bo_kunmap(bo);
return 0;
default:
DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type);
return -EINVAL;
}
BUG();
@ -800,7 +823,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
}
if ((cmd == 0 || cmd == 0x3) &&
(start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) {
(start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
start, end);
return -EINVAL;
@ -968,6 +991,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
uint64_t addr;
long r;
int i;
unsigned offset_idx = 0;
unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
@ -987,17 +1012,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
goto err;
if (adev->asic_type >= CHIP_VEGA10) {
data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0);
data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0);
data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0);
data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0);
} else {
data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
data[3] = PACKET0(mmUVD_NO_OP, 0);
offset_idx = 1 + ring->me;
offset[1] = adev->reg_offset[UVD_HWIP][0][1];
offset[2] = adev->reg_offset[UVD_HWIP][1][1];
}
data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
ib = &job->ibs[0];
addr = amdgpu_bo_gpu_offset(bo);
ib->ptr[0] = data[0];
@ -1033,7 +1057,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
if (r)
goto err_free;
r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &f);
if (r)
goto err_free;
@ -1121,8 +1145,15 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, uvd.idle_work.work);
unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
container_of(work, struct amdgpu_device, uvd.inst->idle_work.work);
unsigned fences = 0, i, j;
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
}
}
if (fences == 0) {
if (adev->pm.dpm_enabled) {
@ -1136,7 +1167,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
AMD_CG_STATE_GATE);
}
} else {
schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
}
}
@ -1148,7 +1179,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
if (amdgpu_sriov_vf(adev))
return;
set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work);
if (set_clocks) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, true);
@ -1165,7 +1196,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
{
if (!amdgpu_sriov_vf(ring->adev))
schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
}
/**
@ -1179,27 +1210,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence;
long r;
uint32_t ip_instance = ring->me;
r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
goto error;
}
r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
} else {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
r = 0;
}
@ -1227,7 +1259,7 @@ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
* necessarily linear. So we need to count
* all non-zero handles.
*/
if (atomic_read(&adev->uvd.handles[i]))
if (atomic_read(&adev->uvd.inst->handles[i]))
used_handles++;
}

View File

@ -31,30 +31,37 @@
#define AMDGPU_UVD_SESSION_SIZE (50*1024)
#define AMDGPU_UVD_FIRMWARE_OFFSET 256
#define AMDGPU_MAX_UVD_INSTANCES 2
#define AMDGPU_UVD_FIRMWARE_SIZE(adev) \
(AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \
8) - AMDGPU_UVD_FIRMWARE_OFFSET)
struct amdgpu_uvd {
struct amdgpu_uvd_inst {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
unsigned fw_version;
void *saved_bo;
unsigned max_handles;
atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
struct delayed_work idle_work;
const struct firmware *fw; /* UVD firmware */
struct amdgpu_ring ring;
struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
struct amdgpu_irq_src irq;
bool address_64_bit;
bool use_ctx_buf;
struct drm_sched_entity entity;
struct drm_sched_entity entity_enc;
uint32_t srbm_soft_reset;
};
struct amdgpu_uvd {
const struct firmware *fw; /* UVD firmware */
unsigned fw_version;
unsigned max_handles;
unsigned num_enc_rings;
uint8_t num_uvd_inst;
bool address_64_bit;
bool use_ctx_buf;
struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES];
};
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);

View File

@ -51,11 +51,13 @@
#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin"
#define FIRMWARE_STONEY "amdgpu/stoney_vce.bin"
#define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin"
#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin"
#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin"
#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin"
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@ -71,9 +73,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
MODULE_FIRMWARE(FIRMWARE_POLARIS10);
MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
MODULE_FIRMWARE(FIRMWARE_VEGAM);
MODULE_FIRMWARE(FIRMWARE_VEGA10);
MODULE_FIRMWARE(FIRMWARE_VEGA12);
MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_vce_idle_work_handler(struct work_struct *work);
@ -132,12 +136,18 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
case CHIP_POLARIS12:
fw_name = FIRMWARE_POLARIS12;
break;
case CHIP_VEGAM:
fw_name = FIRMWARE_VEGAM;
break;
case CHIP_VEGA10:
fw_name = FIRMWARE_VEGA10;
break;
case CHIP_VEGA12:
fw_name = FIRMWARE_VEGA12;
break;
case CHIP_VEGA20:
fw_name = FIRMWARE_VEGA20;
break;
default:
return -EINVAL;
@ -181,7 +191,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
ring = &adev->vce.ring[0];
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
rq, amdgpu_sched_jobs, NULL);
rq, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up VCE run queue.\n");
return r;
@ -755,6 +765,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
if (r)
goto out;
break;
case 0x0500000d: /* MV buffer */
r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
idx + 2, 0, 0);
if (r)
goto out;
r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
idx + 7, 0, 0);
if (r)
goto out;
break;
}
idx += len / 4;
@ -860,6 +882,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
goto out;
break;
case 0x0500000d: /* MV buffer */
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
idx + 2, *size, 0);
if (r)
goto out;
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
idx + 7, *size / 12, 0);
if (r)
goto out;
break;
default:
DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
r = -EINVAL;

View File

@ -105,7 +105,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
ring = &adev->vcn.ring_dec;
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
rq, amdgpu_sched_jobs, NULL);
rq, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up VCN dec run queue.\n");
return r;
@ -114,7 +114,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
ring = &adev->vcn.ring_enc[0];
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
rq, amdgpu_sched_jobs, NULL);
rq, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up VCN enc run queue.\n");
return r;
@ -205,13 +205,18 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vcn.idle_work.work);
unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
unsigned i;
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
}
if (fences == 0) {
if (adev->pm.dpm_enabled) {
/* might be used when with pg/cg
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, false);
*/
}
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
} else {
schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
}
@ -223,9 +228,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
if (set_clocks && adev->pm.dpm_enabled) {
/* might be used when with pg/cg
amdgpu_dpm_enable_uvd(adev, true);
*/
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, true);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_UNGATE);
}
}

View File

@ -45,6 +45,17 @@
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
enum engine_status_constants {
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
UVD_STATUS__UVD_BUSY = 0x00000004,
GB_ADDR_CONFIG_DEFAULT = 0x26010011,
UVD_STATUS__IDLE = 0x2,
UVD_STATUS__BUSY = 0x5,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
UVD_STATUS__RBC_BUSY = 0x1,
};
struct amdgpu_vcn {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;

View File

@ -94,6 +94,34 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
};
static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
struct amdgpu_vm *vm,
struct amdgpu_bo *bo)
{
base->vm = vm;
base->bo = bo;
INIT_LIST_HEAD(&base->bo_list);
INIT_LIST_HEAD(&base->vm_status);
if (!bo)
return;
list_add_tail(&base->bo_list, &bo->va);
if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
return;
if (bo->preferred_domains &
amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
return;
/*
* we checked all the prerequisites, but it looks like this per vm bo
* is currently evicted. add the bo to the evicted list to make sure it
* is validated on next vm use to avoid fault.
* */
list_move_tail(&base->vm_status, &vm->evicted);
}
/**
* amdgpu_vm_level_shift - return the addr shift for each level
*
@ -196,24 +224,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
void *param)
{
struct ttm_bo_global *glob = adev->mman.bdev.glob;
int r;
struct amdgpu_vm_bo_base *bo_base, *tmp;
int r = 0;
spin_lock(&vm->status_lock);
while (!list_empty(&vm->evicted)) {
struct amdgpu_vm_bo_base *bo_base;
struct amdgpu_bo *bo;
list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
bo_base = list_first_entry(&vm->evicted,
struct amdgpu_vm_bo_base,
vm_status);
spin_unlock(&vm->status_lock);
bo = bo_base->bo;
BUG_ON(!bo);
if (bo->parent) {
r = validate(param, bo);
if (r)
return r;
break;
spin_lock(&glob->lru_lock);
ttm_bo_move_to_lru_tail(&bo->tbo);
@ -222,22 +242,29 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
spin_unlock(&glob->lru_lock);
}
if (bo->tbo.type == ttm_bo_type_kernel &&
vm->use_cpu_for_update) {
r = amdgpu_bo_kmap(bo, NULL);
if (r)
return r;
}
spin_lock(&vm->status_lock);
if (bo->tbo.type != ttm_bo_type_kernel)
if (bo->tbo.type != ttm_bo_type_kernel) {
spin_lock(&vm->moved_lock);
list_move(&bo_base->vm_status, &vm->moved);
else
spin_unlock(&vm->moved_lock);
} else {
list_move(&bo_base->vm_status, &vm->relocated);
}
}
spin_unlock(&vm->status_lock);
return 0;
spin_lock(&glob->lru_lock);
list_for_each_entry(bo_base, &vm->idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
if (!bo->parent)
continue;
ttm_bo_move_to_lru_tail(&bo->tbo);
if (bo->shadow)
ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
}
spin_unlock(&glob->lru_lock);
return r;
}
/**
@ -249,13 +276,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
*/
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
bool ready;
spin_lock(&vm->status_lock);
ready = list_empty(&vm->evicted);
spin_unlock(&vm->status_lock);
return ready;
return list_empty(&vm->evicted);
}
/**
@ -412,11 +433,16 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
struct amdgpu_bo *pt;
if (!entry->base.bo) {
r = amdgpu_bo_create(adev,
amdgpu_vm_bo_size(adev, level),
AMDGPU_GPU_PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, flags,
ttm_bo_type_kernel, resv, &pt);
struct amdgpu_bo_param bp;
memset(&bp, 0, sizeof(bp));
bp.size = amdgpu_vm_bo_size(adev, level);
bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
bp.flags = flags;
bp.type = ttm_bo_type_kernel;
bp.resv = resv;
r = amdgpu_bo_create(adev, &bp, &pt);
if (r)
return r;
@ -441,12 +467,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
*/
pt->parent = amdgpu_bo_ref(parent->base.bo);
entry->base.vm = vm;
entry->base.bo = pt;
list_add_tail(&entry->base.bo_list, &pt->va);
spin_lock(&vm->status_lock);
list_add(&entry->base.vm_status, &vm->relocated);
spin_unlock(&vm->status_lock);
amdgpu_vm_bo_base_init(&entry->base, vm, pt);
list_move(&entry->base.vm_status, &vm->relocated);
}
if (level < AMDGPU_VM_PTB) {
@ -628,7 +650,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
if (vm_flush_needed || pasid_mapping_needed) {
r = amdgpu_fence_emit(ring, &fence);
r = amdgpu_fence_emit(ring, &fence, 0);
if (r)
return r;
}
@ -893,10 +915,8 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
if (!entry->base.bo)
continue;
spin_lock(&vm->status_lock);
if (list_empty(&entry->base.vm_status))
list_add(&entry->base.vm_status, &vm->relocated);
spin_unlock(&vm->status_lock);
if (!entry->base.moved)
list_move(&entry->base.vm_status, &vm->relocated);
amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
}
}
@ -926,6 +946,14 @@ restart:
params.adev = adev;
if (vm->use_cpu_for_update) {
struct amdgpu_vm_bo_base *bo_base;
list_for_each_entry(bo_base, &vm->relocated, vm_status) {
r = amdgpu_bo_kmap(bo_base->bo, NULL);
if (unlikely(r))
return r;
}
r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
if (unlikely(r))
return r;
@ -941,7 +969,6 @@ restart:
params.func = amdgpu_vm_do_set_ptes;
}
spin_lock(&vm->status_lock);
while (!list_empty(&vm->relocated)) {
struct amdgpu_vm_bo_base *bo_base, *parent;
struct amdgpu_vm_pt *pt, *entry;
@ -950,14 +977,12 @@ restart:
bo_base = list_first_entry(&vm->relocated,
struct amdgpu_vm_bo_base,
vm_status);
list_del_init(&bo_base->vm_status);
spin_unlock(&vm->status_lock);
bo_base->moved = false;
list_move(&bo_base->vm_status, &vm->idle);
bo = bo_base->bo->parent;
if (!bo) {
spin_lock(&vm->status_lock);
if (!bo)
continue;
}
parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
bo_list);
@ -966,12 +991,10 @@ restart:
amdgpu_vm_update_pde(&params, vm, pt, entry);
spin_lock(&vm->status_lock);
if (!vm->use_cpu_for_update &&
(ndw - params.ib->length_dw) < 32)
break;
}
spin_unlock(&vm->status_lock);
if (vm->use_cpu_for_update) {
/* Flush HDP */
@ -1074,9 +1097,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
if (entry->huge) {
/* Add the entry to the relocated list to update it. */
entry->huge = false;
spin_lock(&p->vm->status_lock);
list_move(&entry->base.vm_status, &p->vm->relocated);
spin_unlock(&p->vm->status_lock);
}
return;
}
@ -1555,9 +1576,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
amdgpu_asic_flush_hdp(adev, NULL);
}
spin_lock(&vm->status_lock);
spin_lock(&vm->moved_lock);
list_del_init(&bo_va->base.vm_status);
spin_unlock(&vm->status_lock);
spin_unlock(&vm->moved_lock);
/* If the BO is not in its preferred location add it back to
* the evicted list so that it gets validated again on the
* next command submission.
*/
if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
uint32_t mem_type = bo->tbo.mem.mem_type;
if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
list_add_tail(&bo_va->base.vm_status, &vm->evicted);
else
list_add(&bo_va->base.vm_status, &vm->idle);
}
list_splice_init(&bo_va->invalids, &bo_va->valids);
bo_va->cleared = clear;
@ -1766,19 +1800,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
struct amdgpu_bo_va *bo_va, *tmp;
struct list_head moved;
bool clear;
int r = 0;
int r;
spin_lock(&vm->status_lock);
while (!list_empty(&vm->moved)) {
struct amdgpu_bo_va *bo_va;
struct reservation_object *resv;
INIT_LIST_HEAD(&moved);
spin_lock(&vm->moved_lock);
list_splice_init(&vm->moved, &moved);
spin_unlock(&vm->moved_lock);
bo_va = list_first_entry(&vm->moved,
struct amdgpu_bo_va, base.vm_status);
spin_unlock(&vm->status_lock);
resv = bo_va->base.bo->tbo.resv;
list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
struct reservation_object *resv = bo_va->base.bo->tbo.resv;
/* Per VM BOs never need to bo cleared in the page tables */
if (resv == vm->root.base.bo->tbo.resv)
@ -1791,17 +1824,19 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
clear = true;
r = amdgpu_vm_bo_update(adev, bo_va, clear);
if (r)
if (r) {
spin_lock(&vm->moved_lock);
list_splice(&moved, &vm->moved);
spin_unlock(&vm->moved_lock);
return r;
}
if (!clear && resv != vm->root.base.bo->tbo.resv)
reservation_object_unlock(resv);
spin_lock(&vm->status_lock);
}
spin_unlock(&vm->status_lock);
return r;
return 0;
}
/**
@ -1827,36 +1862,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
if (bo_va == NULL) {
return NULL;
}
bo_va->base.vm = vm;
bo_va->base.bo = bo;
INIT_LIST_HEAD(&bo_va->base.bo_list);
INIT_LIST_HEAD(&bo_va->base.vm_status);
amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
if (!bo)
return bo_va;
list_add_tail(&bo_va->base.bo_list, &bo->va);
if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
return bo_va;
if (bo->preferred_domains &
amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
return bo_va;
/*
* We checked all the prerequisites, but it looks like this per VM BO
* is currently evicted. add the BO to the evicted list to make sure it
* is validated on next VM use to avoid fault.
* */
spin_lock(&vm->status_lock);
list_move_tail(&bo_va->base.vm_status, &vm->evicted);
spin_unlock(&vm->status_lock);
return bo_va;
}
@ -1884,11 +1895,11 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
if (mapping->flags & AMDGPU_PTE_PRT)
amdgpu_vm_prt_get(adev);
if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
spin_lock(&vm->status_lock);
if (list_empty(&bo_va->base.vm_status))
list_add(&bo_va->base.vm_status, &vm->moved);
spin_unlock(&vm->status_lock);
if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
!bo_va->base.moved) {
spin_lock(&vm->moved_lock);
list_move(&bo_va->base.vm_status, &vm->moved);
spin_unlock(&vm->moved_lock);
}
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
@ -2198,9 +2209,9 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_del(&bo_va->base.bo_list);
spin_lock(&vm->status_lock);
spin_lock(&vm->moved_lock);
list_del(&bo_va->base.vm_status);
spin_unlock(&vm->status_lock);
spin_unlock(&vm->moved_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
@ -2234,33 +2245,34 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
{
struct amdgpu_vm_bo_base *bo_base;
/* shadow bo doesn't have bo base, its validation needs its parent */
if (bo->parent && bo->parent->shadow == bo)
bo = bo->parent;
list_for_each_entry(bo_base, &bo->va, bo_list) {
struct amdgpu_vm *vm = bo_base->vm;
bool was_moved = bo_base->moved;
bo_base->moved = true;
if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
spin_lock(&bo_base->vm->status_lock);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&bo_base->vm_status, &vm->evicted);
else
list_move_tail(&bo_base->vm_status,
&vm->evicted);
spin_unlock(&bo_base->vm->status_lock);
continue;
}
if (was_moved)
continue;
if (bo->tbo.type == ttm_bo_type_kernel) {
spin_lock(&bo_base->vm->status_lock);
if (list_empty(&bo_base->vm_status))
list_add(&bo_base->vm_status, &vm->relocated);
spin_unlock(&bo_base->vm->status_lock);
continue;
list_move(&bo_base->vm_status, &vm->relocated);
} else {
spin_lock(&bo_base->vm->moved_lock);
list_move(&bo_base->vm_status, &vm->moved);
spin_unlock(&bo_base->vm->moved_lock);
}
spin_lock(&bo_base->vm->status_lock);
if (list_empty(&bo_base->vm_status))
list_add(&bo_base->vm_status, &vm->moved);
spin_unlock(&bo_base->vm->status_lock);
}
}
@ -2355,6 +2367,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, unsigned int pasid)
{
struct amdgpu_bo_param bp;
struct amdgpu_bo *root;
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT(adev) * 8);
unsigned ring_instance;
@ -2367,10 +2381,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->va = RB_ROOT_CACHED;
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL;
spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->evicted);
INIT_LIST_HEAD(&vm->relocated);
spin_lock_init(&vm->moved_lock);
INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->idle);
INIT_LIST_HEAD(&vm->freed);
/* create scheduler entity for page table updates */
@ -2380,7 +2395,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
ring = adev->vm_manager.vm_pte_rings[ring_instance];
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
r = drm_sched_entity_init(&ring->sched, &vm->entity,
rq, amdgpu_sched_jobs, NULL);
rq, NULL);
if (r)
return r;
@ -2409,24 +2424,28 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
flags |= AMDGPU_GEM_CREATE_SHADOW;
size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
ttm_bo_type_kernel, NULL, &vm->root.base.bo);
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = align;
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
bp.flags = flags;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
r = amdgpu_bo_create(adev, &bp, &root);
if (r)
goto error_free_sched_entity;
r = amdgpu_bo_reserve(vm->root.base.bo, true);
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_free_root;
r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
r = amdgpu_vm_clear_bo(adev, vm, root,
adev->vm_manager.root_level,
vm->pte_support_ats);
if (r)
goto error_unreserve;
vm->root.base.vm = vm;
list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
list_add_tail(&vm->root.base.vm_status, &vm->evicted);
amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
amdgpu_bo_unreserve(vm->root.base.bo);
if (pasid) {

View File

@ -75,11 +75,12 @@ struct amdgpu_bo_list_entry;
/* PDE Block Fragment Size for VEGA10 */
#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59)
/* VEGA10 only */
/* For GFX9 */
#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
/* For Raven */
#define AMDGPU_MTYPE_NC 0
#define AMDGPU_MTYPE_CC 2
#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \
@ -167,9 +168,6 @@ struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
/* protecting invalidated */
spinlock_t status_lock;
/* BOs who needs a validation */
struct list_head evicted;
@ -178,6 +176,10 @@ struct amdgpu_vm {
/* BOs moved, but not yet updated in the PT */
struct list_head moved;
spinlock_t moved_lock;
/* All BOs of this VM not currently in the state machine */
struct list_head idle;
/* BO mappings freed, but not yet updated in the PT */
struct list_head freed;
@ -186,9 +188,6 @@ struct amdgpu_vm {
struct amdgpu_vm_pt root;
struct dma_fence *last_update;
/* protecting freed */
spinlock_t freed_lock;
/* Scheduler entity for page table updates */
struct drm_sched_entity entity;

View File

@ -5903,7 +5903,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)
pi->pcie_dpm_key_disabled = 0;
pi->thermal_sclk_dpm_enabled = 0;
if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
pi->caps_sclk_ds = true;
else
pi->caps_sclk_ds = false;
@ -6255,7 +6255,7 @@ static int ci_dpm_late_init(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (!amdgpu_dpm)
if (!adev->pm.dpm_enabled)
return 0;
/* init the sysfs and debugfs files late */

View File

@ -1735,6 +1735,12 @@ static void cik_invalidate_hdp(struct amdgpu_device *adev,
}
}
static bool cik_need_full_reset(struct amdgpu_device *adev)
{
/* change this when we support soft reset */
return true;
}
static const struct amdgpu_asic_funcs cik_asic_funcs =
{
.read_disabled_bios = &cik_read_disabled_bios,
@ -1748,6 +1754,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.get_config_memsize = &cik_get_config_memsize,
.flush_hdp = &cik_flush_hdp,
.invalidate_hdp = &cik_invalidate_hdp,
.need_full_reset = &cik_need_full_reset,
};
static int cik_common_early_init(void *handle)

View File

@ -1823,7 +1823,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@ -1842,18 +1841,15 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
if (atomic) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
if (atomic)
target_fb = fb;
} else {
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
else
target_fb = crtc->primary->fb;
}
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
obj = amdgpu_fb->obj;
obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
@ -2043,8 +2039,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@ -2526,11 +2521,9 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc)
dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");

View File

@ -173,6 +173,7 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
ARRAY_SIZE(polaris11_golden_settings_a11));
break;
case CHIP_POLARIS10:
case CHIP_VEGAM:
amdgpu_device_program_register_sequence(adev,
polaris10_golden_settings_a11,
ARRAY_SIZE(polaris10_golden_settings_a11));
@ -473,6 +474,7 @@ static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
num_crtc = 2;
break;
case CHIP_POLARIS10:
case CHIP_VEGAM:
num_crtc = 6;
break;
case CHIP_POLARIS11:
@ -1445,6 +1447,7 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev)
adev->mode_info.audio.num_pins = 7;
break;
case CHIP_POLARIS10:
case CHIP_VEGAM:
adev->mode_info.audio.num_pins = 8;
break;
case CHIP_POLARIS11:
@ -1862,7 +1865,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@ -1881,18 +1883,15 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
if (atomic) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
if (atomic)
target_fb = fb;
} else {
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
else
target_fb = crtc->primary->fb;
}
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
obj = amdgpu_fb->obj;
obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
@ -2082,8 +2081,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@ -2253,7 +2251,8 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
(adev->asic_type == CHIP_POLARIS12)) {
(adev->asic_type == CHIP_POLARIS12) ||
(adev->asic_type == CHIP_VEGAM)) {
struct amdgpu_encoder *amdgpu_encoder =
to_amdgpu_encoder(amdgpu_crtc->encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
@ -2601,11 +2600,9 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
@ -2673,7 +2670,8 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
(adev->asic_type == CHIP_POLARIS12)) {
(adev->asic_type == CHIP_POLARIS12) ||
(adev->asic_type == CHIP_VEGAM)) {
struct amdgpu_encoder *amdgpu_encoder =
to_amdgpu_encoder(amdgpu_crtc->encoder);
int encoder_mode =
@ -2830,6 +2828,7 @@ static int dce_v11_0_early_init(void *handle)
adev->mode_info.num_dig = 9;
break;
case CHIP_POLARIS10:
case CHIP_VEGAM:
adev->mode_info.num_hpd = 6;
adev->mode_info.num_dig = 6;
break;
@ -2949,7 +2948,8 @@ static int dce_v11_0_hw_init(void *handle)
amdgpu_atombios_encoder_init_dig(adev);
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
(adev->asic_type == CHIP_POLARIS12)) {
(adev->asic_type == CHIP_POLARIS12) ||
(adev->asic_type == CHIP_VEGAM)) {
amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
amdgpu_atombios_crtc_set_dce_clock(adev, 0,

View File

@ -1780,7 +1780,6 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@ -1798,18 +1797,15 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
if (atomic) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
if (atomic)
target_fb = fb;
} else {
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
else
target_fb = crtc->primary->fb;
}
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
obj = amdgpu_fb->obj;
obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
@ -1978,8 +1974,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@ -2414,11 +2409,9 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc)
dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");

View File

@ -1754,7 +1754,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@ -1773,18 +1772,15 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
if (atomic) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
if (atomic)
target_fb = fb;
} else {
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
else
target_fb = crtc->primary->fb;
}
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
obj = amdgpu_fb->obj;
obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
@ -1955,8 +1951,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@ -2430,11 +2425,9 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc)
dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");

View File

@ -168,11 +168,9 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
@ -329,7 +327,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector)
return 0;
}
static int dce_virtual_mode_valid(struct drm_connector *connector,
static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
return MODE_OK;
@ -462,8 +460,9 @@ static int dce_virtual_hw_init(void *handle)
break;
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_POLARIS11:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_VEGAM:
dce_v11_0_disable_dce(adev);
break;
case CHIP_TOPAZ:
@ -474,6 +473,7 @@ static int dce_virtual_hw_init(void *handle)
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
break;
default:
DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);

View File

@ -0,0 +1,120 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "df_v1_7.h"
#include "df/df_1_7_default.h"
#include "df/df_1_7_offset.h"
#include "df/df_1_7_sh_mask.h"
static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
static void df_v1_7_init (struct amdgpu_device *adev)
{
}
static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
bool enable)
{
u32 tmp;
if (enable) {
tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
} else
WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
mmFabricConfigAccessControl_DEFAULT);
}
static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev)
{
u32 tmp;
tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
return tmp;
}
static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
{
int fb_channel_number;
fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
return df_v1_7_channel_number[fb_channel_number];
}
static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
u32 tmp;
/* Put DF on broadcast mode */
adev->df_funcs->enable_broadcast_mode(adev, true);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY;
WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
} else {
tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
tmp |= DF_V1_7_MGCG_DISABLE;
WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
}
/* Exit boradcast mode */
adev->df_funcs->enable_broadcast_mode(adev, false);
}
static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
u32 *flags)
{
u32 tmp;
/* AMD_CG_SUPPORT_DF_MGCG */
tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY)
*flags |= AMD_CG_SUPPORT_DF_MGCG;
}
static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
bool enable)
{
WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0,
ForceParWrRMW, enable);
}
const struct amdgpu_df_funcs df_v1_7_funcs = {
.init = df_v1_7_init,
.enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
.get_fb_channel_number = df_v1_7_get_fb_channel_number,
.get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
.update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating,
.get_clockgating_state = df_v1_7_get_clockgating_state,
.enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw,
};

View File

@ -1,5 +1,5 @@
/*
* Copyright 2015 Red Hat Inc.
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -19,20 +19,22 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Ben Skeggs <bskeggs@redhat.com>
*/
#include "dmacnv50.h"
#include "rootnv50.h"
#include <nvif/class.h>
#ifndef __DF_V1_7_H__
#define __DF_V1_7_H__
const struct nv50_disp_dmac_oclass
gk104_disp_base_oclass = {
.base.oclass = GK104_DISP_BASE_CHANNEL_DMA,
.base.minver = 0,
.base.maxver = 0,
.ctor = nv50_disp_base_new,
.func = &gf119_disp_dmac_func,
.mthd = &gf119_disp_base_chan_mthd,
.chid = 1,
#include "soc15_common.h"
enum DF_V1_7_MGCG
{
DF_V1_7_MGCG_DISABLE = 0,
DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1,
DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2,
DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13,
DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14,
DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15
};
extern const struct amdgpu_df_funcs df_v1_7_funcs;
#endif

View File

@ -0,0 +1,116 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "df_v3_6.h"
#include "df/df_3_6_default.h"
#include "df/df_3_6_offset.h"
#include "df/df_3_6_sh_mask.h"
static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
16, 32, 0, 0, 0, 2, 4, 8};
static void df_v3_6_init(struct amdgpu_device *adev)
{
}
static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
bool enable)
{
u32 tmp;
if (enable) {
tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
} else
WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
mmFabricConfigAccessControl_DEFAULT);
}
static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
{
u32 tmp;
tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
return tmp;
}
static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
{
int fb_channel_number;
fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
if (fb_channel_number > ARRAY_SIZE(df_v3_6_channel_number))
fb_channel_number = 0;
return df_v3_6_channel_number[fb_channel_number];
}
static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
u32 tmp;
/* Put DF on broadcast mode */
adev->df_funcs->enable_broadcast_mode(adev, true);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
} else {
tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
tmp |= DF_V3_6_MGCG_DISABLE;
WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
}
/* Exit broadcast mode */
adev->df_funcs->enable_broadcast_mode(adev, false);
}
static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
u32 *flags)
{
u32 tmp;
/* AMD_CG_SUPPORT_DF_MGCG */
tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
*flags |= AMD_CG_SUPPORT_DF_MGCG;
}
const struct amdgpu_df_funcs df_v3_6_funcs = {
.init = df_v3_6_init,
.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
.get_fb_channel_number = df_v3_6_get_fb_channel_number,
.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
.update_medium_grain_clock_gating =
df_v3_6_update_medium_grain_clock_gating,
.get_clockgating_state = df_v3_6_get_clockgating_state,
};

View File

@ -1,5 +1,5 @@
/*
* Copyright 2015 Red Hat Inc.
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -19,20 +19,22 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Ben Skeggs <bskeggs@redhat.com>
*/
#include "dmacnv50.h"
#include "rootnv50.h"
#include <nvif/class.h>
#ifndef __DF_V3_6_H__
#define __DF_V3_6_H__
const struct nv50_disp_dmac_oclass
gk110_disp_base_oclass = {
.base.oclass = GK110_DISP_BASE_CHANNEL_DMA,
.base.minver = 0,
.base.maxver = 0,
.ctor = nv50_disp_base_new,
.func = &gf119_disp_dmac_func,
.mthd = &gf119_disp_base_chan_mthd,
.chid = 1,
#include "soc15_common.h"
enum DF_V3_6_MGCG {
DF_V3_6_MGCG_DISABLE = 0,
DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1,
DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2,
DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13,
DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14,
DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
};
extern const struct amdgpu_df_funcs df_v3_6_funcs;
#endif

View File

@ -125,18 +125,6 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
@ -149,6 +137,18 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
@ -161,6 +161,13 @@ MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
MODULE_FIRMWARE("amdgpu/vegam_me.bin");
MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
{
{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
@ -292,6 +299,37 @@ static const u32 tonga_mgcg_cgcg_init[] =
mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
};
static const u32 golden_settings_vegam_a11[] =
{
mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
mmDB_DEBUG2, 0xf00fffff, 0x00000400,
mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
mmSQ_CONFIG, 0x07f80000, 0x01180000,
mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
mmTCC_CTRL, 0x00100000, 0xf31fff7f,
mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
};
static const u32 vegam_golden_common_all[] =
{
mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
};
static const u32 golden_settings_polaris11_a11[] =
{
mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
@ -712,6 +750,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
tonga_golden_common_all,
ARRAY_SIZE(tonga_golden_common_all));
break;
case CHIP_VEGAM:
amdgpu_device_program_register_sequence(adev,
golden_settings_vegam_a11,
ARRAY_SIZE(golden_settings_vegam_a11));
amdgpu_device_program_register_sequence(adev,
vegam_golden_common_all,
ARRAY_SIZE(vegam_golden_common_all));
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
amdgpu_device_program_register_sequence(adev,
@ -918,17 +964,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
case CHIP_FIJI:
chip_name = "fiji";
break;
case CHIP_POLARIS11:
chip_name = "polaris11";
case CHIP_STONEY:
chip_name = "stoney";
break;
case CHIP_POLARIS10:
chip_name = "polaris10";
break;
case CHIP_POLARIS11:
chip_name = "polaris11";
break;
case CHIP_POLARIS12:
chip_name = "polaris12";
break;
case CHIP_STONEY:
chip_name = "stoney";
case CHIP_VEGAM:
chip_name = "vegam";
break;
default:
BUG();
@ -1770,6 +1819,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_POLARIS10:
case CHIP_VEGAM:
ret = amdgpu_atombios_get_gfx_info(adev);
if (ret)
return ret;
@ -1957,12 +2007,13 @@ static int gfx_v8_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
switch (adev->asic_type) {
case CHIP_FIJI:
case CHIP_TONGA:
case CHIP_CARRIZO:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_POLARIS10:
case CHIP_CARRIZO:
case CHIP_VEGAM:
adev->gfx.mec.num_mec = 2;
break;
case CHIP_TOPAZ:
@ -2323,6 +2374,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
break;
case CHIP_FIJI:
case CHIP_VEGAM:
modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
@ -3504,6 +3556,7 @@ gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
{
switch (adev->asic_type) {
case CHIP_FIJI:
case CHIP_VEGAM:
*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
RB_XSEL2(1) | PKR_MAP(2) |
PKR_XSEL(1) | PKR_YSEL(1) |
@ -4071,7 +4124,8 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
gfx_v8_0_init_power_gating(adev);
WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
} else if ((adev->asic_type == CHIP_POLARIS11) ||
(adev->asic_type == CHIP_POLARIS12)) {
(adev->asic_type == CHIP_POLARIS12) ||
(adev->asic_type == CHIP_VEGAM)) {
gfx_v8_0_init_csb(adev);
gfx_v8_0_init_save_restore_list(adev);
gfx_v8_0_enable_save_restore_machine(adev);
@ -4146,7 +4200,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
if (adev->asic_type == CHIP_POLARIS11 ||
adev->asic_type == CHIP_POLARIS10 ||
adev->asic_type == CHIP_POLARIS12) {
adev->asic_type == CHIP_POLARIS12 ||
adev->asic_type == CHIP_VEGAM) {
tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
tmp &= ~0x3;
WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
@ -5498,7 +5553,8 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
bool enable)
{
if ((adev->asic_type == CHIP_POLARIS11) ||
(adev->asic_type == CHIP_POLARIS12))
(adev->asic_type == CHIP_POLARIS12) ||
(adev->asic_type == CHIP_VEGAM))
/* Send msg to SMU via Powerplay */
amdgpu_device_ip_set_powergating_state(adev,
AMD_IP_BLOCK_TYPE_SMC,
@ -5588,6 +5644,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
else
@ -6154,6 +6211,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
break;
default:

View File

@ -27,6 +27,7 @@
#include "amdgpu_gfx.h"
#include "soc15.h"
#include "soc15d.h"
#include "amdgpu_atomfirmware.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
@ -41,7 +42,6 @@
#define GFX9_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
#define mmPWR_MISC_CNTL_STATUS 0x0183
#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
@ -64,6 +64,13 @@ MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
MODULE_FIRMWARE("amdgpu/vega20_me.bin");
MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
MODULE_FIRMWARE("amdgpu/raven_ce.bin");
MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
MODULE_FIRMWARE("amdgpu/raven_me.bin");
@ -73,29 +80,22 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
static const struct soc15_reg_golden golden_settings_gc_9_0[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
};
static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
@ -109,6 +109,20 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
};
static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
};
static const struct soc15_reg_golden golden_settings_gc_9_1[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
@ -185,6 +199,30 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
};
static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
{
mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
};
static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
{
mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
};
#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@ -218,6 +256,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_9_2_1_vg12,
ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
break;
case CHIP_VEGA20:
soc15_program_register_sequence(adev,
golden_settings_gc_9_0,
ARRAY_SIZE(golden_settings_gc_9_0));
soc15_program_register_sequence(adev,
golden_settings_gc_9_0_vg20,
ARRAY_SIZE(golden_settings_gc_9_0_vg20));
break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
golden_settings_gc_9_1,
@ -401,6 +447,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
kfree(adev->gfx.rlc.register_list_format);
}
static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
{
const struct rlc_firmware_header_v2_1 *rlc_hdr;
rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
adev->gfx.rlc.reg_list_format_direct_reg_list_length =
le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
}
static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
@ -412,6 +479,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
const struct rlc_firmware_header_v2_0 *rlc_hdr;
unsigned int *tmp = NULL;
unsigned int i = 0;
uint16_t version_major;
uint16_t version_minor;
DRM_DEBUG("\n");
@ -422,6 +491,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
case CHIP_VEGA12:
chip_name = "vega12";
break;
case CHIP_VEGA20:
chip_name = "vega20";
break;
case CHIP_RAVEN:
chip_name = "raven";
break;
@ -468,6 +540,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
goto out;
err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
if (version_major == 2 && version_minor == 1)
adev->gfx.rlc.is_rlc_v2_1 = true;
adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
adev->gfx.rlc.save_and_restore_offset =
@ -508,6 +586,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
if (adev->gfx.rlc.is_rlc_v2_1)
gfx_v9_0_init_rlc_ext_microcode(adev);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
if (err)
@ -566,6 +647,26 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
if (adev->gfx.rlc.is_rlc_v2_1) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
info->fw = adev->gfx.rlc_fw;
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
info->fw = adev->gfx.rlc_fw;
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
info->fw = adev->gfx.rlc_fw;
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
}
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
info->fw = adev->gfx.mec_fw;
@ -1013,9 +1114,10 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
};
static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
int err;
adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
@ -1037,6 +1139,20 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
DRM_INFO("fix gfx.config for vega12\n");
break;
case CHIP_VEGA20:
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
gb_addr_config &= ~0xf3e777ff;
gb_addr_config |= 0x22014042;
/* check vbios table if gpu info is not available */
err = amdgpu_atomfirmware_get_gfx_info(adev);
if (err)
return err;
break;
case CHIP_RAVEN:
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
@ -1086,6 +1202,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config,
GB_ADDR_CONFIG,
PIPE_INTERLEAVE_SIZE));
return 0;
}
static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
@ -1319,6 +1437,7 @@ static int gfx_v9_0_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
adev->gfx.mec.num_mec = 2;
break;
@ -1446,7 +1565,9 @@ static int gfx_v9_0_sw_init(void *handle)
adev->gfx.ce_ram_size = 0x8000;
gfx_v9_0_gpu_early_init(adev);
r = gfx_v9_0_gpu_early_init(adev);
if (r)
return r;
r = gfx_v9_0_ngg_init(adev);
if (r)
@ -1600,6 +1721,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
gfx_v9_0_setup_rb(adev);
gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
@ -1616,7 +1738,10 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
tmp = adev->gmc.shared_aperture_start >> 48;
tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
(adev->gmc.private_aperture_start >> 48));
tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
(adev->gmc.shared_aperture_start >> 48));
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
}
}
@ -1708,55 +1833,42 @@ static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
adev->gfx.rlc.clear_state_size);
}
static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
int indirect_offset,
int list_size,
int *unique_indirect_regs,
int *unique_indirect_reg_count,
int max_indirect_reg_count,
int *indirect_start_offsets,
int *indirect_start_offsets_count,
int max_indirect_start_offsets_count)
int *indirect_start_offsets_count)
{
int idx;
bool new_entry = true;
for (; indirect_offset < list_size; indirect_offset++) {
indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
if (new_entry) {
new_entry = false;
indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
indirect_offset += 2;
/* look for the matching indice */
for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
if (unique_indirect_regs[idx] ==
register_list_format[indirect_offset] ||
!unique_indirect_regs[idx])
break;
}
BUG_ON(idx >= *unique_indirect_reg_count);
if (!unique_indirect_regs[idx])
unique_indirect_regs[idx] = register_list_format[indirect_offset];
indirect_offset++;
}
if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
new_entry = true;
continue;
}
indirect_offset += 2;
/* look for the matching indice */
for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
if (unique_indirect_regs[idx] ==
register_list_format[indirect_offset])
break;
}
if (idx >= *unique_indirect_reg_count) {
unique_indirect_regs[*unique_indirect_reg_count] =
register_list_format[indirect_offset];
idx = *unique_indirect_reg_count;
*unique_indirect_reg_count = *unique_indirect_reg_count + 1;
BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
}
register_list_format[indirect_offset] = idx;
}
}
static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
{
int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
int unique_indirect_reg_count = 0;
@ -1765,7 +1877,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
int indirect_start_offsets_count = 0;
int list_size = 0;
int i = 0;
int i = 0, j = 0;
u32 tmp = 0;
u32 *register_list_format =
@ -1776,15 +1888,14 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
adev->gfx.rlc.reg_list_format_size_bytes);
/* setup unique_indirect_regs array and indirect_start_offsets array */
gfx_v9_0_parse_ind_reg_list(register_list_format,
GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
adev->gfx.rlc.reg_list_format_size_bytes >> 2,
unique_indirect_regs,
&unique_indirect_reg_count,
ARRAY_SIZE(unique_indirect_regs),
indirect_start_offsets,
&indirect_start_offsets_count,
ARRAY_SIZE(indirect_start_offsets));
unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
gfx_v9_1_parse_ind_reg_list(register_list_format,
adev->gfx.rlc.reg_list_format_direct_reg_list_length,
adev->gfx.rlc.reg_list_format_size_bytes >> 2,
unique_indirect_regs,
&unique_indirect_reg_count,
indirect_start_offsets,
&indirect_start_offsets_count);
/* enable auto inc in case it is disabled */
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
@ -1798,19 +1909,37 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
adev->gfx.rlc.register_restore[i]);
/* load direct register */
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
adev->gfx.rlc.register_restore[i]);
/* load indirect register */
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
adev->gfx.rlc.reg_list_format_start);
for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
/* direct register portion */
for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
register_list_format[i]);
/* indirect register portion */
while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
if (register_list_format[i] == 0xFFFFFFFF) {
WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
continue;
}
WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
for (j = 0; j < unique_indirect_reg_count; j++) {
if (register_list_format[i] == unique_indirect_regs[j]) {
WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
break;
}
}
BUG_ON(j >= unique_indirect_reg_count);
i++;
}
/* set save/restore list size */
list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
list_size = list_size >> 1;
@ -1823,14 +1952,19 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
adev->gfx.rlc.starting_offsets_start);
for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
indirect_start_offsets[i]);
indirect_start_offsets[i]);
/* load unique indirect regs*/
for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
unique_indirect_regs[i] & 0x3FFFF);
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
unique_indirect_regs[i] >> 20);
if (unique_indirect_regs[i] != 0) {
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
+ GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
unique_indirect_regs[i] & 0x3FFFF);
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
+ GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
unique_indirect_regs[i] >> 20);
}
}
kfree(register_list_format);
@ -2010,6 +2144,9 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad
static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
{
if (!adev->gfx.rlc.is_rlc_v2_1)
return;
if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_GFX_SMG |
AMD_PG_SUPPORT_GFX_DMG |
@ -2017,27 +2154,12 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
AMD_PG_SUPPORT_GDS |
AMD_PG_SUPPORT_RLC_SMU_HS)) {
gfx_v9_0_init_csb(adev);
gfx_v9_0_init_rlc_save_restore_list(adev);
gfx_v9_1_init_rlc_save_restore_list(adev);
gfx_v9_0_enable_save_restore_machine(adev);
if (adev->asic_type == CHIP_RAVEN) {
WREG32(mmRLC_JUMP_TABLE_RESTORE,
adev->gfx.rlc.cp_table_gpu_addr >> 8);
gfx_v9_0_init_gfx_power_gating(adev);
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
} else {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
}
if (adev->pg_flags & AMD_PG_SUPPORT_CP)
gfx_v9_0_enable_cp_power_gating(adev, true);
else
gfx_v9_0_enable_cp_power_gating(adev, false);
}
WREG32(mmRLC_JUMP_TABLE_RESTORE,
adev->gfx.rlc.cp_table_gpu_addr >> 8);
gfx_v9_0_init_gfx_power_gating(adev);
}
}
@ -3061,6 +3183,9 @@ static int gfx_v9_0_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_UNGATE);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@ -3279,6 +3404,11 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_GATE);
if (r)
return r;
return 0;
}
@ -3339,8 +3469,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
/* TODO: double check if we need to perform under safe mdoe */
/* gfx_v9_0_enter_rlc_safe_mode(adev); */
gfx_v9_0_enter_rlc_safe_mode(adev);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@ -3351,7 +3480,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
/* gfx_v9_0_exit_rlc_safe_mode(adev); */
gfx_v9_0_exit_rlc_safe_mode(adev);
}
static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@ -3605,6 +3734,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
gfx_v9_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
@ -3742,7 +3872,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
}
amdgpu_ring_write(ring, header);
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN
(2 << 0) |
@ -3774,13 +3904,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
EOP_TC_ACTION_EN |
EOP_TC_WB_ACTION_EN |
EOP_TC_MD_ACTION_EN |
amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
EOP_TC_NC_ACTION_EN) :
(EOP_TCL1_ACTION_EN |
EOP_TC_ACTION_EN |
EOP_TC_WB_ACTION_EN |
EOP_TC_MD_ACTION_EN)) |
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
EVENT_INDEX(5)));
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@ -4137,6 +4270,20 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
}
static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
{
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
if (amdgpu_sriov_vf(ring->adev))
gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
ref, mask, 0x20);
else
amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
ref, mask);
}
static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@ -4458,6 +4605,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_tmz = gfx_v9_0_ring_emit_tmz,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@ -4492,6 +4640,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.set_priority = gfx_v9_0_ring_set_priority_compute,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@ -4522,6 +4671,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.emit_rreg = gfx_v9_0_ring_emit_rreg,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
};
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
@ -4577,6 +4727,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
break;
@ -4686,6 +4837,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
cu_info->simd_per_cu = NUM_SIMD_PER_CU;
return 0;
}

View File

@ -819,12 +819,33 @@ static int gmc_v6_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_bo_late_init(adev);
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
unsigned size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
} else {
u32 viewport = RREG32(mmVIEWPORT_SIZE);
size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
4);
}
/* return 0 if the pre-OS buffer uses up most of vram */
if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
return 0;
return size;
}
static int gmc_v6_0_sw_init(void *handle)
{
int r;
@ -851,8 +872,6 @@ static int gmc_v6_0_sw_init(void *handle)
adev->gmc.mc_mask = 0xffffffffffULL;
adev->gmc.stolen_size = 256 * 1024;
adev->need_dma32 = false;
dma_bits = adev->need_dma32 ? 32 : 40;
r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
@ -878,6 +897,8 @@ static int gmc_v6_0_sw_init(void *handle)
if (r)
return r;
adev->gmc.stolen_size = gmc_v6_0_get_vbios_fb_size(adev);
r = amdgpu_bo_init(adev);
if (r)
return r;

View File

@ -958,12 +958,33 @@ static int gmc_v7_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_bo_late_init(adev);
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
unsigned size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
} else {
u32 viewport = RREG32(mmVIEWPORT_SIZE);
size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
4);
}
/* return 0 if the pre-OS buffer uses up most of vram */
if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
return 0;
return size;
}
static int gmc_v7_0_sw_init(void *handle)
{
int r;
@ -998,8 +1019,6 @@ static int gmc_v7_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
adev->gmc.stolen_size = 256 * 1024;
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
* IGP - can handle 40-bits
@ -1030,6 +1049,8 @@ static int gmc_v7_0_sw_init(void *handle)
if (r)
return r;
adev->gmc.stolen_size = gmc_v7_0_get_vbios_fb_size(adev);
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)

View File

@ -138,6 +138,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
amdgpu_device_program_register_sequence(adev,
golden_settings_polaris11_a11,
ARRAY_SIZE(golden_settings_polaris11_a11));
@ -231,6 +232,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
case CHIP_FIJI:
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_VEGAM:
return 0;
default: BUG();
}
@ -567,9 +569,10 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
/* set the gart size */
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
case CHIP_POLARIS11: /* all engines support GPUVM */
case CHIP_POLARIS10: /* all engines support GPUVM */
case CHIP_POLARIS11: /* all engines support GPUVM */
case CHIP_POLARIS12: /* all engines support GPUVM */
case CHIP_VEGAM: /* all engines support GPUVM */
default:
adev->gmc.gart_size = 256ULL << 20;
break;
@ -1049,12 +1052,33 @@ static int gmc_v8_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_bo_late_init(adev);
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
unsigned size;
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
} else {
u32 viewport = RREG32(mmVIEWPORT_SIZE);
size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
4);
}
/* return 0 if the pre-OS buffer uses up most of vram */
if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
return 0;
return size;
}
#define mmMC_SEQ_MISC0_FIJI 0xA71
static int gmc_v8_0_sw_init(void *handle)
@ -1068,7 +1092,8 @@ static int gmc_v8_0_sw_init(void *handle)
} else {
u32 tmp;
if (adev->asic_type == CHIP_FIJI)
if ((adev->asic_type == CHIP_FIJI) ||
(adev->asic_type == CHIP_VEGAM))
tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
else
tmp = RREG32(mmMC_SEQ_MISC0);
@ -1096,8 +1121,6 @@ static int gmc_v8_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
adev->gmc.stolen_size = 256 * 1024;
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
* IGP - can handle 40-bits
@ -1128,6 +1151,8 @@ static int gmc_v8_0_sw_init(void *handle)
if (r)
return r;
adev->gmc.stolen_size = gmc_v8_0_get_vbios_fb_size(adev);
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)

View File

@ -43,19 +43,13 @@
#include "gfxhub_v1_0.h"
#include "mmhub_v1_0.h"
#define mmDF_CS_AON0_DramBaseAddress0 0x0044
#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0
//DF_CS_AON0_DramBaseAddress0
#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0
#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1
#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4
#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8
#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc
#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L
#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L
#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L
#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L
#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L
/* add these here since we already include dce12 headers and these are for DCN */
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
#define AMDGPU_NUM_OF_VMIDS 8
@ -385,11 +379,9 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
upper_32_bits(pd_addr));
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
/* wait for the invalidate to complete */
amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
1 << vmid, 1 << vmid);
amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
hub->vm_inv_eng0_ack + eng,
req, 1 << vmid);
return pd_addr;
}
@ -556,8 +548,7 @@ static int gmc_v9_0_early_init(void *handle)
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
adev->gmc.private_aperture_start =
adev->gmc.shared_aperture_end + 1;
adev->gmc.private_aperture_start = 0x1000000000000000ULL;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
@ -659,6 +650,11 @@ static int gmc_v9_0_late_init(void *handle)
unsigned i;
int r;
/*
* TODO - Uncomment once GART corruption issue is fixed.
*/
/* amdgpu_bo_late_init(adev); */
for(i = 0; i < adev->num_rings; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
unsigned vmhub = ring->funcs->vmhub;
@ -679,6 +675,7 @@ static int gmc_v9_0_late_init(void *handle)
DRM_INFO("ECC is active.\n");
} else if (r == 0) {
DRM_INFO("ECC is not present.\n");
adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false);
} else {
DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r);
return r;
@ -697,10 +694,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
amdgpu_device_vram_location(adev, &adev->gmc, base);
amdgpu_device_gart_location(adev, mc);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU)
adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
else
adev->vm_manager.vram_base_offset = 0;
adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
}
/**
@ -714,7 +708,6 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
{
u32 tmp;
int chansize, numchan;
int r;
@ -727,39 +720,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
else
chansize = 128;
tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
switch (tmp) {
case 0:
default:
numchan = 1;
break;
case 1:
numchan = 2;
break;
case 2:
numchan = 0;
break;
case 3:
numchan = 4;
break;
case 4:
numchan = 0;
break;
case 5:
numchan = 8;
break;
case 6:
numchan = 0;
break;
case 7:
numchan = 16;
break;
case 8:
numchan = 2;
break;
}
numchan = adev->df_funcs->get_hbm_channel_number(adev);
adev->gmc.vram_width = numchan * chansize;
}
@ -792,6 +753,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10: /* all engines support GPUVM */
case CHIP_VEGA12: /* all engines support GPUVM */
case CHIP_VEGA20:
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@ -826,6 +788,52 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
return amdgpu_gart_table_vram_alloc(adev);
}
static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
#if 0
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
#endif
unsigned size;
/*
* TODO Remove once GART corruption is resolved
* Check related code in gmc_v9_0_sw_fini
* */
size = 9 * 1024 * 1024;
#if 0
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
} else {
u32 viewport;
switch (adev->asic_type) {
case CHIP_RAVEN:
viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
size = (REG_GET_FIELD(viewport,
HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport,
HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
4);
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
default:
viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
4);
break;
}
}
/* return 0 if the pre-OS buffer uses up most of vram */
if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
return 0;
#endif
return size;
}
static int gmc_v9_0_sw_init(void *handle)
{
int r;
@ -851,6 +859,7 @@ static int gmc_v9_0_sw_init(void *handle)
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size of Vega10,
@ -877,12 +886,6 @@ static int gmc_v9_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
/*
* It needs to reserve 8M stolen memory for vega10
* TODO: Figure out how to avoid that...
*/
adev->gmc.stolen_size = 8 * 1024 * 1024;
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 44-bits.
* IGP - can handle 44-bits
@ -907,6 +910,8 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev);
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
@ -950,6 +955,18 @@ static int gmc_v9_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
gmc_v9_0_gart_fini(adev);
/*
* TODO:
* Currently there is a bug where some memory client outside
* of the driver writes to first 8M of VRAM on S3 resume,
* this overrides GART which by default gets placed in first 8M and
* causes VM_FAULTS once GTT is accessed.
* Keep the stolen memory reservation until the while this is not solved.
* Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
*/
amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
amdgpu_bo_fini(adev);
return 0;
@ -960,6 +977,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA20:
soc15_program_register_sequence(adev,
golden_settings_mmhub_1_0_0,
ARRAY_SIZE(golden_settings_mmhub_1_0_0));

View File

@ -2817,7 +2817,7 @@ static int kv_dpm_init(struct amdgpu_device *adev)
pi->caps_tcp_ramping = true;
}
if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
pi->caps_sclk_ds = true;
else
pi->caps_sclk_ds = false;
@ -2974,7 +2974,7 @@ static int kv_dpm_late_init(void *handle)
/* powerdown unused blocks for now */
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (!amdgpu_dpm)
if (!adev->pm.dpm_enabled)
return 0;
kv_dpm_powergate_acp(adev, true);

View File

@ -734,6 +734,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
mmhub_v1_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);

View File

@ -260,8 +260,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
} while (timeout > 1);
flr_done:
if (locked)
if (locked) {
adev->in_gpu_reset = 0;
mutex_unlock(&adev->lock_reset);
}
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_lockup_timeout == 0)

View File

@ -34,10 +34,19 @@
#define smnCPM_CONTROL 0x11180460
#define smnPCIE_CNTL2 0x11180070
/* vega20 */
#define mmRCC_DEV0_EPF0_STRAP0_VG20 0x0011
#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX 2
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
if (adev->asic_type == CHIP_VEGA20)
tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20);
else
tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
@ -75,10 +84,14 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
u32 doorbell_range = RREG32(reg);
u32 range = 2;
if (adev->asic_type == CHIP_VEGA20)
range = 8;
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range);
} else
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
@ -133,6 +146,9 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
if (adev->asic_type == CHIP_VEGA20)
return;
/* NBIF_MGCG_CTRL_LCLK */
def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);

View File

@ -40,11 +40,20 @@ enum psp_gfx_crtl_cmd_id
GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */
GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */
GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */
GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */
GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */
GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */
GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */
};
/*-----------------------------------------------------------------------------
NOTE: All physical addresses used in this interface are actually
GPU Virtual Addresses.
*/
/* Control registers of the TEE Gfx interface. These are located in
* SRBM-to-PSP mailbox registers (total 8 registers).
*/
@ -55,8 +64,8 @@ struct psp_gfx_ctrl
volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */
volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */
volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */
volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of physical address of ring buffer */
volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of physical address of ring buffer */
volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/
volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of GPU Virtual of ring buffer (VMID=0) */
volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */
};
@ -78,6 +87,8 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */
GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */
GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */
GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */
GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
};
@ -85,11 +96,11 @@ enum psp_gfx_cmd_id
/* Command to load Trusted Application binary into PSP OS. */
struct psp_gfx_cmd_load_ta
{
uint32_t app_phy_addr_lo; /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */
uint32_t app_phy_addr_hi; /* bits [63:32] of the physical address of the TA binary */
uint32_t app_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */
uint32_t app_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of the TA binary */
uint32_t app_len; /* length of the TA binary in bytes */
uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */
uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the physical address of CMD buffer */
uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */
uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of CMD buffer */
uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */
/* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided
@ -111,8 +122,8 @@ struct psp_gfx_cmd_unload_ta
*/
struct psp_gfx_buf_desc
{
uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */
uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of the buffer */
uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */
uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of the buffer */
uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */
};
@ -145,8 +156,8 @@ struct psp_gfx_cmd_invoke_cmd
/* Command to setup TMR region. */
struct psp_gfx_cmd_setup_tmr
{
uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */
uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of TMR buffer */
uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */
uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */
uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */
};
@ -174,18 +185,32 @@ enum psp_gfx_fw_type
GFX_FW_TYPE_ISP = 16,
GFX_FW_TYPE_ACP = 17,
GFX_FW_TYPE_SMU = 18,
GFX_FW_TYPE_MMSCH = 19,
GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20,
GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21,
GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22,
GFX_FW_TYPE_MAX = 23
};
/* Command to load HW IP FW. */
struct psp_gfx_cmd_load_ip_fw
{
uint32_t fw_phy_addr_lo; /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */
uint32_t fw_phy_addr_hi; /* bits [63:32] of physical address of FW location */
uint32_t fw_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */
uint32_t fw_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */
uint32_t fw_size; /* FW buffer size in bytes */
enum psp_gfx_fw_type fw_type; /* FW type */
};
/* Command to save/restore HW IP FW. */
struct psp_gfx_cmd_save_restore_ip_fw
{
uint32_t save_fw; /* if set, command is used for saving fw otherwise for resetoring*/
uint32_t save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */
uint32_t save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */
uint32_t buf_size; /* Size of the save/restore buffer in bytes */
enum psp_gfx_fw_type fw_type; /* FW type */
};
/* All GFX ring buffer commands. */
union psp_gfx_commands
@ -195,7 +220,7 @@ union psp_gfx_commands
struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd;
struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
};
@ -226,8 +251,8 @@ struct psp_gfx_cmd_resp
/* These fields are used for RBI only. They are all 0 in GPCOM commands
*/
uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */
uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of physical address of response buffer */
uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */
uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of GPU Virtual address of response buffer */
uint32_t resp_offset; /* +20 offset within response buffer */
uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */
@ -251,19 +276,19 @@ struct psp_gfx_cmd_resp
/* Structure of the Ring Buffer Frame */
struct psp_gfx_rb_frame
{
uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of physical address of command buffer (must be 4 KB aligned) */
uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of physical address of command buffer */
uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */
uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of GPU Virtual address of command buffer */
uint32_t cmd_buf_size; /* +8 command buffer size in bytes */
uint32_t fence_addr_lo; /* +12 bits [31:0] of physical address of Fence for this frame */
uint32_t fence_addr_hi; /* +16 bits [63:32] of physical address of Fence for this frame */
uint32_t fence_addr_lo; /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */
uint32_t fence_addr_hi; /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */
uint32_t fence_value; /* +20 Fence value */
uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */
uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */
uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */
uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */
uint8_t reserved1[2]; /* +34 reserved, must be 0 */
uint32_t reserved2[7]; /* +40 reserved, must be 0 */
/* total 64 bytes */
uint32_t reserved2[7]; /* +36 reserved, must be 0 */
/* total 64 bytes */
};
#endif /* _PSP_TEE_GFX_IF_H_ */

View File

@ -70,6 +70,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *
case AMDGPU_UCODE_ID_RLC_G:
*type = GFX_FW_TYPE_RLC_G;
break;
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
*type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL;
break;
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
*type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
break;
case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
break;
case AMDGPU_UCODE_ID_SMC:
*type = GFX_FW_TYPE_SMU;
break;

View File

@ -41,6 +41,9 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
#define smnMP1_FIRMWARE_FLAGS 0x3010028

View File

@ -62,6 +62,8 @@ MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin");
MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin");
MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin");
MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin");
MODULE_FIRMWARE("amdgpu/vegam_sdma.bin");
MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin");
static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
@ -209,6 +211,7 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
amdgpu_device_program_register_sequence(adev,
golden_settings_polaris11_a11,
ARRAY_SIZE(golden_settings_polaris11_a11));
@ -275,15 +278,18 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
case CHIP_FIJI:
chip_name = "fiji";
break;
case CHIP_POLARIS11:
chip_name = "polaris11";
break;
case CHIP_POLARIS10:
chip_name = "polaris10";
break;
case CHIP_POLARIS11:
chip_name = "polaris11";
break;
case CHIP_POLARIS12:
chip_name = "polaris12";
break;
case CHIP_VEGAM:
chip_name = "vegam";
break;
case CHIP_CARRIZO:
chip_name = "carrizo";
break;

View File

@ -42,6 +42,8 @@ MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
@ -107,6 +109,28 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0)
};
static const struct soc15_reg_golden golden_settings_sdma_4_2[] =
{
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
};
static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
{
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
@ -139,6 +163,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_vg12,
ARRAY_SIZE(golden_settings_sdma_vg12));
break;
case CHIP_VEGA20:
soc15_program_register_sequence(adev,
golden_settings_sdma_4_2,
ARRAY_SIZE(golden_settings_sdma_4_2));
break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
golden_settings_sdma_4_1,
@ -182,6 +211,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
case CHIP_VEGA12:
chip_name = "vega12";
break;
case CHIP_VEGA20:
chip_name = "vega20";
break;
case CHIP_RAVEN:
chip_name = "raven";
break;
@ -360,6 +392,31 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
}
static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring,
int mem_space, int hdp,
uint32_t addr0, uint32_t addr1,
uint32_t ref, uint32_t mask,
uint32_t inv)
{
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
if (mem_space) {
/* memory */
amdgpu_ring_write(ring, addr0);
amdgpu_ring_write(ring, addr1);
} else {
/* registers */
amdgpu_ring_write(ring, addr0 << 2);
amdgpu_ring_write(ring, addr1 << 2);
}
amdgpu_ring_write(ring, ref); /* reference */
amdgpu_ring_write(ring, mask); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
}
/**
* sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
*
@ -378,15 +435,10 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
else
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
amdgpu_ring_write(ring, ref_and_mask); /* reference */
amdgpu_ring_write(ring, ref_and_mask); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
sdma_v4_0_wait_reg_mem(ring, 0, 1,
adev->nbio_funcs->get_hdp_flush_done_offset(adev),
adev->nbio_funcs->get_hdp_flush_req_offset(adev),
ref_and_mask, ref_and_mask, 10);
}
/**
@ -1114,16 +1166,10 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
uint64_t addr = ring->fence_drv.gpu_addr;
/* wait for idle */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
amdgpu_ring_write(ring, seq); /* reference */
amdgpu_ring_write(ring, 0xffffffff); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
sdma_v4_0_wait_reg_mem(ring, 1, 0,
addr & 0xfffffffc,
upper_32_bits(addr) & 0xffffffff,
seq, 0xffffffff, 4);
}
@ -1154,15 +1200,7 @@ static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask)
{
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, val); /* reference */
amdgpu_ring_write(ring, mask); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
}
static int sdma_v4_0_early_init(void *handle)
@ -1510,6 +1548,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
sdma_v4_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
@ -1605,6 +1644,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.pad_ib = sdma_v4_0_ring_pad_ib,
.emit_wreg = sdma_v4_0_ring_emit_wreg,
.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)

View File

@ -1252,6 +1252,12 @@ static void si_invalidate_hdp(struct amdgpu_device *adev,
}
}
static bool si_need_full_reset(struct amdgpu_device *adev)
{
/* change this when we support soft reset */
return true;
}
static int si_get_pcie_lanes(struct amdgpu_device *adev)
{
u32 link_width_cntl;
@ -1332,6 +1338,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.get_config_memsize = &si_get_config_memsize,
.flush_hdp = &si_flush_hdp,
.invalidate_hdp = &si_invalidate_hdp,
.need_full_reset = &si_need_full_reset,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)

Some files were not shown because too many files have changed in this diff Show More