Merge drm/drm-next into drm-intel-next

Backmerge to get the EDID handling changes. Signed-off-by: Jani Nikula <jani.nikula@intel.com>
2023-01-25 11:41:16 +02:00 · 2023-01-25 11:41:16 +02:00 · 902ecddc95
commit 902ecddc95
parent 81f66500f7 68de345e10
1003 changed files with 36178 additions and 36416 deletions
--- a/.gitignore
+++ b/.gitignore
@ -39,6 +39,7 @@
 *.o.*
 *.patch
 *.rmeta
+*.rpm
 *.rsi
 *.s
 *.so
--- a/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt
+++ b/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt
@ -1,112 +0,0 @@
-Cadence DSI bridge
-==================
-
-The Cadence DSI bridge is a DPI to DSI bridge supporting up to 4 DSI lanes.
-
-Required properties:
- compatible: should be set to "cdns,dsi".
- reg: physical base address and length of the controller's registers.
- interrupts: interrupt line connected to the DSI bridge.
- clocks: DSI bridge clocks.
- clock-names: must contain "dsi_p_clk" and "dsi_sys_clk".
- phys: phandle link to the MIPI D-PHY controller.
- phy-names: must contain "dphy".
- #address-cells: must be set to 1.
- #size-cells: must be set to 0.
-
-Optional properties:
- resets: DSI reset lines.
- reset-names: can contain "dsi_p_rst".
-
-Required subnodes:
- ports: Ports as described in Documentation/devicetree/bindings/graph.txt.
-  2 ports are available:
-  * port 0: this port is only needed if some of your DSI devices are
-	    controlled through  an external bus like I2C or SPI. Can have at
-	    most 4 endpoints. The endpoint number is directly encoding the
-	    DSI virtual channel used by this device.
-  * port 1: represents the DPI input.
-  Other ports will be added later to support the new kind of inputs.
-
- one subnode per DSI device connected on the DSI bus. Each DSI device should
-  contain a reg property encoding its virtual channel.
-
-Example:
-	dsi0: dsi@fd0c0000 {
-		compatible = "cdns,dsi";
-		reg = <0x0 0xfd0c0000 0x0 0x1000>;
-		clocks = <&pclk>, <&sysclk>;
-		clock-names = "dsi_p_clk", "dsi_sys_clk";
-		interrupts = <1>;
-		phys = <&dphy0>;
-		phy-names = "dphy";
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			port@1 {
-				reg = <1>;
-				dsi0_dpi_input: endpoint {
-					remote-endpoint = <&xxx_dpi_output>;
-				};
-			};
-		};
-
-		panel: dsi-dev@0 {
-			compatible = "<vendor,panel>";
-			reg = <0>;
-		};
-	};
-
-or
-
-	dsi0: dsi@fd0c0000 {
-		compatible = "cdns,dsi";
-		reg = <0x0 0xfd0c0000 0x0 0x1000>;
-		clocks = <&pclk>, <&sysclk>;
-		clock-names = "dsi_p_clk", "dsi_sys_clk";
-		interrupts = <1>;
-		phys = <&dphy1>;
-		phy-names = "dphy";
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			port@0 {
-				reg = <0>;
-				#address-cells = <1>;
-				#size-cells = <0>;
-
-				dsi0_output: endpoint@0 {
-					reg = <0>;
-					remote-endpoint = <&dsi_panel_input>;
-				};
-			};
-
-			port@1 {
-				reg = <1>;
-				dsi0_dpi_input: endpoint {
-					remote-endpoint = <&xxx_dpi_output>;
-				};
-			};
-		};
-	};
-
-	i2c@xxx {
-		panel: panel@59 {
-			compatible = "<vendor,panel>";
-			reg = <0x59>;
-
-			port {
-				dsi_panel_input: endpoint {
-					remote-endpoint = <&dsi0_output>;
-				};
-			};
-		};
-	};
--- a/Documentation/devicetree/bindings/display/bridge/cdns,dsi.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/cdns,dsi.yaml
@ -0,0 +1,180 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/cdns,dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cadence DSI bridge
+
+maintainers:
+  - Boris Brezillon <boris.brezillon@bootlin.com>
+
+description: |
+   CDNS DSI is a bridge device which converts DPI to DSI
+
+properties:
+  compatible:
+    enum:
+      - cdns,dsi
+      - ti,j721e-dsi
+
+  reg:
+    minItems: 1
+    items:
+      - description:
+          Register block for controller's registers.
+      - description:
+          Register block for wrapper settings registers in case of TI J7 SoCs.
+
+  clocks:
+    items:
+      - description: PSM clock, used by the IP
+      - description: sys clock, used by the IP
+
+  clock-names:
+    items:
+      - const: dsi_p_clk
+      - const: dsi_sys_clk
+
+  phys:
+    maxItems: 1
+
+  phy-names:
+    const: dphy
+
+  interrupts:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    const: dsi_p_rst
+
+  ports:
+    $ref: /schemas/graph.yaml#/properties/ports
+
+    properties:
+      port@0:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Output port representing the DSI output. It can have
+          at most 4 endpoints. The endpoint number is directly encoding
+          the DSI virtual channel used by this device.
+
+      port@1:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Input port representing the DPI input.
+
+    required:
+      - port@1
+
+allOf:
+  - $ref: ../dsi-controller.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ti,j721e-dsi
+    then:
+      properties:
+        reg:
+          minItems: 2
+          maxItems: 2
+        power-domains:
+          maxItems: 1
+    else:
+      properties:
+        reg:
+          maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - phys
+  - phy-names
+  - ports
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    bus {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        dsi@fd0c0000 {
+            compatible = "cdns,dsi";
+            reg = <0x0 0xfd0c0000 0x0 0x1000>;
+            clocks = <&pclk>, <&sysclk>;
+            clock-names = "dsi_p_clk", "dsi_sys_clk";
+            interrupts = <1>;
+            phys = <&dphy0>;
+            phy-names = "dphy";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@1 {
+                    reg = <1>;
+                    endpoint {
+                        remote-endpoint = <&xxx_dpi_output>;
+                    };
+                };
+            };
+
+            panel@0 {
+                compatible = "panasonic,vvx10f034n00";
+                reg = <0>;
+                power-supply = <&vcc_lcd_reg>;
+            };
+        };
+    };
+
+  - |
+    bus {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        dsi@fd0c0000 {
+            compatible = "cdns,dsi";
+            reg = <0x0 0xfd0c0000 0x0 0x1000>;
+            clocks = <&pclk>, <&sysclk>;
+            clock-names = "dsi_p_clk", "dsi_sys_clk";
+            interrupts = <1>;
+            phys = <&dphy1>;
+            phy-names = "dphy";
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    endpoint@0 {
+                        reg = <0>;
+                        remote-endpoint = <&dsi_panel_input>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    endpoint {
+                        remote-endpoint = <&xxx_dpi_output>;
+                    };
+                };
+            };
+        };
+    };
--- a/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml
@ -16,7 +16,9 @@ description: |

 properties:
  compatible:
-    const: fsl,imx8mp-ldb
+    enum:
+      - fsl,imx8mp-ldb
+      - fsl,imx93-ldb

  clocks:
    maxItems: 1
@ -57,6 +59,18 @@ required:
  - clocks
  - ports

+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: fsl,imx93-ldb
+    then:
+      properties:
+        ports:
+          properties:
+            port@2: false
+
 additionalProperties: false

 examples:
--- a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
@ -52,9 +52,49 @@ properties:
    maxItems: 1
    description: extcon specifier for the Power Delivery

-  port:
-    $ref: /schemas/graph.yaml#/properties/port
-    description: A port node pointing to DPI host port node
+  ports:
+    $ref: /schemas/graph.yaml#/properties/ports
+
+    properties:
+      port@0:
+        $ref: /schemas/graph.yaml#/$defs/port-base
+        unevaluatedProperties: false
+        description: A port node pointing to DPI host port node
+
+        properties:
+          endpoint:
+            $ref: /schemas/graph.yaml#/$defs/endpoint-base
+            unevaluatedProperties: false
+
+            properties:
+              link-frequencies:
+                minItems: 1
+                maxItems: 1
+                description: Allowed max link frequencies in Hz
+
+      port@1:
+        $ref: /schemas/graph.yaml#/$defs/port-base
+        unevaluatedProperties: false
+        description: Video port for DP output
+
+        properties:
+          endpoint:
+            $ref: /schemas/graph.yaml#/$defs/endpoint-base
+            unevaluatedProperties: false
+
+            properties:
+              data-lanes:
+                minItems: 1
+                uniqueItems: true
+                items:
+                  - enum: [ 0, 1 ]
+                  - const: 1
+                  - const: 2
+                  - const: 3
+
+    required:
+      - port@0
+      - port@1

 required:
  - compatible
@ -63,6 +103,7 @@ required:
  - interrupts
  - reset-gpios
  - extcon
+  - ports

 additionalProperties: false

@ -85,9 +126,24 @@ examples:
            reset-gpios = <&pio 179 1>;
            extcon = <&usbc_extcon>;

-            port {
-                it6505_in: endpoint {
-                    remote-endpoint = <&dpi_out>;
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    it6505_in: endpoint {
+                        remote-endpoint = <&dpi_out>;
+                        link-frequencies = /bits/ 64 <150000000>;
+                    };
+                };
+
+                port@1 {
+                    reg = <1>;
+                    it6505_out: endpoint {
+                        remote-endpoint = <&dp_in>;
+                        data-lanes = <0 1>;
+                    };
                };
            };
        };
--- a/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml
@ -17,7 +17,9 @@ description: |

 properties:
  compatible:
-    const: ite,it66121
+    enum:
+      - ite,it66121
+      - ite,it6610

  reg:
    maxItems: 1
--- a/Documentation/devicetree/bindings/display/bridge/renesas,dsi.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,dsi.yaml
@ -22,6 +22,7 @@ properties:
    items:
      - enum:
          - renesas,r9a07g044-mipi-dsi # RZ/G2{L,LC}
+          - renesas,r9a07g054-mipi-dsi # RZ/V2L
      - const: renesas,rzg2l-mipi-dsi

  reg:
--- a/Documentation/devicetree/bindings/display/panel/auo,a030jtn01.yaml
+++ b/Documentation/devicetree/bindings/display/panel/auo,a030jtn01.yaml
@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/auo,a030jtn01.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AUO A030JTN01 3.0" (320x480 pixels) 24-bit TFT LCD panel
+
+description: |
+  Delta RGB 8-bit panel found in some Retrogame handhelds
+
+maintainers:
+  - Paul Cercueil <paul@crapouillou.net>
+  - Christophe Branchereau <cbranchereau@gmail.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+properties:
+  compatible:
+    const: auo,a030jtn01
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - power-supply
+  - reset-gpios
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "auo,a030jtn01";
+            reg = <0>;
+
+            spi-max-frequency = <10000000>;
+
+            reset-gpios = <&gpe 4 GPIO_ACTIVE_LOW>;
+            power-supply = <&lcd_power>;
+
+            backlight = <&backlight>;
+
+            port {
+                panel_input: endpoint {
+                    remote-endpoint = <&panel_output>;
+                };
+            };
+        };
+    };
--- a/Documentation/devicetree/bindings/display/panel/focaltech,gpt3.yaml
+++ b/Documentation/devicetree/bindings/display/panel/focaltech,gpt3.yaml
@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/focaltech,gpt3.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Focaltech GPT3 3.0" (640x480 pixels) IPS LCD panel
+
+maintainers:
+  - Christophe Branchereau <cbranchereau@gmail.com>
+
+allOf:
+  - $ref: panel-common.yaml#
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+properties:
+  compatible:
+    const: focaltech,gpt3
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - power-supply
+  - reset-gpios
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "focaltech,gpt3";
+            reg = <0>;
+
+            spi-max-frequency = <3125000>;
+
+            reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>;
+
+            backlight = <&backlight>;
+            power-supply = <&vcc>;
+
+            port {
+                panel_input: endpoint {
+                    remote-endpoint = <&panel_output>;
+                };
+            };
+        };
+    };
--- a/Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
+++ b/Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0-only or BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/himax,hx8394.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Himax HX8394 MIPI-DSI LCD panel controller
+
+maintainers:
+  - Ondrej Jirman <megi@xff.cz>
+  - Javier Martinez Canillas <javierm@redhat.com>
+
+description:
+  Device tree bindings for panels based on the Himax HX8394 controller,
+  such as the HannStar HSD060BHW4 720x1440 TFT LCD panel connected with
+  a MIPI-DSI video interface.
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - hannstar,hsd060bhw4
+      - const: himax,hx8394
+
+  reg: true
+
+  reset-gpios: true
+
+  backlight: true
+
+  port: true
+
+  vcc-supply:
+    description: Panel power supply
+
+  iovcc-supply:
+    description: I/O voltage supply
+
+required:
+  - compatible
+  - reg
+  - reset-gpios
+  - backlight
+  - port
+  - vcc-supply
+  - iovcc-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        panel@0 {
+            compatible = "hannstar,hsd060bhw4", "himax,hx8394";
+            reg = <0>;
+            vcc-supply = <&reg_2v8_p>;
+            iovcc-supply = <&reg_1v8_p>;
+            reset-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>;
+            backlight = <&backlight>;
+
+            port {
+                mipi_in_panel: endpoint {
+                    remote-endpoint = <&mipi_out_panel>;
+                };
+            };
+        };
+    };
+
+...
--- a/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml
@ -22,8 +22,9 @@ description: |
  The standard defines the following interface signals for type C:
  - Power:
    - Vdd: Power supply for display module
+      Called power-supply in this binding.
    - Vddi: Logic level supply for interface signals
-    Combined into one in this binding called: power-supply
+      Called io-supply in this binding.
  - Interface:
    - CSx: Chip select
    - SCL: Serial clock
@ -80,6 +81,11 @@ properties:
      Controller data/command selection (D/CX) in 4-line SPI mode.
      If not set, the controller is in 3-line SPI mode.

+  io-supply:
+    description: |
+      Logic level supply for interface signals (Vddi).
+      No need to set if this is the same as power-supply.
+
 required:
  - compatible
  - reg
--- a/Documentation/devicetree/bindings/display/panel/visionox,vtdr6130.yaml
+++ b/Documentation/devicetree/bindings/display/panel/visionox,vtdr6130.yaml
@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/visionox,vtdr6130.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Visionox VTDR6130 AMOLED DSI Panel
+
+maintainers:
+  - Neil Armstrong <neil.armstrong@linaro.org>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: visionox,vtdr6130
+
+  vddio-supply: true
+  vci-supply: true
+  vdd-supply: true
+  port: true
+  reset-gpios: true
+
+additionalProperties: false
+
+required:
+  - compatible
+  - vddio-supply
+  - vci-supply
+  - vdd-supply
+  - reset-gpios
+  - port
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    panel {
+        compatible = "visionox,vtdr6130";
+
+        vddio-supply = <&vreg_l12b_1p8>;
+        vci-supply = <&vreg_l13b_3p0>;
+        vdd-supply = <&vreg_l11b_1p2>;
+
+        reset-gpios = <&tlmm 133 GPIO_ACTIVE_LOW>;
+
+        port {
+            panel0_in: endpoint {
+                remote-endpoint = <&dsi0_out>;
+            };
+        };
+    };
+...
--- a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
+++ b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
@ -63,6 +63,11 @@ properties:
  reg:
    description: Location and size of the framebuffer memory

+  memory-region:
+    maxItems: 1
+    description: Phandle to a node describing the memory to be used for the
+      framebuffer. If present, overrides the "reg" property (if one exists).
+
  clocks:
    description: List of clocks used by the framebuffer.

@ -94,6 +99,7 @@ properties:
        * `x1r5g5b5` - 16-bit pixels, d[14:10]=r, d[9:5]=g, d[4:0]=b
        * `x2r10g10b10` - 32-bit pixels, d[29:20]=r, d[19:10]=g, d[9:0]=b
        * `x8r8g8b8` - 32-bit pixels, d[23:16]=r, d[15:8]=g, d[7:0]=b
+        * `x8b8g8r8` - 32-bit pixels, d[23:16]=b, d[15:8]=g, d[7:0]=r
    enum:
      - a1r5g5b5
      - a2r10g10b10
@ -105,6 +111,7 @@ properties:
      - x1r5g5b5
      - x2r10g10b10
      - x8r8g8b8
+      - x8b8g8r8

  display:
    $ref: /schemas/types.yaml#/definitions/phandle
--- a/Documentation/devicetree/bindings/reserved-memory/framebuffer.yaml
+++ b/Documentation/devicetree/bindings/reserved-memory/framebuffer.yaml
@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/reserved-memory/framebuffer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: /reserved-memory framebuffer node bindings
+
+maintainers:
+  - devicetree-spec@vger.kernel.org
+
+allOf:
+  - $ref: reserved-memory.yaml
+
+properties:
+  compatible:
+    const: framebuffer
+    description: >
+      This indicates a region of memory meant to be used as a framebuffer for
+      a set of display devices. It can be used by an operating system to keep
+      the framebuffer from being overwritten and use it as the backing memory
+      for a display device (such as simple-framebuffer).
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    / {
+        compatible = "foo";
+        model = "foo";
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        chosen {
+            framebuffer {
+                compatible = "simple-framebuffer";
+                memory-region = <&fb>;
+            };
+        };
+
+        reserved-memory {
+            #address-cells = <1>;
+            #size-cells = <1>;
+            ranges;
+
+            fb: framebuffer@80000000 {
+                compatible = "framebuffer";
+                reg = <0x80000000 0x007e9000>;
+            };
+        };
+    };
+...
--- a/Documentation/fb/modedb.rst
+++ b/Documentation/fb/modedb.rst
@ -29,7 +29,10 @@ Things between square brackets are optional.
 Valid names are::

  - NSTC: 480i output, with the CCIR System-M TV mode and NTSC color encoding
+  - NTSC-J: 480i output, with the CCIR System-M TV mode, the NTSC color
+    encoding, and a black level equal to the blanking level.
  - PAL: 576i output, with the CCIR System-B TV mode and PAL color encoding
+  - PAL-M: 480i output, with the CCIR System-M TV mode and PAL color encoding

 If 'M' is specified in the mode_option argument (after <yres> and before
 <bpp> and <refresh>, if specified) the timings will be calculated using
@ -70,6 +73,8 @@ Valid options are::
  - reflect_y (boolean): Perform an axial symmetry on the Y axis
  - rotate (integer): Rotate the initial framebuffer by x
    degrees. Valid values are 0, 90, 180 and 270.
+  - tv_mode: Analog TV mode. One of "NTSC", "NTSC-443", "NTSC-J", "PAL",
+    "PAL-M", "PAL-N", or "SECAM".
  - panel_orientation, one of "normal", "upside_down", "left_side_up", or
    "right_side_up". For KMS drivers only, this sets the "panel orientation"
    property on the kms connector as hint for kms users.
--- a/Documentation/gpu/amdgpu/apu-asic-info-table.csv
+++ b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
@ -1,8 +1,10 @@
-Product Name, Code Reference, DCN/DCE version, GC version, VCE/UVD/VCN version, SDMA version
-Radeon R* Graphics, CARRIZO/STONEY, DCE 11, 8, VCE 3 / UVD 6, 3
-Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN/PICASSO, DCN 1.0, 9.1.0, VCN 1.0, 4.1.0
-Ryzen 4000 series, RENOIR, DCN 2.1, 9.3, VCN 2.2, 4.1.2
-Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN2, DCN 1.0, 9.2.2, VCN 1.0.1, 4.1.1
-SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1
-Ryzen 5000 series, GREEN SARDINE, DCN 2.1, 9.3, VCN 2.2, 4.1.1
-Ryzen 6000 Zen, YELLOW CARP, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3
+Product Name, Code Reference, DCN/DCE version, GC version, VCE/UVD/VCN version, SDMA version, MP0 version
+Radeon R* Graphics, CARRIZO/STONEY, DCE 11, 8, VCE 3 / UVD 6, 3, n/a
+Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN/PICASSO, DCN 1.0, 9.1.0, VCN 1.0, 4.1.0, 10.0.0
+Ryzen 4000 series, RENOIR, DCN 2.1, 9.3, VCN 2.2, 4.1.2, 11.0.3
+Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN2, DCN 1.0, 9.2.2, VCN 1.0.1, 4.1.1, 10.0.1
+SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0
+Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1
+Ryzen 6000 series / Ryzen 7x35 series, YELLOW CARP / Rembrandt / Rembrandt+, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3
+Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
+Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
--- a/Documentation/gpu/amdgpu/dgpu-asic-info-table.csv
+++ b/Documentation/gpu/amdgpu/dgpu-asic-info-table.csv
@ -22,3 +22,5 @@ AMD Radeon RX 6800(XT) /6900(XT) /W6800, SIENNA_CICHLID, DCN 3.0.0, 10.3.0, VCN
 AMD Radeon RX 6700 XT / 6800M / 6700M, NAVY_FLOUNDER, DCN 3.0.0, 10.3.2, VCN 3.0.0, 5.2.2
 AMD Radeon RX 6600(XT) /6600M /W6600 /W6600M, DIMGREY_CAVEFISH, DCN 3.0.2, 10.3.4, VCN 3.0.16, 5.2.4
 AMD Radeon RX 6500M /6300M /W6500M /W6300M, BEIGE_GOBY, DCN 3.0.3, 10.3.5, VCN 3.0.33, 5.2.5
+AMD Radeon RX 7900 XT /XTX, , DCN 3.2.0, 11.0.0, VCN 4.0.0, 6.0.0
+AMD Radeon RX 7600M (XT) /7700S /7600S, , DCN 3.2.1, 11.0.2, VCN 4.0.4, 6.0.2
--- a/Documentation/gpu/amdgpu/driver-misc.rst
+++ b/Documentation/gpu/amdgpu/driver-misc.rst
@ -37,7 +37,7 @@ Accelerated Processing Units (APU) Info

 .. csv-table::
   :header-rows: 1
-   :widths: 3, 2, 2, 1, 1, 1
+   :widths: 3, 2, 2, 1, 1, 1, 1
   :file: ./apu-asic-info-table.csv

 Discrete GPU Info
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@ -188,6 +188,13 @@ Bridge Helper Reference
 .. kernel-doc:: drivers/gpu/drm/drm_bridge.c
   :export:

+MIPI-DSI bridge operation
+-------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_bridge.c
+   :doc: dsi bridge operations
+
+
 Bridge Connector Helper Reference
 ---------------------------------

--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@ -520,6 +520,12 @@ HDMI Specific Connector Properties
 .. kernel-doc:: drivers/gpu/drm/drm_connector.c
   :doc: HDMI connector properties

+Analog TV Specific Connector Properties
+---------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_connector.c
+   :doc: Analog TV Connector Properties
+
 Standard CRTC Properties
 ------------------------

--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@ -402,19 +402,19 @@ It's possible to run the IGT-tests in a VM in two ways:
 	1. Use IGT inside a VM
 	2. Use IGT from the host machine and write the results in a shared directory.

-As follow, there is an example of using a VM with a shared directory with
-the host machine to run igt-tests. As an example it's used virtme::
+Following is an example of using a VM with a shared directory with
+the host machine to run igt-tests. This example uses virtme::

 	$ virtme-run --rwdir /path/for/shared_dir --kdir=path/for/kernel/directory --mods=auto

-Run the igt-tests in the guest machine, as example it's ran the 'kms_flip'
+Run the igt-tests in the guest machine. This example runs the 'kms_flip'
 tests::

 	$ /path/for/igt-gpu-tools/scripts/run-tests.sh -p -s -t "kms_flip.*" -v

-In this example, instead of build the igt_runner, Piglit is used
-(-p option); it's created html summary of the tests results and it's saved
-in the folder "igt-gpu-tools/results"; it's executed only the igt-tests
+In this example, instead of building the igt_runner, Piglit is used
+(-p option). It creates an HTML summary of the test results and saves
+them in the folder "igt-gpu-tools/results". It executes only the igt-tests
 matching the -t option.

 Display CRC Support
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@ -508,17 +508,18 @@ Clean up the debugfs support

 There's a bunch of issues with it:

- The drm_info_list ->show() function doesn't even bother to cast to the drm
-  structure for you. This is lazy.
+- Convert drivers to support the drm_debugfs_add_files() function instead of
+  the drm_debugfs_create_files() function.
+
+- Improve late-register debugfs by rolling out the same debugfs pre-register
+  infrastructure for connector and crtc too. That way, the drivers won't need to
+  split their setup code into init and register anymore.

 - We probably want to have some support for debugfs files on crtc/connectors and
  maybe other kms objects directly in core. There's even drm_print support in
  the funcs for these objects to dump kms state, so it's all there. And then the
  ->show() functions should obviously give you a pointer to the right object.

- The drm_info_list stuff is centered on drm_minor instead of drm_device. For
-  anything we want to print drm_device (or maybe drm_file) is the right thing.
-
 - The drm_driver->debugfs_init hooks we have is just an artifact of the old
  midlayered load sequence. DRM debugfs should work more like sysfs, where you
  can create properties/files for an object anytime you want, and the core
@ -527,8 +528,6 @@ There's a bunch of issues with it:
  this (together with the drm_minor->drm_device move) would allow us to remove
  debugfs_init.

-Previous RFC that hasn't landed yet: https://lore.kernel.org/dri-devel/20200513114130.28641-2-wambui.karugax@gmail.com/
-
 Contact: Daniel Vetter

 Level: Intermediate
--- a/Documentation/gpu/vc4.rst
+++ b/Documentation/gpu/vc4.rst
@ -54,6 +54,25 @@ VEC (Composite TV out) encoder
 .. kernel-doc:: drivers/gpu/drm/vc4/vc4_vec.c
   :doc: VC4 SDTV module

+KUnit Tests
+===========
+
+The VC4 Driver uses KUnit to perform driver-specific unit and
+integration tests.
+
+These tests are using a mock driver and can be ran using the
+command below, on either arm or arm64 architectures,
+
+.. code-block:: bash
+
+	$ ./tools/testing/kunit/kunit.py run \
+		--kunitconfig=drivers/gpu/drm/vc4/tests/.kunitconfig \
+		--cross_compile aarch64-linux-gnu- --arch arm64
+
+Parts of the driver that are currently covered by tests are:
+ * The HVS to PixelValve dynamic FIFO assignment, for the BCM2835-7
+   and BCM2711.
+
 Memory Management and 3D Command Submission
 ===========================================

--- a/Documentation/maintainer/maintainer-entry-profile.rst
+++ b/Documentation/maintainer/maintainer-entry-profile.rst
@ -104,3 +104,4 @@ to do something different in the near future.
   ../riscv/patch-acceptance
   ../driver-api/media/maintainer-entry-profile
   ../driver-api/vfio-pci-device-specific-driver-acceptance
+   ../nvme/feature-and-quirk-policy
--- a/Documentation/nvme/feature-and-quirk-policy.rst
+++ b/Documentation/nvme/feature-and-quirk-policy.rst
@ -0,0 +1,77 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+Linux NVMe feature and and quirk policy
+=======================================
+
+This file explains the policy used to decide what is supported by the
+Linux NVMe driver and what is not.
+
+
+Introduction
+============
+
+NVM Express is an open collection of standards and information.
+
+The Linux NVMe host driver in drivers/nvme/host/ supports devices
+implementing the NVM Express (NVMe) family of specifications, which
+currently consists of a number of documents:
+
+ - the NVMe Base specification
+ - various Command Set specifications (e.g. NVM Command Set)
+ - various Transport specifications (e.g. PCIe, Fibre Channel, RDMA, TCP)
+ - the NVMe Management Interface specification
+
+See https://nvmexpress.org/developers/ for the NVMe specifications.
+
+
+Supported features
+==================
+
+NVMe is a large suite of specifications, and contains features that are only
+useful or suitable for specific use-cases. It is important to note that Linux
+does not aim to implement every feature in the specification.  Every additional
+feature implemented introduces more code, more maintenance and potentially more
+bugs.  Hence there is an inherent tradeoff between functionality and
+maintainability of the NVMe host driver.
+
+Any feature implemented in the Linux NVMe host driver must support the
+following requirements:
+
+  1. The feature is specified in a release version of an official NVMe
+     specification, or in a ratified Technical Proposal (TP) that is
+     available on NVMe website. Or if it is not directly related to the
+     on-wire protocol, does not contradict any of the NVMe specifications.
+  2. Does not conflict with the Linux architecture, nor the design of the
+     NVMe host driver.
+  3. Has a clear, indisputable value-proposition and a wide consensus across
+     the community.
+
+Vendor specific extensions are generally not supported in the NVMe host
+driver.
+
+It is strongly recommended to work with the Linux NVMe and block layer
+maintainers and get feedback on specification changes that are intended
+to be used by the Linux NVMe host driver in order to avoid conflict at a
+later stage.
+
+
+Quirks
+======
+
+Sometimes implementations of open standards fail to correctly implement parts
+of the standards.  Linux uses identifier-based quirks to work around such
+implementation bugs.  The intent of quirks is to deal with widely available
+hardware, usually consumer, which Linux users can't use without these quirks.
+Typically these implementations are not or only superficially tested with Linux
+by the hardware manufacturer.
+
+The Linux NVMe maintainers decide ad hoc whether to quirk implementations
+based on the impact of the problem to Linux users and how it impacts
+maintainability of the driver.  In general quirks are a last resort, if no
+firmware updates or other workarounds are available from the vendor.
+
+Quirks will not be added to the Linux kernel for hardware that isn't available
+on the mass market.  Hardware that fails qualification for enterprise Linux
+distributions, ChromeOS, Android or other consumers of the Linux kernel
+should be fixed before it is shipped instead of relying on Linux quirks.
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@ -222,6 +222,7 @@ Code  Seq#    Include File                                           Comments
 'a'   00-0F  drivers/crypto/qat/qat_common/adf_cfg_common.h          conflict! qat driver
 'b'   00-FF                                                          conflict! bit3 vme host bridge
                                                                     <mailto:natalia@nikhefk.nikhef.nl>
+'b'   00-0F  linux/dma-buf.h                                         conflict!
 'c'   all    linux/cm4000_cs.h                                       conflict!
 'c'   00-7F  linux/comstats.h                                        conflict!
 'c'   00-7F  linux/coda.h                                            conflict!
--- a/Documentation/userspace-api/media/v4l/subdev-formats.rst
+++ b/Documentation/userspace-api/media/v4l/subdev-formats.rst
@ -949,6 +949,43 @@ The following tables list existing packed RGB formats.
      - b\ :sub:`2`
      - b\ :sub:`1`
      - b\ :sub:`0`
+    * .. _MEDIA-BUS-FMT-BGR666-1X18:
+
+      - MEDIA_BUS_FMT_BGR666_1X18
+      - 0x1023
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      - b\ :sub:`5`
+      - b\ :sub:`4`
+      - b\ :sub:`3`
+      - b\ :sub:`2`
+      - b\ :sub:`1`
+      - b\ :sub:`0`
+      - g\ :sub:`5`
+      - g\ :sub:`4`
+      - g\ :sub:`3`
+      - g\ :sub:`2`
+      - g\ :sub:`1`
+      - g\ :sub:`0`
+      - r\ :sub:`5`
+      - r\ :sub:`4`
+      - r\ :sub:`3`
+      - r\ :sub:`2`
+      - r\ :sub:`1`
+      - r\ :sub:`0`
    * .. _MEDIA-BUS-FMT-RBG888-1X24:

      - MEDIA_BUS_FMT_RBG888_1X24
@ -1023,6 +1060,80 @@ The following tables list existing packed RGB formats.
      - b\ :sub:`2`
      - b\ :sub:`1`
      - b\ :sub:`0`
+    * .. _MEDIA-BUS-FMT-BGR666-1X24_CPADHI:
+
+      - MEDIA_BUS_FMT_BGR666_1X24_CPADHI
+      - 0x1024
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      - 0
+      - 0
+      - b\ :sub:`5`
+      - b\ :sub:`4`
+      - b\ :sub:`3`
+      - b\ :sub:`2`
+      - b\ :sub:`1`
+      - b\ :sub:`0`
+      - 0
+      - 0
+      - g\ :sub:`5`
+      - g\ :sub:`4`
+      - g\ :sub:`3`
+      - g\ :sub:`2`
+      - g\ :sub:`1`
+      - g\ :sub:`0`
+      - 0
+      - 0
+      - r\ :sub:`5`
+      - r\ :sub:`4`
+      - r\ :sub:`3`
+      - r\ :sub:`2`
+      - r\ :sub:`1`
+      - r\ :sub:`0`
+    * .. _MEDIA-BUS-FMT-RGB565-1X24_CPADHI:
+
+      - MEDIA_BUS_FMT_RGB565_1X24_CPADHI
+      - 0x1022
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      - 0
+      - 0
+      - 0
+      - r\ :sub:`4`
+      - r\ :sub:`3`
+      - r\ :sub:`2`
+      - r\ :sub:`1`
+      - r\ :sub:`0`
+      - 0
+      - 0
+      - g\ :sub:`5`
+      - g\ :sub:`4`
+      - g\ :sub:`3`
+      - g\ :sub:`2`
+      - g\ :sub:`1`
+      - g\ :sub:`0`
+      - 0
+      - 0
+      - 0
+      - b\ :sub:`4`
+      - b\ :sub:`3`
+      - b\ :sub:`2`
+      - b\ :sub:`1`
+      - b\ :sub:`0`
    * .. _MEDIA-BUS-FMT-BGR888-1X24:

      - MEDIA_BUS_FMT_BGR888_1X24
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@ -5343,9 +5343,9 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
  32 vCPUs in the shared_info page, KVM does not automatically do so
  and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO be used
  explicitly even when the vcpu_info for a given vCPU resides at the
-  "default" location in the shared_info page. This is because KVM is
-  not aware of the Xen CPU id which is used as the index into the
-  vcpu_info[] array, so cannot know the correct default location.
+  "default" location in the shared_info page. This is because KVM may
+  not be aware of the Xen CPU id which is used as the index into the
+  vcpu_info[] array, so may know the correct default location.

  Note that the shared info page may be constantly written to by KVM;
  it contains the event channel bitmap used to deliver interrupts to
@ -5356,23 +5356,29 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
  any vCPU has been running or any event channel interrupts can be
  routed to the guest.

+  Setting the gfn to KVM_XEN_INVALID_GFN will disable the shared info
+  page.
+
 KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
  Sets the exception vector used to deliver Xen event channel upcalls.
  This is the HVM-wide vector injected directly by the hypervisor
  (not through the local APIC), typically configured by a guest via
-  HVM_PARAM_CALLBACK_IRQ.
+  HVM_PARAM_CALLBACK_IRQ. This can be disabled again (e.g. for guest
+  SHUTDOWN_soft_reset) by setting it to zero.

 KVM_XEN_ATTR_TYPE_EVTCHN
  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
  support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It configures
  an outbound port number for interception of EVTCHNOP_send requests
-  from the guest. A given sending port number may be directed back
-  to a specified vCPU (by APIC ID) / port / priority on the guest,
-  or to trigger events on an eventfd. The vCPU and priority can be
-  changed by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call,
-  but other fields cannot change for a given sending port. A port
-  mapping is removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags
-  field.
+  from the guest. A given sending port number may be directed back to
+  a specified vCPU (by APIC ID) / port / priority on the guest, or to
+  trigger events on an eventfd. The vCPU and priority can be changed
+  by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call, but but other
+  fields cannot change for a given sending port. A port mapping is
+  removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags field. Passing
+  KVM_XEN_EVTCHN_RESET in the flags field removes all interception of
+  outbound event channels. The values of the flags field are mutually
+  exclusive and cannot be combined as a bitmask.

 KVM_XEN_ATTR_TYPE_XEN_VERSION
  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
@ -5388,7 +5394,7 @@ KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG
  support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the
  XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read
  other vCPUs' vcpu_runstate_info. Xen guests enable this feature via
-  the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
+  the VMASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
  hypercall.

 4.127 KVM_XEN_HVM_GET_ATTR
@ -5446,15 +5452,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
  As with the shared_info page for the VM, the corresponding page may be
  dirtied at any time if event channel interrupt delivery is enabled, so
  userspace should always assume that the page is dirty without relying
-  on dirty logging.
+  on dirty logging. Setting the gpa to KVM_XEN_INVALID_GPA will disable
+  the vcpu_info.

 KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
  Sets the guest physical address of an additional pvclock structure
  for a given vCPU. This is typically used for guest vsyscall support.
+  Setting the gpa to KVM_XEN_INVALID_GPA will disable the structure.

 KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
  Sets the guest physical address of the vcpu_runstate_info for a given
  vCPU. This is how a Xen guest tracks CPU state such as steal time.
+  Setting the gpa to KVM_XEN_INVALID_GPA will disable the runstate area.

 KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
  Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of
@ -5487,7 +5496,8 @@ KVM_XEN_VCPU_ATTR_TYPE_TIMER
  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
  support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the
  event channel port/priority for the VIRQ_TIMER of the vCPU, as well
-  as allowing a pending timer to be saved/restored.
+  as allowing a pending timer to be saved/restored. Setting the timer
+  port to zero disables kernel handling of the singleshot timer.

 KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
@ -5495,7 +5505,8 @@ KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
  per-vCPU local APIC upcall vector, configured by a Xen guest with
  the HVMOP_set_evtchn_upcall_vector hypercall. This is typically
  used by Windows guests, and is distinct from the HVM-wide upcall
-  vector configured with HVM_PARAM_CALLBACK_IRQ.
+  vector configured with HVM_PARAM_CALLBACK_IRQ. It is disabled by
+  setting the vector to zero.


 4.129 KVM_XEN_VCPU_GET_ATTR
@ -6577,11 +6588,6 @@ Please note that the kernel is allowed to use the kvm_run structure as the
 primary storage for certain register types. Therefore, the kernel may use the
 values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.

-::
-
-  };
-
-

 6. Capabilities that can be enabled on vCPUs
 ============================================
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@ -16,17 +16,26 @@ The acquisition orders for mutexes are as follows:
 - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
  them together is quite rare.

- Unlike kvm->slots_lock, kvm->slots_arch_lock is released before
-  synchronize_srcu(&kvm->srcu).  Therefore kvm->slots_arch_lock
-  can be taken inside a kvm->srcu read-side critical section,
-  while kvm->slots_lock cannot.
-
 - kvm->mn_active_invalidate_count ensures that pairs of
  invalidate_range_start() and invalidate_range_end() callbacks
  use the same memslots array.  kvm->slots_lock and kvm->slots_arch_lock
  are taken on the waiting side in install_new_memslots, so MMU notifiers
  must not take either kvm->slots_lock or kvm->slots_arch_lock.

+For SRCU:
+
+- ``synchronize_srcu(&kvm->srcu)`` is called _inside_
+  the kvm->slots_lock critical section, therefore kvm->slots_lock
+  cannot be taken inside a kvm->srcu read-side critical section.
+  Instead, kvm->slots_arch_lock is released before the call
+  to ``synchronize_srcu()`` and _can_ be taken inside a
+  kvm->srcu read-side critical section.
+
+- kvm->lock is taken inside kvm->srcu, therefore
+  ``synchronize_srcu(&kvm->srcu)`` cannot be called inside
+  a kvm->lock critical section.  If you cannot delay the
+  call until after kvm->lock is released, use ``call_srcu``.
+
 On x86:

 - vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
--- a/56
+++ b/56
@ -6550,6 +6550,14 @@ S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/tiny/gm12u320.c

+DRM DRIVER FOR HIMAX HX8394 MIPI-DSI LCD panels
+M:	Ondrej Jirman <megi@xff.cz>
+M:	Javier Martinez Canillas <javierm@redhat.com>
+S:	Maintained
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+F:	Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml
+F:	drivers/gpu/drm/panel/panel-himax-hx8394.c
+
 DRM DRIVER FOR HX8357D PANELS
 M:	Emma Anholt <emma@anholt.net>
 S:	Maintained
@ -6571,11 +6579,6 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/ilitek,ili9486.yaml
 F:	drivers/gpu/drm/tiny/ili9486.c

-DRM DRIVER FOR INTEL I810 VIDEO CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/i810/
-F:	include/uapi/drm/i810_drm.h
-
 DRM DRIVER FOR JADARD JD9365DA-H3 MIPI-DSI LCD PANELS
 M:	Jagan Teki <jagan@edgeble.ai>
 S:	Maintained
@ -6604,11 +6607,6 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/display/panel/mantix,mlaf057we51-x.yaml
 F:	drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c

-DRM DRIVER FOR MATROX G200/G400 GRAPHICS CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/mga/
-F:	include/uapi/drm/mga_drm.h
-
 DRM DRIVER FOR MGA G200 GRAPHICS CHIPS
 M:	Dave Airlie <airlied@redhat.com>
 R:	Thomas Zimmermann <tzimmermann@suse.de>
@ -6727,11 +6725,6 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/qxl/
 F:	include/uapi/drm/qxl_drm.h

-DRM DRIVER FOR RAGE 128 VIDEO CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/r128/
-F:	include/uapi/drm/r128_drm.h
-
 DRM DRIVER FOR RAYDIUM RM67191 PANELS
 M:	Robert Chiras <robert.chiras@nxp.com>
 S:	Maintained
@ -6759,11 +6752,6 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.yaml
 F:	drivers/gpu/drm/panel/panel-sitronix-st7703.c

-DRM DRIVER FOR SAVAGE VIDEO CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/savage/
-F:	include/uapi/drm/savage_drm.h
-
 DRM DRIVER FOR FIRMWARE FRAMEBUFFERS
 M:	Thomas Zimmermann <tzimmermann@suse.de>
 M:	Javier Martinez Canillas <javierm@redhat.com>
@ -6779,11 +6767,6 @@ F:	include/drm/drm_aperture.h
 F:	include/linux/aperture.h
 F:	include/video/nomodeset.h

-DRM DRIVER FOR SIS VIDEO CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/sis/
-F:	include/uapi/drm/sis_drm.h
-
 DRM DRIVER FOR SITRONIX ST7586 PANELS
 M:	David Lechner <david@lechnology.com>
 S:	Maintained
@ -6811,10 +6794,6 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/ste,mcde.yaml
 F:	drivers/gpu/drm/mcde/

-DRM DRIVER FOR TDFX VIDEO CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/tdfx/
-
 DRM DRIVER FOR TI DLPC3433 MIPI DSI TO DMD BRIDGE
 M:	Jagan Teki <jagan@amarulasolutions.com>
 S:	Maintained
@ -6915,6 +6894,15 @@ T:	git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/accel.git
 F:	Documentation/accel/
 F:	drivers/accel/

+DRM ACCEL DRIVERS FOR INTEL VPU
+M:	Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
+M:	Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
+L:	dri-devel@lists.freedesktop.org
+S:	Supported
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+F:	drivers/accel/ivpu/
+F:	include/uapi/drm/ivpu_accel.h
+
 DRM DRIVERS FOR ALLWINNER A10
 M:	Maxime Ripard <mripard@kernel.org>
 M:	Chen-Yu Tsai <wens@csie.org>
@ -6984,7 +6972,7 @@ M:	Philipp Zabel <p.zabel@pengutronix.de>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/display/imx/
-F:	drivers/gpu/drm/imx/
+F:	drivers/gpu/drm/imx/ipuv3/
 F:	drivers/gpu/ipu-v3/

 DRM DRIVERS FOR FREESCALE IMX BRIDGE
@ -7007,9 +6995,10 @@ F:	drivers/gpu/drm/gma500/
 DRM DRIVERS FOR HISILICON
 M:	Xinliang Liu <xinliang.liu@linaro.org>
 M:	Tian Tao  <tiantao6@hisilicon.com>
-R:	John Stultz <jstultz@google.com>
 R:	Xinwei Kong <kong.kongxinwei@hisilicon.com>
-R:	Chen Feng <puck.chen@hisilicon.com>
+R:	Sumit Semwal <sumit.semwal@linaro.org>
+R:	Yongqin Liu <yongqin.liu@linaro.org>
+R:	John Stultz <jstultz@google.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@ -11468,7 +11457,7 @@ F:	arch/x86/kvm/hyperv.*
 F:	arch/x86/kvm/kvm_onhyperv.*
 F:	arch/x86/kvm/svm/hyperv.*
 F:	arch/x86/kvm/svm/svm_onhyperv.*
-F:	arch/x86/kvm/vmx/evmcs.*
+F:	arch/x86/kvm/vmx/hyperv.*

 KVM X86 Xen (KVM/Xen)
 M:	David Woodhouse <dwmw2@infradead.org>
@ -14916,6 +14905,7 @@ L:	linux-nvme@lists.infradead.org
 S:	Supported
 W:	http://git.infradead.org/nvme.git
 T:	git://git.infradead.org/nvme.git
+F:	Documentation/nvme/
 F:	drivers/nvme/host/
 F:	drivers/nvme/common/
 F:	include/linux/nvme*
--- a/4
+++ b/4
@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 2
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Hurr durr I'ma ninja sloth

 # *DOCUMENTATION*
@ -297,7 +297,7 @@ no-compiler-targets := $(no-dot-config-targets) install dtbs_install \
 			headers_install modules_install kernelrelease image_name
 no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease \
 			  image_name
-single-targets := %.a %.i %.rsi %.ko %.lds %.ll %.lst %.mod %.o %.s %.symtypes %/
+single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.symtypes %/

 config-build	:=
 mixed-build	:=
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@ -1387,7 +1387,7 @@ static int __init amd_core_pmu_init(void)
 		 * numbered counter following it.
 		 */
 		for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
-			even_ctr_mask |= 1 << i;
+			even_ctr_mask |= BIT_ULL(i);

 		pair_constraint = (struct event_constraint)
 				    __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@ -119,7 +119,7 @@ static bool is_coretext(const struct core_text *ct, void *addr)
 	return within_module_coretext(addr);
 }

-static __init_or_module bool skip_addr(void *dest)
+static bool skip_addr(void *dest)
 {
 	if (dest == error_entry)
 		return true;
@ -181,7 +181,7 @@ static const u8 nops[] = {
 	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
 };

-static __init_or_module void *patch_dest(void *dest, bool direct)
+static void *patch_dest(void *dest, bool direct)
 {
 	unsigned int tsize = SKL_TMPL_SIZE;
 	u8 *pad = dest - tsize;
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@ -37,6 +37,7 @@
 #include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
+#include <linux/kgdb.h>
 #include <linux/ftrace.h>
 #include <linux/kasan.h>
 #include <linux/moduleloader.h>
@ -281,12 +282,15 @@ static int can_probe(unsigned long paddr)
 		if (ret < 0)
 			return 0;

+#ifdef CONFIG_KGDB
 		/*
-		 * Another debugging subsystem might insert this breakpoint.
-		 * In that case, we can't recover it.
+		 * If there is a dynamically installed kgdb sw breakpoint,
+		 * this function should not be probed.
 		 */
-		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
+		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
+		    kgdb_has_hit_break(addr))
 			return 0;
+#endif
 		addr += insn.length;
 	}

--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@ -15,6 +15,7 @@
 #include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
+#include <linux/kgdb.h>
 #include <linux/ftrace.h>
 #include <linux/objtool.h>
 #include <linux/pgtable.h>
@ -279,19 +280,6 @@ static int insn_is_indirect_jump(struct insn *insn)
 	return ret;
 }

-static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
-{
-	unsigned char ops;
-
-	for (; addr < eaddr; addr++) {
-		if (get_kernel_nofault(ops, (void *)addr) < 0 ||
-		    ops != INT3_INSN_OPCODE)
-			return false;
-	}
-
-	return true;
-}
-
 /* Decode whole function to ensure any instructions don't jump into target */
 static int can_optimize(unsigned long paddr)
 {
@ -334,15 +322,15 @@ static int can_optimize(unsigned long paddr)
 		ret = insn_decode_kernel(&insn, (void *)recovered_insn);
 		if (ret < 0)
 			return 0;
-
+#ifdef CONFIG_KGDB
 		/*
-		 * In the case of detecting unknown breakpoint, this could be
-		 * a padding INT3 between functions. Let's check that all the
-		 * rest of the bytes are also INT3.
+		 * If there is a dynamically installed kgdb sw breakpoint,
+		 * this function should not be probed.
 		 */
-		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
-			return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
-
+		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
+		    kgdb_has_hit_break(addr))
+			return 0;
+#endif
 		/* Recover address */
 		insn.kaddr = (void *)addr;
 		insn.next_byte = (void *)(addr + insn.length);
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@ -1769,6 +1769,7 @@ static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_ba
 }

 struct kvm_hv_hcall {
+	/* Hypercall input data */
 	u64 param;
 	u64 ingpa;
 	u64 outgpa;
@ -1779,12 +1780,21 @@ struct kvm_hv_hcall {
 	bool fast;
 	bool rep;
 	sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
+
+	/*
+	 * Current read offset when KVM reads hypercall input data gradually,
+	 * either offset in bytes from 'ingpa' for regular hypercalls or the
+	 * number of already consumed 'XMM halves' for 'fast' hypercalls.
+	 */
+	union {
+		gpa_t data_offset;
+		int consumed_xmm_halves;
+	};
 };


 static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
-			      u16 orig_cnt, u16 cnt_cap, u64 *data,
-			      int consumed_xmm_halves, gpa_t offset)
+			      u16 orig_cnt, u16 cnt_cap, u64 *data)
 {
 	/*
 	 * Preserve the original count when ignoring entries via a "cap", KVM
@ -1799,11 +1809,11 @@ static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
 		 * Each XMM holds two sparse banks, but do not count halves that
 		 * have already been consumed for hypercall parameters.
 		 */
-		if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves)
+		if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - hc->consumed_xmm_halves)
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;

 		for (i = 0; i < cnt; i++) {
-			j = i + consumed_xmm_halves;
+			j = i + hc->consumed_xmm_halves;
 			if (j % 2)
 				data[i] = sse128_hi(hc->xmm[j / 2]);
 			else
@ -1812,27 +1822,24 @@ static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
 		return 0;
 	}

-	return kvm_read_guest(kvm, hc->ingpa + offset, data,
+	return kvm_read_guest(kvm, hc->ingpa + hc->data_offset, data,
 			      cnt * sizeof(*data));
 }

 static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
-				 u64 *sparse_banks, int consumed_xmm_halves,
-				 gpa_t offset)
+				 u64 *sparse_banks)
 {
 	if (hc->var_cnt > HV_MAX_SPARSE_VCPU_BANKS)
 		return -EINVAL;

 	/* Cap var_cnt to ignore banks that cannot contain a legal VP index. */
 	return kvm_hv_get_hc_data(kvm, hc, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS,
-				  sparse_banks, consumed_xmm_halves, offset);
+				  sparse_banks);
 }

-static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[],
-					int consumed_xmm_halves, gpa_t offset)
+static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[])
 {
-	return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt,
-				  entries, consumed_xmm_halves, offset);
+	return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt, entries);
 }

 static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu,
@ -1926,8 +1933,6 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	struct kvm_vcpu *v;
 	unsigned long i;
 	bool all_cpus;
-	int consumed_xmm_halves = 0;
-	gpa_t data_offset;

 	/*
 	 * The Hyper-V TLFS doesn't allow more than HV_MAX_SPARSE_VCPU_BANKS
@ -1955,12 +1960,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 			flush.address_space = hc->ingpa;
 			flush.flags = hc->outgpa;
 			flush.processor_mask = sse128_lo(hc->xmm[0]);
-			consumed_xmm_halves = 1;
+			hc->consumed_xmm_halves = 1;
 		} else {
 			if (unlikely(kvm_read_guest(kvm, hc->ingpa,
 						    &flush, sizeof(flush))))
 				return HV_STATUS_INVALID_HYPERCALL_INPUT;
-			data_offset = sizeof(flush);
+			hc->data_offset = sizeof(flush);
 		}

 		trace_kvm_hv_flush_tlb(flush.processor_mask,
@ -1985,12 +1990,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 			flush_ex.flags = hc->outgpa;
 			memcpy(&flush_ex.hv_vp_set,
 			       &hc->xmm[0], sizeof(hc->xmm[0]));
-			consumed_xmm_halves = 2;
+			hc->consumed_xmm_halves = 2;
 		} else {
 			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
 						    sizeof(flush_ex))))
 				return HV_STATUS_INVALID_HYPERCALL_INPUT;
-			data_offset = sizeof(flush_ex);
+			hc->data_offset = sizeof(flush_ex);
 		}

 		trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
@ -2009,8 +2014,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 			if (!hc->var_cnt)
 				goto ret_success;

-			if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks,
-						  consumed_xmm_halves, data_offset))
+			if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks))
 				return HV_STATUS_INVALID_HYPERCALL_INPUT;
 		}

@ -2021,8 +2025,10 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		 * consumed_xmm_halves to make sure TLB flush entries are read
 		 * from the correct offset.
 		 */
-		data_offset += hc->var_cnt * sizeof(sparse_banks[0]);
-		consumed_xmm_halves += hc->var_cnt;
+		if (hc->fast)
+			hc->consumed_xmm_halves += hc->var_cnt;
+		else
+			hc->data_offset += hc->var_cnt * sizeof(sparse_banks[0]);
 	}

 	if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE ||
@ -2030,8 +2036,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	    hc->rep_cnt > ARRAY_SIZE(__tlb_flush_entries)) {
 		tlb_flush_entries = NULL;
 	} else {
-		if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries,
-						consumed_xmm_halves, data_offset))
+		if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
 		tlb_flush_entries = __tlb_flush_entries;
 	}
@ -2180,9 +2185,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		if (!hc->var_cnt)
 			goto ret_success;

-		if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 1,
-					  offsetof(struct hv_send_ipi_ex,
-						   vp_set.bank_contents)))
+		if (!hc->fast)
+			hc->data_offset = offsetof(struct hv_send_ipi_ex,
+						   vp_set.bank_contents);
+		else
+			hc->consumed_xmm_halves = 1;
+
+		if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
 	}

--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@ -426,8 +426,9 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
 			kvm_set_msi_irq(vcpu->kvm, entry, &irq);

 			if (irq.trig_mode &&
-			    kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						irq.dest_id, irq.dest_mode))
+			    (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
+						 irq.dest_id, irq.dest_mode) ||
+			     kvm_apic_pending_eoi(vcpu, irq.vector)))
 				__set_bit(irq.vector, ioapic_handled_vectors);
 		}
 	}
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@ -188,11 +188,11 @@ static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu)

 extern struct static_key_false_deferred apic_hw_disabled;

-static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
+static inline bool kvm_apic_hw_enabled(struct kvm_lapic *apic)
 {
 	if (static_branch_unlikely(&apic_hw_disabled.key))
 		return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
-	return MSR_IA32_APICBASE_ENABLE;
+	return true;
 }

 extern struct static_key_false_deferred apic_sw_disabled;
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@ -363,7 +363,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
 * A shadow-present leaf SPTE may be non-writable for 4 possible reasons:
 *
 *  1. To intercept writes for dirty logging. KVM write-protects huge pages
- *     so that they can be split be split down into the dirty logging
+ *     so that they can be split down into the dirty logging
 *     granularity (4KiB) whenever the guest writes to them. KVM also
 *     write-protects 4KiB pages so that writes can be recorded in the dirty log
 *     (e.g. if not using PML). SPTEs are write-protected for dirty logging
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@ -1074,7 +1074,9 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
 	int ret = RET_PF_FIXED;
 	bool wrprot = false;

-	WARN_ON(sp->role.level != fault->goal_level);
+	if (WARN_ON_ONCE(sp->role.level != fault->goal_level))
+		return RET_PF_RETRY;
+
 	if (unlikely(!fault->slot))
 		new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
 	else
@ -1173,9 +1175,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		if (fault->nx_huge_page_workaround_enabled)
 			disallowed_hugepage_adjust(fault, iter.old_spte, iter.level);

-		if (iter.level == fault->goal_level)
-			break;
-
 		/*
 		 * If SPTE has been frozen by another thread, just give up and
 		 * retry, avoiding unnecessary page table allocation and free.
@ -1183,6 +1182,9 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		if (is_removed_spte(iter.old_spte))
 			goto retry;

+		if (iter.level == fault->goal_level)
+			goto map_target_level;
+
 		/* Step down into the lower level page table if it exists. */
 		if (is_shadow_present_pte(iter.old_spte) &&
 		    !is_large_pte(iter.old_spte))
@ -1203,8 +1205,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 			r = tdp_mmu_link_sp(kvm, &iter, sp, true);

 		/*
-		 * Also force the guest to retry the access if the upper level SPTEs
-		 * aren't in place.
+		 * Force the guest to retry if installing an upper level SPTE
+		 * failed, e.g. because a different task modified the SPTE.
 		 */
 		if (r) {
 			tdp_mmu_free_sp(sp);
@ -1214,11 +1216,20 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		if (fault->huge_page_disallowed &&
 		    fault->req_level >= iter.level) {
 			spin_lock(&kvm->arch.tdp_mmu_pages_lock);
-			track_possible_nx_huge_page(kvm, sp);
+			if (sp->nx_huge_page_disallowed)
+				track_possible_nx_huge_page(kvm, sp);
 			spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 		}
 	}

+	/*
+	 * The walk aborted before reaching the target level, e.g. because the
+	 * iterator detected an upper level SPTE was frozen during traversal.
+	 */
+	WARN_ON_ONCE(iter.level == fault->goal_level);
+	goto retry;
+
+map_target_level:
 	ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter);

 retry:
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@ -238,7 +238,8 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
 		return false;

 	/* recalibrate sample period and check if it's accepted by perf core */
-	if (perf_event_period(pmc->perf_event,
+	if (is_sampling_event(pmc->perf_event) &&
+	    perf_event_period(pmc->perf_event,
 			      get_sample_period(pmc, pmc->counter)))
 		return false;

--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@ -140,7 +140,8 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)

 static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
 {
-	if (!pmc->perf_event || pmc->is_paused)
+	if (!pmc->perf_event || pmc->is_paused ||
+	    !is_sampling_event(pmc->perf_event))
 		return;

 	perf_event_period(pmc->perf_event,
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@ -5296,10 +5296,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 		if (vmptr == vmx->nested.current_vmptr)
 			nested_release_vmcs12(vcpu);

-		kvm_vcpu_write_guest(vcpu,
-				     vmptr + offsetof(struct vmcs12,
-						      launch_state),
-				     &zero, sizeof(zero));
+		/*
+		 * Silently ignore memory errors on VMCLEAR, Intel's pseudocode
+		 * for VMCLEAR includes a "ensure that data for VMCS referenced
+		 * by the operand is in memory" clause that guards writes to
+		 * memory, i.e. doing nothing for I/O is architecturally valid.
+		 *
+		 * FIXME: Suppress failures if and only if no memslot is found,
+		 * i.e. exit to userspace if __copy_to_user() fails.
+		 */
+		(void)kvm_vcpu_write_guest(vcpu,
+					   vmptr + offsetof(struct vmcs12,
+							    launch_state),
+					   &zero, sizeof(zero));
 	} else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
 		nested_release_evmcs(vcpu);
 	}
@ -6873,7 +6882,8 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
 		SECONDARY_EXEC_ENABLE_INVPCID |
 		SECONDARY_EXEC_RDSEED_EXITING |
 		SECONDARY_EXEC_XSAVES |
-		SECONDARY_EXEC_TSC_SCALING;
+		SECONDARY_EXEC_TSC_SCALING |
+		SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;

 	/*
 	 * We can emulate "VMCS shadowing," even if the hardware
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@ -4459,6 +4459,13 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
 	 * controls for features that are/aren't exposed to the guest.
 	 */
 	if (nested) {
+		/*
+		 * All features that can be added or removed to VMX MSRs must
+		 * be supported in the first place for nested virtualization.
+		 */
+		if (WARN_ON_ONCE(!(vmcs_config.nested.secondary_ctls_high & control)))
+			enabled = false;
+
 		if (enabled)
 			vmx->nested.msrs.secondary_ctls_high |= control;
 		else
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@ -13132,6 +13132,9 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
 			      struct x86_exception *e)
 {
 	if (r == X86EMUL_PROPAGATE_FAULT) {
+		if (KVM_BUG_ON(!e, vcpu->kvm))
+			return -EIO;
+
 		kvm_inject_emulated_page_fault(vcpu, e);
 		return 1;
 	}
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@ -41,7 +41,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
 	int ret = 0;
 	int idx = srcu_read_lock(&kvm->srcu);

-	if (gfn == GPA_INVALID) {
+	if (gfn == KVM_XEN_INVALID_GFN) {
 		kvm_gpc_deactivate(gpc);
 		goto out;
 	}
@ -659,7 +659,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		if (kvm->arch.xen.shinfo_cache.active)
 			data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
 		else
-			data->u.shared_info.gfn = GPA_INVALID;
+			data->u.shared_info.gfn = KVM_XEN_INVALID_GFN;
 		r = 0;
 		break;

@ -705,7 +705,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 		BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
 			     offsetof(struct compat_vcpu_info, time));

-		if (data->u.gpa == GPA_INVALID) {
+		if (data->u.gpa == KVM_XEN_INVALID_GPA) {
 			kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
 			r = 0;
 			break;
@ -719,7 +719,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 		break;

 	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
-		if (data->u.gpa == GPA_INVALID) {
+		if (data->u.gpa == KVM_XEN_INVALID_GPA) {
 			kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache);
 			r = 0;
 			break;
@ -739,7 +739,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 			r = -EOPNOTSUPP;
 			break;
 		}
-		if (data->u.gpa == GPA_INVALID) {
+		if (data->u.gpa == KVM_XEN_INVALID_GPA) {
 			r = 0;
 		deactivate_out:
 			kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache);
@ -937,7 +937,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 		if (vcpu->arch.xen.vcpu_info_cache.active)
 			data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
 		else
-			data->u.gpa = GPA_INVALID;
+			data->u.gpa = KVM_XEN_INVALID_GPA;
 		r = 0;
 		break;

@ -945,7 +945,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 		if (vcpu->arch.xen.vcpu_time_info_cache.active)
 			data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa;
 		else
-			data->u.gpa = GPA_INVALID;
+			data->u.gpa = KVM_XEN_INVALID_GPA;
 		r = 0;
 		break;

@ -1069,6 +1069,7 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
 		u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
 				  : kvm->arch.xen_hvm_config.blob_size_32;
 		u8 *page;
+		int ret;

 		if (page_num >= blob_size)
 			return 1;
@ -1079,10 +1080,10 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
 		if (IS_ERR(page))
 			return PTR_ERR(page);

-		if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
-			kfree(page);
+		ret = kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE);
+		kfree(page);
+		if (ret)
 			return 1;
-		}
 	}
 	return 0;
 }
@ -1183,30 +1184,22 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
 static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
 				 u64 param, u64 *r)
 {
-	int idx, i;
 	struct sched_poll sched_poll;
 	evtchn_port_t port, *ports;
-	gpa_t gpa;
+	struct x86_exception e;
+	int i;

 	if (!lapic_in_kernel(vcpu) ||
 	    !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
 		return false;

-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-	gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	if (!gpa) {
-		*r = -EFAULT;
-		return true;
-	}
-
 	if (IS_ENABLED(CONFIG_64BIT) && !longmode) {
 		struct compat_sched_poll sp32;

 		/* Sanity check that the compat struct definition is correct */
 		BUILD_BUG_ON(sizeof(sp32) != 16);

-		if (kvm_vcpu_read_guest(vcpu, gpa, &sp32, sizeof(sp32))) {
+		if (kvm_read_guest_virt(vcpu, param, &sp32, sizeof(sp32), &e)) {
 			*r = -EFAULT;
 			return true;
 		}
@ -1220,8 +1213,8 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
 		sched_poll.nr_ports = sp32.nr_ports;
 		sched_poll.timeout = sp32.timeout;
 	} else {
-		if (kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
-					sizeof(sched_poll))) {
+		if (kvm_read_guest_virt(vcpu, param, &sched_poll,
+					sizeof(sched_poll), &e)) {
 			*r = -EFAULT;
 			return true;
 		}
@ -1243,18 +1236,13 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
 	} else
 		ports = &port;

-	for (i = 0; i < sched_poll.nr_ports; i++) {
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		gpa = kvm_mmu_gva_to_gpa_system(vcpu,
-						(gva_t)(sched_poll.ports + i),
-						NULL);
-		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports,
+				sched_poll.nr_ports * sizeof(*ports), &e)) {
+		*r = -EFAULT;
+		return true;
+	}

-		if (!gpa || kvm_vcpu_read_guest(vcpu, gpa,
-						&ports[i], sizeof(port))) {
-			*r = -EFAULT;
-			goto out;
-		}
+	for (i = 0; i < sched_poll.nr_ports; i++) {
 		if (ports[i] >= max_evtchn_port(vcpu->kvm)) {
 			*r = -EINVAL;
 			goto out;
@ -1330,9 +1318,8 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
 				  int vcpu_id, u64 param, u64 *r)
 {
 	struct vcpu_set_singleshot_timer oneshot;
+	struct x86_exception e;
 	s64 delta;
-	gpa_t gpa;
-	int idx;

 	if (!kvm_xen_timer_enabled(vcpu))
 		return false;
@ -1343,9 +1330,6 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
 			*r = -EINVAL;
 			return true;
 		}
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
-		srcu_read_unlock(&vcpu->kvm->srcu, idx);

 		/*
 		 * The only difference for 32-bit compat is the 4 bytes of
@ -1363,9 +1347,8 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
 		BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, flags) !=
 			     sizeof_field(struct vcpu_set_singleshot_timer, flags));

-		if (!gpa ||
-		    kvm_vcpu_read_guest(vcpu, gpa, &oneshot, longmode ? sizeof(oneshot) :
-					sizeof(struct compat_vcpu_set_singleshot_timer))) {
+		if (kvm_read_guest_virt(vcpu, param, &oneshot, longmode ? sizeof(oneshot) :
+					sizeof(struct compat_vcpu_set_singleshot_timer), &e)) {
 			*r = -EFAULT;
 			return true;
 		}
@ -1825,20 +1808,20 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
 {
 	u32 port = data->u.evtchn.send_port;
 	struct evtchnfd *evtchnfd;
+	int ret;

-	if (!port || port >= max_evtchn_port(kvm))
-		return -EINVAL;
-
+	/* Protect writes to evtchnfd as well as the idr lookup.  */
 	mutex_lock(&kvm->lock);
 	evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port);
-	mutex_unlock(&kvm->lock);

+	ret = -ENOENT;
 	if (!evtchnfd)
-		return -ENOENT;
+		goto out_unlock;

 	/* For an UPDATE, nothing may change except the priority/vcpu */
+	ret = -EINVAL;
 	if (evtchnfd->type != data->u.evtchn.type)
-		return -EINVAL;
+		goto out_unlock;

 	/*
 	 * Port cannot change, and if it's zero that was an eventfd
@ -1846,20 +1829,21 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
 	 */
 	if (!evtchnfd->deliver.port.port ||
 	    evtchnfd->deliver.port.port != data->u.evtchn.deliver.port.port)
-		return -EINVAL;
+		goto out_unlock;

 	/* We only support 2 level event channels for now */
 	if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
-		return -EINVAL;
+		goto out_unlock;

-	mutex_lock(&kvm->lock);
 	evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority;
 	if (evtchnfd->deliver.port.vcpu_id != data->u.evtchn.deliver.port.vcpu) {
 		evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu;
 		evtchnfd->deliver.port.vcpu_idx = -1;
 	}
+	ret = 0;
+out_unlock:
 	mutex_unlock(&kvm->lock);
-	return 0;
+	return ret;
 }

 /*
@ -1871,12 +1855,9 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
 {
 	u32 port = data->u.evtchn.send_port;
 	struct eventfd_ctx *eventfd = NULL;
-	struct evtchnfd *evtchnfd = NULL;
+	struct evtchnfd *evtchnfd;
 	int ret = -EINVAL;

-	if (!port || port >= max_evtchn_port(kvm))
-		return -EINVAL;
-
 	evtchnfd = kzalloc(sizeof(struct evtchnfd), GFP_KERNEL);
 	if (!evtchnfd)
 		return -ENOMEM;
@ -1952,8 +1933,7 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)
 	if (!evtchnfd)
 		return -ENOENT;

-	if (kvm)
-		synchronize_srcu(&kvm->srcu);
+	synchronize_srcu(&kvm->srcu);
 	if (!evtchnfd->deliver.port.port)
 		eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
 	kfree(evtchnfd);
@ -1962,18 +1942,42 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)

 static int kvm_xen_eventfd_reset(struct kvm *kvm)
 {
-	struct evtchnfd *evtchnfd;
+	struct evtchnfd *evtchnfd, **all_evtchnfds;
 	int i;
+	int n = 0;

 	mutex_lock(&kvm->lock);
+
+	/*
+	 * Because synchronize_srcu() cannot be called inside the
+	 * critical section, first collect all the evtchnfd objects
+	 * in an array as they are removed from evtchn_ports.
+	 */
+	idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i)
+		n++;
+
+	all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL);
+	if (!all_evtchnfds) {
+		mutex_unlock(&kvm->lock);
+		return -ENOMEM;
+	}
+
+	n = 0;
 	idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
+		all_evtchnfds[n++] = evtchnfd;
 		idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port);
-		synchronize_srcu(&kvm->srcu);
+	}
+	mutex_unlock(&kvm->lock);
+
+	synchronize_srcu(&kvm->srcu);
+
+	while (n--) {
+		evtchnfd = all_evtchnfds[n];
 		if (!evtchnfd->deliver.port.port)
 			eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
 		kfree(evtchnfd);
 	}
-	mutex_unlock(&kvm->lock);
+	kfree(all_evtchnfds);

 	return 0;
 }
@ -2002,20 +2006,22 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
 {
 	struct evtchnfd *evtchnfd;
 	struct evtchn_send send;
-	gpa_t gpa;
-	int idx;
+	struct x86_exception e;

-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-	gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-
-	if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &send, sizeof(send))) {
+	/* Sanity check: this structure is the same for 32-bit and 64-bit */
+	BUILD_BUG_ON(sizeof(send) != 4);
+	if (kvm_read_guest_virt(vcpu, param, &send, sizeof(send), &e)) {
 		*r = -EFAULT;
 		return true;
 	}

-	/* The evtchn_ports idr is protected by vcpu->kvm->srcu */
+	/*
+	 * evtchnfd is protected by kvm->srcu; the idr lookup instead
+	 * is protected by RCU.
+	 */
+	rcu_read_lock();
 	evtchnfd = idr_find(&vcpu->kvm->arch.xen.evtchn_ports, send.port);
+	rcu_read_unlock();
 	if (!evtchnfd)
 		return false;

--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@ -5317,8 +5317,8 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
 		unsigned long flags;

 		spin_lock_irqsave(&bfqd->lock, flags);
-		bfq_exit_bfqq(bfqd, bfqq);
 		bic_set_bfqq(bic, NULL, is_sync);
+		bfq_exit_bfqq(bfqd, bfqq);
 		spin_unlock_irqrestore(&bfqd->lock, flags);
 	}
 }
--- a/drivers/Makefile
+++ b/drivers/Makefile
@ -189,3 +189,4 @@ obj-$(CONFIG_COUNTER)		+= counter/
 obj-$(CONFIG_MOST)		+= most/
 obj-$(CONFIG_PECI)		+= peci/
 obj-$(CONFIG_HTE)		+= hte/
+obj-$(CONFIG_DRM_ACCEL)	+= accel/
--- a/drivers/accel/Kconfig
+++ b/drivers/accel/Kconfig
@ -22,3 +22,5 @@ menuconfig DRM_ACCEL
 	  major number than GPUs, and will be exposed to user-space using
 	  different device files, called accel/accel* (in /dev, sysfs
 	  and debugfs).
+
+source "drivers/accel/ivpu/Kconfig"
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-y	+= ivpu/
--- a/drivers/accel/ivpu/Kconfig
+++ b/drivers/accel/ivpu/Kconfig
@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_ACCEL_IVPU
+	tristate "Intel VPU for Meteor Lake and newer"
+	depends on DRM_ACCEL
+	depends on X86_64 && !UML
+	depends on PCI && PCI_MSI
+	select FW_LOADER
+	select SHMEM
+	help
+	  Choose this option if you have a system that has an 14th generation Intel CPU
+	  or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated
+	  inference accelerator for Computer Vision and Deep Learning applications.
+
+	  If "M" is selected, the module will be called intel_vpu.
--- a/drivers/accel/ivpu/Makefile
+++ b/drivers/accel/ivpu/Makefile
@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2023 Intel Corporation
+
+intel_vpu-y := \
+	ivpu_drv.o \
+	ivpu_fw.o \
+	ivpu_gem.o \
+	ivpu_hw_mtl.o \
+	ivpu_ipc.o \
+	ivpu_job.o \
+	ivpu_jsm_msg.o \
+	ivpu_mmu.o \
+	ivpu_mmu_context.o \
+	ivpu_pm.o
+
+obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o
--- a/drivers/accel/ivpu/TODO
+++ b/drivers/accel/ivpu/TODO
@ -0,0 +1,11 @@
+- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler()
+- Implement support for BLOB IDs
+- Add debugfs support to improve debugging and testing
+- Add tracing events for performance debugging
+- Implement HW based scheduling support
+- Use syncobjs for submit/sync
+- Refactor IPC protocol to improve message latency
+- Implement BO cache and MADVISE IOCTL
+- Add support for user allocated buffers using prime import and dma-buf heaps
+- Refactor struct ivpu_bo to use struct drm_gem_shmem_object
+- Add driver/device documentation
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@ -0,0 +1,654 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_prime.h>
+
+#include "vpu_boot_api.h"
+#include "ivpu_drv.h"
+#include "ivpu_fw.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_ipc.h"
+#include "ivpu_job.h"
+#include "ivpu_jsm_msg.h"
+#include "ivpu_mmu.h"
+#include "ivpu_mmu_context.h"
+#include "ivpu_pm.h"
+
+#ifndef DRIVER_VERSION_STR
+#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \
+			   __stringify(DRM_IVPU_DRIVER_MINOR) "."
+#endif
+
+static const struct drm_driver driver;
+
+static struct lock_class_key submitted_jobs_xa_lock_class_key;
+
+int ivpu_dbg_mask;
+module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644);
+MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
+
+int ivpu_test_mode;
+module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644);
+MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw");
+
+u8 ivpu_pll_min_ratio;
+module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
+MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency");
+
+u8 ivpu_pll_max_ratio = U8_MAX;
+module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644);
+MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency");
+
+struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+
+	kref_get(&file_priv->ref);
+
+	ivpu_dbg(vdev, KREF, "file_priv get: ctx %u refcount %u\n",
+		 file_priv->ctx.id, kref_read(&file_priv->ref));
+
+	return file_priv;
+}
+
+struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id)
+{
+	struct ivpu_file_priv *file_priv;
+
+	xa_lock_irq(&vdev->context_xa);
+	file_priv = xa_load(&vdev->context_xa, id);
+	/* file_priv may still be in context_xa during file_priv_release() */
+	if (file_priv && !kref_get_unless_zero(&file_priv->ref))
+		file_priv = NULL;
+	xa_unlock_irq(&vdev->context_xa);
+
+	if (file_priv)
+		ivpu_dbg(vdev, KREF, "file_priv get by id: ctx %u refcount %u\n",
+			 file_priv->ctx.id, kref_read(&file_priv->ref));
+
+	return file_priv;
+}
+
+static void file_priv_release(struct kref *ref)
+{
+	struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref);
+	struct ivpu_device *vdev = file_priv->vdev;
+
+	ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id);
+
+	ivpu_cmdq_release_all(file_priv);
+	ivpu_bo_remove_all_bos_from_context(&file_priv->ctx);
+	ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
+	drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
+	mutex_destroy(&file_priv->lock);
+	kfree(file_priv);
+}
+
+void ivpu_file_priv_put(struct ivpu_file_priv **link)
+{
+	struct ivpu_file_priv *file_priv = *link;
+	struct ivpu_device *vdev = file_priv->vdev;
+
+	drm_WARN_ON(&vdev->drm, !file_priv);
+
+	ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n",
+		 file_priv->ctx.id, kref_read(&file_priv->ref));
+
+	*link = NULL;
+	kref_put(&file_priv->ref, file_priv_release);
+}
+
+static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
+	struct drm_ivpu_param *args = data;
+	int ret = 0;
+
+	switch (args->param) {
+	case DRM_IVPU_PARAM_DEVICE_ID:
+		args->value = pdev->device;
+		break;
+	case DRM_IVPU_PARAM_DEVICE_REVISION:
+		args->value = pdev->revision;
+		break;
+	case DRM_IVPU_PARAM_PLATFORM_TYPE:
+		args->value = vdev->platform;
+		break;
+	case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
+		args->value = ivpu_hw_reg_pll_freq_get(vdev);
+		break;
+	case DRM_IVPU_PARAM_NUM_CONTEXTS:
+		args->value = ivpu_get_context_count(vdev);
+		break;
+	case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS:
+		args->value = vdev->hw->ranges.user_low.start;
+		break;
+	case DRM_IVPU_PARAM_CONTEXT_PRIORITY:
+		args->value = file_priv->priority;
+		break;
+	case DRM_IVPU_PARAM_CONTEXT_ID:
+		args->value = file_priv->ctx.id;
+		break;
+	case DRM_IVPU_PARAM_FW_API_VERSION:
+		if (args->index < VPU_FW_API_VER_NUM) {
+			struct vpu_firmware_header *fw_hdr;
+
+			fw_hdr = (struct vpu_firmware_header *)vdev->fw->file->data;
+			args->value = fw_hdr->api_version[args->index];
+		} else {
+			ret = -EINVAL;
+		}
+		break;
+	case DRM_IVPU_PARAM_ENGINE_HEARTBEAT:
+		ret = ivpu_jsm_get_heartbeat(vdev, args->index, &args->value);
+		break;
+	case DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID:
+		args->value = (u64)atomic64_inc_return(&vdev->unique_id_counter);
+		break;
+	case DRM_IVPU_PARAM_TILE_CONFIG:
+		args->value = vdev->hw->tile_fuse;
+		break;
+	case DRM_IVPU_PARAM_SKU:
+		args->value = vdev->hw->sku;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int ivpu_set_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct drm_ivpu_param *args = data;
+	int ret = 0;
+
+	switch (args->param) {
+	case DRM_IVPU_PARAM_CONTEXT_PRIORITY:
+		if (args->value <= DRM_IVPU_CONTEXT_PRIORITY_REALTIME)
+			file_priv->priority = args->value;
+		else
+			ret = -EINVAL;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int ivpu_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct ivpu_file_priv *file_priv;
+	u32 ctx_id;
+	void *old;
+	int ret;
+
+	ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, NULL, vdev->context_xa_limit, GFP_KERNEL);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate context id: %d\n", ret);
+		return ret;
+	}
+
+	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
+	if (!file_priv) {
+		ret = -ENOMEM;
+		goto err_xa_erase;
+	}
+
+	file_priv->vdev = vdev;
+	file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL;
+	kref_init(&file_priv->ref);
+	mutex_init(&file_priv->lock);
+
+	ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id);
+	if (ret)
+		goto err_mutex_destroy;
+
+	old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL);
+	if (xa_is_err(old)) {
+		ret = xa_err(old);
+		ivpu_err(vdev, "Failed to store context %u: %d\n", ctx_id, ret);
+		goto err_ctx_fini;
+	}
+
+	ivpu_dbg(vdev, FILE, "file_priv create: ctx %u process %s pid %d\n",
+		 ctx_id, current->comm, task_pid_nr(current));
+
+	file->driver_priv = file_priv;
+	return 0;
+
+err_ctx_fini:
+	ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
+err_mutex_destroy:
+	mutex_destroy(&file_priv->lock);
+	kfree(file_priv);
+err_xa_erase:
+	xa_erase_irq(&vdev->context_xa, ctx_id);
+	return ret;
+}
+
+static void ivpu_postclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+
+	ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n",
+		 file_priv->ctx.id, current->comm, task_pid_nr(current));
+
+	ivpu_file_priv_put(&file_priv);
+}
+
+static const struct drm_ioctl_desc ivpu_drm_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(IVPU_GET_PARAM, ivpu_get_param_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE, ivpu_bo_create_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0),
+};
+
+static int ivpu_wait_for_ready(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_consumer cons;
+	struct ivpu_ipc_hdr ipc_hdr;
+	unsigned long timeout;
+	int ret;
+
+	if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST)
+		return 0;
+
+	ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG);
+
+	timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot);
+	while (1) {
+		ret = ivpu_ipc_irq_handler(vdev);
+		if (ret)
+			break;
+		ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0);
+		if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout))
+			break;
+
+		cond_resched();
+	}
+
+	ivpu_ipc_consumer_del(vdev, &cons);
+
+	if (!ret && ipc_hdr.data_addr != IVPU_IPC_BOOT_MSG_DATA_ADDR) {
+		ivpu_err(vdev, "Invalid VPU ready message: 0x%x\n",
+			 ipc_hdr.data_addr);
+		return -EIO;
+	}
+
+	if (!ret)
+		ivpu_info(vdev, "VPU ready message received successfully\n");
+	else
+		ivpu_hw_diagnose_failure(vdev);
+
+	return ret;
+}
+
+/**
+ * ivpu_boot() - Start VPU firmware
+ * @vdev: VPU device
+ *
+ * This function is paired with ivpu_shutdown() but it doesn't power up the
+ * VPU because power up has to be called very early in ivpu_probe().
+ */
+int ivpu_boot(struct ivpu_device *vdev)
+{
+	int ret;
+
+	/* Update boot params located at first 4KB of FW memory */
+	ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr);
+
+	ret = ivpu_hw_boot_fw(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to start the firmware: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_wait_for_ready(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret);
+		return ret;
+	}
+
+	ivpu_hw_irq_clear(vdev);
+	enable_irq(vdev->irq);
+	ivpu_hw_irq_enable(vdev);
+	ivpu_ipc_enable(vdev);
+	return 0;
+}
+
+int ivpu_shutdown(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ivpu_hw_irq_disable(vdev);
+	disable_irq(vdev->irq);
+	ivpu_ipc_disable(vdev);
+	ivpu_mmu_disable(vdev);
+
+	ret = ivpu_hw_power_down(vdev);
+	if (ret)
+		ivpu_warn(vdev, "Failed to power down HW: %d\n", ret);
+
+	return ret;
+}
+
+static const struct file_operations ivpu_fops = {
+	.owner		= THIS_MODULE,
+	.mmap           = drm_gem_mmap,
+	DRM_ACCEL_FOPS,
+};
+
+static const struct drm_driver driver = {
+	.driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL,
+
+	.open = ivpu_open,
+	.postclose = ivpu_postclose,
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_import = ivpu_gem_prime_import,
+	.gem_prime_mmap = drm_gem_prime_mmap,
+
+	.ioctls = ivpu_drm_ioctls,
+	.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
+	.fops = &ivpu_fops,
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRM_IVPU_DRIVER_MAJOR,
+	.minor = DRM_IVPU_DRIVER_MINOR,
+};
+
+static int ivpu_irq_init(struct ivpu_device *vdev)
+{
+	struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
+	int ret;
+
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+	if (ret < 0) {
+		ivpu_err(vdev, "Failed to allocate a MSI IRQ: %d\n", ret);
+		return ret;
+	}
+
+	vdev->irq = pci_irq_vector(pdev, 0);
+
+	ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
+			       IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to request an IRQ %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_pci_init(struct ivpu_device *vdev)
+{
+	struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
+	struct resource *bar0 = &pdev->resource[0];
+	struct resource *bar4 = &pdev->resource[4];
+	int ret;
+
+	ivpu_dbg(vdev, MISC, "Mapping BAR0 (RegV) %pR\n", bar0);
+	vdev->regv = devm_ioremap_resource(vdev->drm.dev, bar0);
+	if (IS_ERR(vdev->regv)) {
+		ivpu_err(vdev, "Failed to map bar 0: %pe\n", vdev->regv);
+		return PTR_ERR(vdev->regv);
+	}
+
+	ivpu_dbg(vdev, MISC, "Mapping BAR4 (RegB) %pR\n", bar4);
+	vdev->regb = devm_ioremap_resource(vdev->drm.dev, bar4);
+	if (IS_ERR(vdev->regb)) {
+		ivpu_err(vdev, "Failed to map bar 4: %pe\n", vdev->regb);
+		return PTR_ERR(vdev->regb);
+	}
+
+	ret = dma_set_mask_and_coherent(vdev->drm.dev, DMA_BIT_MASK(38));
+	if (ret) {
+		ivpu_err(vdev, "Failed to set DMA mask: %d\n", ret);
+		return ret;
+	}
+
+	/* Clear any pending errors */
+	pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f);
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to enable PCI device: %d\n", ret);
+		return ret;
+	}
+
+	pci_set_master(pdev);
+
+	return 0;
+}
+
+static int ivpu_dev_init(struct ivpu_device *vdev)
+{
+	int ret;
+
+	vdev->hw = drmm_kzalloc(&vdev->drm, sizeof(*vdev->hw), GFP_KERNEL);
+	if (!vdev->hw)
+		return -ENOMEM;
+
+	vdev->mmu = drmm_kzalloc(&vdev->drm, sizeof(*vdev->mmu), GFP_KERNEL);
+	if (!vdev->mmu)
+		return -ENOMEM;
+
+	vdev->fw = drmm_kzalloc(&vdev->drm, sizeof(*vdev->fw), GFP_KERNEL);
+	if (!vdev->fw)
+		return -ENOMEM;
+
+	vdev->ipc = drmm_kzalloc(&vdev->drm, sizeof(*vdev->ipc), GFP_KERNEL);
+	if (!vdev->ipc)
+		return -ENOMEM;
+
+	vdev->pm = drmm_kzalloc(&vdev->drm, sizeof(*vdev->pm), GFP_KERNEL);
+	if (!vdev->pm)
+		return -ENOMEM;
+
+	vdev->hw->ops = &ivpu_hw_mtl_ops;
+	vdev->platform = IVPU_PLATFORM_INVALID;
+	vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1;
+	vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT;
+	atomic64_set(&vdev->unique_id_counter, 0);
+	xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+	xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
+	lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
+
+	ret = ivpu_pci_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret);
+		goto err_xa_destroy;
+	}
+
+	ret = ivpu_irq_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret);
+		goto err_xa_destroy;
+	}
+
+	/* Init basic HW info based on buttress registers which are accessible before power up */
+	ret = ivpu_hw_info_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret);
+		goto err_xa_destroy;
+	}
+
+	/* Power up early so the rest of init code can access VPU registers */
+	ret = ivpu_hw_power_up(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
+		goto err_xa_destroy;
+	}
+
+	ret = ivpu_mmu_global_context_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret);
+		goto err_power_down;
+	}
+
+	ret = ivpu_mmu_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret);
+		goto err_mmu_gctx_fini;
+	}
+
+	ret = ivpu_fw_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret);
+		goto err_mmu_gctx_fini;
+	}
+
+	ret = ivpu_ipc_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret);
+		goto err_fw_fini;
+	}
+
+	ret = ivpu_pm_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize PM: %d\n", ret);
+		goto err_ipc_fini;
+	}
+
+	ret = ivpu_job_done_thread_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret);
+		goto err_ipc_fini;
+	}
+
+	ret = ivpu_fw_load(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to load firmware: %d\n", ret);
+		goto err_job_done_thread_fini;
+	}
+
+	ret = ivpu_boot(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to boot: %d\n", ret);
+		goto err_job_done_thread_fini;
+	}
+
+	ivpu_pm_enable(vdev);
+
+	return 0;
+
+err_job_done_thread_fini:
+	ivpu_job_done_thread_fini(vdev);
+err_ipc_fini:
+	ivpu_ipc_fini(vdev);
+err_fw_fini:
+	ivpu_fw_fini(vdev);
+err_mmu_gctx_fini:
+	ivpu_mmu_global_context_fini(vdev);
+err_power_down:
+	ivpu_hw_power_down(vdev);
+err_xa_destroy:
+	xa_destroy(&vdev->submitted_jobs_xa);
+	xa_destroy(&vdev->context_xa);
+	return ret;
+}
+
+static void ivpu_dev_fini(struct ivpu_device *vdev)
+{
+	ivpu_pm_disable(vdev);
+	ivpu_shutdown(vdev);
+	ivpu_job_done_thread_fini(vdev);
+	ivpu_ipc_fini(vdev);
+	ivpu_fw_fini(vdev);
+	ivpu_mmu_global_context_fini(vdev);
+
+	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
+	xa_destroy(&vdev->submitted_jobs_xa);
+	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa));
+	xa_destroy(&vdev->context_xa);
+}
+
+static struct pci_device_id ivpu_pci_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, ivpu_pci_ids);
+
+static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct ivpu_device *vdev;
+	int ret;
+
+	vdev = devm_drm_dev_alloc(&pdev->dev, &driver, struct ivpu_device, drm);
+	if (IS_ERR(vdev))
+		return PTR_ERR(vdev);
+
+	pci_set_drvdata(pdev, vdev);
+
+	ret = ivpu_dev_init(vdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret);
+		return ret;
+	}
+
+	ret = drm_dev_register(&vdev->drm, 0);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to register DRM device: %d\n", ret);
+		ivpu_dev_fini(vdev);
+	}
+
+	return ret;
+}
+
+static void ivpu_remove(struct pci_dev *pdev)
+{
+	struct ivpu_device *vdev = pci_get_drvdata(pdev);
+
+	drm_dev_unregister(&vdev->drm);
+	ivpu_dev_fini(vdev);
+}
+
+static const struct dev_pm_ops ivpu_drv_pci_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(ivpu_pm_suspend_cb, ivpu_pm_resume_cb)
+	SET_RUNTIME_PM_OPS(ivpu_pm_runtime_suspend_cb, ivpu_pm_runtime_resume_cb, NULL)
+};
+
+static const struct pci_error_handlers ivpu_drv_pci_err = {
+	.reset_prepare = ivpu_pm_reset_prepare_cb,
+	.reset_done = ivpu_pm_reset_done_cb,
+};
+
+static struct pci_driver ivpu_pci_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = ivpu_pci_ids,
+	.probe = ivpu_probe,
+	.remove = ivpu_remove,
+	.driver = {
+		.pm = &ivpu_drv_pci_pm,
+	},
+	.err_handler = &ivpu_drv_pci_err,
+};
+
+module_pci_driver(ivpu_pci_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
+MODULE_VERSION(DRIVER_VERSION_STR);
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_DRV_H__
+#define __IVPU_DRV_H__
+
+#include <drm/drm_device.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_mm.h>
+#include <drm/drm_print.h>
+
+#include <linux/pci.h>
+#include <linux/xarray.h>
+#include <uapi/drm/ivpu_accel.h>
+
+#include "ivpu_mmu_context.h"
+
+#define DRIVER_NAME "intel_vpu"
+#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)"
+#define DRIVER_DATE "20230117"
+
+#define PCI_DEVICE_ID_MTL   0x7d1d
+
+#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0
+#define IVPU_CONTEXT_LIMIT	     64
+#define IVPU_NUM_ENGINES	     2
+
+#define IVPU_PLATFORM_SILICON 0
+#define IVPU_PLATFORM_SIMICS  2
+#define IVPU_PLATFORM_FPGA    3
+#define IVPU_PLATFORM_INVALID 8
+
+#define IVPU_DBG_REG	 BIT(0)
+#define IVPU_DBG_IRQ	 BIT(1)
+#define IVPU_DBG_MMU	 BIT(2)
+#define IVPU_DBG_FILE	 BIT(3)
+#define IVPU_DBG_MISC	 BIT(4)
+#define IVPU_DBG_FW_BOOT BIT(5)
+#define IVPU_DBG_PM	 BIT(6)
+#define IVPU_DBG_IPC	 BIT(7)
+#define IVPU_DBG_BO	 BIT(8)
+#define IVPU_DBG_JOB	 BIT(9)
+#define IVPU_DBG_JSM	 BIT(10)
+#define IVPU_DBG_KREF	 BIT(11)
+#define IVPU_DBG_RPM	 BIT(12)
+
+#define ivpu_err(vdev, fmt, ...) \
+	drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
+
+#define ivpu_err_ratelimited(vdev, fmt, ...) \
+	drm_err_ratelimited(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
+
+#define ivpu_warn(vdev, fmt, ...) \
+	drm_warn(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
+
+#define ivpu_warn_ratelimited(vdev, fmt, ...) \
+	drm_err_ratelimited(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
+
+#define ivpu_info(vdev, fmt, ...) drm_info(&(vdev)->drm, fmt, ##__VA_ARGS__)
+
+#define ivpu_dbg(vdev, type, fmt, args...) do {                                \
+	if (unlikely(IVPU_DBG_##type & ivpu_dbg_mask))                         \
+		dev_dbg((vdev)->drm.dev, "[%s] " fmt, #type, ##args);          \
+} while (0)
+
+#define IVPU_WA(wa_name) (vdev->wa.wa_name)
+
+struct ivpu_wa_table {
+	bool punit_disabled;
+	bool clear_runtime_mem;
+};
+
+struct ivpu_hw_info;
+struct ivpu_mmu_info;
+struct ivpu_fw_info;
+struct ivpu_ipc_info;
+struct ivpu_pm_info;
+
+struct ivpu_device {
+	struct drm_device drm;
+	void __iomem *regb;
+	void __iomem *regv;
+	u32 platform;
+	u32 irq;
+
+	struct ivpu_wa_table wa;
+	struct ivpu_hw_info *hw;
+	struct ivpu_mmu_info *mmu;
+	struct ivpu_fw_info *fw;
+	struct ivpu_ipc_info *ipc;
+	struct ivpu_pm_info *pm;
+
+	struct ivpu_mmu_context gctx;
+	struct xarray context_xa;
+	struct xa_limit context_xa_limit;
+
+	struct xarray submitted_jobs_xa;
+	struct task_struct *job_done_thread;
+
+	atomic64_t unique_id_counter;
+
+	struct {
+		int boot;
+		int jsm;
+		int tdr;
+		int reschedule_suspend;
+	} timeout;
+};
+
+/*
+ * file_priv has its own refcount (ref) that allows user space to close the fd
+ * without blocking even if VPU is still processing some jobs.
+ */
+struct ivpu_file_priv {
+	struct kref ref;
+	struct ivpu_device *vdev;
+	struct mutex lock; /* Protects cmdq */
+	struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES];
+	struct ivpu_mmu_context ctx;
+	u32 priority;
+	bool has_mmu_faults;
+};
+
+extern int ivpu_dbg_mask;
+extern u8 ivpu_pll_min_ratio;
+extern u8 ivpu_pll_max_ratio;
+
+#define IVPU_TEST_MODE_DISABLED  0
+#define IVPU_TEST_MODE_FW_TEST   1
+#define IVPU_TEST_MODE_NULL_HW   2
+extern int ivpu_test_mode;
+
+struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
+struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id);
+void ivpu_file_priv_put(struct ivpu_file_priv **link);
+
+int ivpu_boot(struct ivpu_device *vdev);
+int ivpu_shutdown(struct ivpu_device *vdev);
+
+static inline bool ivpu_is_mtl(struct ivpu_device *vdev)
+{
+	return to_pci_dev(vdev->drm.dev)->device == PCI_DEVICE_ID_MTL;
+}
+
+static inline u8 ivpu_revision(struct ivpu_device *vdev)
+{
+	return to_pci_dev(vdev->drm.dev)->revision;
+}
+
+static inline u16 ivpu_device_id(struct ivpu_device *vdev)
+{
+	return to_pci_dev(vdev->drm.dev)->device;
+}
+
+static inline struct ivpu_device *to_ivpu_device(struct drm_device *dev)
+{
+	return container_of(dev, struct ivpu_device, drm);
+}
+
+static inline u32 ivpu_get_context_count(struct ivpu_device *vdev)
+{
+	struct xa_limit ctx_limit = vdev->context_xa_limit;
+
+	return (ctx_limit.max - ctx_limit.min + 1);
+}
+
+static inline u32 ivpu_get_platform(struct ivpu_device *vdev)
+{
+	WARN_ON_ONCE(vdev->platform == IVPU_PLATFORM_INVALID);
+	return vdev->platform;
+}
+
+static inline bool ivpu_is_silicon(struct ivpu_device *vdev)
+{
+	return ivpu_get_platform(vdev) == IVPU_PLATFORM_SILICON;
+}
+
+static inline bool ivpu_is_simics(struct ivpu_device *vdev)
+{
+	return ivpu_get_platform(vdev) == IVPU_PLATFORM_SIMICS;
+}
+
+static inline bool ivpu_is_fpga(struct ivpu_device *vdev)
+{
+	return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA;
+}
+
+#endif /* __IVPU_DRV_H__ */
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/firmware.h>
+#include <linux/highmem.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+
+#include "vpu_boot_api.h"
+#include "ivpu_drv.h"
+#include "ivpu_fw.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_ipc.h"
+#include "ivpu_pm.h"
+
+#define FW_GLOBAL_MEM_START	(2ull * SZ_1G)
+#define FW_GLOBAL_MEM_END	(3ull * SZ_1G)
+#define FW_SHARED_MEM_SIZE	SZ_256M /* Must be aligned to FW_SHARED_MEM_ALIGNMENT */
+#define FW_SHARED_MEM_ALIGNMENT	SZ_128K /* VPU MTRR limitation */
+#define FW_RUNTIME_MAX_SIZE	SZ_512M
+#define FW_SHAVE_NN_MAX_SIZE	SZ_2M
+#define FW_RUNTIME_MIN_ADDR	(FW_GLOBAL_MEM_START)
+#define FW_RUNTIME_MAX_ADDR	(FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE)
+#define FW_VERSION_HEADER_SIZE	SZ_4K
+#define FW_FILE_IMAGE_OFFSET	(VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE)
+
+#define WATCHDOG_MSS_REDIRECT	32
+#define WATCHDOG_NCE_REDIRECT	33
+
+#define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31)
+
+#define IVPU_FW_CHECK_API(vdev, fw_hdr, name) ivpu_fw_check_api(vdev, fw_hdr, #name, \
+								  VPU_##name##_API_VER_INDEX, \
+								  VPU_##name##_API_VER_MAJOR, \
+								  VPU_##name##_API_VER_MINOR)
+
+static char *ivpu_firmware;
+module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
+MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/..");
+
+static int ivpu_fw_request(struct ivpu_device *vdev)
+{
+	static const char * const fw_names[] = {
+		"mtl_vpu.bin",
+		"intel/vpu/mtl_vpu_v0.0.bin"
+	};
+	int ret = -ENOENT;
+	int i;
+
+	if (ivpu_firmware)
+		return request_firmware(&vdev->fw->file, ivpu_firmware, vdev->drm.dev);
+
+	for (i = 0; i < ARRAY_SIZE(fw_names); i++) {
+		ret = firmware_request_nowarn(&vdev->fw->file, fw_names[i], vdev->drm.dev);
+		if (!ret)
+			return 0;
+	}
+
+	ivpu_err(vdev, "Failed to request firmware: %d\n", ret);
+	return ret;
+}
+
+static void
+ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr,
+		  const char *str, int index, u16 expected_major, u16 expected_minor)
+{
+	u16 major = (u16)(fw_hdr->api_version[index] >> 16);
+	u16 minor = (u16)(fw_hdr->api_version[index]);
+
+	if (major != expected_major) {
+		ivpu_warn(vdev, "Incompatible FW %s API version: %d.%d (expected %d.%d)\n",
+			  str, major, minor, expected_major, expected_minor);
+	}
+	ivpu_dbg(vdev, FW_BOOT, "FW %s API version: %d.%d (expected %d.%d)\n",
+		 str, major, minor, expected_major, expected_minor);
+}
+
+static int ivpu_fw_parse(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+	const struct vpu_firmware_header *fw_hdr = (const void *)fw->file->data;
+	u64 runtime_addr, image_load_addr, runtime_size, image_size;
+
+	if (fw->file->size <= FW_FILE_IMAGE_OFFSET) {
+		ivpu_err(vdev, "Firmware file is too small: %zu\n", fw->file->size);
+		return -EINVAL;
+	}
+
+	if (fw_hdr->header_version != VPU_FW_HEADER_VERSION) {
+		ivpu_err(vdev, "Invalid firmware header version: %u\n", fw_hdr->header_version);
+		return -EINVAL;
+	}
+
+	runtime_addr = fw_hdr->boot_params_load_address;
+	runtime_size = fw_hdr->runtime_size;
+	image_load_addr = fw_hdr->image_load_address;
+	image_size = fw_hdr->image_size;
+
+	if (runtime_addr < FW_RUNTIME_MIN_ADDR || runtime_addr > FW_RUNTIME_MAX_ADDR) {
+		ivpu_err(vdev, "Invalid firmware runtime address: 0x%llx\n", runtime_addr);
+		return -EINVAL;
+	}
+
+	if (runtime_size < fw->file->size || runtime_size > FW_RUNTIME_MAX_SIZE) {
+		ivpu_err(vdev, "Invalid firmware runtime size: %llu\n", runtime_size);
+		return -EINVAL;
+	}
+
+	if (FW_FILE_IMAGE_OFFSET + image_size > fw->file->size) {
+		ivpu_err(vdev, "Invalid image size: %llu\n", image_size);
+		return -EINVAL;
+	}
+
+	if (image_load_addr < runtime_addr ||
+	    image_load_addr + image_size > runtime_addr + runtime_size) {
+		ivpu_err(vdev, "Invalid firmware load address size: 0x%llx and size %llu\n",
+			 image_load_addr, image_size);
+		return -EINVAL;
+	}
+
+	if (fw_hdr->shave_nn_fw_size > FW_SHAVE_NN_MAX_SIZE) {
+		ivpu_err(vdev, "SHAVE NN firmware is too big: %u\n", fw_hdr->shave_nn_fw_size);
+		return -EINVAL;
+	}
+
+	if (fw_hdr->entry_point < image_load_addr ||
+	    fw_hdr->entry_point >= image_load_addr + image_size) {
+		ivpu_err(vdev, "Invalid entry point: 0x%llx\n", fw_hdr->entry_point);
+		return -EINVAL;
+	}
+
+	fw->runtime_addr = runtime_addr;
+	fw->runtime_size = runtime_size;
+	fw->image_load_offset = image_load_addr - runtime_addr;
+	fw->image_size = image_size;
+	fw->shave_nn_size = PAGE_ALIGN(fw_hdr->shave_nn_fw_size);
+
+	fw->cold_boot_entry_point = fw_hdr->entry_point;
+	fw->entry_point = fw->cold_boot_entry_point;
+
+	ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n",
+		 fw_hdr->header_version, fw_hdr->image_format);
+	ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n",
+		 fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size);
+	ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n",
+		 fw->runtime_addr, image_load_addr, fw->entry_point);
+	ivpu_dbg(vdev, FW_BOOT, "FW version: %s\n", (char *)fw_hdr + VPU_FW_HEADER_SIZE);
+
+	IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT);
+	IVPU_FW_CHECK_API(vdev, fw_hdr, JSM);
+
+	return 0;
+}
+
+static void ivpu_fw_release(struct ivpu_device *vdev)
+{
+	release_firmware(vdev->fw->file);
+}
+
+static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+	u64 start = ALIGN(fw->runtime_addr + fw->runtime_size, FW_SHARED_MEM_ALIGNMENT);
+	u64 size = FW_SHARED_MEM_SIZE;
+
+	if (start + size > FW_GLOBAL_MEM_END) {
+		ivpu_err(vdev, "No space for shared region, start %lld, size %lld\n", start, size);
+		return -EINVAL;
+	}
+
+	ivpu_hw_init_range(&vdev->hw->ranges.global_low, start, size);
+	return 0;
+}
+
+static int ivpu_fw_mem_init(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+	int ret;
+
+	ret = ivpu_fw_update_global_range(vdev);
+	if (ret)
+		return ret;
+
+	fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC);
+	if (!fw->mem) {
+		ivpu_err(vdev, "Failed to allocate firmware runtime memory\n");
+		return -ENOMEM;
+	}
+
+	if (fw->shave_nn_size) {
+		fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.global_high.start,
+							  fw->shave_nn_size, DRM_IVPU_BO_UNCACHED);
+		if (!fw->mem_shave_nn) {
+			ivpu_err(vdev, "Failed to allocate shavenn buffer\n");
+			ivpu_bo_free_internal(fw->mem);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void ivpu_fw_mem_fini(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+
+	if (fw->mem_shave_nn) {
+		ivpu_bo_free_internal(fw->mem_shave_nn);
+		fw->mem_shave_nn = NULL;
+	}
+
+	ivpu_bo_free_internal(fw->mem);
+	fw->mem = NULL;
+}
+
+int ivpu_fw_init(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ret = ivpu_fw_request(vdev);
+	if (ret)
+		return ret;
+
+	ret = ivpu_fw_parse(vdev);
+	if (ret)
+		goto err_fw_release;
+
+	ret = ivpu_fw_mem_init(vdev);
+	if (ret)
+		goto err_fw_release;
+
+	return 0;
+
+err_fw_release:
+	ivpu_fw_release(vdev);
+	return ret;
+}
+
+void ivpu_fw_fini(struct ivpu_device *vdev)
+{
+	ivpu_fw_mem_fini(vdev);
+	ivpu_fw_release(vdev);
+}
+
+int ivpu_fw_load(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+	u64 image_end_offset = fw->image_load_offset + fw->image_size;
+
+	memset(fw->mem->kvaddr, 0, fw->image_load_offset);
+	memcpy(fw->mem->kvaddr + fw->image_load_offset,
+	       fw->file->data + FW_FILE_IMAGE_OFFSET, fw->image_size);
+
+	if (IVPU_WA(clear_runtime_mem)) {
+		u8 *start = fw->mem->kvaddr + image_end_offset;
+		u64 size = fw->mem->base.size - image_end_offset;
+
+		memset(start, 0, size);
+	}
+
+	wmb(); /* Flush WC buffers after writing fw->mem */
+
+	return 0;
+}
+
+static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
+{
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.magic = 0x%x\n",
+		 boot_params->magic);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_id = 0x%x\n",
+		 boot_params->vpu_id);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_count = 0x%x\n",
+		 boot_params->vpu_count);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.frequency = %u\n",
+		 boot_params->frequency);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.perf_clk_frequency = %u\n",
+		 boot_params->perf_clk_frequency);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_header_area_start = 0x%llx\n",
+		 boot_params->ipc_header_area_start);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_header_area_size = 0x%x\n",
+		 boot_params->ipc_header_area_size);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.shared_region_base = 0x%llx\n",
+		 boot_params->shared_region_base);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.shared_region_size = 0x%x\n",
+		 boot_params->shared_region_size);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_payload_area_start = 0x%llx\n",
+		 boot_params->ipc_payload_area_start);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.ipc_payload_area_size = 0x%x\n",
+		 boot_params->ipc_payload_area_size);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.global_aliased_pio_base = 0x%llx\n",
+		 boot_params->global_aliased_pio_base);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.global_aliased_pio_size = 0x%x\n",
+		 boot_params->global_aliased_pio_size);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.autoconfig = 0x%x\n",
+		 boot_params->autoconfig);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 0x%x\n",
+		 boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg = 0x%x\n",
+		 boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_base = 0x%llx\n",
+		 boot_params->global_memory_allocator_base);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.global_memory_allocator_size = 0x%x\n",
+		 boot_params->global_memory_allocator_size);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.shave_nn_fw_base = 0x%llx\n",
+		 boot_params->shave_nn_fw_base);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_mss = 0x%x\n",
+		 boot_params->watchdog_irq_mss);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.watchdog_irq_nce = 0x%x\n",
+		 boot_params->watchdog_irq_nce);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.host_to_vpu_irq = 0x%x\n",
+		 boot_params->host_to_vpu_irq);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.job_done_irq = 0x%x\n",
+		 boot_params->job_done_irq);
+
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.host_version_id = 0x%x\n",
+		 boot_params->host_version_id);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.si_stepping = 0x%x\n",
+		 boot_params->si_stepping);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.device_id = 0x%llx\n",
+		 boot_params->device_id);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.feature_exclusion = 0x%llx\n",
+		 boot_params->feature_exclusion);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.sku = 0x%llx\n",
+		 boot_params->sku);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.min_freq_pll_ratio = 0x%x\n",
+		 boot_params->min_freq_pll_ratio);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.pn_freq_pll_ratio = 0x%x\n",
+		 boot_params->pn_freq_pll_ratio);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.max_freq_pll_ratio = 0x%x\n",
+		 boot_params->max_freq_pll_ratio);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.default_trace_level = 0x%x\n",
+		 boot_params->default_trace_level);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.tracing_buff_message_format_mask = 0x%llx\n",
+		 boot_params->tracing_buff_message_format_mask);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.trace_destination_mask = 0x%x\n",
+		 boot_params->trace_destination_mask);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.trace_hw_component_mask = 0x%llx\n",
+		 boot_params->trace_hw_component_mask);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.boot_type = 0x%x\n",
+		 boot_params->boot_type);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.punit_telemetry_sram_base = 0x%llx\n",
+		 boot_params->punit_telemetry_sram_base);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.punit_telemetry_sram_size = 0x%llx\n",
+		 boot_params->punit_telemetry_sram_size);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n",
+		 boot_params->vpu_telemetry_enable);
+}
+
+void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
+{
+	struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx;
+
+	/* In case of warm boot we only have to reset the entrypoint addr */
+	if (!ivpu_fw_is_cold_boot(vdev)) {
+		boot_params->save_restore_ret_address = 0;
+		vdev->pm->is_warmboot = true;
+		return;
+	}
+
+	vdev->pm->is_warmboot = false;
+
+	boot_params->magic = VPU_BOOT_PARAMS_MAGIC;
+	boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
+	boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev);
+
+	/*
+	 * Uncached region of VPU address space, covers IPC buffers, job queues
+	 * and log buffers, programmable to L2$ Uncached by VPU MTRR
+	 */
+	boot_params->shared_region_base = vdev->hw->ranges.global_low.start;
+	boot_params->shared_region_size = vdev->hw->ranges.global_low.end -
+					  vdev->hw->ranges.global_low.start;
+
+	boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr;
+	boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2;
+
+	boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2;
+	boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2;
+
+	boot_params->global_aliased_pio_base =
+		vdev->hw->ranges.global_aliased_pio.start;
+	boot_params->global_aliased_pio_size =
+		ivpu_hw_range_size(&vdev->hw->ranges.global_aliased_pio);
+
+	/* Allow configuration for L2C_PAGE_TABLE with boot param value */
+	boot_params->autoconfig = 1;
+
+	/* Enable L2 cache for first 2GB of high memory */
+	boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].use = 1;
+	boot_params->cache_defaults[VPU_BOOT_L2_CACHE_CFG_NN].cfg =
+		ADDR_TO_L2_CACHE_CFG(vdev->hw->ranges.global_high.start);
+
+	if (vdev->fw->mem_shave_nn)
+		boot_params->shave_nn_fw_base = vdev->fw->mem_shave_nn->vpu_addr;
+
+	boot_params->watchdog_irq_mss = WATCHDOG_MSS_REDIRECT;
+	boot_params->watchdog_irq_nce = WATCHDOG_NCE_REDIRECT;
+	boot_params->si_stepping = ivpu_revision(vdev);
+	boot_params->device_id = ivpu_device_id(vdev);
+	boot_params->feature_exclusion = vdev->hw->tile_fuse;
+	boot_params->sku = vdev->hw->sku;
+
+	boot_params->min_freq_pll_ratio = vdev->hw->pll.min_ratio;
+	boot_params->pn_freq_pll_ratio = vdev->hw->pll.pn_ratio;
+	boot_params->max_freq_pll_ratio = vdev->hw->pll.max_ratio;
+
+	boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
+	boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
+	boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
+
+	wmb(); /* Flush WC buffers after writing bootparams */
+
+	ivpu_fw_boot_params_print(vdev, boot_params);
+}
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_FW_H__
+#define __IVPU_FW_H__
+
+struct ivpu_device;
+struct ivpu_bo;
+struct vpu_boot_params;
+
+struct ivpu_fw_info {
+	const struct firmware *file;
+	struct ivpu_bo *mem;
+	struct ivpu_bo *mem_shave_nn;
+	struct ivpu_bo *mem_log_crit;
+	struct ivpu_bo *mem_log_verb;
+	u64 runtime_addr;
+	u32 runtime_size;
+	u64 image_load_offset;
+	u32 image_size;
+	u32 shave_nn_size;
+	u64 entry_point; /* Cold or warm boot entry point for next boot */
+	u64 cold_boot_entry_point;
+};
+
+int ivpu_fw_init(struct ivpu_device *vdev);
+void ivpu_fw_fini(struct ivpu_device *vdev);
+int ivpu_fw_load(struct ivpu_device *vdev);
+void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp);
+
+static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)
+{
+	return vdev->fw->entry_point == vdev->fw->cold_boot_entry_point;
+}
+
+#endif /* __IVPU_FW_H__ */
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@ -0,0 +1,753 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/set_memory.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_cache.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
+#include <drm/drm_utils.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_mmu.h"
+#include "ivpu_mmu_context.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+static const struct drm_gem_object_funcs ivpu_gem_funcs;
+
+static struct lock_class_key prime_bo_lock_class_key;
+
+static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo)
+{
+	/* Pages are managed by the underlying dma-buf */
+	return 0;
+}
+
+static void prime_free_pages_locked(struct ivpu_bo *bo)
+{
+	/* Pages are managed by the underlying dma-buf */
+}
+
+static int prime_map_pages_locked(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	struct sg_table *sgt;
+
+	WARN_ON(!bo->base.import_attach);
+
+	sgt = dma_buf_map_attachment(bo->base.import_attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sgt)) {
+		ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt));
+		return PTR_ERR(sgt);
+	}
+
+	bo->sgt = sgt;
+	return 0;
+}
+
+static void prime_unmap_pages_locked(struct ivpu_bo *bo)
+{
+	WARN_ON(!bo->base.import_attach);
+
+	dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL);
+	bo->sgt = NULL;
+}
+
+static const struct ivpu_bo_ops prime_ops = {
+	.type = IVPU_BO_TYPE_PRIME,
+	.name = "prime",
+	.alloc_pages = prime_alloc_pages_locked,
+	.free_pages = prime_free_pages_locked,
+	.map_pages = prime_map_pages_locked,
+	.unmap_pages = prime_unmap_pages_locked,
+};
+
+static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
+{
+	int npages = bo->base.size >> PAGE_SHIFT;
+	struct page **pages;
+
+	pages = drm_gem_get_pages(&bo->base);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	if (bo->flags & DRM_IVPU_BO_WC)
+		set_pages_array_wc(pages, npages);
+	else if (bo->flags & DRM_IVPU_BO_UNCACHED)
+		set_pages_array_uc(pages, npages);
+
+	bo->pages = pages;
+	return 0;
+}
+
+static void shmem_free_pages_locked(struct ivpu_bo *bo)
+{
+	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+		set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+
+	drm_gem_put_pages(&bo->base, bo->pages, true, false);
+	bo->pages = NULL;
+}
+
+static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
+{
+	int npages = bo->base.size >> PAGE_SHIFT;
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	struct sg_table *sgt;
+	int ret;
+
+	sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages);
+	if (IS_ERR(sgt)) {
+		ivpu_err(vdev, "Failed to allocate sgtable\n");
+		return PTR_ERR(sgt);
+	}
+
+	ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0);
+	if (ret) {
+		ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
+		goto err_free_sgt;
+	}
+
+	bo->sgt = sgt;
+	return 0;
+
+err_free_sgt:
+	kfree(sgt);
+	return ret;
+}
+
+static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+	dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0);
+	sg_free_table(bo->sgt);
+	kfree(bo->sgt);
+	bo->sgt = NULL;
+}
+
+static const struct ivpu_bo_ops shmem_ops = {
+	.type = IVPU_BO_TYPE_SHMEM,
+	.name = "shmem",
+	.alloc_pages = shmem_alloc_pages_locked,
+	.free_pages = shmem_free_pages_locked,
+	.map_pages = ivpu_bo_map_pages_locked,
+	.unmap_pages = ivpu_bo_unmap_pages_locked,
+};
+
+static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
+{
+	unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+	struct page **pages;
+	int ret;
+
+	pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < npages; i++) {
+		pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+		if (!pages[i]) {
+			ret = -ENOMEM;
+			goto err_free_pages;
+		}
+		cond_resched();
+	}
+
+	bo->pages = pages;
+	return 0;
+
+err_free_pages:
+	while (i--)
+		put_page(pages[i]);
+	kvfree(pages);
+	return ret;
+}
+
+static void internal_free_pages_locked(struct ivpu_bo *bo)
+{
+	unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+
+	for (i = 0; i < npages; i++)
+		put_page(bo->pages[i]);
+
+	kvfree(bo->pages);
+	bo->pages = NULL;
+}
+
+static const struct ivpu_bo_ops internal_ops = {
+	.type = IVPU_BO_TYPE_INTERNAL,
+	.name = "internal",
+	.alloc_pages = internal_alloc_pages_locked,
+	.free_pages = internal_free_pages_locked,
+	.map_pages = ivpu_bo_map_pages_locked,
+	.unmap_pages = ivpu_bo_unmap_pages_locked,
+};
+
+static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	int ret;
+
+	lockdep_assert_held(&bo->lock);
+	drm_WARN_ON(&vdev->drm, bo->sgt);
+
+	ret = bo->ops->alloc_pages(bo);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret);
+		return ret;
+	}
+
+	ret = bo->ops->map_pages(bo);
+	if (ret) {
+		ivpu_err(vdev, "Failed to map pages for BO: %d", ret);
+		goto err_free_pages;
+	}
+	return ret;
+
+err_free_pages:
+	bo->ops->free_pages(bo);
+	return ret;
+}
+
+static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo)
+{
+	mutex_lock(&bo->lock);
+
+	WARN_ON(!bo->sgt);
+	bo->ops->unmap_pages(bo);
+	WARN_ON(bo->sgt);
+	bo->ops->free_pages(bo);
+	WARN_ON(bo->pages);
+
+	mutex_unlock(&bo->lock);
+}
+
+/*
+ * ivpu_bo_pin() - pin the backing physical pages and map them to VPU.
+ *
+ * This function pins physical memory pages, then maps the physical pages
+ * to IOMMU address space and finally updates the VPU MMU page tables
+ * to allow the VPU to translate VPU address to IOMMU address.
+ */
+int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	int ret = 0;
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->vpu_addr) {
+		ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n",
+			 bo->ctx->id, bo->handle);
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	if (!bo->sgt) {
+		ret = ivpu_bo_alloc_and_map_pages_locked(bo);
+		if (ret)
+			goto unlock;
+	}
+
+	if (!bo->mmu_mapped) {
+		ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt,
+					       ivpu_bo_is_snooped(bo));
+		if (ret) {
+			ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
+			goto unlock;
+		}
+		bo->mmu_mapped = true;
+	}
+
+unlock:
+	mutex_unlock(&bo->lock);
+
+	return ret;
+}
+
+static int
+ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
+		       const struct ivpu_addr_range *range)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	int ret;
+
+	if (!range) {
+		if (bo->flags & DRM_IVPU_BO_HIGH_MEM)
+			range = &vdev->hw->ranges.user_high;
+		else
+			range = &vdev->hw->ranges.user_low;
+	}
+
+	mutex_lock(&ctx->lock);
+	ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node);
+	if (!ret) {
+		bo->ctx = ctx;
+		bo->vpu_addr = bo->mm_node.start;
+		list_add_tail(&bo->ctx_node, &ctx->bo_list);
+	}
+	mutex_unlock(&ctx->lock);
+
+	return ret;
+}
+
+static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo)
+{
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+	struct ivpu_mmu_context *ctx = bo->ctx;
+
+	ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
+		 ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+
+	mutex_lock(&bo->lock);
+
+	if (bo->mmu_mapped) {
+		drm_WARN_ON(&vdev->drm, !bo->sgt);
+		ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt);
+		bo->mmu_mapped = false;
+	}
+
+	mutex_lock(&ctx->lock);
+	list_del(&bo->ctx_node);
+	bo->vpu_addr = 0;
+	bo->ctx = NULL;
+	ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node);
+	mutex_unlock(&ctx->lock);
+
+	mutex_unlock(&bo->lock);
+}
+
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx)
+{
+	struct ivpu_bo *bo, *tmp;
+
+	list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node)
+		ivpu_bo_free_vpu_addr(bo);
+}
+
+static struct ivpu_bo *
+ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
+	      u64 size, u32 flags, const struct ivpu_bo_ops *ops,
+	      const struct ivpu_addr_range *range, u64 user_ptr)
+{
+	struct ivpu_bo *bo;
+	int ret = 0;
+
+	if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size)))
+		return ERR_PTR(-EINVAL);
+
+	switch (flags & DRM_IVPU_BO_CACHE_MASK) {
+	case DRM_IVPU_BO_CACHED:
+	case DRM_IVPU_BO_UNCACHED:
+	case DRM_IVPU_BO_WC:
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&bo->lock);
+	bo->base.funcs = &ivpu_gem_funcs;
+	bo->flags = flags;
+	bo->ops = ops;
+	bo->user_ptr = user_ptr;
+
+	if (ops->type == IVPU_BO_TYPE_SHMEM)
+		ret = drm_gem_object_init(&vdev->drm, &bo->base, size);
+	else
+		drm_gem_private_object_init(&vdev->drm, &bo->base, size);
+
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize drm object\n");
+		goto err_free;
+	}
+
+	if (flags & DRM_IVPU_BO_MAPPABLE) {
+		ret = drm_gem_create_mmap_offset(&bo->base);
+		if (ret) {
+			ivpu_err(vdev, "Failed to allocate mmap offset\n");
+			goto err_release;
+		}
+	}
+
+	if (mmu_context) {
+		ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range);
+		if (ret) {
+			ivpu_err(vdev, "Failed to add BO to context: %d\n", ret);
+			goto err_release;
+		}
+	}
+
+	return bo;
+
+err_release:
+	drm_gem_object_release(&bo->base);
+err_free:
+	kfree(bo);
+	return ERR_PTR(ret);
+}
+
+static void ivpu_bo_free(struct drm_gem_object *obj)
+{
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+	if (bo->ctx)
+		ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
+			 bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+	else
+		ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n",
+			 (bool)bo->sgt, bo->mmu_mapped);
+
+	drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+
+	vunmap(bo->kvaddr);
+
+	if (bo->ctx)
+		ivpu_bo_free_vpu_addr(bo);
+
+	if (bo->sgt)
+		ivpu_bo_unmap_and_free_pages(bo);
+
+	if (bo->base.import_attach)
+		drm_prime_gem_destroy(&bo->base, bo->sgt);
+
+	drm_gem_object_release(&bo->base);
+
+	mutex_destroy(&bo->lock);
+	kfree(bo);
+}
+
+static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
+
+	ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
+		 bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name);
+
+	if (obj->import_attach) {
+		/* Drop the reference drm_gem_mmap_obj() acquired.*/
+		drm_gem_object_put(obj);
+		vma->vm_private_data = NULL;
+		return dma_buf_mmap(obj->dma_buf, vma, 0);
+	}
+
+	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND;
+	vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags));
+
+	return 0;
+}
+
+static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj)
+{
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	loff_t npages = obj->size >> PAGE_SHIFT;
+	int ret = 0;
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->sgt)
+		ret = ivpu_bo_alloc_and_map_pages_locked(bo);
+
+	mutex_unlock(&bo->lock);
+
+	if (ret)
+		return ERR_PTR(ret);
+
+	return drm_prime_pages_to_sg(obj->dev, bo->pages, npages);
+}
+
+static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct ivpu_bo *bo = to_ivpu_bo(obj);
+	loff_t npages = obj->size >> PAGE_SHIFT;
+	pgoff_t page_offset;
+	struct page *page;
+	vm_fault_t ret;
+	int err;
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->sgt) {
+		err = ivpu_bo_alloc_and_map_pages_locked(bo);
+		if (err) {
+			ret = vmf_error(err);
+			goto unlock;
+		}
+	}
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+	if (page_offset >= npages) {
+		ret = VM_FAULT_SIGBUS;
+	} else {
+		page = bo->pages[page_offset];
+		ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
+	}
+
+unlock:
+	mutex_unlock(&bo->lock);
+
+	return ret;
+}
+
+static const struct vm_operations_struct ivpu_vm_ops = {
+	.fault = ivpu_vm_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static const struct drm_gem_object_funcs ivpu_gem_funcs = {
+	.free = ivpu_bo_free,
+	.mmap = ivpu_bo_mmap,
+	.vm_ops = &ivpu_vm_ops,
+	.get_sg_table = ivpu_bo_get_sg_table,
+};
+
+int
+ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct drm_ivpu_bo_create *args = data;
+	u64 size = PAGE_ALIGN(args->size);
+	struct ivpu_bo *bo;
+	int ret;
+
+	if (args->flags & ~DRM_IVPU_BO_FLAGS)
+		return -EINVAL;
+
+	if (size == 0)
+		return -EINVAL;
+
+	bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0);
+	if (IS_ERR(bo)) {
+		ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
+			 bo, file_priv->ctx.id, args->size, args->flags);
+		return PTR_ERR(bo);
+	}
+
+	ret = drm_gem_handle_create(file, &bo->base, &bo->handle);
+	if (!ret) {
+		args->vpu_addr = bo->vpu_addr;
+		args->handle = bo->handle;
+	}
+
+	drm_gem_object_put(&bo->base);
+
+	ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
+		 file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags);
+
+	return ret;
+}
+
+struct ivpu_bo *
+ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags)
+{
+	const struct ivpu_addr_range *range;
+	struct ivpu_addr_range fixed_range;
+	struct ivpu_bo *bo;
+	pgprot_t prot;
+	int ret;
+
+	drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
+	drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
+
+	if (vpu_addr) {
+		fixed_range.start = vpu_addr;
+		fixed_range.end = vpu_addr + size;
+		range = &fixed_range;
+	} else {
+		range = &vdev->hw->ranges.global_low;
+	}
+
+	bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0);
+	if (IS_ERR(bo)) {
+		ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
+			 bo, vpu_addr, size, flags);
+		return NULL;
+	}
+
+	ret = ivpu_bo_pin(bo);
+	if (ret)
+		goto err_put;
+
+	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+		drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
+
+	prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
+	bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
+	if (!bo->kvaddr) {
+		ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
+		goto err_put;
+	}
+
+	ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
+		 bo->vpu_addr, bo->base.size, flags);
+
+	return bo;
+
+err_put:
+	drm_gem_object_put(&bo->base);
+	return NULL;
+}
+
+void ivpu_bo_free_internal(struct ivpu_bo *bo)
+{
+	drm_gem_object_put(&bo->base);
+}
+
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf)
+{
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct dma_buf_attachment *attach;
+	struct ivpu_bo *bo;
+
+	attach = dma_buf_attach(buf, dev->dev);
+	if (IS_ERR(attach))
+		return ERR_CAST(attach);
+
+	get_dma_buf(buf);
+
+	bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0);
+	if (IS_ERR(bo)) {
+		ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size);
+		goto err_detach;
+	}
+
+	lockdep_set_class(&bo->lock, &prime_bo_lock_class_key);
+
+	bo->base.import_attach = attach;
+
+	return &bo->base;
+
+err_detach:
+	dma_buf_detach(buf, attach);
+	dma_buf_put(buf);
+	return ERR_CAST(bo);
+}
+
+int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct drm_ivpu_bo_info *args = data;
+	struct drm_gem_object *obj;
+	struct ivpu_bo *bo;
+	int ret = 0;
+
+	obj = drm_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	bo = to_ivpu_bo(obj);
+
+	mutex_lock(&bo->lock);
+
+	if (!bo->ctx) {
+		ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL);
+		if (ret) {
+			ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret);
+			goto unlock;
+		}
+	}
+
+	args->flags = bo->flags;
+	args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
+	args->vpu_addr = bo->vpu_addr;
+	args->size = obj->size;
+unlock:
+	mutex_unlock(&bo->lock);
+	drm_gem_object_put(obj);
+	return ret;
+}
+
+int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_ivpu_bo_wait *args = data;
+	struct drm_gem_object *obj;
+	unsigned long timeout;
+	long ret;
+
+	timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+
+	obj = drm_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -EINVAL;
+
+	ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, true, timeout);
+	if (ret == 0) {
+		ret = -ETIMEDOUT;
+	} else if (ret > 0) {
+		ret = 0;
+		args->job_status = to_ivpu_bo(obj)->job_status;
+	}
+
+	drm_gem_object_put(obj);
+
+	return ret;
+}
+
+static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
+{
+	unsigned long dma_refcount = 0;
+
+	if (bo->base.dma_buf && bo->base.dma_buf->file)
+		dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
+
+	drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
+		   bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size,
+		   kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
+}
+
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
+{
+	struct ivpu_device *vdev = to_ivpu_device(dev);
+	struct ivpu_file_priv *file_priv;
+	unsigned long ctx_id;
+	struct ivpu_bo *bo;
+
+	drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n",
+		   "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type");
+
+	mutex_lock(&vdev->gctx.lock);
+	list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node)
+		ivpu_bo_print_info(bo, p);
+	mutex_unlock(&vdev->gctx.lock);
+
+	xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
+		file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
+		if (!file_priv)
+			continue;
+
+		mutex_lock(&file_priv->ctx.lock);
+		list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node)
+			ivpu_bo_print_info(bo, p);
+		mutex_unlock(&file_priv->ctx.lock);
+
+		ivpu_file_priv_put(&file_priv);
+	}
+}
+
+void ivpu_bo_list_print(struct drm_device *dev)
+{
+	struct drm_printer p = drm_info_printer(dev->dev);
+
+	ivpu_bo_list(dev, &p);
+}
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+#ifndef __IVPU_GEM_H__
+#define __IVPU_GEM_H__
+
+#include <drm/drm_gem.h>
+#include <drm/drm_mm.h>
+
+struct dma_buf;
+struct ivpu_bo_ops;
+struct ivpu_file_priv;
+
+struct ivpu_bo {
+	struct drm_gem_object base;
+	const struct ivpu_bo_ops *ops;
+
+	struct ivpu_mmu_context *ctx;
+	struct list_head ctx_node;
+	struct drm_mm_node mm_node;
+
+	struct mutex lock; /* Protects: pages, sgt, mmu_mapped */
+	struct sg_table *sgt;
+	struct page **pages;
+	bool mmu_mapped;
+
+	void *kvaddr;
+	u64 vpu_addr;
+	u32 handle;
+	u32 flags;
+	uintptr_t user_ptr;
+	u32 job_status;
+};
+
+enum ivpu_bo_type {
+	IVPU_BO_TYPE_SHMEM = 1,
+	IVPU_BO_TYPE_INTERNAL,
+	IVPU_BO_TYPE_PRIME,
+};
+
+struct ivpu_bo_ops {
+	enum ivpu_bo_type type;
+	const char *name;
+	int (*alloc_pages)(struct ivpu_bo *bo);
+	void (*free_pages)(struct ivpu_bo *bo);
+	int (*map_pages)(struct ivpu_bo *bo);
+	void (*unmap_pages)(struct ivpu_bo *bo);
+};
+
+int ivpu_bo_pin(struct ivpu_bo *bo);
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx);
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
+void ivpu_bo_list_print(struct drm_device *dev);
+
+struct ivpu_bo *
+ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
+void ivpu_bo_free_internal(struct ivpu_bo *bo);
+struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
+void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo);
+
+int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj)
+{
+	return container_of(obj, struct ivpu_bo, base);
+}
+
+static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset)
+{
+	if (offset > bo->base.size || !bo->pages)
+		return NULL;
+
+	return bo->pages[offset / PAGE_SIZE];
+}
+
+static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
+{
+	return bo->flags & DRM_IVPU_BO_CACHE_MASK;
+}
+
+static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
+{
+	return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
+}
+
+static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot)
+{
+	if (bo->flags & DRM_IVPU_BO_WC)
+		return pgprot_writecombine(prot);
+
+	if (bo->flags & DRM_IVPU_BO_UNCACHED)
+		return pgprot_noncached(prot);
+
+	return prot;
+}
+
+static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo)
+{
+	return to_ivpu_device(bo->base.dev);
+}
+
+static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)
+{
+	if (vpu_addr < bo->vpu_addr)
+		return NULL;
+
+	if (vpu_addr >= (bo->vpu_addr + bo->base.size))
+		return NULL;
+
+	return bo->kvaddr + (vpu_addr - bo->vpu_addr);
+}
+
+static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr)
+{
+	if (cpu_addr < bo->kvaddr)
+		return 0;
+
+	if (cpu_addr >= (bo->kvaddr + bo->base.size))
+		return 0;
+
+	return bo->vpu_addr + (cpu_addr - bo->kvaddr);
+}
+
+#endif /* __IVPU_GEM_H__ */
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_HW_H__
+#define __IVPU_HW_H__
+
+#include "ivpu_drv.h"
+
+struct ivpu_hw_ops {
+	int (*info_init)(struct ivpu_device *vdev);
+	int (*power_up)(struct ivpu_device *vdev);
+	int (*boot_fw)(struct ivpu_device *vdev);
+	int (*power_down)(struct ivpu_device *vdev);
+	bool (*is_idle)(struct ivpu_device *vdev);
+	void (*wdt_disable)(struct ivpu_device *vdev);
+	void (*diagnose_failure)(struct ivpu_device *vdev);
+	u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
+	u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
+	u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
+	u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev);
+	void (*reg_db_set)(struct ivpu_device *vdev, u32 db_id);
+	u32 (*reg_ipc_rx_addr_get)(struct ivpu_device *vdev);
+	u32 (*reg_ipc_rx_count_get)(struct ivpu_device *vdev);
+	void (*reg_ipc_tx_set)(struct ivpu_device *vdev, u32 vpu_addr);
+	void (*irq_clear)(struct ivpu_device *vdev);
+	void (*irq_enable)(struct ivpu_device *vdev);
+	void (*irq_disable)(struct ivpu_device *vdev);
+	irqreturn_t (*irq_handler)(int irq, void *ptr);
+};
+
+struct ivpu_addr_range {
+	resource_size_t start;
+	resource_size_t end;
+};
+
+struct ivpu_hw_info {
+	const struct ivpu_hw_ops *ops;
+	struct {
+		struct ivpu_addr_range global_low;
+		struct ivpu_addr_range global_high;
+		struct ivpu_addr_range user_low;
+		struct ivpu_addr_range user_high;
+		struct ivpu_addr_range global_aliased_pio;
+	} ranges;
+	struct {
+		u8 min_ratio;
+		u8 max_ratio;
+		/*
+		 * Pll ratio for the efficiency frequency. The VPU has optimum
+		 * performance to power ratio at this frequency.
+		 */
+		u8 pn_ratio;
+		u32 profiling_freq;
+	} pll;
+	u32 tile_fuse;
+	u32 sku;
+	u16 config;
+};
+
+extern const struct ivpu_hw_ops ivpu_hw_mtl_ops;
+
+static inline int ivpu_hw_info_init(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->info_init(vdev);
+};
+
+static inline int ivpu_hw_power_up(struct ivpu_device *vdev)
+{
+	ivpu_dbg(vdev, PM, "HW power up\n");
+
+	return vdev->hw->ops->power_up(vdev);
+};
+
+static inline int ivpu_hw_boot_fw(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->boot_fw(vdev);
+};
+
+static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->is_idle(vdev);
+};
+
+static inline int ivpu_hw_power_down(struct ivpu_device *vdev)
+{
+	ivpu_dbg(vdev, PM, "HW power down\n");
+
+	return vdev->hw->ops->power_down(vdev);
+};
+
+static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev)
+{
+	vdev->hw->ops->wdt_disable(vdev);
+};
+
+/* Register indirect accesses */
+static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_pll_freq_get(vdev);
+};
+
+static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_telemetry_offset_get(vdev);
+};
+
+static inline u32 ivpu_hw_reg_telemetry_size_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_telemetry_size_get(vdev);
+};
+
+static inline u32 ivpu_hw_reg_telemetry_enable_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_telemetry_enable_get(vdev);
+};
+
+static inline void ivpu_hw_reg_db_set(struct ivpu_device *vdev, u32 db_id)
+{
+	vdev->hw->ops->reg_db_set(vdev, db_id);
+};
+
+static inline u32 ivpu_hw_reg_ipc_rx_addr_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_ipc_rx_addr_get(vdev);
+};
+
+static inline u32 ivpu_hw_reg_ipc_rx_count_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->reg_ipc_rx_count_get(vdev);
+};
+
+static inline void ivpu_hw_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
+{
+	vdev->hw->ops->reg_ipc_tx_set(vdev, vpu_addr);
+};
+
+static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
+{
+	vdev->hw->ops->irq_clear(vdev);
+};
+
+static inline void ivpu_hw_irq_enable(struct ivpu_device *vdev)
+{
+	vdev->hw->ops->irq_enable(vdev);
+};
+
+static inline void ivpu_hw_irq_disable(struct ivpu_device *vdev)
+{
+	vdev->hw->ops->irq_disable(vdev);
+};
+
+static inline void ivpu_hw_init_range(struct ivpu_addr_range *range, u64 start, u64 size)
+{
+	range->start = start;
+	range->end = start + size;
+}
+
+static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
+{
+	return range->end - range->start;
+}
+
+static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev)
+{
+	vdev->hw->ops->diagnose_failure(vdev);
+}
+
+#endif /* __IVPU_HW_H__ */
--- a/drivers/accel/ivpu/ivpu_hw_mtl.c
+++ b/drivers/accel/ivpu/ivpu_hw_mtl.c
--- a/drivers/accel/ivpu/ivpu_hw_mtl_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h
@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_HW_MTL_REG_H__
+#define __IVPU_HW_MTL_REG_H__
+
+#include <linux/bits.h>
+
+#define MTL_BUTTRESS_INTERRUPT_TYPE					0x00000000u
+
+#define MTL_BUTTRESS_INTERRUPT_STAT					0x00000004u
+#define MTL_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK			BIT_MASK(0)
+#define MTL_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK			BIT_MASK(1)
+#define MTL_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK			BIT_MASK(2)
+
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD0					0x00000008u
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK			GENMASK(15, 0)
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK			GENMASK(31, 16)
+
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD1					0x0000000cu
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK			GENMASK(15, 0)
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK				GENMASK(31, 16)
+
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD2					0x00000010u
+#define MTL_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK			GENMASK(15, 0)
+
+#define MTL_BUTTRESS_WP_REQ_CMD						0x00000014u
+#define MTL_BUTTRESS_WP_REQ_CMD_SEND_MASK				BIT_MASK(0)
+
+#define MTL_BUTTRESS_WP_DOWNLOAD					0x00000018u
+#define MTL_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK			GENMASK(15, 0)
+
+#define MTL_BUTTRESS_CURRENT_PLL					0x0000001cu
+#define MTL_BUTTRESS_CURRENT_PLL_RATIO_MASK				GENMASK(15, 0)
+
+#define MTL_BUTTRESS_PLL_ENABLE						0x00000020u
+
+#define MTL_BUTTRESS_FMIN_FUSE						0x00000024u
+#define MTL_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK				GENMASK(7, 0)
+#define MTL_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK				GENMASK(15, 8)
+
+#define MTL_BUTTRESS_FMAX_FUSE						0x00000028u
+#define MTL_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK				GENMASK(7, 0)
+
+#define MTL_BUTTRESS_TILE_FUSE						0x0000002cu
+#define MTL_BUTTRESS_TILE_FUSE_VALID_MASK				BIT_MASK(0)
+#define MTL_BUTTRESS_TILE_FUSE_SKU_MASK					GENMASK(3, 2)
+
+#define MTL_BUTTRESS_LOCAL_INT_MASK					0x00000030u
+#define MTL_BUTTRESS_GLOBAL_INT_MASK					0x00000034u
+
+#define MTL_BUTTRESS_PLL_STATUS						0x00000040u
+#define MTL_BUTTRESS_PLL_STATUS_LOCK_MASK				BIT_MASK(1)
+
+#define MTL_BUTTRESS_VPU_STATUS						0x00000044u
+#define MTL_BUTTRESS_VPU_STATUS_READY_MASK				BIT_MASK(0)
+#define MTL_BUTTRESS_VPU_STATUS_IDLE_MASK				BIT_MASK(1)
+
+#define MTL_BUTTRESS_VPU_D0I3_CONTROL					0x00000060u
+#define MTL_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK			BIT_MASK(0)
+#define MTL_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK				BIT_MASK(2)
+
+#define MTL_BUTTRESS_VPU_IP_RESET					0x00000050u
+#define MTL_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK				BIT_MASK(0)
+
+#define MTL_BUTTRESS_VPU_TELEMETRY_OFFSET				0x00000080u
+#define MTL_BUTTRESS_VPU_TELEMETRY_SIZE					0x00000084u
+#define MTL_BUTTRESS_VPU_TELEMETRY_ENABLE				0x00000088u
+
+#define MTL_BUTTRESS_ATS_ERR_LOG_0					0x000000a0u
+#define MTL_BUTTRESS_ATS_ERR_LOG_1					0x000000a4u
+#define MTL_BUTTRESS_ATS_ERR_CLEAR					0x000000a8u
+
+#define MTL_BUTTRESS_UFI_ERR_LOG					0x000000b0u
+#define MTL_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK				GENMASK(11, 0)
+#define MTL_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK				GENMASK(19, 12)
+#define MTL_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK				GENMASK(24, 20)
+
+#define MTL_BUTTRESS_UFI_ERR_CLEAR					0x000000b4u
+
+#define MTL_VPU_HOST_SS_CPR_CLK_SET					0x00000084u
+#define MTL_VPU_HOST_SS_CPR_CLK_SET_TOP_NOC_MASK			BIT_MASK(1)
+#define MTL_VPU_HOST_SS_CPR_CLK_SET_DSS_MAS_MASK			BIT_MASK(10)
+#define MTL_VPU_HOST_SS_CPR_CLK_SET_MSS_MAS_MASK			BIT_MASK(11)
+
+#define MTL_VPU_HOST_SS_CPR_RST_SET					0x00000094u
+#define MTL_VPU_HOST_SS_CPR_RST_SET_TOP_NOC_MASK			BIT_MASK(1)
+#define MTL_VPU_HOST_SS_CPR_RST_SET_DSS_MAS_MASK			BIT_MASK(10)
+#define MTL_VPU_HOST_SS_CPR_RST_SET_MSS_MAS_MASK			BIT_MASK(11)
+
+#define MTL_VPU_HOST_SS_CPR_RST_CLR					0x00000098u
+#define MTL_VPU_HOST_SS_CPR_RST_CLR_TOP_NOC_MASK			BIT_MASK(1)
+#define MTL_VPU_HOST_SS_CPR_RST_CLR_DSS_MAS_MASK			BIT_MASK(10)
+#define MTL_VPU_HOST_SS_CPR_RST_CLR_MSS_MAS_MASK			BIT_MASK(11)
+
+#define MTL_VPU_HOST_SS_HW_VERSION					0x00000108u
+#define MTL_VPU_HOST_SS_HW_VERSION_SOC_REVISION_MASK			GENMASK(7, 0)
+#define MTL_VPU_HOST_SS_HW_VERSION_SOC_NUMBER_MASK			GENMASK(15, 8)
+#define MTL_VPU_HOST_SS_HW_VERSION_VPU_GENERATION_MASK			GENMASK(23, 16)
+
+#define MTL_VPU_HOST_SS_GEN_CTRL					0x00000118u
+#define MTL_VPU_HOST_SS_GEN_CTRL_PS_MASK				GENMASK(31, 29)
+
+#define MTL_VPU_HOST_SS_NOC_QREQN					0x00000154u
+#define MTL_VPU_HOST_SS_NOC_QREQN_TOP_SOCMMIO_MASK			BIT_MASK(0)
+
+#define MTL_VPU_HOST_SS_NOC_QACCEPTN					0x00000158u
+#define MTL_VPU_HOST_SS_NOC_QACCEPTN_TOP_SOCMMIO_MASK			BIT_MASK(0)
+
+#define MTL_VPU_HOST_SS_NOC_QDENY					0x0000015cu
+#define MTL_VPU_HOST_SS_NOC_QDENY_TOP_SOCMMIO_MASK			BIT_MASK(0)
+
+#define MTL_VPU_TOP_NOC_QREQN						0x00000160u
+#define MTL_VPU_TOP_NOC_QREQN_CPU_CTRL_MASK				BIT_MASK(0)
+#define MTL_VPU_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+
+#define MTL_VPU_TOP_NOC_QACCEPTN					0x00000164u
+#define MTL_VPU_TOP_NOC_QACCEPTN_CPU_CTRL_MASK				BIT_MASK(0)
+#define MTL_VPU_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+
+#define MTL_VPU_TOP_NOC_QDENY						0x00000168u
+#define MTL_VPU_TOP_NOC_QDENY_CPU_CTRL_MASK				BIT_MASK(0)
+#define MTL_VPU_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN					0x00000170u
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_ROM_CMX_MASK			BIT_MASK(0)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_DBG_MASK			BIT_MASK(1)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_CSS_CTRL_MASK			BIT_MASK(2)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_DEC400_MASK			BIT_MASK(3)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_NCE_MASK			BIT_MASK(4)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_MASK			BIT_MASK(5)
+#define MTL_VPU_HOST_SS_FW_SOC_IRQ_EN_MSS_MBI_CMX_MASK			BIT_MASK(6)
+
+#define MTL_VPU_HOST_SS_ICB_STATUS_0					0x00010210u
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_0_INT_MASK			BIT_MASK(0)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_1_INT_MASK			BIT_MASK(1)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_2_INT_MASK			BIT_MASK(2)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_TIMER_3_INT_MASK			BIT_MASK(3)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_HOST_IPC_FIFO_INT_MASK		BIT_MASK(4)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK			BIT_MASK(5)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK			BIT_MASK(6)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK			BIT_MASK(7)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_NOC_FIREWALL_INT_MASK		BIT_MASK(8)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_0_INT_MASK	BIT_MASK(30)
+#define MTL_VPU_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_1_INT_MASK	BIT_MASK(31)
+
+#define MTL_VPU_HOST_SS_ICB_STATUS_1					0x00010214u
+#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_2_INT_MASK	BIT_MASK(0)
+#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_3_INT_MASK	BIT_MASK(1)
+#define MTL_VPU_HOST_SS_ICB_STATUS_1_CPU_INT_REDIRECT_4_INT_MASK	BIT_MASK(2)
+
+#define MTL_VPU_HOST_SS_ICB_CLEAR_0					0x00010220u
+#define MTL_VPU_HOST_SS_ICB_CLEAR_1					0x00010224u
+#define MTL_VPU_HOST_SS_ICB_ENABLE_0					0x00010240u
+
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_ATM				0x000200f4u
+
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT				0x000200fcu
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_READ_POINTER_MASK		GENMASK(7, 0)
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_WRITE_POINTER_MASK		GENMASK(15, 8)
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_FILL_LEVEL_MASK		GENMASK(23, 16)
+#define MTL_VPU_HOST_SS_TIM_IPC_FIFO_STAT_RSVD0_MASK			GENMASK(31, 24)
+
+#define MTL_VPU_HOST_SS_AON_PWR_ISO_EN0					0x00030020u
+#define MTL_VPU_HOST_SS_AON_PWR_ISO_EN0_MSS_CPU_MASK			BIT_MASK(3)
+
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0				0x00030024u
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK			BIT_MASK(3)
+
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0			0x00030028u
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK		BIT_MASK(3)
+
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0				0x0003002cu
+#define MTL_VPU_HOST_SS_AON_PWR_ISLAND_STATUS0_MSS_CPU_MASK		BIT_MASK(3)
+
+#define MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN				0x00030200u
+#define MTL_VPU_HOST_SS_AON_VPU_IDLE_GEN_EN_MASK			BIT_MASK(0)
+
+#define MTL_VPU_HOST_SS_AON_DPU_ACTIVE					0x00030204u
+#define MTL_VPU_HOST_SS_AON_DPU_ACTIVE_DPU_ACTIVE_MASK			BIT_MASK(0)
+
+#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO				0x00041040u
+#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_DONE_MASK			BIT_MASK(0)
+#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_IOSF_RS_ID_MASK		GENMASK(2, 1)
+#define MTL_VPU_HOST_SS_LOADING_ADDRESS_LO_IMAGE_LOCATION_MASK		GENMASK(31, 3)
+
+#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR				0x00082020u
+#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR_FINAL_PLL_FREQ_MASK	GENMASK(15, 0)
+#define MTL_VPU_HOST_SS_WORKPOINT_CONFIG_MIRROR_CONFIG_ID_MASK		GENMASK(31, 16)
+
+#define MTL_VPU_HOST_MMU_IDR0						0x00200000u
+#define MTL_VPU_HOST_MMU_IDR1						0x00200004u
+#define MTL_VPU_HOST_MMU_IDR3						0x0020000cu
+#define MTL_VPU_HOST_MMU_IDR5						0x00200014u
+#define MTL_VPU_HOST_MMU_CR0						0x00200020u
+#define MTL_VPU_HOST_MMU_CR0ACK						0x00200024u
+#define MTL_VPU_HOST_MMU_CR1						0x00200028u
+#define MTL_VPU_HOST_MMU_CR2						0x0020002cu
+#define MTL_VPU_HOST_MMU_IRQ_CTRL					0x00200050u
+#define MTL_VPU_HOST_MMU_IRQ_CTRLACK					0x00200054u
+
+#define MTL_VPU_HOST_MMU_GERROR						0x00200060u
+#define MTL_VPU_HOST_MMU_GERROR_CMDQ_MASK				BIT_MASK(0)
+#define MTL_VPU_HOST_MMU_GERROR_EVTQ_ABT_MASK				BIT_MASK(2)
+#define MTL_VPU_HOST_MMU_GERROR_PRIQ_ABT_MASK				BIT_MASK(3)
+#define MTL_VPU_HOST_MMU_GERROR_MSI_CMDQ_ABT_MASK			BIT_MASK(4)
+#define MTL_VPU_HOST_MMU_GERROR_MSI_EVTQ_ABT_MASK			BIT_MASK(5)
+#define MTL_VPU_HOST_MMU_GERROR_MSI_PRIQ_ABT_MASK			BIT_MASK(6)
+#define MTL_VPU_HOST_MMU_GERROR_MSI_ABT_MASK				BIT_MASK(7)
+
+#define MTL_VPU_HOST_MMU_GERRORN					0x00200064u
+
+#define MTL_VPU_HOST_MMU_STRTAB_BASE					0x00200080u
+#define MTL_VPU_HOST_MMU_STRTAB_BASE_CFG				0x00200088u
+#define MTL_VPU_HOST_MMU_CMDQ_BASE					0x00200090u
+#define MTL_VPU_HOST_MMU_CMDQ_PROD					0x00200098u
+#define MTL_VPU_HOST_MMU_CMDQ_CONS					0x0020009cu
+#define MTL_VPU_HOST_MMU_EVTQ_BASE					0x002000a0u
+#define MTL_VPU_HOST_MMU_EVTQ_PROD					0x002000a8u
+#define MTL_VPU_HOST_MMU_EVTQ_CONS					0x002000acu
+#define MTL_VPU_HOST_MMU_EVTQ_PROD_SEC					(0x002000a8u + SZ_64K)
+#define MTL_VPU_HOST_MMU_EVTQ_CONS_SEC					(0x002000acu + SZ_64K)
+
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES				0x00360000u
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_CACHE_OVERRIDE_EN_MASK	BIT_MASK(0)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK		BIT_MASK(1)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK		BIT_MASK(2)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_NOSNOOP_OVERRIDE_EN_MASK	BIT_MASK(3)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AW_NOSNOOP_OVERRIDE_MASK	BIT_MASK(4)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_AR_NOSNOOP_OVERRIDE_MASK	BIT_MASK(5)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_PTW_AW_CONTEXT_FLAG_MASK	GENMASK(10, 6)
+#define MTL_VPU_HOST_IF_TCU_PTW_OVERRIDES_PTW_AR_CONTEXT_FLAG_MASK	GENMASK(15, 11)
+
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV					0x00360004u
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU0_AWMMUSSIDV_MASK		BIT_MASK(0)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU0_ARMMUSSIDV_MASK		BIT_MASK(1)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU1_AWMMUSSIDV_MASK		BIT_MASK(2)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU1_ARMMUSSIDV_MASK		BIT_MASK(3)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU2_AWMMUSSIDV_MASK		BIT_MASK(4)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU2_ARMMUSSIDV_MASK		BIT_MASK(5)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU3_AWMMUSSIDV_MASK		BIT_MASK(6)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU3_ARMMUSSIDV_MASK		BIT_MASK(7)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU4_AWMMUSSIDV_MASK		BIT_MASK(8)
+#define MTL_VPU_HOST_IF_TBU_MMUSSIDV_TBU4_ARMMUSSIDV_MASK		BIT_MASK(9)
+
+#define MTL_VPU_CPU_SS_DSU_LEON_RT_BASE					0x04000000u
+#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_CTRL				0x04000000u
+#define MTL_VPU_CPU_SS_DSU_LEON_RT_PC_REG				0x04400010u
+#define MTL_VPU_CPU_SS_DSU_LEON_RT_NPC_REG				0x04400014u
+#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG				0x04400020u
+
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET				0x06010004u
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK			BIT_MASK(1)
+
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR				0x06010018u
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK			BIT_MASK(1)
+
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC				0x06010040u
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK		BIT_MASK(0)
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK		BIT_MASK(1)
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK		BIT_MASK(2)
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK		BIT_MASK(3)
+#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK		GENMASK(31, 4)
+
+#define MTL_VPU_CPU_SS_TIM_WATCHDOG					0x0602009cu
+#define MTL_VPU_CPU_SS_TIM_WDOG_EN					0x060200a4u
+#define MTL_VPU_CPU_SS_TIM_SAFE						0x060200a8u
+#define MTL_VPU_CPU_SS_TIM_IPC_FIFO					0x060200f0u
+
+#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG					0x06021008u
+#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK		BIT_MASK(9)
+
+#define MTL_VPU_CPU_SS_DOORBELL_0					0x06300000u
+#define MTL_VPU_CPU_SS_DOORBELL_0_SET_MASK				BIT_MASK(0)
+
+#define MTL_VPU_CPU_SS_DOORBELL_1					0x06301000u
+
+#endif /* __IVPU_HW_MTL_REG_H__ */
--- a/drivers/accel/ivpu/ivpu_hw_reg_io.h
+++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h
@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_HW_REG_IO_H__
+#define __IVPU_HW_REG_IO_H__
+
+#include <linux/bitfield.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+
+#include "ivpu_drv.h"
+
+#define REG_POLL_SLEEP_US 50
+#define REG_IO_ERROR      0xffffffff
+
+#define REGB_RD32(reg)          ivpu_hw_reg_rd32(vdev, vdev->regb, (reg), #reg, __func__)
+#define REGB_RD32_SILENT(reg)   readl(vdev->regb + (reg))
+#define REGB_RD64(reg)          ivpu_hw_reg_rd64(vdev, vdev->regb, (reg), #reg, __func__)
+#define REGB_WR32(reg, val)     ivpu_hw_reg_wr32(vdev, vdev->regb, (reg), (val), #reg, __func__)
+#define REGB_WR64(reg, val)     ivpu_hw_reg_wr64(vdev, vdev->regb, (reg), (val), #reg, __func__)
+
+#define REGV_RD32(reg)          ivpu_hw_reg_rd32(vdev, vdev->regv, (reg), #reg, __func__)
+#define REGV_RD32_SILENT(reg)   readl(vdev->regv + (reg))
+#define REGV_RD64(reg)          ivpu_hw_reg_rd64(vdev, vdev->regv, (reg), #reg, __func__)
+#define REGV_WR32(reg, val)     ivpu_hw_reg_wr32(vdev, vdev->regv, (reg), (val), #reg, __func__)
+#define REGV_WR64(reg, val)     ivpu_hw_reg_wr64(vdev, vdev->regv, (reg), (val), #reg, __func__)
+
+#define REGV_WR32I(reg, stride, index, val) \
+	ivpu_hw_reg_wr32_index(vdev, vdev->regv, (reg), (stride), (index), (val), #reg, __func__)
+
+#define REG_FLD(REG, FLD) \
+	(REG##_##FLD##_MASK)
+#define REG_FLD_NUM(REG, FLD, num) \
+	FIELD_PREP(REG##_##FLD##_MASK, num)
+#define REG_GET_FLD(REG, FLD, val) \
+	FIELD_GET(REG##_##FLD##_MASK, val)
+#define REG_CLR_FLD(REG, FLD, val) \
+	((val) & ~(REG##_##FLD##_MASK))
+#define REG_SET_FLD(REG, FLD, val) \
+	((val) | (REG##_##FLD##_MASK))
+#define REG_SET_FLD_NUM(REG, FLD, num, val) \
+	(((val) & ~(REG##_##FLD##_MASK)) | FIELD_PREP(REG##_##FLD##_MASK, num))
+#define REG_TEST_FLD(REG, FLD, val) \
+	((REG##_##FLD##_MASK) == ((val) & (REG##_##FLD##_MASK)))
+#define REG_TEST_FLD_NUM(REG, FLD, num, val) \
+	((num) == FIELD_GET(REG##_##FLD##_MASK, val))
+
+#define REGB_POLL(reg, var, cond, timeout_us) \
+	read_poll_timeout(REGB_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg)
+
+#define REGV_POLL(reg, var, cond, timeout_us) \
+	read_poll_timeout(REGV_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg)
+
+#define REGB_POLL_FLD(reg, fld, val, timeout_us) \
+({ \
+	u32 var; \
+	REGB_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \
+})
+
+#define REGV_POLL_FLD(reg, fld, val, timeout_us) \
+({ \
+	u32 var; \
+	REGV_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \
+})
+
+static inline u32
+ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg,
+		 const char *name, const char *func)
+{
+	u32 val = readl(base + reg);
+
+	ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%08x\n", func, name, reg, val);
+	return val;
+}
+
+static inline u64
+ivpu_hw_reg_rd64(struct ivpu_device *vdev, void __iomem *base, u32 reg,
+		 const char *name, const char *func)
+{
+	u64 val = readq(base + reg);
+
+	ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%016llx\n", func, name, reg, val);
+	return val;
+}
+
+static inline void
+ivpu_hw_reg_wr32(struct ivpu_device *vdev, void __iomem *base, u32 reg, u32 val,
+		 const char *name, const char *func)
+{
+	ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%08x\n", func, name, reg, val);
+	writel(val, base + reg);
+}
+
+static inline void
+ivpu_hw_reg_wr64(struct ivpu_device *vdev, void __iomem *base, u32 reg, u64 val,
+		 const char *name, const char *func)
+{
+	ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%016llx\n", func, name, reg, val);
+	writeq(val, base + reg);
+}
+
+static inline void
+ivpu_hw_reg_wr32_index(struct ivpu_device *vdev, void __iomem *base, u32 reg,
+		       u32 stride, u32 index, u32 val, const char *name,
+		       const char *func)
+{
+	reg += index * stride;
+
+	ivpu_dbg(vdev, REG, "%s WR: %s_%d (0x%08x) <= 0x%08x\n", func, name, index, reg, val);
+	writel(val, base + reg);
+}
+
+#endif /* __IVPU_HW_REG_IO_H__ */
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@ -0,0 +1,510 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/genalloc.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_gem.h"
+#include "ivpu_hw.h"
+#include "ivpu_hw_reg_io.h"
+#include "ivpu_ipc.h"
+#include "ivpu_jsm_msg.h"
+#include "ivpu_pm.h"
+
+#define IPC_MAX_RX_MSG	128
+#define IS_KTHREAD()	(get_current()->flags & PF_KTHREAD)
+
+struct ivpu_ipc_tx_buf {
+	struct ivpu_ipc_hdr ipc;
+	struct vpu_jsm_msg jsm;
+};
+
+struct ivpu_ipc_rx_msg {
+	struct list_head link;
+	struct ivpu_ipc_hdr *ipc_hdr;
+	struct vpu_jsm_msg *jsm_msg;
+};
+
+static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c,
+			      struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr)
+{
+	ivpu_dbg(vdev, IPC,
+		 "%s: vpu:0x%x (data_addr:0x%08x, data_size:0x%x, channel:0x%x, src_node:0x%x, dst_node:0x%x, status:0x%x)",
+		 c, vpu_addr, ipc_hdr->data_addr, ipc_hdr->data_size, ipc_hdr->channel,
+		 ipc_hdr->src_node, ipc_hdr->dst_node, ipc_hdr->status);
+}
+
+static void ivpu_jsm_msg_dump(struct ivpu_device *vdev, char *c,
+			      struct vpu_jsm_msg *jsm_msg, u32 vpu_addr)
+{
+	u32 *payload = (u32 *)&jsm_msg->payload;
+
+	ivpu_dbg(vdev, JSM,
+		 "%s: vpu:0x%08x (type:0x%x, status:0x%x, id: 0x%x, result: 0x%x, payload:0x%x 0x%x 0x%x 0x%x 0x%x)\n",
+		 c, vpu_addr, jsm_msg->type, jsm_msg->status, jsm_msg->request_id, jsm_msg->result,
+		 payload[0], payload[1], payload[2], payload[3], payload[4]);
+}
+
+static void
+ivpu_ipc_rx_mark_free(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
+		      struct vpu_jsm_msg *jsm_msg)
+{
+	ipc_hdr->status = IVPU_IPC_HDR_FREE;
+	if (jsm_msg)
+		jsm_msg->status = VPU_JSM_MSG_FREE;
+	wmb(); /* Flush WC buffers for message statuses */
+}
+
+static void ivpu_ipc_mem_fini(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	ivpu_bo_free_internal(ipc->mem_rx);
+	ivpu_bo_free_internal(ipc->mem_tx);
+}
+
+static int
+ivpu_ipc_tx_prepare(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+		    struct vpu_jsm_msg *req)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_tx_buf *tx_buf;
+	u32 tx_buf_vpu_addr;
+	u32 jsm_vpu_addr;
+
+	tx_buf_vpu_addr = gen_pool_alloc(ipc->mm_tx, sizeof(*tx_buf));
+	if (!tx_buf_vpu_addr) {
+		ivpu_err(vdev, "Failed to reserve IPC buffer, size %ld\n",
+			 sizeof(*tx_buf));
+		return -ENOMEM;
+	}
+
+	tx_buf = ivpu_to_cpu_addr(ipc->mem_tx, tx_buf_vpu_addr);
+	if (drm_WARN_ON(&vdev->drm, !tx_buf)) {
+		gen_pool_free(ipc->mm_tx, tx_buf_vpu_addr, sizeof(*tx_buf));
+		return -EIO;
+	}
+
+	jsm_vpu_addr = tx_buf_vpu_addr + offsetof(struct ivpu_ipc_tx_buf, jsm);
+
+	if (tx_buf->ipc.status != IVPU_IPC_HDR_FREE)
+		ivpu_warn(vdev, "IPC message vpu:0x%x not released by firmware\n",
+			  tx_buf_vpu_addr);
+
+	if (tx_buf->jsm.status != VPU_JSM_MSG_FREE)
+		ivpu_warn(vdev, "JSM message vpu:0x%x not released by firmware\n",
+			  jsm_vpu_addr);
+
+	memset(tx_buf, 0, sizeof(*tx_buf));
+	tx_buf->ipc.data_addr = jsm_vpu_addr;
+	/* TODO: Set data_size to actual JSM message size, not union of all messages */
+	tx_buf->ipc.data_size = sizeof(*req);
+	tx_buf->ipc.channel = cons->channel;
+	tx_buf->ipc.src_node = 0;
+	tx_buf->ipc.dst_node = 1;
+	tx_buf->ipc.status = IVPU_IPC_HDR_ALLOCATED;
+	tx_buf->jsm.type = req->type;
+	tx_buf->jsm.status = VPU_JSM_MSG_ALLOCATED;
+	tx_buf->jsm.payload = req->payload;
+
+	req->request_id = atomic_inc_return(&ipc->request_id);
+	tx_buf->jsm.request_id = req->request_id;
+	cons->request_id = req->request_id;
+	wmb(); /* Flush WC buffers for IPC, JSM msgs */
+
+	cons->tx_vpu_addr = tx_buf_vpu_addr;
+
+	ivpu_jsm_msg_dump(vdev, "TX", &tx_buf->jsm, jsm_vpu_addr);
+	ivpu_ipc_msg_dump(vdev, "TX", &tx_buf->ipc, tx_buf_vpu_addr);
+
+	return 0;
+}
+
+static void ivpu_ipc_tx_release(struct ivpu_device *vdev, u32 vpu_addr)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	if (vpu_addr)
+		gen_pool_free(ipc->mm_tx, vpu_addr, sizeof(struct ivpu_ipc_tx_buf));
+}
+
+static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr)
+{
+	ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr);
+}
+
+void
+ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	INIT_LIST_HEAD(&cons->link);
+	cons->channel = channel;
+	cons->tx_vpu_addr = 0;
+	cons->request_id = 0;
+	spin_lock_init(&cons->rx_msg_lock);
+	INIT_LIST_HEAD(&cons->rx_msg_list);
+	init_waitqueue_head(&cons->rx_msg_wq);
+
+	spin_lock_irq(&ipc->cons_list_lock);
+	list_add_tail(&cons->link, &ipc->cons_list);
+	spin_unlock_irq(&ipc->cons_list_lock);
+}
+
+void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_rx_msg *rx_msg, *r;
+
+	spin_lock_irq(&ipc->cons_list_lock);
+	list_del(&cons->link);
+	spin_unlock_irq(&ipc->cons_list_lock);
+
+	spin_lock_irq(&cons->rx_msg_lock);
+	list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) {
+		list_del(&rx_msg->link);
+		ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+		atomic_dec(&ipc->rx_msg_count);
+		kfree(rx_msg);
+	}
+	spin_unlock_irq(&cons->rx_msg_lock);
+
+	ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr);
+}
+
+static int
+ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct vpu_jsm_msg *req)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	int ret;
+
+	ret = mutex_lock_interruptible(&ipc->lock);
+	if (ret)
+		return ret;
+
+	if (!ipc->on) {
+		ret = -EAGAIN;
+		goto unlock;
+	}
+
+	ret = ivpu_ipc_tx_prepare(vdev, cons, req);
+	if (ret)
+		goto unlock;
+
+	ivpu_ipc_tx(vdev, cons->tx_vpu_addr);
+
+unlock:
+	mutex_unlock(&ipc->lock);
+	return ret;
+}
+
+int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+		     struct ivpu_ipc_hdr *ipc_buf,
+		     struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_rx_msg *rx_msg;
+	int wait_ret, ret = 0;
+
+	wait_ret = wait_event_interruptible_timeout(cons->rx_msg_wq,
+						    (IS_KTHREAD() && kthread_should_stop()) ||
+						    !list_empty(&cons->rx_msg_list),
+						    msecs_to_jiffies(timeout_ms));
+
+	if (IS_KTHREAD() && kthread_should_stop())
+		return -EINTR;
+
+	if (wait_ret == 0)
+		return -ETIMEDOUT;
+
+	if (wait_ret < 0)
+		return -ERESTARTSYS;
+
+	spin_lock_irq(&cons->rx_msg_lock);
+	rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link);
+	if (!rx_msg) {
+		spin_unlock_irq(&cons->rx_msg_lock);
+		return -EAGAIN;
+	}
+	list_del(&rx_msg->link);
+	spin_unlock_irq(&cons->rx_msg_lock);
+
+	if (ipc_buf)
+		memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf));
+	if (rx_msg->jsm_msg) {
+		u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload));
+
+		if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
+			ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
+			ret = -EBADMSG;
+		}
+
+		if (ipc_payload)
+			memcpy(ipc_payload, rx_msg->jsm_msg, size);
+	}
+
+	ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+	atomic_dec(&ipc->rx_msg_count);
+	kfree(rx_msg);
+
+	return ret;
+}
+
+static int
+ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+			       enum vpu_ipc_msg_type expected_resp_type,
+			       struct vpu_jsm_msg *resp, u32 channel,
+			       unsigned long timeout_ms)
+{
+	struct ivpu_ipc_consumer cons;
+	int ret;
+
+	ivpu_ipc_consumer_add(vdev, &cons, channel);
+
+	ret = ivpu_ipc_send(vdev, &cons, req);
+	if (ret) {
+		ivpu_warn(vdev, "IPC send failed: %d\n", ret);
+		goto consumer_del;
+	}
+
+	ret = ivpu_ipc_receive(vdev, &cons, NULL, resp, timeout_ms);
+	if (ret) {
+		ivpu_warn(vdev, "IPC receive failed: type 0x%x, ret %d\n", req->type, ret);
+		goto consumer_del;
+	}
+
+	if (resp->type != expected_resp_type) {
+		ivpu_warn(vdev, "Invalid JSM response type: 0x%x\n", resp->type);
+		ret = -EBADE;
+	}
+
+consumer_del:
+	ivpu_ipc_consumer_del(vdev, &cons);
+	return ret;
+}
+
+int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+			  enum vpu_ipc_msg_type expected_resp_type,
+			  struct vpu_jsm_msg *resp, u32 channel,
+			  unsigned long timeout_ms)
+{
+	struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
+	struct vpu_jsm_msg hb_resp;
+	int ret, hb_ret;
+
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
+	ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp,
+					     channel, timeout_ms);
+	if (ret != -ETIMEDOUT)
+		goto rpm_put;
+
+	hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
+						&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
+						vdev->timeout.jsm);
+	if (hb_ret == -ETIMEDOUT) {
+		ivpu_hw_diagnose_failure(vdev);
+		ivpu_pm_schedule_recovery(vdev);
+	}
+
+rpm_put:
+	ivpu_rpm_put(vdev);
+	return ret;
+}
+
+static bool
+ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+			struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
+{
+	if (cons->channel != ipc_hdr->channel)
+		return false;
+
+	if (!jsm_msg || jsm_msg->request_id == cons->request_id)
+		return true;
+
+	return false;
+}
+
+static void
+ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+		  struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_rx_msg *rx_msg;
+	unsigned long flags;
+
+	lockdep_assert_held(&ipc->cons_list_lock);
+
+	rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
+	if (!rx_msg) {
+		ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
+		return;
+	}
+
+	atomic_inc(&ipc->rx_msg_count);
+
+	rx_msg->ipc_hdr = ipc_hdr;
+	rx_msg->jsm_msg = jsm_msg;
+
+	spin_lock_irqsave(&cons->rx_msg_lock, flags);
+	list_add_tail(&rx_msg->link, &cons->rx_msg_list);
+	spin_unlock_irqrestore(&cons->rx_msg_lock, flags);
+
+	wake_up(&cons->rx_msg_wq);
+}
+
+int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_consumer *cons;
+	struct ivpu_ipc_hdr *ipc_hdr;
+	struct vpu_jsm_msg *jsm_msg;
+	unsigned long flags;
+	bool dispatched;
+	u32 vpu_addr;
+
+	/*
+	 * Driver needs to purge all messages from IPC FIFO to clear IPC interrupt.
+	 * Without purge IPC FIFO to 0 next IPC interrupts won't be generated.
+	 */
+	while (ivpu_hw_reg_ipc_rx_count_get(vdev)) {
+		vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
+		if (vpu_addr == REG_IO_ERROR) {
+			ivpu_err(vdev, "Failed to read IPC rx addr register\n");
+			return -EIO;
+		}
+
+		ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr);
+		if (!ipc_hdr) {
+			ivpu_warn(vdev, "IPC msg 0x%x out of range\n", vpu_addr);
+			continue;
+		}
+		ivpu_ipc_msg_dump(vdev, "RX", ipc_hdr, vpu_addr);
+
+		jsm_msg = NULL;
+		if (ipc_hdr->channel != IVPU_IPC_CHAN_BOOT_MSG) {
+			jsm_msg = ivpu_to_cpu_addr(ipc->mem_rx, ipc_hdr->data_addr);
+			if (!jsm_msg) {
+				ivpu_warn(vdev, "JSM msg 0x%x out of range\n", ipc_hdr->data_addr);
+				ivpu_ipc_rx_mark_free(vdev, ipc_hdr, NULL);
+				continue;
+			}
+			ivpu_jsm_msg_dump(vdev, "RX", jsm_msg, ipc_hdr->data_addr);
+		}
+
+		if (atomic_read(&ipc->rx_msg_count) > IPC_MAX_RX_MSG) {
+			ivpu_warn(vdev, "IPC RX msg dropped, msg count %d\n", IPC_MAX_RX_MSG);
+			ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
+			continue;
+		}
+
+		dispatched = false;
+		spin_lock_irqsave(&ipc->cons_list_lock, flags);
+		list_for_each_entry(cons, &ipc->cons_list, link) {
+			if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) {
+				ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg);
+				dispatched = true;
+				break;
+			}
+		}
+		spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+
+		if (!dispatched) {
+			ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr);
+			ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
+		}
+	}
+
+	return 0;
+}
+
+int ivpu_ipc_init(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	int ret = -ENOMEM;
+
+	ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
+	if (!ipc->mem_tx)
+		return ret;
+
+	ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
+	if (!ipc->mem_rx)
+		goto err_free_tx;
+
+	ipc->mm_tx = devm_gen_pool_create(vdev->drm.dev, __ffs(IVPU_IPC_ALIGNMENT),
+					  -1, "TX_IPC_JSM");
+	if (IS_ERR(ipc->mm_tx)) {
+		ret = PTR_ERR(ipc->mm_tx);
+		ivpu_err(vdev, "Failed to create gen pool, %pe\n", ipc->mm_tx);
+		goto err_free_rx;
+	}
+
+	ret = gen_pool_add(ipc->mm_tx, ipc->mem_tx->vpu_addr, ipc->mem_tx->base.size, -1);
+	if (ret) {
+		ivpu_err(vdev, "gen_pool_add failed, ret %d\n", ret);
+		goto err_free_rx;
+	}
+
+	INIT_LIST_HEAD(&ipc->cons_list);
+	spin_lock_init(&ipc->cons_list_lock);
+	drmm_mutex_init(&vdev->drm, &ipc->lock);
+
+	ivpu_ipc_reset(vdev);
+	return 0;
+
+err_free_rx:
+	ivpu_bo_free_internal(ipc->mem_rx);
+err_free_tx:
+	ivpu_bo_free_internal(ipc->mem_tx);
+	return ret;
+}
+
+void ivpu_ipc_fini(struct ivpu_device *vdev)
+{
+	ivpu_ipc_mem_fini(vdev);
+}
+
+void ivpu_ipc_enable(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	mutex_lock(&ipc->lock);
+	ipc->on = true;
+	mutex_unlock(&ipc->lock);
+}
+
+void ivpu_ipc_disable(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_consumer *cons, *c;
+	unsigned long flags;
+
+	mutex_lock(&ipc->lock);
+	ipc->on = false;
+	mutex_unlock(&ipc->lock);
+
+	spin_lock_irqsave(&ipc->cons_list_lock, flags);
+	list_for_each_entry_safe(cons, c, &ipc->cons_list, link)
+		wake_up(&cons->rx_msg_wq);
+	spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+}
+
+void ivpu_ipc_reset(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	mutex_lock(&ipc->lock);
+
+	memset(ipc->mem_tx->kvaddr, 0, ipc->mem_tx->base.size);
+	memset(ipc->mem_rx->kvaddr, 0, ipc->mem_rx->base.size);
+	wmb(); /* Flush WC buffers for TX and RX rings */
+
+	mutex_unlock(&ipc->lock);
+}
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_IPC_H__
+#define __IVPU_IPC_H__
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+
+#include "vpu_jsm_api.h"
+
+struct ivpu_bo;
+
+/* VPU FW boot notification */
+#define IVPU_IPC_CHAN_BOOT_MSG		0x3ff
+#define IVPU_IPC_BOOT_MSG_DATA_ADDR	0x424f4f54
+
+/* The alignment to be used for IPC Buffers and IPC Data. */
+#define IVPU_IPC_ALIGNMENT	   64
+
+#define IVPU_IPC_HDR_FREE	   0
+#define IVPU_IPC_HDR_ALLOCATED	   0
+
+/**
+ * struct ivpu_ipc_hdr - The IPC message header structure, exchanged
+ * with the VPU device firmware.
+ * @data_addr: The VPU address of the payload (JSM message)
+ * @data_size: The size of the payload.
+ * @channel: The channel used.
+ * @src_node: The Node ID of the sender.
+ * @dst_node: The Node ID of the intended receiver.
+ * @status: IPC buffer usage status
+ */
+struct ivpu_ipc_hdr {
+	u32 data_addr;
+	u32 data_size;
+	u16 channel;
+	u8 src_node;
+	u8 dst_node;
+	u8 status;
+} __packed __aligned(IVPU_IPC_ALIGNMENT);
+
+struct ivpu_ipc_consumer {
+	struct list_head link;
+	u32 channel;
+	u32 tx_vpu_addr;
+	u32 request_id;
+
+	spinlock_t rx_msg_lock; /* Protects rx_msg_list */
+	struct list_head rx_msg_list;
+	wait_queue_head_t rx_msg_wq;
+};
+
+struct ivpu_ipc_info {
+	struct gen_pool *mm_tx;
+	struct ivpu_bo *mem_tx;
+	struct ivpu_bo *mem_rx;
+
+	atomic_t rx_msg_count;
+
+	spinlock_t cons_list_lock; /* Protects cons_list */
+	struct list_head cons_list;
+
+	atomic_t request_id;
+	struct mutex lock; /* Lock on status */
+	bool on;
+};
+
+int ivpu_ipc_init(struct ivpu_device *vdev);
+void ivpu_ipc_fini(struct ivpu_device *vdev);
+
+void ivpu_ipc_enable(struct ivpu_device *vdev);
+void ivpu_ipc_disable(struct ivpu_device *vdev);
+void ivpu_ipc_reset(struct ivpu_device *vdev);
+
+int ivpu_ipc_irq_handler(struct ivpu_device *vdev);
+
+void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+			   u32 channel);
+void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons);
+
+int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+		     struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload,
+		     unsigned long timeout_ms);
+
+int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+			  enum vpu_ipc_msg_type expected_resp_type,
+			  struct vpu_jsm_msg *resp, u32 channel,
+			  unsigned long timeout_ms);
+
+#endif /* __IVPU_IPC_H__ */
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@ -0,0 +1,614 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <drm/drm_file.h>
+
+#include <linux/bitfield.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <uapi/drm/ivpu_accel.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_hw.h"
+#include "ivpu_ipc.h"
+#include "ivpu_job.h"
+#include "ivpu_jsm_msg.h"
+#include "ivpu_pm.h"
+
+#define CMD_BUF_IDX	     0
+#define JOB_ID_JOB_MASK	     GENMASK(7, 0)
+#define JOB_ID_CONTEXT_MASK  GENMASK(31, 8)
+#define JOB_MAX_BUFFER_COUNT 65535
+
+static unsigned int ivpu_tdr_timeout_ms;
+module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644);
+MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+
+static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
+{
+	ivpu_hw_reg_db_set(vdev, cmdq->db_id);
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct vpu_job_queue_header *jobq_header;
+	struct ivpu_cmdq *cmdq;
+
+	cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL);
+	if (!cmdq)
+		return NULL;
+
+	cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC);
+	if (!cmdq->mem)
+		goto cmdq_free;
+
+	cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev);
+	cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) /
+				  sizeof(struct vpu_job_queue_entry));
+
+	cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr;
+	jobq_header = &cmdq->jobq->header;
+	jobq_header->engine_idx = engine;
+	jobq_header->head = 0;
+	jobq_header->tail = 0;
+	wmb(); /* Flush WC buffer for jobq->header */
+
+	return cmdq;
+
+cmdq_free:
+	kfree(cmdq);
+	return NULL;
+}
+
+static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
+{
+	if (!cmdq)
+		return;
+
+	ivpu_bo_free_internal(cmdq->mem);
+	kfree(cmdq);
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+	int ret;
+
+	lockdep_assert_held(&file_priv->lock);
+
+	if (!cmdq) {
+		cmdq = ivpu_cmdq_alloc(file_priv, engine);
+		if (!cmdq)
+			return NULL;
+		file_priv->cmdq[engine] = cmdq;
+	}
+
+	if (cmdq->db_registered)
+		return cmdq;
+
+	ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id,
+				   cmdq->mem->vpu_addr, cmdq->mem->base.size);
+	if (ret)
+		return NULL;
+
+	cmdq->db_registered = true;
+
+	return cmdq;
+}
+
+static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+
+	lockdep_assert_held(&file_priv->lock);
+
+	if (cmdq) {
+		file_priv->cmdq[engine] = NULL;
+		if (cmdq->db_registered)
+			ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id);
+
+		ivpu_cmdq_free(file_priv, cmdq);
+	}
+}
+
+void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv)
+{
+	int i;
+
+	mutex_lock(&file_priv->lock);
+
+	for (i = 0; i < IVPU_NUM_ENGINES; i++)
+		ivpu_cmdq_release_locked(file_priv, i);
+
+	mutex_unlock(&file_priv->lock);
+}
+
+/*
+ * Mark the doorbell as unregistered and reset job queue pointers.
+ * This function needs to be called when the VPU hardware is restarted
+ * and FW looses job queue state. The next time job queue is used it
+ * will be registered again.
+ */
+static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+
+	lockdep_assert_held(&file_priv->lock);
+
+	if (cmdq) {
+		cmdq->db_registered = false;
+		cmdq->jobq->header.head = 0;
+		cmdq->jobq->header.tail = 0;
+		wmb(); /* Flush WC buffer for jobq header */
+	}
+}
+
+static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv)
+{
+	int i;
+
+	mutex_lock(&file_priv->lock);
+
+	for (i = 0; i < IVPU_NUM_ENGINES; i++)
+		ivpu_cmdq_reset_locked(file_priv, i);
+
+	mutex_unlock(&file_priv->lock);
+}
+
+void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev)
+{
+	struct ivpu_file_priv *file_priv;
+	unsigned long ctx_id;
+
+	xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
+		file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
+		if (!file_priv)
+			continue;
+
+		ivpu_cmdq_reset_all(file_priv);
+
+		ivpu_file_priv_put(&file_priv);
+	}
+}
+
+static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
+{
+	struct ivpu_device *vdev = job->vdev;
+	struct vpu_job_queue_header *header = &cmdq->jobq->header;
+	struct vpu_job_queue_entry *entry;
+	u32 tail = READ_ONCE(header->tail);
+	u32 next_entry = (tail + 1) % cmdq->entry_count;
+
+	/* Check if there is space left in job queue */
+	if (next_entry == header->head) {
+		ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n",
+			 job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail);
+		return -EBUSY;
+	}
+
+	entry = &cmdq->jobq->job[tail];
+	entry->batch_buf_addr = job->cmd_buf_vpu_addr;
+	entry->job_id = job->job_id;
+	entry->flags = 0;
+	wmb(); /* Ensure that tail is updated after filling entry */
+	header->tail = next_entry;
+	wmb(); /* Flush WC buffer for jobq header */
+
+	return 0;
+}
+
+struct ivpu_fence {
+	struct dma_fence base;
+	spinlock_t lock; /* protects base */
+	struct ivpu_device *vdev;
+};
+
+static inline struct ivpu_fence *to_vpu_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct ivpu_fence, base);
+}
+
+static const char *ivpu_fence_get_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *ivpu_fence_get_timeline_name(struct dma_fence *fence)
+{
+	struct ivpu_fence *ivpu_fence = to_vpu_fence(fence);
+
+	return dev_name(ivpu_fence->vdev->drm.dev);
+}
+
+static const struct dma_fence_ops ivpu_fence_ops = {
+	.get_driver_name = ivpu_fence_get_driver_name,
+	.get_timeline_name = ivpu_fence_get_timeline_name,
+};
+
+static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev)
+{
+	struct ivpu_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence->vdev = vdev;
+	spin_lock_init(&fence->lock);
+	dma_fence_init(&fence->base, &ivpu_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1);
+
+	return &fence->base;
+}
+
+static void job_get(struct ivpu_job *job, struct ivpu_job **link)
+{
+	struct ivpu_device *vdev = job->vdev;
+
+	kref_get(&job->ref);
+	*link = job;
+
+	ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
+}
+
+static void job_release(struct kref *ref)
+{
+	struct ivpu_job *job = container_of(ref, struct ivpu_job, ref);
+	struct ivpu_device *vdev = job->vdev;
+	u32 i;
+
+	for (i = 0; i < job->bo_count; i++)
+		if (job->bos[i])
+			drm_gem_object_put(&job->bos[i]->base);
+
+	dma_fence_put(job->done_fence);
+	ivpu_file_priv_put(&job->file_priv);
+
+	ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id);
+	kfree(job);
+
+	/* Allow the VPU to get suspended, must be called after ivpu_file_priv_put() */
+	ivpu_rpm_put(vdev);
+}
+
+static void job_put(struct ivpu_job *job)
+{
+	struct ivpu_device *vdev = job->vdev;
+
+	ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
+	kref_put(&job->ref, job_release);
+}
+
+static struct ivpu_job *
+ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct ivpu_job *job;
+	size_t buf_size;
+	int ret;
+
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return NULL;
+
+	buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *);
+	job = kzalloc(buf_size, GFP_KERNEL);
+	if (!job)
+		goto err_rpm_put;
+
+	kref_init(&job->ref);
+
+	job->vdev = vdev;
+	job->engine_idx = engine_idx;
+	job->bo_count = bo_count;
+	job->done_fence = ivpu_fence_create(vdev);
+	if (!job->done_fence) {
+		ivpu_warn_ratelimited(vdev, "Failed to create a fence\n");
+		goto err_free_job;
+	}
+
+	job->file_priv = ivpu_file_priv_get(file_priv);
+
+	ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx);
+
+	return job;
+
+err_free_job:
+	kfree(job);
+err_rpm_put:
+	ivpu_rpm_put(vdev);
+	return NULL;
+}
+
+static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+{
+	struct ivpu_job *job;
+
+	job = xa_erase(&vdev->submitted_jobs_xa, job_id);
+	if (!job)
+		return -ENOENT;
+
+	if (job->file_priv->has_mmu_faults)
+		job_status = VPU_JSM_STATUS_ABORTED;
+
+	job->bos[CMD_BUF_IDX]->job_status = job_status;
+	dma_fence_signal(job->done_fence);
+
+	ivpu_dbg(vdev, JOB, "Job complete:  id %3u ctx %2d engine %d status 0x%x\n",
+		 job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+
+	job_put(job);
+	return 0;
+}
+
+static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg)
+{
+	struct vpu_ipc_msg_payload_job_done *payload;
+	struct vpu_jsm_msg *job_ret_msg = msg;
+	int ret;
+
+	payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload;
+
+	ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
+	if (ret)
+		ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret);
+}
+
+void ivpu_jobs_abort_all(struct ivpu_device *vdev)
+{
+	struct ivpu_job *job;
+	unsigned long id;
+
+	xa_for_each(&vdev->submitted_jobs_xa, id, job)
+		ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED);
+}
+
+static int ivpu_direct_job_submission(struct ivpu_job *job)
+{
+	struct ivpu_file_priv *file_priv = job->file_priv;
+	struct ivpu_device *vdev = job->vdev;
+	struct xa_limit job_id_range;
+	struct ivpu_cmdq *cmdq;
+	int ret;
+
+	mutex_lock(&file_priv->lock);
+
+	cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx);
+	if (!cmdq) {
+		ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n",
+			  file_priv->ctx.id, job->engine_idx);
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
+	job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK;
+
+	job_get(job, &job);
+	ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
+	if (ret) {
+		ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret);
+		goto err_job_put;
+	}
+
+	ret = ivpu_cmdq_push_job(cmdq, job);
+	if (ret)
+		goto err_xa_erase;
+
+	ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d next %d\n",
+		 job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->jobq->header.tail);
+
+	if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) {
+		ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
+		cmdq->jobq->header.head = cmdq->jobq->header.tail;
+		wmb(); /* Flush WC buffer for jobq header */
+	} else {
+		ivpu_cmdq_ring_db(vdev, cmdq);
+	}
+
+	mutex_unlock(&file_priv->lock);
+	return 0;
+
+err_xa_erase:
+	xa_erase(&vdev->submitted_jobs_xa, job->job_id);
+err_job_put:
+	job_put(job);
+err_unlock:
+	mutex_unlock(&file_priv->lock);
+	return ret;
+}
+
+static int
+ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles,
+				u32 buf_count, u32 commands_offset)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct ww_acquire_ctx acquire_ctx;
+	struct ivpu_bo *bo;
+	int ret;
+	u32 i;
+
+	for (i = 0; i < buf_count; i++) {
+		struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]);
+
+		if (!obj)
+			return -ENOENT;
+
+		job->bos[i] = to_ivpu_bo(obj);
+
+		ret = ivpu_bo_pin(job->bos[i]);
+		if (ret)
+			return ret;
+	}
+
+	bo = job->bos[CMD_BUF_IDX];
+	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) {
+		ivpu_warn(vdev, "Buffer is already in use\n");
+		return -EBUSY;
+	}
+
+	if (commands_offset >= bo->base.size) {
+		ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset);
+		return -EINVAL;
+	}
+
+	job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset;
+
+	ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count,
+					&acquire_ctx);
+	if (ret) {
+		ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < buf_count; i++) {
+		ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
+		if (ret) {
+			ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
+			goto unlock_reservations;
+		}
+	}
+
+	for (i = 0; i < buf_count; i++)
+		dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE);
+
+unlock_reservations:
+	drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx);
+
+	wmb(); /* Flush write combining buffers */
+
+	return ret;
+}
+
+int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	int ret = 0;
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct drm_ivpu_submit *params = data;
+	struct ivpu_job *job;
+	u32 *buf_handles;
+
+	if (params->engine > DRM_IVPU_ENGINE_COPY)
+		return -EINVAL;
+
+	if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT)
+		return -EINVAL;
+
+	if (!IS_ALIGNED(params->commands_offset, 8))
+		return -EINVAL;
+
+	if (!file_priv->ctx.id)
+		return -EINVAL;
+
+	if (file_priv->has_mmu_faults)
+		return -EBADFD;
+
+	buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL);
+	if (!buf_handles)
+		return -ENOMEM;
+
+	ret = copy_from_user(buf_handles,
+			     (void __user *)params->buffers_ptr,
+			     params->buffer_count * sizeof(u32));
+	if (ret) {
+		ret = -EFAULT;
+		goto free_handles;
+	}
+
+	ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n",
+		 file_priv->ctx.id, params->buffer_count);
+
+	job = ivpu_create_job(file_priv, params->engine, params->buffer_count);
+	if (!job) {
+		ivpu_err(vdev, "Failed to create job\n");
+		ret = -ENOMEM;
+		goto free_handles;
+	}
+
+	ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count,
+					      params->commands_offset);
+	if (ret) {
+		ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret);
+		goto job_put;
+	}
+
+	ret = ivpu_direct_job_submission(job);
+	if (ret) {
+		dma_fence_signal(job->done_fence);
+		ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret);
+	}
+
+job_put:
+	job_put(job);
+free_handles:
+	kfree(buf_handles);
+
+	return ret;
+}
+
+static int ivpu_job_done_thread(void *arg)
+{
+	struct ivpu_device *vdev = (struct ivpu_device *)arg;
+	struct ivpu_ipc_consumer cons;
+	struct vpu_jsm_msg jsm_msg;
+	bool jobs_submitted;
+	unsigned int timeout;
+	int ret;
+
+	ivpu_dbg(vdev, JOB, "Started %s\n", __func__);
+
+	ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
+
+	while (!kthread_should_stop()) {
+		timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+		jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
+		ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
+		if (!ret) {
+			ivpu_job_done_message(vdev, &jsm_msg);
+		} else if (ret == -ETIMEDOUT) {
+			if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) {
+				ivpu_err(vdev, "TDR detected, timeout %d ms", timeout);
+				ivpu_hw_diagnose_failure(vdev);
+				ivpu_pm_schedule_recovery(vdev);
+			}
+		}
+	}
+
+	ivpu_ipc_consumer_del(vdev, &cons);
+
+	ivpu_jobs_abort_all(vdev);
+
+	ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__);
+	return 0;
+}
+
+int ivpu_job_done_thread_init(struct ivpu_device *vdev)
+{
+	struct task_struct *thread;
+
+	thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread");
+	if (IS_ERR(thread)) {
+		ivpu_err(vdev, "Failed to start job completion thread\n");
+		return -EIO;
+	}
+
+	get_task_struct(thread);
+	wake_up_process(thread);
+
+	vdev->job_done_thread = thread;
+
+	return 0;
+}
+
+void ivpu_job_done_thread_fini(struct ivpu_device *vdev)
+{
+	kthread_stop(vdev->job_done_thread);
+	put_task_struct(vdev->job_done_thread);
+}
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_JOB_H__
+#define __IVPU_JOB_H__
+
+#include <linux/kref.h>
+#include <linux/idr.h>
+
+#include "ivpu_gem.h"
+
+struct ivpu_device;
+struct ivpu_file_priv;
+
+/**
+ * struct ivpu_cmdq - Object representing device queue used to send jobs.
+ * @jobq:	   Pointer to job queue memory shared with the device
+ * @mem:           Memory allocated for the job queue, shared with device
+ * @entry_count    Number of job entries in the queue
+ * @db_id:	   Doorbell assigned to this job queue
+ * @db_registered: True if doorbell is registered in device
+ */
+struct ivpu_cmdq {
+	struct vpu_job_queue *jobq;
+	struct ivpu_bo *mem;
+	u32 entry_count;
+	u32 db_id;
+	bool db_registered;
+};
+
+/**
+ * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer.
+ * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW.
+ * This is a unit of execution, and be tracked by the job_id for
+ * any status reporting from VPU FW through IPC JOB RET/DONE message.
+ * @file_priv:		  The client that submitted this job
+ * @job_id:		  Job ID for KMD tracking and job status reporting from VPU FW
+ * @status:		  Status of the Job from IPC JOB RET/DONE message
+ * @batch_buffer:	  CPU vaddr points to the batch buffer memory allocated for the job
+ * @submit_status_offset: Offset within batch buffer where job completion handler
+			  will update the job status
+ */
+struct ivpu_job {
+	struct kref ref;
+	struct ivpu_device *vdev;
+	struct ivpu_file_priv *file_priv;
+	struct dma_fence *done_fence;
+	u64 cmd_buf_vpu_addr;
+	u32 job_id;
+	u32 engine_idx;
+	size_t bo_count;
+	struct ivpu_bo *bos[];
+};
+
+int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv);
+void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
+
+int ivpu_job_done_thread_init(struct ivpu_device *vdev);
+void ivpu_job_done_thread_fini(struct ivpu_device *vdev);
+
+void ivpu_jobs_abort_all(struct ivpu_device *vdev);
+
+#endif /* __IVPU_JOB_H__ */
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include "ivpu_drv.h"
+#include "ivpu_ipc.h"
+#include "ivpu_jsm_msg.h"
+
+int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id,
+			 u64 jobq_base, u32 jobq_size)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_REGISTER_DB };
+	struct vpu_jsm_msg resp;
+	int ret = 0;
+
+	req.payload.register_db.db_idx = db_id;
+	req.payload.register_db.jobq_base = jobq_base;
+	req.payload.register_db.jobq_size = jobq_size;
+	req.payload.register_db.host_ssid = ctx_id;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret) {
+		ivpu_err(vdev, "Failed to register doorbell %d: %d\n", db_id, ret);
+		return ret;
+	}
+
+	ivpu_dbg(vdev, JSM, "Doorbell %d registered to context %d\n", db_id, ctx_id);
+
+	return 0;
+}
+
+int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_UNREGISTER_DB };
+	struct vpu_jsm_msg resp;
+	int ret = 0;
+
+	req.payload.unregister_db.db_idx = db_id;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_UNREGISTER_DB_DONE, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret) {
+		ivpu_warn(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret);
+		return ret;
+	}
+
+	ivpu_dbg(vdev, JSM, "Doorbell %d unregistered\n", db_id);
+
+	return 0;
+}
+
+int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	if (engine > VPU_ENGINE_COPY)
+		return -EINVAL;
+
+	req.payload.query_engine_hb.engine_idx = engine;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret) {
+		ivpu_err(vdev, "Failed to get heartbeat from engine %d: %d\n", engine, ret);
+		return ret;
+	}
+
+	*heartbeat = resp.payload.query_engine_hb_done.heartbeat;
+	return ret;
+}
+
+int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_ENGINE_RESET };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	if (engine > VPU_ENGINE_COPY)
+		return -EINVAL;
+
+	req.payload.engine_reset.engine_idx = engine;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret)
+		ivpu_err(vdev, "Failed to reset engine %d: %d\n", engine, ret);
+
+	return ret;
+}
+
+int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_ENGINE_PREEMPT };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	if (engine > VPU_ENGINE_COPY)
+		return -EINVAL;
+
+	req.payload.engine_preempt.engine_idx = engine;
+	req.payload.engine_preempt.preempt_id = preempt_id;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_PREEMPT_DONE, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret)
+		ivpu_err(vdev, "Failed to preempt engine %d: %d\n", engine, ret);
+
+	return ret;
+}
+
+int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DYNDBG_CONTROL };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	if (!strncpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN - 1))
+		return -ENOMEM;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret)
+		ivpu_warn(vdev, "Failed to send command \"%s\": ret %d\n", command, ret);
+
+	return ret;
+}
+
+int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destination_mask,
+				  u64 *trace_hw_component_mask)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_TRACE_GET_CAPABILITY };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret) {
+		ivpu_warn(vdev, "Failed to get trace capability: %d\n", ret);
+		return ret;
+	}
+
+	*trace_destination_mask = resp.payload.trace_capability.trace_destination_mask;
+	*trace_hw_component_mask = resp.payload.trace_capability.trace_hw_component_mask;
+
+	return ret;
+}
+
+int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask,
+			      u64 trace_hw_component_mask)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_TRACE_SET_CONFIG };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	req.payload.trace_config.trace_level = trace_level;
+	req.payload.trace_config.trace_destination_mask = trace_destination_mask;
+	req.payload.trace_config.trace_hw_component_mask = trace_hw_component_mask;
+
+	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, &resp,
+				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+	if (ret)
+		ivpu_warn(vdev, "Failed to set config: %d\n", ret);
+
+	return ret;
+}
--- a/drivers/accel/ivpu/ivpu_jsm_msg.h
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.h
@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_JSM_MSG_H__
+#define __IVPU_JSM_MSG_H__
+
+#include "vpu_jsm_api.h"
+
+int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id,
+			 u64 jobq_base, u32 jobq_size);
+int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id);
+int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat);
+int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine);
+int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id);
+int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size);
+int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destination_mask,
+				  u64 *trace_hw_component_mask);
+int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask,
+			      u64 trace_hw_component_mask);
+
+#endif
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@ -0,0 +1,883 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/highmem.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_hw_mtl_reg.h"
+#include "ivpu_hw_reg_io.h"
+#include "ivpu_mmu.h"
+#include "ivpu_mmu_context.h"
+#include "ivpu_pm.h"
+
+#define IVPU_MMU_IDR0_REF		0x080f3e0f
+#define IVPU_MMU_IDR0_REF_SIMICS	0x080f3e1f
+#define IVPU_MMU_IDR1_REF		0x0e739d18
+#define IVPU_MMU_IDR3_REF		0x0000003c
+#define IVPU_MMU_IDR5_REF		0x00040070
+#define IVPU_MMU_IDR5_REF_SIMICS	0x00000075
+#define IVPU_MMU_IDR5_REF_FPGA		0x00800075
+
+#define IVPU_MMU_CDTAB_ENT_SIZE		64
+#define IVPU_MMU_CDTAB_ENT_COUNT_LOG2	8 /* 256 entries */
+#define IVPU_MMU_CDTAB_ENT_COUNT	((u32)1 << IVPU_MMU_CDTAB_ENT_COUNT_LOG2)
+
+#define IVPU_MMU_STREAM_ID0		0
+#define IVPU_MMU_STREAM_ID3		3
+
+#define IVPU_MMU_STRTAB_ENT_SIZE	64
+#define IVPU_MMU_STRTAB_ENT_COUNT	4
+#define IVPU_MMU_STRTAB_CFG_LOG2SIZE	2
+#define IVPU_MMU_STRTAB_CFG		IVPU_MMU_STRTAB_CFG_LOG2SIZE
+
+#define IVPU_MMU_Q_COUNT_LOG2		4 /* 16 entries */
+#define IVPU_MMU_Q_COUNT		((u32)1 << IVPU_MMU_Q_COUNT_LOG2)
+#define IVPU_MMU_Q_WRAP_BIT		(IVPU_MMU_Q_COUNT << 1)
+#define IVPU_MMU_Q_WRAP_MASK		(IVPU_MMU_Q_WRAP_BIT - 1)
+#define IVPU_MMU_Q_IDX_MASK		(IVPU_MMU_Q_COUNT - 1)
+#define IVPU_MMU_Q_IDX(val)		((val) & IVPU_MMU_Q_IDX_MASK)
+
+#define IVPU_MMU_CMDQ_CMD_SIZE		16
+#define IVPU_MMU_CMDQ_SIZE		(IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE)
+
+#define IVPU_MMU_EVTQ_CMD_SIZE		32
+#define IVPU_MMU_EVTQ_SIZE		(IVPU_MMU_Q_COUNT * IVPU_MMU_EVTQ_CMD_SIZE)
+
+#define IVPU_MMU_CMD_OPCODE		GENMASK(7, 0)
+
+#define IVPU_MMU_CMD_SYNC_0_CS		GENMASK(13, 12)
+#define IVPU_MMU_CMD_SYNC_0_MSH		GENMASK(23, 22)
+#define IVPU_MMU_CMD_SYNC_0_MSI_ATTR	GENMASK(27, 24)
+#define IVPU_MMU_CMD_SYNC_0_MSI_ATTR	GENMASK(27, 24)
+#define IVPU_MMU_CMD_SYNC_0_MSI_DATA	GENMASK(63, 32)
+
+#define IVPU_MMU_CMD_CFGI_0_SSEC	BIT(10)
+#define IVPU_MMU_CMD_CFGI_0_SSV		BIT(11)
+#define IVPU_MMU_CMD_CFGI_0_SSID	GENMASK(31, 12)
+#define IVPU_MMU_CMD_CFGI_0_SID		GENMASK(63, 32)
+#define IVPU_MMU_CMD_CFGI_1_RANGE	GENMASK(4, 0)
+
+#define IVPU_MMU_CMD_TLBI_0_ASID	GENMASK(63, 48)
+#define IVPU_MMU_CMD_TLBI_0_VMID	GENMASK(47, 32)
+
+#define CMD_PREFETCH_CFG		0x1
+#define CMD_CFGI_STE			0x3
+#define CMD_CFGI_ALL			0x4
+#define CMD_CFGI_CD			0x5
+#define CMD_CFGI_CD_ALL			0x6
+#define CMD_TLBI_NH_ASID		0x11
+#define CMD_TLBI_EL2_ALL		0x20
+#define CMD_TLBI_NSNH_ALL		0x30
+#define CMD_SYNC			0x46
+
+#define IVPU_MMU_EVT_F_UUT		0x01
+#define IVPU_MMU_EVT_C_BAD_STREAMID	0x02
+#define IVPU_MMU_EVT_F_STE_FETCH	0x03
+#define IVPU_MMU_EVT_C_BAD_STE		0x04
+#define IVPU_MMU_EVT_F_BAD_ATS_TREQ	0x05
+#define IVPU_MMU_EVT_F_STREAM_DISABLED	0x06
+#define IVPU_MMU_EVT_F_TRANSL_FORBIDDEN	0x07
+#define IVPU_MMU_EVT_C_BAD_SUBSTREAMID	0x08
+#define IVPU_MMU_EVT_F_CD_FETCH		0x09
+#define IVPU_MMU_EVT_C_BAD_CD		0x0a
+#define IVPU_MMU_EVT_F_WALK_EABT	0x0b
+#define IVPU_MMU_EVT_F_TRANSLATION	0x10
+#define IVPU_MMU_EVT_F_ADDR_SIZE	0x11
+#define IVPU_MMU_EVT_F_ACCESS		0x12
+#define IVPU_MMU_EVT_F_PERMISSION	0x13
+#define IVPU_MMU_EVT_F_TLB_CONFLICT	0x20
+#define IVPU_MMU_EVT_F_CFG_CONFLICT	0x21
+#define IVPU_MMU_EVT_E_PAGE_REQUEST	0x24
+#define IVPU_MMU_EVT_F_VMS_FETCH	0x25
+
+#define IVPU_MMU_EVT_OP_MASK		GENMASK_ULL(7, 0)
+#define IVPU_MMU_EVT_SSID_MASK		GENMASK_ULL(31, 12)
+
+#define IVPU_MMU_Q_BASE_RWA		BIT(62)
+#define IVPU_MMU_Q_BASE_ADDR_MASK	GENMASK_ULL(51, 5)
+#define IVPU_MMU_STRTAB_BASE_RA		BIT(62)
+#define IVPU_MMU_STRTAB_BASE_ADDR_MASK	GENMASK_ULL(51, 6)
+
+#define IVPU_MMU_IRQ_EVTQ_EN		BIT(2)
+#define IVPU_MMU_IRQ_GERROR_EN		BIT(0)
+
+#define IVPU_MMU_CR0_ATSCHK		BIT(4)
+#define IVPU_MMU_CR0_CMDQEN		BIT(3)
+#define IVPU_MMU_CR0_EVTQEN		BIT(2)
+#define IVPU_MMU_CR0_PRIQEN		BIT(1)
+#define IVPU_MMU_CR0_SMMUEN		BIT(0)
+
+#define IVPU_MMU_CR1_TABLE_SH		GENMASK(11, 10)
+#define IVPU_MMU_CR1_TABLE_OC		GENMASK(9, 8)
+#define IVPU_MMU_CR1_TABLE_IC		GENMASK(7, 6)
+#define IVPU_MMU_CR1_QUEUE_SH		GENMASK(5, 4)
+#define IVPU_MMU_CR1_QUEUE_OC		GENMASK(3, 2)
+#define IVPU_MMU_CR1_QUEUE_IC		GENMASK(1, 0)
+#define IVPU_MMU_CACHE_NC		0
+#define IVPU_MMU_CACHE_WB		1
+#define IVPU_MMU_CACHE_WT		2
+#define IVPU_MMU_SH_NSH			0
+#define IVPU_MMU_SH_OSH			2
+#define IVPU_MMU_SH_ISH			3
+
+#define IVPU_MMU_CMDQ_OP		GENMASK_ULL(7, 0)
+
+#define IVPU_MMU_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
+#define IVPU_MMU_CD_0_TCR_TG0		GENMASK_ULL(7, 6)
+#define IVPU_MMU_CD_0_TCR_IRGN0		GENMASK_ULL(9, 8)
+#define IVPU_MMU_CD_0_TCR_ORGN0		GENMASK_ULL(11, 10)
+#define IVPU_MMU_CD_0_TCR_SH0		GENMASK_ULL(13, 12)
+#define IVPU_MMU_CD_0_TCR_EPD0		BIT_ULL(14)
+#define IVPU_MMU_CD_0_TCR_EPD1		BIT_ULL(30)
+#define IVPU_MMU_CD_0_ENDI		BIT(15)
+#define IVPU_MMU_CD_0_V			BIT(31)
+#define IVPU_MMU_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
+#define IVPU_MMU_CD_0_TCR_TBI0		BIT_ULL(38)
+#define IVPU_MMU_CD_0_AA64		BIT(41)
+#define IVPU_MMU_CD_0_S			BIT(44)
+#define IVPU_MMU_CD_0_R			BIT(45)
+#define IVPU_MMU_CD_0_A			BIT(46)
+#define IVPU_MMU_CD_0_ASET		BIT(47)
+#define IVPU_MMU_CD_0_ASID		GENMASK_ULL(63, 48)
+
+#define IVPU_MMU_CD_1_TTB0_MASK		GENMASK_ULL(51, 4)
+
+#define IVPU_MMU_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
+#define IVPU_MMU_STE_0_S1FMT		GENMASK_ULL(5, 4)
+#define IVPU_MMU_STE_0_S1FMT_LINEAR	0
+#define IVPU_MMU_STE_DWORDS		8
+#define IVPU_MMU_STE_0_CFG_S1_TRANS	5
+#define IVPU_MMU_STE_0_CFG		GENMASK_ULL(3, 1)
+#define IVPU_MMU_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
+#define IVPU_MMU_STE_0_V			BIT(0)
+
+#define IVPU_MMU_STE_1_STRW_NSEL1	0ul
+#define IVPU_MMU_STE_1_CONT		GENMASK_ULL(16, 13)
+#define IVPU_MMU_STE_1_STRW		GENMASK_ULL(31, 30)
+#define IVPU_MMU_STE_1_PRIVCFG		GENMASK_ULL(49, 48)
+#define IVPU_MMU_STE_1_PRIVCFG_UNPRIV	2ul
+#define IVPU_MMU_STE_1_INSTCFG		GENMASK_ULL(51, 50)
+#define IVPU_MMU_STE_1_INSTCFG_DATA	2ul
+#define IVPU_MMU_STE_1_MEV		BIT(19)
+#define IVPU_MMU_STE_1_S1STALLD		BIT(27)
+#define IVPU_MMU_STE_1_S1C_CACHE_NC	0ul
+#define IVPU_MMU_STE_1_S1C_CACHE_WBRA	1ul
+#define IVPU_MMU_STE_1_S1C_CACHE_WT	2ul
+#define IVPU_MMU_STE_1_S1C_CACHE_WB	3ul
+#define IVPU_MMU_STE_1_S1CIR		GENMASK_ULL(3, 2)
+#define IVPU_MMU_STE_1_S1COR		GENMASK_ULL(5, 4)
+#define IVPU_MMU_STE_1_S1CSH		GENMASK_ULL(7, 6)
+#define IVPU_MMU_STE_1_S1DSS		GENMASK_ULL(1, 0)
+#define IVPU_MMU_STE_1_S1DSS_TERMINATE	0x0
+
+#define IVPU_MMU_REG_TIMEOUT_US		(10 * USEC_PER_MSEC)
+#define IVPU_MMU_QUEUE_TIMEOUT_US	(100 * USEC_PER_MSEC)
+
+#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT)) | \
+				  (REG_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT)))
+
+static char *ivpu_mmu_event_to_str(u32 cmd)
+{
+	switch (cmd) {
+	case IVPU_MMU_EVT_F_UUT:
+		return "Unsupported Upstream Transaction";
+	case IVPU_MMU_EVT_C_BAD_STREAMID:
+		return "Transaction StreamID out of range";
+	case IVPU_MMU_EVT_F_STE_FETCH:
+		return "Fetch of STE caused external abort";
+	case IVPU_MMU_EVT_C_BAD_STE:
+		return "Used STE invalid";
+	case IVPU_MMU_EVT_F_BAD_ATS_TREQ:
+		return "Address Request disallowed for a StreamID";
+	case IVPU_MMU_EVT_F_STREAM_DISABLED:
+		return "Transaction marks non-substream disabled";
+	case IVPU_MMU_EVT_F_TRANSL_FORBIDDEN:
+		return "MMU bypass is disallowed for this StreamID";
+	case IVPU_MMU_EVT_C_BAD_SUBSTREAMID:
+		return "Invalid StreamID";
+	case IVPU_MMU_EVT_F_CD_FETCH:
+		return "Fetch of CD caused external abort";
+	case IVPU_MMU_EVT_C_BAD_CD:
+		return "Fetched CD invalid";
+	case IVPU_MMU_EVT_F_WALK_EABT:
+		return " An external abort occurred fetching a TLB";
+	case IVPU_MMU_EVT_F_TRANSLATION:
+		return "Translation fault";
+	case IVPU_MMU_EVT_F_ADDR_SIZE:
+		return " Output address caused address size fault";
+	case IVPU_MMU_EVT_F_ACCESS:
+		return "Access flag fault";
+	case IVPU_MMU_EVT_F_PERMISSION:
+		return "Permission fault occurred on page access";
+	case IVPU_MMU_EVT_F_TLB_CONFLICT:
+		return "A TLB conflict";
+	case IVPU_MMU_EVT_F_CFG_CONFLICT:
+		return "A configuration cache conflict";
+	case IVPU_MMU_EVT_E_PAGE_REQUEST:
+		return "Page request hint from a client device";
+	case IVPU_MMU_EVT_F_VMS_FETCH:
+		return "Fetch of VMS caused external abort";
+	default:
+		return "Unknown CMDQ command";
+	}
+}
+
+static void ivpu_mmu_config_check(struct ivpu_device *vdev)
+{
+	u32 val_ref;
+	u32 val;
+
+	if (ivpu_is_simics(vdev))
+		val_ref = IVPU_MMU_IDR0_REF_SIMICS;
+	else
+		val_ref = IVPU_MMU_IDR0_REF;
+
+	val = REGV_RD32(MTL_VPU_HOST_MMU_IDR0);
+	if (val != val_ref)
+		ivpu_dbg(vdev, MMU, "IDR0 0x%x != IDR0_REF 0x%x\n", val, val_ref);
+
+	val = REGV_RD32(MTL_VPU_HOST_MMU_IDR1);
+	if (val != IVPU_MMU_IDR1_REF)
+		ivpu_dbg(vdev, MMU, "IDR1 0x%x != IDR1_REF 0x%x\n", val, IVPU_MMU_IDR1_REF);
+
+	val = REGV_RD32(MTL_VPU_HOST_MMU_IDR3);
+	if (val != IVPU_MMU_IDR3_REF)
+		ivpu_dbg(vdev, MMU, "IDR3 0x%x != IDR3_REF 0x%x\n", val, IVPU_MMU_IDR3_REF);
+
+	if (ivpu_is_simics(vdev))
+		val_ref = IVPU_MMU_IDR5_REF_SIMICS;
+	else if (ivpu_is_fpga(vdev))
+		val_ref = IVPU_MMU_IDR5_REF_FPGA;
+	else
+		val_ref = IVPU_MMU_IDR5_REF;
+
+	val = REGV_RD32(MTL_VPU_HOST_MMU_IDR5);
+	if (val != val_ref)
+		ivpu_dbg(vdev, MMU, "IDR5 0x%x != IDR5_REF 0x%x\n", val, val_ref);
+}
+
+static int ivpu_mmu_cdtab_alloc(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
+	size_t size = IVPU_MMU_CDTAB_ENT_COUNT * IVPU_MMU_CDTAB_ENT_SIZE;
+
+	cdtab->base = dmam_alloc_coherent(vdev->drm.dev, size, &cdtab->dma, GFP_KERNEL);
+	if (!cdtab->base)
+		return -ENOMEM;
+
+	ivpu_dbg(vdev, MMU, "CDTAB alloc: dma=%pad size=%zu\n", &cdtab->dma, size);
+
+	return 0;
+}
+
+static int ivpu_mmu_strtab_alloc(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_strtab *strtab = &mmu->strtab;
+	size_t size = IVPU_MMU_STRTAB_ENT_COUNT * IVPU_MMU_STRTAB_ENT_SIZE;
+
+	strtab->base = dmam_alloc_coherent(vdev->drm.dev, size, &strtab->dma, GFP_KERNEL);
+	if (!strtab->base)
+		return -ENOMEM;
+
+	strtab->base_cfg = IVPU_MMU_STRTAB_CFG;
+	strtab->dma_q = IVPU_MMU_STRTAB_BASE_RA;
+	strtab->dma_q |= strtab->dma & IVPU_MMU_STRTAB_BASE_ADDR_MASK;
+
+	ivpu_dbg(vdev, MMU, "STRTAB alloc: dma=%pad dma_q=%pad size=%zu\n",
+		 &strtab->dma, &strtab->dma_q, size);
+
+	return 0;
+}
+
+static int ivpu_mmu_cmdq_alloc(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_queue *q = &mmu->cmdq;
+
+	q->base = dmam_alloc_coherent(vdev->drm.dev, IVPU_MMU_CMDQ_SIZE, &q->dma, GFP_KERNEL);
+	if (!q->base)
+		return -ENOMEM;
+
+	q->dma_q = IVPU_MMU_Q_BASE_RWA;
+	q->dma_q |= q->dma & IVPU_MMU_Q_BASE_ADDR_MASK;
+	q->dma_q |= IVPU_MMU_Q_COUNT_LOG2;
+
+	ivpu_dbg(vdev, MMU, "CMDQ alloc: dma=%pad dma_q=%pad size=%u\n",
+		 &q->dma, &q->dma_q, IVPU_MMU_CMDQ_SIZE);
+
+	return 0;
+}
+
+static int ivpu_mmu_evtq_alloc(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_queue *q = &mmu->evtq;
+
+	q->base = dmam_alloc_coherent(vdev->drm.dev, IVPU_MMU_EVTQ_SIZE, &q->dma, GFP_KERNEL);
+	if (!q->base)
+		return -ENOMEM;
+
+	q->dma_q = IVPU_MMU_Q_BASE_RWA;
+	q->dma_q |= q->dma & IVPU_MMU_Q_BASE_ADDR_MASK;
+	q->dma_q |= IVPU_MMU_Q_COUNT_LOG2;
+
+	ivpu_dbg(vdev, MMU, "EVTQ alloc: dma=%pad dma_q=%pad size=%u\n",
+		 &q->dma, &q->dma_q, IVPU_MMU_EVTQ_SIZE);
+
+	return 0;
+}
+
+static int ivpu_mmu_structs_alloc(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ret = ivpu_mmu_cdtab_alloc(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate cdtab: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_strtab_alloc(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate strtab: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_cmdq_alloc(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate cmdq: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_evtq_alloc(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to allocate evtq: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_mmu_reg_write(struct ivpu_device *vdev, u32 reg, u32 val)
+{
+	u32 reg_ack = reg + 4; /* ACK register is 4B after base register */
+	u32 val_ack;
+	int ret;
+
+	REGV_WR32(reg, val);
+
+	ret = REGV_POLL(reg_ack, val_ack, (val == val_ack), IVPU_MMU_REG_TIMEOUT_US);
+	if (ret)
+		ivpu_err(vdev, "Failed to write register 0x%x\n", reg);
+
+	return ret;
+}
+
+static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev)
+{
+	u32 irq_ctrl = IVPU_MMU_IRQ_EVTQ_EN | IVPU_MMU_IRQ_GERROR_EN;
+	int ret;
+
+	ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, 0);
+	if (ret)
+		return ret;
+
+	return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_IRQ_CTRL, irq_ctrl);
+}
+
+static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq;
+
+	return REGV_POLL(MTL_VPU_HOST_MMU_CMDQ_CONS, cmdq->cons, (cmdq->prod == cmdq->cons),
+			 IVPU_MMU_QUEUE_TIMEOUT_US);
+}
+
+static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1)
+{
+	struct ivpu_mmu_queue *q = &vdev->mmu->cmdq;
+	u64 *queue_buffer = q->base;
+	int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer));
+
+	if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) {
+		ivpu_err(vdev, "Failed to write MMU CMD %s\n", name);
+		return -EBUSY;
+	}
+
+	queue_buffer[idx] = data0;
+	queue_buffer[idx + 1] = data1;
+	q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK;
+
+	ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1);
+
+	return 0;
+}
+
+static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_queue *q = &vdev->mmu->cmdq;
+	u64 val;
+	int ret;
+
+	val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) |
+	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) |
+	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) |
+	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf);
+
+	ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0);
+	if (ret)
+		return ret;
+
+	clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE);
+	REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, q->prod);
+
+	ret = ivpu_mmu_cmdq_wait_for_cons(vdev);
+	if (ret)
+		ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_mmu_cmdq_write_cfgi_all(struct ivpu_device *vdev)
+{
+	u64 data0 = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_CFGI_ALL);
+	u64 data1 = FIELD_PREP(IVPU_MMU_CMD_CFGI_1_RANGE, 0x1f);
+
+	return ivpu_mmu_cmdq_cmd_write(vdev, "CFGI_ALL", data0, data1);
+}
+
+static int ivpu_mmu_cmdq_write_tlbi_nh_asid(struct ivpu_device *vdev, u16 ssid)
+{
+	u64 val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_TLBI_NH_ASID) |
+		  FIELD_PREP(IVPU_MMU_CMD_TLBI_0_ASID, ssid);
+
+	return ivpu_mmu_cmdq_cmd_write(vdev, "TLBI_NH_ASID", val, 0);
+}
+
+static int ivpu_mmu_cmdq_write_tlbi_nsnh_all(struct ivpu_device *vdev)
+{
+	u64 val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_TLBI_NSNH_ALL);
+
+	return ivpu_mmu_cmdq_cmd_write(vdev, "TLBI_NSNH_ALL", val, 0);
+}
+
+static int ivpu_mmu_reset(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	u32 val;
+	int ret;
+
+	memset(mmu->cmdq.base, 0, IVPU_MMU_CMDQ_SIZE);
+	clflush_cache_range(mmu->cmdq.base, IVPU_MMU_CMDQ_SIZE);
+	mmu->cmdq.prod = 0;
+	mmu->cmdq.cons = 0;
+
+	memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE);
+	clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE);
+	mmu->evtq.prod = 0;
+	mmu->evtq.cons = 0;
+
+	ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, 0);
+	if (ret)
+		return ret;
+
+	val = FIELD_PREP(IVPU_MMU_CR1_TABLE_SH, IVPU_MMU_SH_ISH) |
+	      FIELD_PREP(IVPU_MMU_CR1_TABLE_OC, IVPU_MMU_CACHE_WB) |
+	      FIELD_PREP(IVPU_MMU_CR1_TABLE_IC, IVPU_MMU_CACHE_WB) |
+	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_SH, IVPU_MMU_SH_ISH) |
+	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_OC, IVPU_MMU_CACHE_WB) |
+	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_IC, IVPU_MMU_CACHE_WB);
+	REGV_WR32(MTL_VPU_HOST_MMU_CR1, val);
+
+	REGV_WR64(MTL_VPU_HOST_MMU_STRTAB_BASE, mmu->strtab.dma_q);
+	REGV_WR32(MTL_VPU_HOST_MMU_STRTAB_BASE_CFG, mmu->strtab.base_cfg);
+
+	REGV_WR64(MTL_VPU_HOST_MMU_CMDQ_BASE, mmu->cmdq.dma_q);
+	REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_PROD, 0);
+	REGV_WR32(MTL_VPU_HOST_MMU_CMDQ_CONS, 0);
+
+	val = IVPU_MMU_CR0_CMDQEN;
+	ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val);
+	if (ret)
+		return ret;
+
+	ret = ivpu_mmu_cmdq_write_cfgi_all(vdev);
+	if (ret)
+		return ret;
+
+	ret = ivpu_mmu_cmdq_write_tlbi_nsnh_all(vdev);
+	if (ret)
+		return ret;
+
+	ret = ivpu_mmu_cmdq_sync(vdev);
+	if (ret)
+		return ret;
+
+	REGV_WR64(MTL_VPU_HOST_MMU_EVTQ_BASE, mmu->evtq.dma_q);
+	REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC, 0);
+	REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, 0);
+
+	val |= IVPU_MMU_CR0_EVTQEN;
+	ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val);
+	if (ret)
+		return ret;
+
+	val |= IVPU_MMU_CR0_ATSCHK;
+	ret = ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val);
+	if (ret)
+		return ret;
+
+	ret = ivpu_mmu_irqs_setup(vdev);
+	if (ret)
+		return ret;
+
+	val |= IVPU_MMU_CR0_SMMUEN;
+	return ivpu_mmu_reg_write(vdev, MTL_VPU_HOST_MMU_CR0, val);
+}
+
+static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_strtab *strtab = &mmu->strtab;
+	struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
+	u64 *entry = strtab->base + (sid * IVPU_MMU_STRTAB_ENT_SIZE);
+	u64 str[2];
+
+	str[0] = FIELD_PREP(IVPU_MMU_STE_0_CFG, IVPU_MMU_STE_0_CFG_S1_TRANS) |
+		 FIELD_PREP(IVPU_MMU_STE_0_S1CDMAX, IVPU_MMU_CDTAB_ENT_COUNT_LOG2) |
+		 FIELD_PREP(IVPU_MMU_STE_0_S1FMT, IVPU_MMU_STE_0_S1FMT_LINEAR) |
+		 IVPU_MMU_STE_0_V |
+		 (cdtab->dma & IVPU_MMU_STE_0_S1CTXPTR_MASK);
+
+	str[1] = FIELD_PREP(IVPU_MMU_STE_1_S1DSS, IVPU_MMU_STE_1_S1DSS_TERMINATE) |
+		 FIELD_PREP(IVPU_MMU_STE_1_S1CIR, IVPU_MMU_STE_1_S1C_CACHE_NC) |
+		 FIELD_PREP(IVPU_MMU_STE_1_S1COR, IVPU_MMU_STE_1_S1C_CACHE_NC) |
+		 FIELD_PREP(IVPU_MMU_STE_1_S1CSH, IVPU_MMU_SH_NSH) |
+		 FIELD_PREP(IVPU_MMU_STE_1_PRIVCFG, IVPU_MMU_STE_1_PRIVCFG_UNPRIV) |
+		 FIELD_PREP(IVPU_MMU_STE_1_INSTCFG, IVPU_MMU_STE_1_INSTCFG_DATA) |
+		 FIELD_PREP(IVPU_MMU_STE_1_STRW, IVPU_MMU_STE_1_STRW_NSEL1) |
+		 FIELD_PREP(IVPU_MMU_STE_1_CONT, IVPU_MMU_STRTAB_CFG_LOG2SIZE) |
+		 IVPU_MMU_STE_1_MEV |
+		 IVPU_MMU_STE_1_S1STALLD;
+
+	WRITE_ONCE(entry[1], str[1]);
+	WRITE_ONCE(entry[0], str[0]);
+
+	clflush_cache_range(entry, IVPU_MMU_STRTAB_ENT_SIZE);
+
+	ivpu_dbg(vdev, MMU, "STRTAB write entry (SSID=%u): 0x%llx, 0x%llx\n", sid, str[0], str[1]);
+}
+
+static int ivpu_mmu_strtab_init(struct ivpu_device *vdev)
+{
+	ivpu_mmu_strtab_link_cd(vdev, IVPU_MMU_STREAM_ID0);
+	ivpu_mmu_strtab_link_cd(vdev, IVPU_MMU_STREAM_ID3);
+
+	return 0;
+}
+
+int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	int ret;
+
+	ret = mutex_lock_interruptible(&mmu->lock);
+	if (ret)
+		return ret;
+
+	if (!mmu->on) {
+		ret = 0;
+		goto unlock;
+	}
+
+	ret = ivpu_mmu_cmdq_write_tlbi_nh_asid(vdev, ssid);
+	if (ret)
+		goto unlock;
+
+	ret = ivpu_mmu_cmdq_sync(vdev);
+unlock:
+	mutex_unlock(&mmu->lock);
+	return ret;
+}
+
+static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
+	u64 *entry;
+	u64 cd[4];
+	int ret;
+
+	if (ssid > IVPU_MMU_CDTAB_ENT_COUNT)
+		return -EINVAL;
+
+	entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE);
+
+	if (cd_dma != 0) {
+		cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, 26) |
+			FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) |
+			FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) |
+			FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) |
+			FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) |
+			FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, 3) |
+			FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) |
+			IVPU_MMU_CD_0_TCR_EPD1 |
+			IVPU_MMU_CD_0_AA64 |
+			IVPU_MMU_CD_0_R |
+			IVPU_MMU_CD_0_ASET |
+			IVPU_MMU_CD_0_V;
+		cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK;
+		cd[2] = 0;
+		cd[3] = 0x0000000000007444;
+
+		/* For global context generate memory fault on VPU */
+		if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
+			cd[0] |= IVPU_MMU_CD_0_A;
+	} else {
+		memset(cd, 0, sizeof(cd));
+	}
+
+	WRITE_ONCE(entry[1], cd[1]);
+	WRITE_ONCE(entry[2], cd[2]);
+	WRITE_ONCE(entry[3], cd[3]);
+	WRITE_ONCE(entry[0], cd[0]);
+
+	clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE);
+
+	ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
+		 cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]);
+
+	ret = mutex_lock_interruptible(&mmu->lock);
+	if (ret)
+		return ret;
+
+	if (!mmu->on) {
+		ret = 0;
+		goto unlock;
+	}
+
+	ret = ivpu_mmu_cmdq_write_cfgi_all(vdev);
+	if (ret)
+		goto unlock;
+
+	ret = ivpu_mmu_cmdq_sync(vdev);
+unlock:
+	mutex_unlock(&mmu->lock);
+	return ret;
+}
+
+static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma);
+	if (ret)
+		ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret);
+
+	return ret;
+}
+
+static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma)
+{
+	int ret;
+
+	if (ssid == 0) {
+		ivpu_err(vdev, "Invalid SSID: %u\n", ssid);
+		return -EINVAL;
+	}
+
+	ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma);
+	if (ret)
+		ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret);
+
+	return ret;
+}
+
+int ivpu_mmu_init(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	int ret;
+
+	ivpu_dbg(vdev, MMU, "Init..\n");
+
+	drmm_mutex_init(&vdev->drm, &mmu->lock);
+	ivpu_mmu_config_check(vdev);
+
+	ret = ivpu_mmu_structs_alloc(vdev);
+	if (ret)
+		return ret;
+
+	ret = ivpu_mmu_strtab_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_cd_add_gbl(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_enable(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
+		return ret;
+	}
+
+	ivpu_dbg(vdev, MMU, "Init done\n");
+
+	return 0;
+}
+
+int ivpu_mmu_enable(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+	int ret;
+
+	mutex_lock(&mmu->lock);
+
+	mmu->on = true;
+
+	ret = ivpu_mmu_reset(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to reset MMU: %d\n", ret);
+		goto err;
+	}
+
+	ret = ivpu_mmu_cmdq_write_cfgi_all(vdev);
+	if (ret)
+		goto err;
+
+	ret = ivpu_mmu_cmdq_write_tlbi_nsnh_all(vdev);
+	if (ret)
+		goto err;
+
+	ret = ivpu_mmu_cmdq_sync(vdev);
+	if (ret)
+		goto err;
+
+	mutex_unlock(&mmu->lock);
+
+	return 0;
+err:
+	mmu->on = false;
+	mutex_unlock(&mmu->lock);
+	return ret;
+}
+
+void ivpu_mmu_disable(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_info *mmu = vdev->mmu;
+
+	mutex_lock(&mmu->lock);
+	mmu->on = false;
+	mutex_unlock(&mmu->lock);
+}
+
+static void ivpu_mmu_dump_event(struct ivpu_device *vdev, u32 *event)
+{
+	u32 ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
+	u32 op = FIELD_GET(IVPU_MMU_EVT_OP_MASK, event[0]);
+	u64 fetch_addr = ((u64)event[7]) << 32 | event[6];
+	u64 in_addr = ((u64)event[5]) << 32 | event[4];
+	u32 sid = event[1];
+
+	ivpu_err(vdev, "MMU EVTQ: 0x%x (%s) SSID: %d SID: %d, e[2] %08x, e[3] %08x, in addr: 0x%llx, fetch addr: 0x%llx\n",
+		 op, ivpu_mmu_event_to_str(op), ssid, sid, event[2], event[3], in_addr, fetch_addr);
+}
+
+static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
+{
+	struct ivpu_mmu_queue *evtq = &vdev->mmu->evtq;
+	u32 idx = IVPU_MMU_Q_IDX(evtq->cons);
+	u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE);
+
+	evtq->prod = REGV_RD32(MTL_VPU_HOST_MMU_EVTQ_PROD_SEC);
+	if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT))
+		return NULL;
+
+	clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE);
+
+	evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK;
+	REGV_WR32(MTL_VPU_HOST_MMU_EVTQ_CONS_SEC, evtq->cons);
+
+	return evt;
+}
+
+void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
+{
+	bool schedule_recovery = false;
+	u32 *event;
+	u32 ssid;
+
+	ivpu_dbg(vdev, IRQ, "MMU event queue\n");
+
+	while ((event = ivpu_mmu_get_event(vdev)) != NULL) {
+		ivpu_mmu_dump_event(vdev, event);
+
+		ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
+		if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
+			schedule_recovery = true;
+		else
+			ivpu_mmu_user_context_mark_invalid(vdev, ssid);
+	}
+
+	if (schedule_recovery)
+		ivpu_pm_schedule_recovery(vdev);
+}
+
+void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev)
+{
+	u32 gerror_val, gerrorn_val, active;
+
+	ivpu_dbg(vdev, IRQ, "MMU error\n");
+
+	gerror_val = REGV_RD32(MTL_VPU_HOST_MMU_GERROR);
+	gerrorn_val = REGV_RD32(MTL_VPU_HOST_MMU_GERRORN);
+
+	active = gerror_val ^ gerrorn_val;
+	if (!(active & IVPU_MMU_GERROR_ERR_MASK))
+		return;
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_ABT, active))
+		ivpu_warn_ratelimited(vdev, "MMU MSI ABT write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_PRIQ_ABT, active))
+		ivpu_warn_ratelimited(vdev, "MMU PRIQ MSI ABT write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_EVTQ_ABT, active))
+		ivpu_warn_ratelimited(vdev, "MMU EVTQ MSI ABT write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, MSI_CMDQ_ABT, active))
+		ivpu_warn_ratelimited(vdev, "MMU CMDQ MSI ABT write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, PRIQ_ABT, active))
+		ivpu_err_ratelimited(vdev, "MMU PRIQ write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, EVTQ_ABT, active))
+		ivpu_err_ratelimited(vdev, "MMU EVTQ write aborted\n");
+
+	if (REG_TEST_FLD(MTL_VPU_HOST_MMU_GERROR, CMDQ, active))
+		ivpu_err_ratelimited(vdev, "MMU CMDQ write aborted\n");
+
+	REGV_WR32(MTL_VPU_HOST_MMU_GERRORN, gerror_val);
+}
+
+int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable)
+{
+	return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma);
+}
+
+void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid)
+{
+	ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */
+}
--- a/drivers/accel/ivpu/ivpu_mmu.h
+++ b/drivers/accel/ivpu/ivpu_mmu.h
@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_MMU_H__
+#define __IVPU_MMU_H__
+
+struct ivpu_device;
+
+struct ivpu_mmu_cdtab {
+	void *base;
+	dma_addr_t dma;
+};
+
+struct ivpu_mmu_strtab {
+	void *base;
+	dma_addr_t dma;
+	u64 dma_q;
+	u32 base_cfg;
+};
+
+struct ivpu_mmu_queue {
+	void *base;
+	dma_addr_t dma;
+	u64 dma_q;
+	u32 prod;
+	u32 cons;
+};
+
+struct ivpu_mmu_info {
+	struct mutex lock; /* Protects cdtab, strtab, cmdq, on */
+	struct ivpu_mmu_cdtab cdtab;
+	struct ivpu_mmu_strtab strtab;
+	struct ivpu_mmu_queue cmdq;
+	struct ivpu_mmu_queue evtq;
+	bool on;
+};
+
+int ivpu_mmu_init(struct ivpu_device *vdev);
+void ivpu_mmu_disable(struct ivpu_device *vdev);
+int ivpu_mmu_enable(struct ivpu_device *vdev);
+int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable);
+void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid);
+int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid);
+
+void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev);
+void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev);
+
+#endif /* __IVPU_MMU_H__ */
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/highmem.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_hw.h"
+#include "ivpu_mmu.h"
+#include "ivpu_mmu_context.h"
+
+#define IVPU_MMU_PGD_INDEX_MASK          GENMASK(38, 30)
+#define IVPU_MMU_PMD_INDEX_MASK          GENMASK(29, 21)
+#define IVPU_MMU_PTE_INDEX_MASK          GENMASK(20, 12)
+#define IVPU_MMU_ENTRY_FLAGS_MASK        GENMASK(11, 0)
+#define IVPU_MMU_ENTRY_FLAG_NG           BIT(11)
+#define IVPU_MMU_ENTRY_FLAG_AF           BIT(10)
+#define IVPU_MMU_ENTRY_FLAG_USER         BIT(6)
+#define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2)
+#define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE    BIT(1)
+#define IVPU_MMU_ENTRY_FLAG_VALID        BIT(0)
+
+#define IVPU_MMU_PAGE_SIZE    SZ_4K
+#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE)
+#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE)
+#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64))
+
+#define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000
+#define IVPU_MMU_ENTRY_VALID   (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID)
+#define IVPU_MMU_ENTRY_INVALID (IVPU_MMU_DUMMY_ADDRESS & ~IVPU_MMU_ENTRY_FLAGS_MASK)
+#define IVPU_MMU_ENTRY_MAPPED  (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
+				IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
+
+static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
+{
+	dma_addr_t pgd_dma;
+	u64 *pgd;
+
+	pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL);
+	if (!pgd)
+		return -ENOMEM;
+
+	pgtable->pgd = pgd;
+	pgtable->pgd_dma = pgd_dma;
+
+	return 0;
+}
+
+static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
+{
+	int pgd_index, pmd_index;
+
+	for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) {
+		u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index];
+		u64 *pmd = pgtable->pgd_entries[pgd_index];
+
+		if (!pmd_entries)
+			continue;
+
+		for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) {
+			if (pmd_entries[pmd_index])
+				dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE,
+					    pmd_entries[pmd_index],
+					    pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+		}
+
+		kfree(pmd_entries);
+		dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index],
+			    pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+	}
+
+	dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd,
+		    pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+}
+
+static u64*
+ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index)
+{
+	u64 **pmd_entries;
+	dma_addr_t pmd_dma;
+	u64 *pmd;
+
+	if (pgtable->pgd_entries[pgd_index])
+		return pgtable->pgd_entries[pgd_index];
+
+	pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
+	if (!pmd)
+		return NULL;
+
+	pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
+	if (!pmd_entries)
+		goto err_free_pgd;
+
+	pgtable->pgd_entries[pgd_index] = pmd;
+	pgtable->pgd_cpu_entries[pgd_index] = pmd_entries;
+	pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID;
+
+	return pmd;
+
+err_free_pgd:
+	dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma);
+	return NULL;
+}
+
+static u64*
+ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
+		    int pgd_index, int pmd_index)
+{
+	dma_addr_t pte_dma;
+	u64 *pte;
+
+	if (pgtable->pgd_cpu_entries[pgd_index][pmd_index])
+		return pgtable->pgd_cpu_entries[pgd_index][pmd_index];
+
+	pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
+	if (!pte)
+		return NULL;
+
+	pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte;
+	pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID;
+
+	return pte;
+}
+
+static int
+ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+			  u64 vpu_addr, dma_addr_t dma_addr, int prot)
+{
+	u64 *pte;
+	int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+	int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+	int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+
+	/* Allocate PMD - second level page table if needed */
+	if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index))
+		return -ENOMEM;
+
+	/* Allocate PTE - third level page table if needed */
+	pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index);
+	if (!pte)
+		return -ENOMEM;
+
+	/* Update PTE - third level page table with DMA address */
+	pte[pte_index] = dma_addr | prot;
+
+	return 0;
+}
+
+static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr)
+{
+	int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+	int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+	int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+
+	/* Update PTE with dummy physical address and clear flags */
+	ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID;
+}
+
+static void
+ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
+{
+	u64 end_addr = vpu_addr + size;
+	u64 *pgd = ctx->pgtable.pgd;
+
+	/* Align to PMD entry (2 MB) */
+	vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
+
+	while (vpu_addr < end_addr) {
+		int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+		u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
+		u64 *pmd = ctx->pgtable.pgd_entries[pgd_index];
+
+		while (vpu_addr < end_addr && vpu_addr < pmd_end) {
+			int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+			u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index];
+
+			clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE);
+			vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
+		}
+		clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE);
+	}
+	clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE);
+}
+
+static int
+ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+			   u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot)
+{
+	while (size) {
+		int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
+
+		if (ret)
+			return ret;
+
+		vpu_addr += IVPU_MMU_PAGE_SIZE;
+		dma_addr += IVPU_MMU_PAGE_SIZE;
+		size -= IVPU_MMU_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
+{
+	while (size) {
+		ivpu_mmu_context_unmap_page(ctx, vpu_addr);
+		vpu_addr += IVPU_MMU_PAGE_SIZE;
+		size -= IVPU_MMU_PAGE_SIZE;
+	}
+}
+
+int
+ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+			 u64 vpu_addr, struct sg_table *sgt,  bool llc_coherent)
+{
+	struct scatterlist *sg;
+	int prot;
+	int ret;
+	u64 i;
+
+	if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
+		return -EINVAL;
+	/*
+	 * VPU is only 32 bit, but DMA engine is 38 bit
+	 * Ranges < 2 GB are reserved for VPU internal registers
+	 * Limit range to 8 GB
+	 */
+	if (vpu_addr < SZ_2G || vpu_addr > SZ_8G)
+		return -EINVAL;
+
+	prot = IVPU_MMU_ENTRY_MAPPED;
+	if (llc_coherent)
+		prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT;
+
+	mutex_lock(&ctx->lock);
+
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		u64 dma_addr = sg_dma_address(sg) - sg->offset;
+		size_t size = sg_dma_len(sg) + sg->offset;
+
+		ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
+		if (ret) {
+			ivpu_err(vdev, "Failed to map context pages\n");
+			mutex_unlock(&ctx->lock);
+			return ret;
+		}
+		ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
+		vpu_addr += size;
+	}
+
+	mutex_unlock(&ctx->lock);
+
+	ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
+	if (ret)
+		ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
+	return ret;
+}
+
+void
+ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+			   u64 vpu_addr, struct sg_table *sgt)
+{
+	struct scatterlist *sg;
+	int ret;
+	u64 i;
+
+	if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
+		ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr);
+
+	mutex_lock(&ctx->lock);
+
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		size_t size = sg_dma_len(sg) + sg->offset;
+
+		ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
+		ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
+		vpu_addr += size;
+	}
+
+	mutex_unlock(&ctx->lock);
+
+	ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
+	if (ret)
+		ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
+}
+
+int
+ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
+				    const struct ivpu_addr_range *range,
+				    u64 size, struct drm_mm_node *node)
+{
+	lockdep_assert_held(&ctx->lock);
+
+	return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE,
+					  0, range->start, range->end, DRM_MM_INSERT_BEST);
+}
+
+void
+ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
+{
+	lockdep_assert_held(&ctx->lock);
+
+	drm_mm_remove_node(node);
+}
+
+static int
+ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
+{
+	u64 start, end;
+	int ret;
+
+	mutex_init(&ctx->lock);
+	INIT_LIST_HEAD(&ctx->bo_list);
+
+	ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
+	if (ret)
+		return ret;
+
+	if (!context_id) {
+		start = vdev->hw->ranges.global_low.start;
+		end = vdev->hw->ranges.global_high.end;
+	} else {
+		start = vdev->hw->ranges.user_low.start;
+		end = vdev->hw->ranges.user_high.end;
+	}
+
+	drm_mm_init(&ctx->mm, start, end - start);
+	ctx->id = context_id;
+
+	return 0;
+}
+
+static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
+{
+	drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd);
+
+	mutex_destroy(&ctx->lock);
+	ivpu_mmu_pgtable_free(vdev, &ctx->pgtable);
+	drm_mm_takedown(&ctx->mm);
+}
+
+int ivpu_mmu_global_context_init(struct ivpu_device *vdev)
+{
+	return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
+}
+
+void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
+{
+	return ivpu_mmu_context_fini(vdev, &vdev->gctx);
+}
+
+void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
+{
+	struct ivpu_file_priv *file_priv;
+
+	xa_lock(&vdev->context_xa);
+
+	file_priv = xa_load(&vdev->context_xa, ssid);
+	if (file_priv)
+		file_priv->has_mmu_faults = true;
+
+	xa_unlock(&vdev->context_xa);
+}
+
+int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id)
+{
+	int ret;
+
+	drm_WARN_ON(&vdev->drm, !ctx_id);
+
+	ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize context: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
+	if (ret) {
+		ivpu_err(vdev, "Failed to set page table: %d\n", ret);
+		goto err_context_fini;
+	}
+
+	return 0;
+
+err_context_fini:
+	ivpu_mmu_context_fini(vdev, ctx);
+	return ret;
+}
+
+void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
+{
+	drm_WARN_ON(&vdev->drm, !ctx->id);
+
+	ivpu_mmu_clear_pgtable(vdev, ctx->id);
+	ivpu_mmu_context_fini(vdev, ctx);
+}
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_MMU_CONTEXT_H__
+#define __IVPU_MMU_CONTEXT_H__
+
+#include <drm/drm_mm.h>
+
+struct ivpu_device;
+struct ivpu_file_priv;
+struct ivpu_addr_range;
+
+#define IVPU_MMU_PGTABLE_ENTRIES	512
+
+struct ivpu_mmu_pgtable {
+	u64             **pgd_cpu_entries[IVPU_MMU_PGTABLE_ENTRIES];
+	u64		*pgd_entries[IVPU_MMU_PGTABLE_ENTRIES];
+	u64		*pgd;
+	dma_addr_t	pgd_dma;
+};
+
+struct ivpu_mmu_context {
+	struct mutex lock; /* protects: mm, pgtable, bo_list */
+	struct drm_mm mm;
+	struct ivpu_mmu_pgtable pgtable;
+	struct list_head bo_list;
+	u32 id;
+};
+
+int ivpu_mmu_global_context_init(struct ivpu_device *vdev);
+void ivpu_mmu_global_context_fini(struct ivpu_device *vdev);
+
+int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id);
+void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
+void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);
+
+int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
+					const struct ivpu_addr_range *range,
+					u64 size, struct drm_mm_node *node);
+void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx,
+					 struct drm_mm_node *node);
+
+int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+			     u64 vpu_addr, struct sg_table *sgt, bool llc_coherent);
+void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+				u64 vpu_addr, struct sg_table *sgt);
+
+#endif /* __IVPU_MMU_CONTEXT_H__ */
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <linux/highmem.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/reboot.h>
+
+#include "vpu_boot_api.h"
+#include "ivpu_drv.h"
+#include "ivpu_hw.h"
+#include "ivpu_fw.h"
+#include "ivpu_ipc.h"
+#include "ivpu_job.h"
+#include "ivpu_mmu.h"
+#include "ivpu_pm.h"
+
+static bool ivpu_disable_recovery;
+module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
+MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
+
+#define PM_RESCHEDULE_LIMIT     5
+
+static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+
+	ivpu_cmdq_reset_all_contexts(vdev);
+	ivpu_ipc_reset(vdev);
+	ivpu_fw_load(vdev);
+	fw->entry_point = fw->cold_boot_entry_point;
+}
+
+static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
+{
+	struct ivpu_fw_info *fw = vdev->fw;
+	struct vpu_boot_params *bp = fw->mem->kvaddr;
+
+	if (!bp->save_restore_ret_address) {
+		ivpu_pm_prepare_cold_boot(vdev);
+		return;
+	}
+
+	ivpu_dbg(vdev, FW_BOOT, "Save/restore entry point %llx", bp->save_restore_ret_address);
+	fw->entry_point = bp->save_restore_ret_address;
+}
+
+static int ivpu_suspend(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ret = ivpu_shutdown(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
+		return ret;
+	}
+
+	return ret;
+}
+
+static int ivpu_resume(struct ivpu_device *vdev)
+{
+	int ret;
+
+retry:
+	ret = ivpu_hw_power_up(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
+		return ret;
+	}
+
+	ret = ivpu_mmu_enable(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
+		ivpu_hw_power_down(vdev);
+		return ret;
+	}
+
+	ret = ivpu_boot(vdev);
+	if (ret) {
+		ivpu_mmu_disable(vdev);
+		ivpu_hw_power_down(vdev);
+		if (!ivpu_fw_is_cold_boot(vdev)) {
+			ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret);
+			ivpu_pm_prepare_cold_boot(vdev);
+			goto retry;
+		} else {
+			ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
+		}
+	}
+
+	return ret;
+}
+
+static void ivpu_pm_recovery_work(struct work_struct *work)
+{
+	struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
+	struct ivpu_device *vdev =  pm->vdev;
+	char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
+	int ret;
+
+	ret = pci_reset_function(to_pci_dev(vdev->drm.dev));
+	if (ret)
+		ivpu_err(vdev, "Failed to reset VPU: %d\n", ret);
+
+	kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
+}
+
+void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
+{
+	struct ivpu_pm_info *pm = vdev->pm;
+
+	if (ivpu_disable_recovery) {
+		ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n");
+		return;
+	}
+
+	if (ivpu_is_fpga(vdev)) {
+		ivpu_err(vdev, "Recovery not available on FPGA\n");
+		return;
+	}
+
+	/* Schedule recovery if it's not in progress */
+	if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) {
+		ivpu_hw_irq_disable(vdev);
+		queue_work(system_long_wq, &pm->recovery_work);
+	}
+}
+
+int ivpu_pm_suspend_cb(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct ivpu_device *vdev = to_ivpu_device(drm);
+	int ret;
+
+	ivpu_dbg(vdev, PM, "Suspend..\n");
+
+	ret = ivpu_suspend(vdev);
+	if (ret && vdev->pm->suspend_reschedule_counter) {
+		ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n",
+			 vdev->pm->suspend_reschedule_counter);
+		pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend);
+		vdev->pm->suspend_reschedule_counter--;
+		return -EBUSY;
+	} else if (!vdev->pm->suspend_reschedule_counter) {
+		ivpu_warn(vdev, "Failed to enter idle, force suspend\n");
+		ivpu_pm_prepare_cold_boot(vdev);
+	} else {
+		ivpu_pm_prepare_warm_boot(vdev);
+	}
+
+	vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
+
+	pci_save_state(to_pci_dev(dev));
+	pci_set_power_state(to_pci_dev(dev), PCI_D3hot);
+
+	ivpu_dbg(vdev, PM, "Suspend done.\n");
+
+	return ret;
+}
+
+int ivpu_pm_resume_cb(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct ivpu_device *vdev = to_ivpu_device(drm);
+	int ret;
+
+	ivpu_dbg(vdev, PM, "Resume..\n");
+
+	pci_set_power_state(to_pci_dev(dev), PCI_D0);
+	pci_restore_state(to_pci_dev(dev));
+
+	ret = ivpu_resume(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to resume: %d\n", ret);
+
+	ivpu_dbg(vdev, PM, "Resume done.\n");
+
+	return ret;
+}
+
+int ivpu_pm_runtime_suspend_cb(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct ivpu_device *vdev = to_ivpu_device(drm);
+	int ret;
+
+	ivpu_dbg(vdev, PM, "Runtime suspend..\n");
+
+	if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) {
+		ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n",
+			 vdev->pm->suspend_reschedule_counter);
+		pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend);
+		vdev->pm->suspend_reschedule_counter--;
+		return -EAGAIN;
+	}
+
+	ret = ivpu_suspend(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
+
+	if (!vdev->pm->suspend_reschedule_counter) {
+		ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n");
+		ivpu_pm_prepare_cold_boot(vdev);
+	} else {
+		ivpu_pm_prepare_warm_boot(vdev);
+	}
+
+	vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
+
+	ivpu_dbg(vdev, PM, "Runtime suspend done.\n");
+
+	return 0;
+}
+
+int ivpu_pm_runtime_resume_cb(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct ivpu_device *vdev = to_ivpu_device(drm);
+	int ret;
+
+	ivpu_dbg(vdev, PM, "Runtime resume..\n");
+
+	ret = ivpu_resume(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
+
+	ivpu_dbg(vdev, PM, "Runtime resume done.\n");
+
+	return ret;
+}
+
+int ivpu_rpm_get(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ivpu_dbg(vdev, RPM, "rpm_get count %d\n", atomic_read(&vdev->drm.dev->power.usage_count));
+
+	ret = pm_runtime_resume_and_get(vdev->drm.dev);
+	if (!drm_WARN_ON(&vdev->drm, ret < 0))
+		vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
+
+	return ret;
+}
+
+void ivpu_rpm_put(struct ivpu_device *vdev)
+{
+	ivpu_dbg(vdev, RPM, "rpm_put count %d\n", atomic_read(&vdev->drm.dev->power.usage_count));
+
+	pm_runtime_mark_last_busy(vdev->drm.dev);
+	pm_runtime_put_autosuspend(vdev->drm.dev);
+}
+
+void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
+{
+	struct ivpu_device *vdev = pci_get_drvdata(pdev);
+
+	pm_runtime_get_sync(vdev->drm.dev);
+
+	ivpu_dbg(vdev, PM, "Pre-reset..\n");
+	atomic_set(&vdev->pm->in_reset, 1);
+	ivpu_shutdown(vdev);
+	ivpu_pm_prepare_cold_boot(vdev);
+	ivpu_jobs_abort_all(vdev);
+	ivpu_dbg(vdev, PM, "Pre-reset done.\n");
+}
+
+void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
+{
+	struct ivpu_device *vdev = pci_get_drvdata(pdev);
+	int ret;
+
+	ivpu_dbg(vdev, PM, "Post-reset..\n");
+	ret = ivpu_resume(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
+	atomic_set(&vdev->pm->in_reset, 0);
+	ivpu_dbg(vdev, PM, "Post-reset done.\n");
+
+	pm_runtime_put_autosuspend(vdev->drm.dev);
+}
+
+int ivpu_pm_init(struct ivpu_device *vdev)
+{
+	struct device *dev = vdev->drm.dev;
+	struct ivpu_pm_info *pm = vdev->pm;
+
+	pm->vdev = vdev;
+	pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
+
+	atomic_set(&pm->in_reset, 0);
+	INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
+
+	pm_runtime_use_autosuspend(dev);
+
+	if (ivpu_disable_recovery)
+		pm_runtime_set_autosuspend_delay(dev, -1);
+	else if (ivpu_is_silicon(vdev))
+		pm_runtime_set_autosuspend_delay(dev, 100);
+	else
+		pm_runtime_set_autosuspend_delay(dev, 60000);
+
+	return 0;
+}
+
+void ivpu_pm_enable(struct ivpu_device *vdev)
+{
+	struct device *dev = vdev->drm.dev;
+
+	pm_runtime_set_active(dev);
+	pm_runtime_allow(dev);
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+
+	ivpu_dbg(vdev, RPM, "Enable RPM count %d\n", atomic_read(&dev->power.usage_count));
+}
+
+void ivpu_pm_disable(struct ivpu_device *vdev)
+{
+	struct device *dev = vdev->drm.dev;
+
+	ivpu_dbg(vdev, RPM, "Disable RPM count %d\n", atomic_read(&dev->power.usage_count));
+
+	pm_runtime_get_noresume(vdev->drm.dev);
+	pm_runtime_forbid(vdev->drm.dev);
+}
--- a/drivers/accel/ivpu/ivpu_pm.h
+++ b/drivers/accel/ivpu/ivpu_pm.h
@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_PM_H__
+#define __IVPU_PM_H__
+
+#include <linux/types.h>
+
+struct ivpu_device;
+
+struct ivpu_pm_info {
+	struct ivpu_device *vdev;
+	struct work_struct recovery_work;
+	atomic_t in_reset;
+	bool is_warmboot;
+	u32 suspend_reschedule_counter;
+};
+
+int ivpu_pm_init(struct ivpu_device *vdev);
+void ivpu_pm_enable(struct ivpu_device *vdev);
+void ivpu_pm_disable(struct ivpu_device *vdev);
+
+int ivpu_pm_suspend_cb(struct device *dev);
+int ivpu_pm_resume_cb(struct device *dev);
+int ivpu_pm_runtime_suspend_cb(struct device *dev);
+int ivpu_pm_runtime_resume_cb(struct device *dev);
+
+void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev);
+void ivpu_pm_reset_done_cb(struct pci_dev *pdev);
+
+int __must_check ivpu_rpm_get(struct ivpu_device *vdev);
+void ivpu_rpm_put(struct ivpu_device *vdev);
+
+void ivpu_pm_schedule_recovery(struct ivpu_device *vdev);
+
+#endif /* __IVPU_PM_H__ */
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@ -0,0 +1,349 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef VPU_BOOT_API_H
+#define VPU_BOOT_API_H
+
+/*
+ * =========== FW API version information beginning ================
+ *  The bellow values will be used to construct the version info this way:
+ *  fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) |
+ *  VPU_BOOT_API_VER_MINOR;
+ *  VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes.
+ */
+
+/*
+ * Major version changes that break backward compatibility.
+ * Major version must start from 1 and can only be incremented.
+ */
+#define VPU_BOOT_API_VER_MAJOR 3
+
+/*
+ * Minor version changes when API backward compatibility is preserved.
+ * Resets to 0 if Major version is incremented.
+ */
+#define VPU_BOOT_API_VER_MINOR 12
+
+/*
+ * API header changed (field names, documentation, formatting) but API itself has not been changed
+ */
+#define VPU_BOOT_API_VER_PATCH 2
+
+/*
+ * Index in the API version table
+ * Must be unique for each API
+ */
+#define VPU_BOOT_API_VER_INDEX 0
+/* ------------ FW API version information end ---------------------*/
+
+#pragma pack(push, 1)
+
+/*
+ * Firmware image header format
+ */
+#define VPU_FW_HEADER_SIZE    4096
+#define VPU_FW_HEADER_VERSION 0x1
+#define VPU_FW_VERSION_SIZE   32
+#define VPU_FW_API_VER_NUM    16
+
+struct vpu_firmware_header {
+	u32 header_version;
+	u32 image_format;
+	u64 image_load_address;
+	u32 image_size;
+	u64 entry_point;
+	u8 vpu_version[VPU_FW_VERSION_SIZE];
+	u32 compression_type;
+	u64 firmware_version_load_address;
+	u32 firmware_version_size;
+	u64 boot_params_load_address;
+	u32 api_version[VPU_FW_API_VER_NUM];
+	/* Size of memory require for firmware execution */
+	u32 runtime_size;
+	u32 shave_nn_fw_size;
+};
+
+/*
+ * Firmware boot parameters format
+ */
+
+#define VPU_BOOT_PLL_COUNT     3
+#define VPU_BOOT_PLL_OUT_COUNT 4
+
+/** Values for boot_type field */
+#define VPU_BOOT_TYPE_COLDBOOT 0
+#define VPU_BOOT_TYPE_WARMBOOT 1
+
+/** Value for magic filed */
+#define VPU_BOOT_PARAMS_MAGIC 0x10000
+
+/** VPU scheduling mode. By default, OS scheduling is used. */
+#define VPU_SCHEDULING_MODE_OS 0
+#define VPU_SCHEDULING_MODE_HW 1
+
+enum VPU_BOOT_L2_CACHE_CFG_TYPE {
+	VPU_BOOT_L2_CACHE_CFG_UPA = 0,
+	VPU_BOOT_L2_CACHE_CFG_NN = 1,
+	VPU_BOOT_L2_CACHE_CFG_NUM = 2
+};
+
+/**
+ * Logging destinations.
+ *
+ * Logging output can be directed to different logging destinations. This enum
+ * defines the list of logging destinations supported by the VPU firmware (NOTE:
+ * a specific VPU FW binary may support only a subset of such output
+ * destinations, depending on the target platform and compile options).
+ */
+enum vpu_trace_destination {
+	VPU_TRACE_DESTINATION_PIPEPRINT = 0x1,
+	VPU_TRACE_DESTINATION_VERBOSE_TRACING = 0x2,
+	VPU_TRACE_DESTINATION_NORTH_PEAK = 0x4,
+};
+
+/*
+ * Processor bit shifts (for loggable HW components).
+ */
+#define VPU_TRACE_PROC_BIT_ARM	     0
+#define VPU_TRACE_PROC_BIT_LRT	     1
+#define VPU_TRACE_PROC_BIT_LNN	     2
+#define VPU_TRACE_PROC_BIT_SHV_0     3
+#define VPU_TRACE_PROC_BIT_SHV_1     4
+#define VPU_TRACE_PROC_BIT_SHV_2     5
+#define VPU_TRACE_PROC_BIT_SHV_3     6
+#define VPU_TRACE_PROC_BIT_SHV_4     7
+#define VPU_TRACE_PROC_BIT_SHV_5     8
+#define VPU_TRACE_PROC_BIT_SHV_6     9
+#define VPU_TRACE_PROC_BIT_SHV_7     10
+#define VPU_TRACE_PROC_BIT_SHV_8     11
+#define VPU_TRACE_PROC_BIT_SHV_9     12
+#define VPU_TRACE_PROC_BIT_SHV_10    13
+#define VPU_TRACE_PROC_BIT_SHV_11    14
+#define VPU_TRACE_PROC_BIT_SHV_12    15
+#define VPU_TRACE_PROC_BIT_SHV_13    16
+#define VPU_TRACE_PROC_BIT_SHV_14    17
+#define VPU_TRACE_PROC_BIT_SHV_15    18
+#define VPU_TRACE_PROC_BIT_ACT_SHV_0 19
+#define VPU_TRACE_PROC_BIT_ACT_SHV_1 20
+#define VPU_TRACE_PROC_BIT_ACT_SHV_2 21
+#define VPU_TRACE_PROC_BIT_ACT_SHV_3 22
+#define VPU_TRACE_PROC_NO_OF_HW_DEVS 23
+
+/* KMB HW component IDs are sequential, so define first and last IDs. */
+#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_LRT
+#define VPU_TRACE_PROC_BIT_KMB_LAST  VPU_TRACE_PROC_BIT_SHV_15
+
+struct vpu_boot_l2_cache_config {
+	u8 use;
+	u8 cfg;
+};
+
+struct vpu_warm_boot_section {
+	u32 src;
+	u32 dst;
+	u32 size;
+	u32 core_id;
+	u32 is_clear_op;
+};
+
+struct vpu_boot_params {
+	u32 magic;
+	u32 vpu_id;
+	u32 vpu_count;
+	u32 pad0[5];
+	/* Clock frequencies: 0x20 - 0xFF */
+	u32 frequency;
+	u32 pll[VPU_BOOT_PLL_COUNT][VPU_BOOT_PLL_OUT_COUNT];
+	u32 perf_clk_frequency;
+	u32 pad1[42];
+	/* Memory regions: 0x100 - 0x1FF */
+	u64 ipc_header_area_start;
+	u32 ipc_header_area_size;
+	u64 shared_region_base;
+	u32 shared_region_size;
+	u64 ipc_payload_area_start;
+	u32 ipc_payload_area_size;
+	u64 global_aliased_pio_base;
+	u32 global_aliased_pio_size;
+	u32 autoconfig;
+	struct vpu_boot_l2_cache_config cache_defaults[VPU_BOOT_L2_CACHE_CFG_NUM];
+	u64 global_memory_allocator_base;
+	u32 global_memory_allocator_size;
+	/**
+	 * ShaveNN FW section VPU base address
+	 * On VPU2.7 HW this address must be within 2GB range starting from L2C_PAGE_TABLE base
+	 */
+	u64 shave_nn_fw_base;
+	u64 save_restore_ret_address; /* stores the address of FW's restore entry point */
+	u32 pad2[43];
+	/* IRQ re-direct numbers: 0x200 - 0x2FF */
+	s32 watchdog_irq_mss;
+	s32 watchdog_irq_nce;
+	/* ARM -> VPU doorbell interrupt. ARM is notifying VPU of async command or compute job. */
+	u32 host_to_vpu_irq;
+	/* VPU -> ARM job done interrupt. VPU is notifying ARM of compute job completion. */
+	u32 job_done_irq;
+	/* VPU -> ARM IRQ line to use to request MMU update. */
+	u32 mmu_update_request_irq;
+	/* ARM -> VPU IRQ line to use to notify of MMU update completion. */
+	u32 mmu_update_done_irq;
+	/* ARM -> VPU IRQ line to use to request power level change. */
+	u32 set_power_level_irq;
+	/* VPU -> ARM IRQ line to use to notify of power level change completion. */
+	u32 set_power_level_done_irq;
+	/* VPU -> ARM IRQ line to use to notify of VPU idle state change */
+	u32 set_vpu_idle_update_irq;
+	/* VPU -> ARM IRQ line to use to request counter reset. */
+	u32 metric_query_event_irq;
+	/* ARM -> VPU IRQ line to use to notify of counter reset completion. */
+	u32 metric_query_event_done_irq;
+	/* VPU -> ARM IRQ line to use to notify of preemption completion. */
+	u32 preemption_done_irq;
+	/* Padding. */
+	u32 pad3[52];
+	/* Silicon information: 0x300 - 0x3FF */
+	u32 host_version_id;
+	u32 si_stepping;
+	u64 device_id;
+	u64 feature_exclusion;
+	u64 sku;
+	/** PLL ratio for minimum clock frequency */
+	u32 min_freq_pll_ratio;
+	/** PLL ratio for maximum clock frequency */
+	u32 max_freq_pll_ratio;
+	/**
+	 * Initial log level threshold (messages with log level severity less than
+	 * the threshold will not be logged); applies to every enabled logging
+	 * destination and loggable HW component. See 'mvLog_t' enum for acceptable
+	 * values.
+	 */
+	u32 default_trace_level;
+	u32 boot_type;
+	u64 punit_telemetry_sram_base;
+	u64 punit_telemetry_sram_size;
+	u32 vpu_telemetry_enable;
+	u64 crit_tracing_buff_addr;
+	u32 crit_tracing_buff_size;
+	u64 verbose_tracing_buff_addr;
+	u32 verbose_tracing_buff_size;
+	u64 verbose_tracing_sw_component_mask; /* TO BE REMOVED */
+	/**
+	 * Mask of destinations to which logging messages are delivered; bitwise OR
+	 * of values defined in vpu_trace_destination enum.
+	 */
+	u32 trace_destination_mask;
+	/**
+	 * Mask of hardware components for which logging is enabled; bitwise OR of
+	 * bits defined by the VPU_TRACE_PROC_BIT_* macros.
+	 */
+	u64 trace_hw_component_mask;
+	/** Mask of trace message formats supported by the driver */
+	u64 tracing_buff_message_format_mask;
+	u64 trace_reserved_1[2];
+	/**
+	 * Period at which the VPU reads the temp sensor values into MMIO, on
+	 * platforms where that is necessary (in ms). 0 to disable reads.
+	 */
+	u32 temp_sensor_period_ms;
+	/** PLL ratio for efficient clock frequency */
+	u32 pn_freq_pll_ratio;
+	u32 pad4[28];
+	/* Warm boot information: 0x400 - 0x43F */
+	u32 warm_boot_sections_count;
+	u32 warm_boot_start_address_reference;
+	u32 warm_boot_section_info_address_offset;
+	u32 pad5[13];
+	/* Power States transitions timestamps: 0x440 - 0x46F*/
+	struct {
+		/* VPU_IDLE -> VPU_ACTIVE transition initiated timestamp */
+		u64 vpu_active_state_requested;
+		/* VPU_IDLE -> VPU_ACTIVE transition completed timestamp */
+		u64 vpu_active_state_achieved;
+		/* VPU_ACTIVE -> VPU_IDLE transition initiated timestamp */
+		u64 vpu_idle_state_requested;
+		/* VPU_ACTIVE -> VPU_IDLE transition completed timestamp */
+		u64 vpu_idle_state_achieved;
+		/* VPU_IDLE -> VPU_STANDBY transition initiated timestamp */
+		u64 vpu_standby_state_requested;
+		/* VPU_IDLE -> VPU_STANDBY transition completed timestamp */
+		u64 vpu_standby_state_achieved;
+	} power_states_timestamps;
+	/* VPU scheduling mode. Values defined by VPU_SCHEDULING_MODE_* macros. */
+	u32 vpu_scheduling_mode;
+	/* Present call period in milliseconds. */
+	u32 vpu_focus_present_timer_ms;
+	/* Unused/reserved: 0x478 - 0xFFF */
+	u32 pad6[738];
+};
+
+/*
+ * Magic numbers set between host and vpu to detect corruptio of tracing init
+ */
+
+#define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE)
+
+/* Tracing buffer message format definitions */
+#define VPU_TRACING_FORMAT_STRING 0
+#define VPU_TRACING_FORMAT_MIPI	  2
+/*
+ * Header of the tracing buffer.
+ * The below defined header will be stored at the beginning of
+ * each allocated tracing buffer, followed by a series of 256b
+ * of ASCII trace message entries.
+ */
+struct vpu_tracing_buffer_header {
+	/**
+	 * Magic number set by host to detect corruption
+	 * @see VPU_TRACING_BUFFER_CANARY
+	 */
+	u32 host_canary_start;
+	/* offset from start of buffer for trace entries */
+	u32 read_index;
+	u32 pad_to_cache_line_size_0[14];
+	/* End of first cache line */
+
+	/**
+	 * Magic number set by host to detect corruption
+	 * @see VPU_TRACING_BUFFER_CANARY
+	 */
+	u32 vpu_canary_start;
+	/* offset from start of buffer from write start */
+	u32 write_index;
+	/* counter for buffer wrapping */
+	u32 wrap_count;
+	/* legacy field - do not use */
+	u32 reserved_0;
+	/**
+	 * Size of the log buffer include this header (@header_size) and space
+	 * reserved for all messages. If @alignment` is greater that 0 the @Size
+	 * must be multiple of @Alignment.
+	 */
+	u32 size;
+	/* Header version */
+	u16 header_version;
+	/* Header size */
+	u16 header_size;
+	/*
+	 * Format of the messages in the trace buffer
+	 * 0 - null terminated string
+	 * 1 - size + null terminated string
+	 * 2 - MIPI-SysT encoding
+	 */
+	u32 format;
+	/*
+	 * Message alignment
+	 * 0 - messages are place 1 after another
+	 * n - every message starts and multiple on offset
+	 */
+	u32 alignment; /* 64, 128, 256 */
+	/* Name of the logging entity, i.e "LRT", "LNN", "SHV0", etc */
+	char name[16];
+	u32 pad_to_cache_line_size_1[4];
+	/* End of second cache line */
+};
+
+#pragma pack(pop)
+
+#endif
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@ -0,0 +1,999 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+/**
+ * @file
+ * @brief JSM shared definitions
+ *
+ * @ingroup Jsm
+ * @brief JSM shared definitions
+ * @{
+ */
+#ifndef VPU_JSM_API_H
+#define VPU_JSM_API_H
+
+/*
+ * Major version changes that break backward compatibility
+ */
+#define VPU_JSM_API_VER_MAJOR 2
+
+/*
+ * Minor version changes when API backward compatibility is preserved.
+ */
+#define VPU_JSM_API_VER_MINOR 10
+
+/*
+ * API header changed (field names, documentation, formatting) but API itself has not been changed
+ */
+#define VPU_JSM_API_VER_PATCH 1
+
+/*
+ * Index in the API version table
+ */
+#define VPU_JSM_API_VER_INDEX 4
+
+/*
+ * Number of Priority Bands for Hardware Scheduling
+ * Bands: RealTime, Focus, Normal, Idle
+ */
+#define VPU_HWS_NUM_PRIORITY_BANDS 4
+
+/* Max number of impacted contexts that can be dealt with the engine reset command */
+#define VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS 3
+
+/** Pack the API structures for now, once alignment issues are fixed this can be removed */
+#pragma pack(push, 1)
+
+/*
+ * Engine indexes.
+ */
+#define VPU_ENGINE_COMPUTE 0
+#define VPU_ENGINE_COPY	   1
+#define VPU_ENGINE_NB	   2
+
+/*
+ * VPU status values.
+ */
+#define VPU_JSM_STATUS_SUCCESS				 0x0U
+#define VPU_JSM_STATUS_PARSING_ERR			 0x1U
+#define VPU_JSM_STATUS_PROCESSING_ERR			 0x2U
+#define VPU_JSM_STATUS_PREEMPTED			 0x3U
+#define VPU_JSM_STATUS_ABORTED				 0x4U
+#define VPU_JSM_STATUS_USER_CTX_VIOL_ERR		 0x5U
+#define VPU_JSM_STATUS_GLOBAL_CTX_VIOL_ERR		 0x6U
+#define VPU_JSM_STATUS_MVNCI_WRONG_INPUT_FORMAT		 0x7U
+#define VPU_JSM_STATUS_MVNCI_UNSUPPORTED_NETWORK_ELEMENT 0x8U
+#define VPU_JSM_STATUS_MVNCI_INVALID_HANDLE		 0x9U
+#define VPU_JSM_STATUS_MVNCI_OUT_OF_RESOURCES		 0xAU
+#define VPU_JSM_STATUS_MVNCI_NOT_IMPLEMENTED		 0xBU
+#define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR		 0xCU
+/* Job status returned when the job was preempted mid-inference */
+#define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE		 0xDU
+
+/*
+ * Host <-> VPU IPC channels.
+ * ASYNC commands use a high priority channel, other messages use low-priority ones.
+ */
+#define VPU_IPC_CHAN_ASYNC_CMD 0
+#define VPU_IPC_CHAN_GEN_CMD   10
+#define VPU_IPC_CHAN_JOB_RET   11
+
+/*
+ * Job flags bit masks.
+ */
+#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001
+
+/*
+ * Sizes of the reserved areas in jobs, in bytes.
+ */
+#define VPU_JOB_RESERVED_BYTES	     16
+/*
+ * Sizes of the reserved areas in job queues, in bytes.
+ */
+#define VPU_JOB_QUEUE_RESERVED_BYTES 52
+
+/*
+ * Max length (including trailing NULL char) of trace entity name (e.g., the
+ * name of a logging destination or a loggable HW component).
+ */
+#define VPU_TRACE_ENTITY_NAME_MAX_LEN 32
+
+/*
+ * Max length (including trailing NULL char) of a dyndbg command.
+ *
+ * NOTE: 112 is used so that the size of 'struct vpu_ipc_msg' in the JSM API is
+ * 128 bytes (multiple of 64 bytes, the cache line size).
+ */
+#define VPU_DYNDBG_CMD_MAX_LEN 112
+
+/*
+ * Job format.
+ */
+struct vpu_job_queue_entry {
+	u64 batch_buf_addr; /**< Address of VPU commands batch buffer */
+	u32 job_id;	  /**< Job ID */
+	u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */
+	u64 root_page_table_addr; /**< Address of root page table to use for this job */
+	u64 root_page_table_update_counter; /**< Page tables update events counter */
+	u64 preemption_buffer_address; /**< Address of the preemption buffer to use for this job */
+	u64 preemption_buffer_size; /**< Size of the preemption buffer to use for this job */
+	u8 reserved[VPU_JOB_RESERVED_BYTES];
+};
+
+/*
+ * Job queue control registers.
+ */
+struct vpu_job_queue_header {
+	u32 engine_idx;
+	u32 head;
+	u32 tail;
+	u8 reserved[VPU_JOB_QUEUE_RESERVED_BYTES];
+};
+
+/*
+ * Job queue format.
+ */
+struct vpu_job_queue {
+	struct vpu_job_queue_header header;
+	struct vpu_job_queue_entry job[];
+};
+
+/**
+ * Logging entity types.
+ *
+ * This enum defines the different types of entities involved in logging.
+ */
+enum vpu_trace_entity_type {
+	/** Logging destination (entity where logs can be stored / printed). */
+	VPU_TRACE_ENTITY_TYPE_DESTINATION = 1,
+	/** Loggable HW component (HW entity that can be logged). */
+	VPU_TRACE_ENTITY_TYPE_HW_COMPONENT = 2,
+};
+
+/*
+ * Host <-> VPU IPC messages types.
+ */
+enum vpu_ipc_msg_type {
+	VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF,
+	/* IPC Host -> Device, Async commands */
+	VPU_JSM_MSG_ASYNC_CMD = 0x1100,
+	VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD,
+	VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101,
+	VPU_JSM_MSG_REGISTER_DB = 0x1102,
+	VPU_JSM_MSG_UNREGISTER_DB = 0x1103,
+	VPU_JSM_MSG_QUERY_ENGINE_HB = 0x1104,
+	VPU_JSM_MSG_GET_POWER_LEVEL_COUNT = 0x1105,
+	VPU_JSM_MSG_GET_POWER_LEVEL = 0x1106,
+	VPU_JSM_MSG_SET_POWER_LEVEL = 0x1107,
+	/* @deprecated */
+	VPU_JSM_MSG_METRIC_STREAMER_OPEN = 0x1108,
+	/* @deprecated */
+	VPU_JSM_MSG_METRIC_STREAMER_CLOSE = 0x1109,
+	/** Configure logging (used to modify configuration passed in boot params). */
+	VPU_JSM_MSG_TRACE_SET_CONFIG = 0x110a,
+	/** Return current logging configuration. */
+	VPU_JSM_MSG_TRACE_GET_CONFIG = 0x110b,
+	/**
+	 * Get masks of destinations and HW components supported by the firmware
+	 * (may vary between HW generations and FW compile
+	 * time configurations)
+	 */
+	VPU_JSM_MSG_TRACE_GET_CAPABILITY = 0x110c,
+	/** Get the name of a destination or HW component. */
+	VPU_JSM_MSG_TRACE_GET_NAME = 0x110d,
+	/**
+	 * Release resource associated with host ssid . All jobs that belong to the host_ssid
+	 * aborted and removed from internal scheduling queues. All doorbells assigned
+	 * to the host_ssid are unregistered and any internal FW resources belonging to
+	 * the host_ssid are released.
+	 */
+	VPU_JSM_MSG_SSID_RELEASE = 0x110e,
+	/**
+	 * Start collecting metric data.
+	 * @see vpu_jsm_metric_streamer_start
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_START = 0x110f,
+	/**
+	 * Stop collecting metric data. This command will return success if it is called
+	 * for a metric stream that has already been stopped or was never started.
+	 * @see vpu_jsm_metric_streamer_stop
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_STOP = 0x1110,
+	/**
+	 * Update current and next buffer for metric data collection. This command can
+	 * also be used to request information about the number of collected samples
+	 * and the amount of data written to the buffer.
+	 * @see vpu_jsm_metric_streamer_update
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_UPDATE = 0x1111,
+	/**
+	 * Request description of selected metric groups and metric counters within
+	 * each group. The VPU will write the description of groups and counters to
+	 * the buffer specified in the command structure.
+	 * @see vpu_jsm_metric_streamer_start
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_INFO = 0x1112,
+	/** Control command: Priority band setup */
+	VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP = 0x1113,
+	/** Control command: Create command queue */
+	VPU_JSM_MSG_CREATE_CMD_QUEUE = 0x1114,
+	/** Control command: Destroy command queue */
+	VPU_JSM_MSG_DESTROY_CMD_QUEUE = 0x1115,
+	/** Control command: Set context scheduling properties */
+	VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES = 0x1116,
+	/*
+	 * Register a doorbell to notify VPU of new work. The doorbell may later be
+	 * deallocated or reassigned to another context.
+	 */
+	VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117,
+	/* IPC Host -> Device, General commands */
+	VPU_JSM_MSG_GENERAL_CMD = 0x1200,
+	VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD,
+	/**
+	 * Control dyndbg behavior by executing a dyndbg command; equivalent to
+	 * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`.
+	 */
+	VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201,
+	/* IPC Device -> Host, Job completion */
+	VPU_JSM_MSG_JOB_DONE = 0x2100,
+	/* IPC Device -> Host, Async command completion */
+	VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200,
+	VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE,
+	VPU_JSM_MSG_ENGINE_PREEMPT_DONE = 0x2201,
+	VPU_JSM_MSG_REGISTER_DB_DONE = 0x2202,
+	VPU_JSM_MSG_UNREGISTER_DB_DONE = 0x2203,
+	VPU_JSM_MSG_QUERY_ENGINE_HB_DONE = 0x2204,
+	VPU_JSM_MSG_GET_POWER_LEVEL_COUNT_DONE = 0x2205,
+	VPU_JSM_MSG_GET_POWER_LEVEL_DONE = 0x2206,
+	VPU_JSM_MSG_SET_POWER_LEVEL_DONE = 0x2207,
+	/* @deprecated */
+	VPU_JSM_MSG_METRIC_STREAMER_OPEN_DONE = 0x2208,
+	/* @deprecated */
+	VPU_JSM_MSG_METRIC_STREAMER_CLOSE_DONE = 0x2209,
+	/** Response to VPU_JSM_MSG_TRACE_SET_CONFIG. */
+	VPU_JSM_MSG_TRACE_SET_CONFIG_RSP = 0x220a,
+	/** Response to VPU_JSM_MSG_TRACE_GET_CONFIG. */
+	VPU_JSM_MSG_TRACE_GET_CONFIG_RSP = 0x220b,
+	/** Response to VPU_JSM_MSG_TRACE_GET_CAPABILITY. */
+	VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP = 0x220c,
+	/** Response to VPU_JSM_MSG_TRACE_GET_NAME. */
+	VPU_JSM_MSG_TRACE_GET_NAME_RSP = 0x220d,
+	/** Response to VPU_JSM_MSG_SSID_RELEASE. */
+	VPU_JSM_MSG_SSID_RELEASE_DONE = 0x220e,
+	/**
+	 * Response to VPU_JSM_MSG_METRIC_STREAMER_START.
+	 * VPU will return an error result if metric collection cannot be started,
+	 * e.g. when the specified metric mask is invalid.
+	 * @see vpu_jsm_metric_streamer_done
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_START_DONE = 0x220f,
+	/**
+	 * Response to VPU_JSM_MSG_METRIC_STREAMER_STOP.
+	 * Returns information about collected metric data.
+	 * @see vpu_jsm_metric_streamer_done
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE = 0x2210,
+	/**
+	 * Response to VPU_JSM_MSG_METRIC_STREAMER_UPDATE.
+	 * Returns information about collected metric data.
+	 * @see vpu_jsm_metric_streamer_done
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE = 0x2211,
+	/**
+	 * Response to VPU_JSM_MSG_METRIC_STREAMER_INFO.
+	 * Returns a description of the metric groups and metric counters.
+	 * @see vpu_jsm_metric_streamer_done
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE = 0x2212,
+	/**
+	 * Asynchronous event sent from the VPU to the host either when the current
+	 * metric buffer is full or when the VPU has collected a multiple of
+	 * @notify_sample_count samples as indicated through the start command
+	 * (VPU_JSM_MSG_METRIC_STREAMER_START). Returns information about collected
+	 * metric data.
+	 * @see vpu_jsm_metric_streamer_done
+	 */
+	VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION = 0x2213,
+	/** Response to control command: Priority band setup */
+	VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP = 0x2214,
+	/** Response to control command: Create command queue */
+	VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP = 0x2215,
+	/** Response to control command: Destroy command queue */
+	VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216,
+	/** Response to control command: Set context scheduling properties */
+	VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217,
+	/* IPC Device -> Host, General command completion */
+	VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300,
+	VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE,
+	/** Response to VPU_JSM_MSG_DYNDBG_CONTROL. */
+	VPU_JSM_MSG_DYNDBG_CONTROL_RSP = 0x2301,
+};
+
+enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED };
+
+/*
+ * Host <-> LRT IPC message payload definitions
+ */
+struct vpu_ipc_msg_payload_engine_reset {
+	/* Engine to be reset. */
+	u32 engine_idx;
+};
+
+struct vpu_ipc_msg_payload_engine_preempt {
+	/* Engine to be preempted. */
+	u32 engine_idx;
+	/* ID of the preemption request. */
+	u32 preempt_id;
+};
+
+/*
+ * @brief Register doorbell command structure.
+ * This structure supports doorbell registration for only OS scheduling.
+ * @see VPU_JSM_MSG_REGISTER_DB
+ */
+struct vpu_ipc_msg_payload_register_db {
+	/* Index of the doorbell to register. */
+	u32 db_idx;
+	/* Virtual address in Global GTT pointing to the start of job queue. */
+	u64 jobq_base;
+	/* Size of the job queue in bytes. */
+	u32 jobq_size;
+	/* Host sub-stream ID for the context assigned to the doorbell. */
+	u32 host_ssid;
+};
+
+/**
+ * @brief Unregister doorbell command structure.
+ * Request structure to unregister a doorbell for both HW and OS scheduling.
+ * @see VPU_JSM_MSG_UNREGISTER_DB
+ */
+struct vpu_ipc_msg_payload_unregister_db {
+	/* Index of the doorbell to unregister. */
+	u32 db_idx;
+};
+
+struct vpu_ipc_msg_payload_query_engine_hb {
+	/* Engine to return heartbeat value. */
+	u32 engine_idx;
+};
+
+struct vpu_ipc_msg_payload_power_level {
+	/**
+	 * Requested power level. The power level value is in the
+	 * range [0, power_level_count-1] where power_level_count
+	 * is the number of available power levels as returned by
+	 * the get power level count command. A power level of 0
+	 * corresponds to the maximum possible power level, while
+	 * power_level_count-1 corresponds to the minimum possible
+	 * power level. Values outside of this range are not
+	 * considered to be valid.
+	 */
+	u32 power_level;
+};
+
+struct vpu_ipc_msg_payload_ssid_release {
+	/* Host sub-stream ID for the context to be released. */
+	u32 host_ssid;
+};
+
+/**
+ * @brief Metric streamer start command structure.
+ * This structure is also used with VPU_JSM_MSG_METRIC_STREAMER_INFO to request metric
+ * groups and metric counters description from the firmware.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_START
+ * @see VPU_JSM_MSG_METRIC_STREAMER_INFO
+ */
+struct vpu_jsm_metric_streamer_start {
+	/**
+	 * Bitmask to select the desired metric groups.
+	 * A metric group can belong only to one metric streamer instance at a time.
+	 * Since each metric streamer instance has a unique set of metric groups, it
+	 * can also identify a metric streamer instance if more than one instance was
+	 * started. If the VPU device does not support multiple metric streamer instances,
+	 * then VPU_JSM_MSG_METRIC_STREAMER_START will return an error even if the second
+	 * instance has different groups to the first.
+	 */
+	u64 metric_group_mask;
+	/** Sampling rate in nanoseconds. */
+	u64 sampling_rate;
+	/**
+	 * If > 0 the VPU will send a VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION message
+	 * after every @notify_sample_count samples is collected or dropped by the VPU.
+	 * If set to UINT_MAX the VPU will only generate a notification when the metric
+	 * buffer is full. If set to 0 the VPU will never generate a notification.
+	 */
+	u32 notify_sample_count;
+	u32 reserved_0;
+	/**
+	 * Address and size of the buffer where the VPU will write metric data. The
+	 * VPU writes all counters from enabled metric groups one after another. If
+	 * there is no space left to write data at the next sample period the VPU
+	 * will switch to the next buffer (@see next_buffer_addr) and will optionally
+	 * send a notification to the host driver if @notify_sample_count is non-zero.
+	 * If @next_buffer_addr is NULL the VPU will stop collecting metric data.
+	 */
+	u64 buffer_addr;
+	u64 buffer_size;
+	/**
+	 * Address and size of the next buffer to write metric data to after the initial
+	 * buffer is full. If the address is NULL the VPU will stop collecting metric
+	 * data.
+	 */
+	u64 next_buffer_addr;
+	u64 next_buffer_size;
+};
+
+static_assert(sizeof(struct vpu_jsm_metric_streamer_start) % 8 == 0,
+	      "vpu_jsm_metric_streamer_start is misaligned");
+
+/**
+ * @brief Metric streamer stop command structure.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_STOP
+ */
+struct vpu_jsm_metric_streamer_stop {
+	/** Bitmask to select the desired metric groups. */
+	u64 metric_group_mask;
+};
+
+static_assert(sizeof(struct vpu_jsm_metric_streamer_stop) % 8 == 0,
+	      "vpu_jsm_metric_streamer_stop is misaligned");
+
+/**
+ * Provide VPU FW with buffers to write metric data.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_UPDATE
+ */
+struct vpu_jsm_metric_streamer_update {
+	/** Metric group mask that identifies metric streamer instance. */
+	u64 metric_group_mask;
+	/**
+	 * Address and size of the buffer where the VPU will write metric data. If
+	 * the buffer address is 0 or same as the currently used buffer the VPU will
+	 * continue writing metric data to the current buffer. In this case the
+	 * buffer size is ignored and the size of the current buffer is unchanged.
+	 * If the address is non-zero and differs from the current buffer address the
+	 * VPU will immediately switch data collection to the new buffer.
+	 */
+	u64 buffer_addr;
+	u64 buffer_size;
+	/**
+	 * Address and size of the next buffer to write metric data after the initial
+	 * buffer is full. If the address is NULL the VPU will stop collecting metric
+	 * data but will continue to record dropped samples.
+	 *
+	 * Note that there is a hazard possible if both buffer_addr and the next_buffer_addr
+	 * are non-zero in same update request. It is the host's responsibility to ensure
+	 * that both addresses make sense even if the VPU just switched to writing samples
+	 * from the current to the next buffer.
+	 */
+	u64 next_buffer_addr;
+	u64 next_buffer_size;
+};
+
+static_assert(sizeof(struct vpu_jsm_metric_streamer_update) % 8 == 0,
+	      "vpu_jsm_metric_streamer_update is misaligned");
+
+struct vpu_ipc_msg_payload_blob_deinit {
+	/* 64-bit unique ID for the blob to be de-initialized. */
+	u64 blob_id;
+};
+
+struct vpu_ipc_msg_payload_job_done {
+	/* Engine to which the job was submitted. */
+	u32 engine_idx;
+	/* Index of the doorbell to which the job was submitted */
+	u32 db_idx;
+	/* ID of the completed job */
+	u32 job_id;
+	/* Status of the completed job */
+	u32 job_status;
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved;
+	/* Command queue id */
+	u64 cmdq_id;
+};
+
+struct vpu_jsm_engine_reset_context {
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved;
+	/* Command queue id */
+	u64 cmdq_id;
+	/* Flags: 0: cause of hang; 1: collateral damage of reset */
+	u64 flags;
+};
+
+struct vpu_ipc_msg_payload_engine_reset_done {
+	/* Engine ordinal */
+	u32 engine_idx;
+	/* Number of impacted contexts */
+	u32 num_impacted_contexts;
+	/* Array of impacted command queue ids and their flags */
+	struct vpu_jsm_engine_reset_context
+		impacted_contexts[VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS];
+};
+
+struct vpu_ipc_msg_payload_engine_preempt_done {
+	/* Engine preempted. */
+	u32 engine_idx;
+	/* ID of the preemption request. */
+	u32 preempt_id;
+};
+
+/**
+ * Response structure for register doorbell command for both OS
+ * and HW scheduling.
+ * @see VPU_JSM_MSG_REGISTER_DB
+ * @see VPU_JSM_MSG_HWS_REGISTER_DB
+ */
+struct vpu_ipc_msg_payload_register_db_done {
+	/* Index of the registered doorbell. */
+	u32 db_idx;
+};
+
+/**
+ * Response structure for unregister doorbell command for both OS
+ * and HW scheduling.
+ * @see VPU_JSM_MSG_UNREGISTER_DB
+ */
+struct vpu_ipc_msg_payload_unregister_db_done {
+	/* Index of the unregistered doorbell. */
+	u32 db_idx;
+};
+
+struct vpu_ipc_msg_payload_query_engine_hb_done {
+	/* Engine returning heartbeat value. */
+	u32 engine_idx;
+	/* Heartbeat value. */
+	u64 heartbeat;
+};
+
+struct vpu_ipc_msg_payload_get_power_level_count_done {
+	/**
+	 * Number of supported power levels. The maximum possible
+	 * value of power_level_count is 16 but this may vary across
+	 * implementations.
+	 */
+	u32 power_level_count;
+	/**
+	 * Power consumption limit for each supported power level in
+	 * [0-100%] range relative to power level 0.
+	 */
+	u8 power_limit[16];
+};
+
+struct vpu_ipc_msg_payload_blob_deinit_done {
+	/* 64-bit unique ID for the blob de-initialized. */
+	u64 blob_id;
+};
+
+/* HWS priority band setup request / response */
+struct vpu_ipc_msg_payload_hws_priority_band_setup {
+	/*
+	 * Grace period in 100ns units when preempting another priority band for
+	 * this priority band
+	 */
+	u64 grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+	/*
+	 * Default quantum in 100ns units for scheduling across processes
+	 * within a priority band
+	 */
+	u64 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
+	/*
+	 * Default grace period in 100ns units for processes that preempt each
+	 * other within a priority band
+	 */
+	u64 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+	/*
+	 * For normal priority band, specifies the target VPU percentage
+	 * in situations when it's starved by the focus band.
+	 */
+	u32 normal_band_percentage;
+};
+
+/* HWS create command queue request */
+struct vpu_ipc_msg_payload_hws_create_cmdq {
+	/* Process id */
+	u64 process_id;
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved;
+	/* Command queue id */
+	u64 cmdq_id;
+	/* Command queue base */
+	u64 cmdq_base;
+	/* Command queue size */
+	u32 cmdq_size;
+};
+
+/* HWS create command queue response */
+struct vpu_ipc_msg_payload_hws_create_cmdq_rsp {
+	/* Process id */
+	u64 process_id;
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved;
+	/* Command queue id */
+	u64 cmdq_id;
+};
+
+/* HWS destroy command queue request / response */
+struct vpu_ipc_msg_payload_hws_destroy_cmdq {
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved;
+	/* Command queue id */
+	u64 cmdq_id;
+};
+
+/* HWS set context scheduling properties request / response */
+struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved_0;
+	/* Command queue id */
+	u64 cmdq_id;
+	/* Priority band to assign to work of this context */
+	u32 priority_band;
+	/* Inside realtime band assigns a further priority */
+	u32 realtime_priority_level;
+	/* Priority relative to other contexts in the same process */
+	u32 in_process_priority;
+	/* Zero padding / Reserved */
+	u32 reserved_1;
+	/* Context quantum relative to other contexts of same priority in the same process */
+	u64 context_quantum;
+	/* Grace period when preempting context of the same priority within the same process */
+	u64 grace_period_same_priority;
+	/* Grace period when preempting context of a lower priority within the same process */
+	u64 grace_period_lower_priority;
+};
+
+/*
+ * @brief Register doorbell command structure.
+ * This structure supports doorbell registration for both HW and OS scheduling.
+ * Note: Queue base and size are added here so that the same structure can be used for
+ * OS scheduling and HW scheduling. For OS scheduling, cmdq_id will be ignored
+ * and cmdq_base and cmdq_size will be used. For HW scheduling, cmdq_base and cmdq_size will be
+ * ignored and cmdq_id is used.
+ * @see VPU_JSM_MSG_HWS_REGISTER_DB
+ */
+struct vpu_jsm_hws_register_db {
+	/* Index of the doorbell to register. */
+	u32 db_id;
+	/* Host sub-stream ID for the context assigned to the doorbell. */
+	u32 host_ssid;
+	/* ID of the command queue associated with the doorbell. */
+	u64 cmdq_id;
+	/* Virtual address pointing to the start of command queue. */
+	u64 cmdq_base;
+	/* Size of the command queue in bytes. */
+	u64 cmdq_size;
+};
+
+/**
+ * Payload for VPU_JSM_MSG_TRACE_SET_CONFIG[_RSP] and
+ * VPU_JSM_MSG_TRACE_GET_CONFIG_RSP messages.
+ *
+ * The payload is interpreted differently depending on the type of message:
+ *
+ * - For VPU_JSM_MSG_TRACE_SET_CONFIG, the payload specifies the desired
+ *   logging configuration to be set.
+ *
+ * - For VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, the payload reports the logging
+ *   configuration that was set after a VPU_JSM_MSG_TRACE_SET_CONFIG request.
+ *   The host can compare this payload with the one it sent in the
+ *   VPU_JSM_MSG_TRACE_SET_CONFIG request to check whether or not the
+ *   configuration was set as desired.
+ *
+ * - VPU_JSM_MSG_TRACE_GET_CONFIG_RSP, the payload reports the current logging
+ *   configuration.
+ */
+struct vpu_ipc_msg_payload_trace_config {
+	/**
+	 * Logging level (currently set or to be set); see 'mvLog_t' enum for
+	 * acceptable values. The specified logging level applies to all
+	 * destinations and HW components
+	 */
+	u32 trace_level;
+	/**
+	 * Bitmask of logging destinations (currently enabled or to be enabled);
+	 * bitwise OR of values defined in logging_destination enum.
+	 */
+	u32 trace_destination_mask;
+	/**
+	 * Bitmask of loggable HW components (currently enabled or to be enabled);
+	 * bitwise OR of values defined in loggable_hw_component enum.
+	 */
+	u64 trace_hw_component_mask;
+	u64 reserved_0; /**< Reserved for future extensions. */
+};
+
+/**
+ * Payload for VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP messages.
+ */
+struct vpu_ipc_msg_payload_trace_capability_rsp {
+	u32 trace_destination_mask; /**< Bitmask of supported logging destinations. */
+	u32 reserved_0;
+	u64 trace_hw_component_mask; /**< Bitmask of supported loggable HW components. */
+	u64 reserved_1; /**< Reserved for future extensions. */
+};
+
+/**
+ * Payload for VPU_JSM_MSG_TRACE_GET_NAME requests.
+ */
+struct vpu_ipc_msg_payload_trace_get_name {
+	/**
+	 * The type of the entity to query name for; see logging_entity_type for
+	 * possible values.
+	 */
+	u32 entity_type;
+	u32 reserved_0;
+	/**
+	 * The ID of the entity to query name for; possible values depends on the
+	 * entity type.
+	 */
+	u64 entity_id;
+};
+
+/**
+ * Payload for VPU_JSM_MSG_TRACE_GET_NAME_RSP responses.
+ */
+struct vpu_ipc_msg_payload_trace_get_name_rsp {
+	/**
+	 * The type of the entity whose name was queried; see logging_entity_type
+	 * for possible values.
+	 */
+	u32 entity_type;
+	u32 reserved_0;
+	/**
+	 * The ID of the entity whose name was queried; possible values depends on
+	 * the entity type.
+	 */
+	u64 entity_id;
+	/** Reserved for future extensions. */
+	u64 reserved_1;
+	/** The name of the entity. */
+	char entity_name[VPU_TRACE_ENTITY_NAME_MAX_LEN];
+};
+
+/**
+ * Data sent from the VPU to the host in all metric streamer response messages
+ * and in asynchronous notification.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_START_DONE
+ * @see VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE
+ * @see VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE
+ * @see VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE
+ * @see VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION
+ */
+struct vpu_jsm_metric_streamer_done {
+	/** Metric group mask that identifies metric streamer instance. */
+	u64 metric_group_mask;
+	/**
+	 * Size in bytes of single sample - total size of all enabled counters.
+	 * Some VPU implementations may align sample_size to more than 8 bytes.
+	 */
+	u32 sample_size;
+	u32 reserved_0;
+	/**
+	 * Number of samples collected since the metric streamer was started.
+	 * This will be 0 if the metric streamer was not started.
+	 */
+	u32 samples_collected;
+	/**
+	 * Number of samples dropped since the metric streamer was started. This
+	 * is incremented every time the metric streamer is not able to write
+	 * collected samples because the current buffer is full and there is no
+	 * next buffer to switch to.
+	 */
+	u32 samples_dropped;
+	/** Address of the buffer that contains the latest metric data. */
+	u64 buffer_addr;
+	/**
+	 * Number of bytes written into the metric data buffer. In response to the
+	 * VPU_JSM_MSG_METRIC_STREAMER_INFO request this field contains the size of
+	 * all group and counter descriptors. The size is updated even if the buffer
+	 * in the request was NULL or too small to hold descriptors of all counters
+	 */
+	u64 bytes_written;
+};
+
+static_assert(sizeof(struct vpu_jsm_metric_streamer_done) % 8 == 0,
+	      "vpu_jsm_metric_streamer_done is misaligned");
+
+/**
+ * Metric group description placed in the metric buffer after successful completion
+ * of the VPU_JSM_MSG_METRIC_STREAMER_INFO command. This is followed by one or more
+ * @vpu_jsm_metric_counter_descriptor records.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_INFO
+ */
+struct vpu_jsm_metric_group_descriptor {
+	/**
+	 * Offset to the next metric group (8-byte aligned). If this offset is 0 this
+	 * is the last descriptor. The value of metric_info_size must be greater than
+	 * or equal to sizeof(struct vpu_jsm_metric_group_descriptor) + name_string_size
+	 * + description_string_size and must be 8-byte aligned.
+	 */
+	u32 next_metric_group_info_offset;
+	/**
+	 * Offset to the first metric counter description record (8-byte aligned).
+	 * @see vpu_jsm_metric_counter_descriptor
+	 */
+	u32 next_metric_counter_info_offset;
+	/** Index of the group. This corresponds to bit index in metric_group_mask. */
+	u32 group_id;
+	/** Number of counters in the metric group. */
+	u32 num_counters;
+	/** Data size for all counters, must be a multiple of 8 bytes.*/
+	u32 metric_group_data_size;
+	/**
+	 * Metric group domain number. Cannot use multiple, simultaneous metric groups
+	 * from the same domain.
+	 */
+	u32 domain;
+	/**
+	 * Counter name string size. The string must include a null termination character.
+	 * The FW may use a fixed size name or send a different name for each counter.
+	 * If the VPU uses fixed size strings, all characters from the end of the name
+	 * to the of the fixed size character array must be zeroed.
+	 */
+	u32 name_string_size;
+	/** Counter description string size, @see name_string_size */
+	u32 description_string_size;
+	u32 reserved_0[2];
+	/**
+	 * Right after this structure, the VPU writes name and description of
+	 * the metric group.
+	 */
+};
+
+static_assert(sizeof(struct vpu_jsm_metric_group_descriptor) % 8 == 0,
+	      "vpu_jsm_metric_group_descriptor is misaligned");
+
+/**
+ * Metric counter description, placed in the buffer after vpu_jsm_metric_group_descriptor.
+ * @see VPU_JSM_MSG_METRIC_STREAMER_INFO
+ */
+struct vpu_jsm_metric_counter_descriptor {
+	/**
+	 * Offset to the next counter in a group (8-byte aligned). If this offset is
+	 * 0 this is the last counter in the group.
+	 */
+	u32 next_metric_counter_info_offset;
+	/**
+	 * Offset to the counter data from the start of samples in this metric group.
+	 * Note that metric_data_offset % metric_data_size must be 0.
+	 */
+	u32 metric_data_offset;
+	/** Size of the metric counter data in bytes. */
+	u32 metric_data_size;
+	/** Metric type, see Level Zero API for definitions. */
+	u32 tier;
+	/** Metric type, see set_metric_type_t for definitions. */
+	u32 metric_type;
+	/** Metric type, see set_value_type_t for definitions. */
+	u32 metric_value_type;
+	/**
+	 * Counter name string size. The string must include a null termination character.
+	 * The FW may use a fixed size name or send a different name for each counter.
+	 * If the VPU uses fixed size strings, all characters from the end of the name
+	 * to the of the fixed size character array must be zeroed.
+	 */
+	u32 name_string_size;
+	/** Counter description string size, @see name_string_size */
+	u32 description_string_size;
+	/** Counter component name string size, @see name_string_size */
+	u32 component_string_size;
+	/** Counter string size, @see name_string_size */
+	u32 units_string_size;
+	u32 reserved_0[2];
+	/**
+	 * Right after this structure, the VPU writes name, description
+	 * component and unit strings.
+	 */
+};
+
+static_assert(sizeof(struct vpu_jsm_metric_counter_descriptor) % 8 == 0,
+	      "vpu_jsm_metric_counter_descriptor is misaligned");
+
+/**
+ * Payload for VPU_JSM_MSG_DYNDBG_CONTROL requests.
+ *
+ * VPU_JSM_MSG_DYNDBG_CONTROL are used to control the VPU FW Dynamic Debug
+ * feature, which allows developers to selectively enable / disable MVLOG_DEBUG
+ * messages. This is equivalent to the Dynamic Debug functionality provided by
+ * Linux
+ * (https://www.kernel.org/doc/html/latest/admin-guide/dynamic-debug-howto.html)
+ * The host can control Dynamic Debug behavior by sending dyndbg commands, which
+ * have the same syntax as Linux
+ * dyndbg commands.
+ *
+ * NOTE: in order for MVLOG_DEBUG messages to be actually printed, the host
+ * still has to set the logging level to MVLOG_DEBUG, using the
+ * VPU_JSM_MSG_TRACE_SET_CONFIG command.
+ *
+ * The host can see the current dynamic debug configuration by executing a
+ * special 'show' command. The dyndbg configuration will be printed to the
+ * configured logging destination using MVLOG_INFO logging level.
+ */
+struct vpu_ipc_msg_payload_dyndbg_control {
+	/**
+	 * Dyndbg command (same format as Linux dyndbg); must be a NULL-terminated
+	 * string.
+	 */
+	char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN];
+};
+
+/*
+ * Payloads union, used to define complete message format.
+ */
+union vpu_ipc_msg_payload {
+	struct vpu_ipc_msg_payload_engine_reset engine_reset;
+	struct vpu_ipc_msg_payload_engine_preempt engine_preempt;
+	struct vpu_ipc_msg_payload_register_db register_db;
+	struct vpu_ipc_msg_payload_unregister_db unregister_db;
+	struct vpu_ipc_msg_payload_query_engine_hb query_engine_hb;
+	struct vpu_ipc_msg_payload_power_level power_level;
+	struct vpu_jsm_metric_streamer_start metric_streamer_start;
+	struct vpu_jsm_metric_streamer_stop metric_streamer_stop;
+	struct vpu_jsm_metric_streamer_update metric_streamer_update;
+	struct vpu_ipc_msg_payload_blob_deinit blob_deinit;
+	struct vpu_ipc_msg_payload_ssid_release ssid_release;
+	struct vpu_jsm_hws_register_db hws_register_db;
+	struct vpu_ipc_msg_payload_job_done job_done;
+	struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done;
+	struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done;
+	struct vpu_ipc_msg_payload_register_db_done register_db_done;
+	struct vpu_ipc_msg_payload_unregister_db_done unregister_db_done;
+	struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done;
+	struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done;
+	struct vpu_jsm_metric_streamer_done metric_streamer_done;
+	struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done;
+	struct vpu_ipc_msg_payload_trace_config trace_config;
+	struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability;
+	struct vpu_ipc_msg_payload_trace_get_name trace_get_name;
+	struct vpu_ipc_msg_payload_trace_get_name_rsp trace_get_name_rsp;
+	struct vpu_ipc_msg_payload_dyndbg_control dyndbg_control;
+	struct vpu_ipc_msg_payload_hws_priority_band_setup hws_priority_band_setup;
+	struct vpu_ipc_msg_payload_hws_create_cmdq hws_create_cmdq;
+	struct vpu_ipc_msg_payload_hws_create_cmdq_rsp hws_create_cmdq_rsp;
+	struct vpu_ipc_msg_payload_hws_destroy_cmdq hws_destroy_cmdq;
+	struct vpu_ipc_msg_payload_hws_set_context_sched_properties
+		hws_set_context_sched_properties;
+};
+
+/*
+ * Host <-> LRT IPC message base structure.
+ *
+ * NOTE: All instances of this object must be aligned on a 64B boundary
+ * to allow proper handling of VPU cache operations.
+ */
+struct vpu_jsm_msg {
+	/* Message type, see vpu_ipc_msg_type enum. */
+	u32 type;
+	/* Buffer status, see vpu_ipc_msg_status enum. */
+	u32 status;
+	/*
+	 * Request ID, provided by the host in a request message and passed
+	 * back by VPU in the response message.
+	 */
+	u32 request_id;
+	/* Request return code set by the VPU, see VPU_JSM_STATUS_* defines. */
+	u32 result;
+	/* Message payload depending on message type, see vpu_ipc_msg_payload union. */
+	union vpu_ipc_msg_payload payload;
+};
+
+#pragma pack(pop)
+
+#endif
+
+///@}
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@ -70,11 +70,7 @@ module_param(device_id_scheme, bool, 0444);
 static int only_lcd = -1;
 module_param(only_lcd, int, 0444);

-/*
- * Display probing is known to take up to 5 seconds, so delay the fallback
- * backlight registration by 5 seconds + 3 seconds for some extra margin.
- */
-static int register_backlight_delay = 8;
+static int register_backlight_delay;
 module_param(register_backlight_delay, int, 0444);
 MODULE_PARM_DESC(register_backlight_delay,
 	"Delay in seconds before doing fallback (non GPU driver triggered) "
@ -2176,6 +2172,17 @@ static bool should_check_lcd_flag(void)
 	return false;
 }

+/*
+ * At least one graphics driver has reported that no LCD is connected
+ * via the native interface. cancel the registration for fallback acpi_video0.
+ * If another driver still deems this necessary, it can explicitly register it.
+ */
+void acpi_video_report_nolcd(void)
+{
+	cancel_delayed_work(&video_bus_register_backlight_work);
+}
+EXPORT_SYMBOL(acpi_video_report_nolcd);
+
 int acpi_video_register(void)
 {
 	int ret = 0;
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@ -432,10 +432,24 @@ static const struct dmi_system_id asus_laptop[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "S5602ZA"),
 		},
 	},
+	{
+		.ident = "Asus ExpertBook B2502",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_BOARD_NAME, "B2502CBA"),
+		},
+	},
 	{ }
 };

-static const struct dmi_system_id lenovo_82ra[] = {
+static const struct dmi_system_id lenovo_laptop[] = {
+	{
+		.ident = "LENOVO IdeaPad Flex 5 14ALC7",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "82R9"),
+		},
+	},
 	{
 		.ident = "LENOVO IdeaPad Flex 5 16ALC7",
 		.matches = {
@ -446,6 +460,17 @@ static const struct dmi_system_id lenovo_82ra[] = {
 	{ }
 };

+static const struct dmi_system_id schenker_gm_rg[] = {
+	{
+		.ident = "XMG CORE 15 (M22)",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
+			DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"),
+		},
+	},
+	{ }
+};
+
 struct irq_override_cmp {
 	const struct dmi_system_id *system;
 	unsigned char irq;
@ -458,8 +483,9 @@ struct irq_override_cmp {
 static const struct irq_override_cmp override_table[] = {
 	{ medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
 	{ asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
-	{ lenovo_82ra, 6, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true },
-	{ lenovo_82ra, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true },
+	{ lenovo_laptop, 6, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true },
+	{ lenovo_laptop, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true },
+	{ schenker_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
 };

 static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity,
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/platform_data/x86/nvidia-wmi-ec-backlight.h>
+#include <linux/pnp.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 #include <acpi/video.h>
@ -105,6 +106,26 @@ static bool nvidia_wmi_ec_supported(void)
 }
 #endif

+static bool apple_gmux_backlight_present(void)
+{
+	struct acpi_device *adev;
+	struct device *dev;
+
+	adev = acpi_dev_get_first_match_dev(GMUX_ACPI_HID, NULL, -1);
+	if (!adev)
+		return false;
+
+	dev = acpi_get_first_physical_node(adev);
+	if (!dev)
+		return false;
+
+	/*
+	 * drivers/platform/x86/apple-gmux.c only supports old style
+	 * Apple GMUX with an IO-resource.
+	 */
+	return pnp_get_resource(to_pnp_dev(dev), IORESOURCE_IO, 0) != NULL;
+}
+
 /* Force to use vendor driver when the ACPI device is known to be
 * buggy */
 static int video_detect_force_vendor(const struct dmi_system_id *d)
@ -767,7 +788,7 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native)
 	if (nvidia_wmi_ec_present)
 		return acpi_backlight_nvidia_wmi_ec;

-	if (apple_gmux_present())
+	if (apple_gmux_backlight_present())
 		return acpi_backlight_apple_gmux;

 	/* Use ACPI video if available, except when native should be preferred. */
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@ -28,10 +28,6 @@ static bool sleep_no_lps0 __read_mostly;
 module_param(sleep_no_lps0, bool, 0644);
 MODULE_PARM_DESC(sleep_no_lps0, "Do not use the special LPS0 device interface");

-static bool prefer_microsoft_dsm_guid __read_mostly;
-module_param(prefer_microsoft_dsm_guid, bool, 0644);
-MODULE_PARM_DESC(prefer_microsoft_dsm_guid, "Prefer using Microsoft GUID in LPS0 device _DSM evaluation");
-
 static const struct acpi_device_id lps0_device_ids[] = {
 	{"PNP0D80", },
 	{"", },
@ -369,27 +365,15 @@ static int validate_dsm(acpi_handle handle, const char *uuid, int rev, guid_t *d
 }

 struct amd_lps0_hid_device_data {
-	const unsigned int rev_id;
 	const bool check_off_by_one;
-	const bool prefer_amd_guid;
 };

 static const struct amd_lps0_hid_device_data amd_picasso = {
-	.rev_id = 0,
 	.check_off_by_one = true,
-	.prefer_amd_guid = false,
 };

 static const struct amd_lps0_hid_device_data amd_cezanne = {
-	.rev_id = 0,
 	.check_off_by_one = false,
-	.prefer_amd_guid = false,
-};
-
-static const struct amd_lps0_hid_device_data amd_rembrandt = {
-	.rev_id = 2,
-	.check_off_by_one = false,
-	.prefer_amd_guid = true,
 };

 static const struct acpi_device_id amd_hid_ids[] = {
@ -397,69 +381,27 @@ static const struct acpi_device_id amd_hid_ids[] = {
 	{"AMD0005",	(kernel_ulong_t)&amd_picasso,	},
 	{"AMDI0005",	(kernel_ulong_t)&amd_picasso,	},
 	{"AMDI0006",	(kernel_ulong_t)&amd_cezanne,	},
-	{"AMDI0007",	(kernel_ulong_t)&amd_rembrandt,	},
 	{}
 };

-static int lps0_prefer_microsoft(const struct dmi_system_id *id)
+static int lps0_prefer_amd(const struct dmi_system_id *id)
 {
-	pr_debug("Preferring Microsoft GUID.\n");
-	prefer_microsoft_dsm_guid = true;
+	pr_debug("Using AMD GUID w/ _REV 2.\n");
+	rev_id = 2;
 	return 0;
 }
-
 static const struct dmi_system_id s2idle_dmi_table[] __initconst = {
 	{
 		/*
-		 * ASUS TUF Gaming A17 FA707RE
-		 * https://bugzilla.kernel.org/show_bug.cgi?id=216101
+		 * AMD Rembrandt based HP EliteBook 835/845/865 G9
+		 * Contains specialized AML in AMD/_REV 2 path to avoid
+		 * triggering a bug in Qualcomm WLAN firmware. This may be
+		 * removed in the future if that firmware is fixed.
 		 */
-		.callback = lps0_prefer_microsoft,
+		.callback = lps0_prefer_amd,
 		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "ASUS TUF Gaming A17"),
-		},
-	},
-	{
-		/* ASUS ROG Zephyrus G14 (2022) */
-		.callback = lps0_prefer_microsoft,
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "ROG Zephyrus G14 GA402"),
-		},
-	},
-	{
-		/*
-		 * Lenovo Yoga Slim 7 Pro X 14ARH7
-		 * https://bugzilla.kernel.org/show_bug.cgi?id=216473 : 82V2
-		 * https://bugzilla.kernel.org/show_bug.cgi?id=216438 : 82TL
-		 */
-		.callback = lps0_prefer_microsoft,
-		.matches = {
-			DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "82"),
-		},
-	},
-	{
-		/*
-		 * ASUSTeK COMPUTER INC. ROG Flow X13 GV301RE_GV301RE
-		 * https://gitlab.freedesktop.org/drm/amd/-/issues/2148
-		 */
-		.callback = lps0_prefer_microsoft,
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow X13 GV301"),
-		},
-	},
-	{
-		/*
-		 * ASUSTeK COMPUTER INC. ROG Flow X16 GV601RW_GV601RW
-		 * https://gitlab.freedesktop.org/drm/amd/-/issues/2148
-		 */
-		.callback = lps0_prefer_microsoft,
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow X16 GV601"),
+			DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+			DMI_MATCH(DMI_BOARD_NAME, "8990"),
 		},
 	},
 	{}
@ -484,16 +426,14 @@ static int lps0_device_attach(struct acpi_device *adev,
 		if (dev_id->id[0])
 			data = (const struct amd_lps0_hid_device_data *) dev_id->driver_data;
 		else
-			data = &amd_rembrandt;
-		rev_id = data->rev_id;
+			data = &amd_cezanne;
 		lps0_dsm_func_mask = validate_dsm(adev->handle,
 					ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid);
 		if (lps0_dsm_func_mask > 0x3 && data->check_off_by_one) {
 			lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1;
 			acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n",
 					  ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask);
-		} else if (lps0_dsm_func_mask_microsoft > 0 && data->prefer_amd_guid &&
-				!prefer_microsoft_dsm_guid) {
+		} else if (lps0_dsm_func_mask_microsoft > 0 && rev_id) {
 			lps0_dsm_func_mask_microsoft = -EINVAL;
 			acpi_handle_debug(adev->handle, "_DSM Using AMD method\n");
 		}
@ -501,8 +441,7 @@ static int lps0_device_attach(struct acpi_device *adev,
 		rev_id = 1;
 		lps0_dsm_func_mask = validate_dsm(adev->handle,
 					ACPI_LPS0_DSM_UUID, rev_id, &lps0_dsm_guid);
-		if (!prefer_microsoft_dsm_guid)
-			lps0_dsm_func_mask_microsoft = -EINVAL;
+		lps0_dsm_func_mask_microsoft = -EINVAL;
 	}

 	if (lps0_dsm_func_mask < 0 && lps0_dsm_func_mask_microsoft < 0)
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@ -83,6 +83,7 @@ enum board_ids {
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static void ahci_remove_one(struct pci_dev *dev);
 static void ahci_shutdown_one(struct pci_dev *dev);
+static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hpriv);
 static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class,
 				 unsigned long deadline);
 static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
@ -676,6 +677,25 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev,
 	ahci_save_initial_config(&pdev->dev, hpriv);
 }

+static int ahci_pci_reset_controller(struct ata_host *host)
+{
+	struct pci_dev *pdev = to_pci_dev(host->dev);
+	struct ahci_host_priv *hpriv = host->private_data;
+	int rc;
+
+	rc = ahci_reset_controller(host);
+	if (rc)
+		return rc;
+
+	/*
+	 * If platform firmware failed to enable ports, try to enable
+	 * them here.
+	 */
+	ahci_intel_pcs_quirk(pdev, hpriv);
+
+	return 0;
+}
+
 static void ahci_pci_init_controller(struct ata_host *host)
 {
 	struct ahci_host_priv *hpriv = host->private_data;
@ -870,7 +890,7 @@ static int ahci_pci_device_runtime_resume(struct device *dev)
 	struct ata_host *host = pci_get_drvdata(pdev);
 	int rc;

-	rc = ahci_reset_controller(host);
+	rc = ahci_pci_reset_controller(host);
 	if (rc)
 		return rc;
 	ahci_pci_init_controller(host);
@ -906,7 +926,7 @@ static int ahci_pci_device_resume(struct device *dev)
 		ahci_mcp89_apple_enable(pdev);

 	if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) {
-		rc = ahci_reset_controller(host);
+		rc = ahci_pci_reset_controller(host);
 		if (rc)
 			return rc;

@ -1784,12 +1804,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* save initial config */
 	ahci_pci_save_initial_config(pdev, hpriv);

-	/*
-	 * If platform firmware failed to enable ports, try to enable
-	 * them here.
-	 */
-	ahci_intel_pcs_quirk(pdev, hpriv);
-
 	/* prepare host */
 	if (hpriv->cap & HOST_CAP_NCQ) {
 		pi.flags |= ATA_FLAG_NCQ;
@ -1899,7 +1913,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		return rc;

-	rc = ahci_reset_controller(host);
+	rc = ahci_pci_reset_controller(host);
 	if (rc)
 		return rc;

--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@ -1263,7 +1263,7 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment_unlocked, DMA_BUF);
 *
 * @dmabuf:	[in]	buffer which is moving
 *
- * Informs all attachmenst that they need to destroy and recreated all their
+ * Informs all attachments that they need to destroy and recreate all their
 * mappings.
 */
 void dma_buf_move_notify(struct dma_buf *dmabuf)
@ -1281,11 +1281,11 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_move_notify, DMA_BUF);
 /**
 * DOC: cpu access
 *
- * There are mutliple reasons for supporting CPU access to a dma buffer object:
+ * There are multiple reasons for supporting CPU access to a dma buffer object:
 *
 * - Fallback operations in the kernel, for example when a device is connected
 *   over USB and the kernel needs to shuffle the data around first before
- *   sending it away. Cache coherency is handled by braketing any transactions
+ *   sending it away. Cache coherency is handled by bracketing any transactions
 *   with calls to dma_buf_begin_cpu_access() and dma_buf_end_cpu_access()
 *   access.
 *
@ -1312,7 +1312,7 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_move_notify, DMA_BUF);
 *   replace ION buffers mmap support was needed.
 *
 *   There is no special interfaces, userspace simply calls mmap on the dma-buf
- *   fd. But like for CPU access there's a need to braket the actual access,
+ *   fd. But like for CPU access there's a need to bracket the actual access,
 *   which is handled by the ioctl (DMA_BUF_IOCTL_SYNC). Note that
 *   DMA_BUF_IOCTL_SYNC can fail with -EAGAIN or -EINTR, in which case it must
 *   be restarted.
@ -1386,10 +1386,10 @@ static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
 * preparations. Coherency is only guaranteed in the specified range for the
 * specified access direction.
 * @dmabuf:	[in]	buffer to prepare cpu access for.
- * @direction:	[in]	length of range for cpu access.
+ * @direction:	[in]	direction of access.
 *
 * After the cpu access is complete the caller should call
- * dma_buf_end_cpu_access(). Only when cpu access is braketed by both calls is
+ * dma_buf_end_cpu_access(). Only when cpu access is bracketed by both calls is
 * it guaranteed to be coherent with other DMA access.
 *
 * This function will also wait for any DMA transactions tracked through
@ -1429,7 +1429,7 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_begin_cpu_access, DMA_BUF);
 * actions. Coherency is only guaranteed in the specified range for the
 * specified access direction.
 * @dmabuf:	[in]	buffer to complete cpu access for.
- * @direction:	[in]	length of range for cpu access.
+ * @direction:	[in]	direction of access.
 *
 * This terminates CPU access started with dma_buf_begin_cpu_access().
 *
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@ -13,6 +13,8 @@
 #include <linux/slab.h>
 #include <linux/udmabuf.h>
 #include <linux/hugetlb.h>
+#include <linux/vmalloc.h>
+#include <linux/iosys-map.h>

 static int list_limit = 1024;
 module_param(list_limit, int, 0644);
@ -60,6 +62,30 @@ static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
 	return 0;
 }

+static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
+{
+	struct udmabuf *ubuf = buf->priv;
+	void *vaddr;
+
+	dma_resv_assert_held(buf->resv);
+
+	vaddr = vm_map_ram(ubuf->pages, ubuf->pagecount, -1);
+	if (!vaddr)
+		return -EINVAL;
+
+	iosys_map_set_vaddr(map, vaddr);
+	return 0;
+}
+
+static void vunmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
+{
+	struct udmabuf *ubuf = buf->priv;
+
+	dma_resv_assert_held(buf->resv);
+
+	vm_unmap_ram(map->vaddr, ubuf->pagecount);
+}
+
 static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
 				     enum dma_data_direction direction)
 {
@ -162,6 +188,8 @@ static const struct dma_buf_ops udmabuf_ops = {
 	.unmap_dma_buf	   = unmap_udmabuf,
 	.release	   = release_udmabuf,
 	.mmap		   = mmap_udmabuf,
+	.vmap		   = vmap_udmabuf,
+	.vunmap		   = vunmap_udmabuf,
 	.begin_cpu_access  = begin_cpu_udmabuf,
 	.end_cpu_access    = end_cpu_udmabuf,
 };
--- a/drivers/firmware/sysfb_simplefb.c
+++ b/drivers/firmware/sysfb_simplefb.c
@ -27,25 +27,56 @@ static const struct simplefb_format formats[] = SIMPLEFB_FORMATS;
 __init bool sysfb_parse_mode(const struct screen_info *si,
 			     struct simplefb_platform_data *mode)
 {
-	const struct simplefb_format *f;
 	__u8 type;
+	u32 bits_per_pixel;
 	unsigned int i;

 	type = si->orig_video_isVGA;
 	if (type != VIDEO_TYPE_VLFB && type != VIDEO_TYPE_EFI)
 		return false;

+	/*
+	 * The meaning of depth and bpp for direct-color formats is
+	 * inconsistent:
+	 *
+	 *  - DRM format info specifies depth as the number of color
+	 *    bits; including alpha, but not including filler bits.
+	 *  - Linux' EFI platform code computes lfb_depth from the
+	 *    individual color channels, including the reserved bits.
+	 *  - VBE 1.1 defines lfb_depth for XRGB1555 as 16, but later
+	 *    versions use 15.
+	 *  - On the kernel command line, 'bpp' of 32 is usually
+	 *    XRGB8888 including the filler bits, but 15 is XRGB1555
+	 *    not including the filler bit.
+	 *
+	 * It's not easily possible to fix this in struct screen_info,
+	 * as this could break UAPI. The best solution is to compute
+	 * bits_per_pixel here and ignore lfb_depth. In the loop below,
+	 * ignore simplefb formats with alpha bits, as EFI and VESA
+	 * don't specify alpha channels.
+	 */
+	if (si->lfb_depth > 8) {
+		bits_per_pixel = max(max3(si->red_size + si->red_pos,
+					  si->green_size + si->green_pos,
+					  si->blue_size + si->blue_pos),
+				     si->rsvd_size + si->rsvd_pos);
+	} else {
+		bits_per_pixel = si->lfb_depth;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(formats); ++i) {
-		f = &formats[i];
-		if (si->lfb_depth == f->bits_per_pixel &&
+		const struct simplefb_format *f = &formats[i];
+
+		if (f->transp.length)
+			continue; /* transparent formats are unsupported by VESA/EFI */
+
+		if (bits_per_pixel == f->bits_per_pixel &&
 		    si->red_size == f->red.length &&
 		    si->red_pos == f->red.offset &&
 		    si->green_size == f->green.length &&
 		    si->green_pos == f->green.offset &&
 		    si->blue_size == f->blue.length &&
-		    si->blue_pos == f->blue.offset &&
-		    si->rsvd_size == f->transp.length &&
-		    si->rsvd_pos == f->transp.offset) {
+		    si->blue_pos == f->blue.offset) {
 			mode->format = f->name;
 			mode->width = si->lfb_width;
 			mode->height = si->lfb_height;
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@ -12,7 +12,6 @@ menuconfig DRM
 	select HDMI
 	select FB_CMDLINE
 	select I2C
-	select I2C_ALGOBIT
 	select DMA_SHARED_BUFFER
 	select SYNC_FILE
 # gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
@ -63,6 +62,12 @@ config DRM_USE_DYNAMIC_DEBUG
 	  bytes per callsite, the .data costs can be substantial, and
 	  are therefore configurable.

+config DRM_KUNIT_TEST_HELPERS
+	tristate
+	depends on DRM && KUNIT
+	help
+	  KUnit Helpers for KMS drivers.
+
 config DRM_KUNIT_TEST
 	tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS
 	depends on DRM && KUNIT
@ -73,6 +78,7 @@ config DRM_KUNIT_TEST
 	select DRM_KMS_HELPER
 	select DRM_BUDDY
 	select DRM_EXPORT_FOR_TESTS if m
+	select DRM_KUNIT_TEST_HELPERS
 	default KUNIT_ALL_TESTS
 	help
 	  This builds unit tests for DRM. This option is not useful for
@ -391,64 +397,7 @@ menuconfig DRM_LEGACY
 	  Unless you have strong reasons to go rogue, say "N".

 if DRM_LEGACY
-
-config DRM_TDFX
-	tristate "3dfx Banshee/Voodoo3+"
-	depends on DRM && PCI
-	help
-	  Choose this option if you have a 3dfx Banshee or Voodoo3 (or later),
-	  graphics card.  If M is selected, the module will be called tdfx.
-
-config DRM_R128
-	tristate "ATI Rage 128"
-	depends on DRM && PCI
-	select FW_LOADER
-	help
-	  Choose this option if you have an ATI Rage 128 graphics card.  If M
-	  is selected, the module will be called r128.  AGP support for
-	  this card is strongly suggested (unless you have a PCI version).
-
-config DRM_I810
-	tristate "Intel I810"
-	# !PREEMPTION because of missing ioctl locking
-	depends on DRM && AGP && AGP_INTEL && (!PREEMPTION || BROKEN)
-	help
-	  Choose this option if you have an Intel I810 graphics card.  If M is
-	  selected, the module will be called i810.  AGP support is required
-	  for this driver to work.
-
-config DRM_MGA
-	tristate "Matrox g200/g400"
-	depends on DRM && PCI
-	select FW_LOADER
-	help
-	  Choose this option if you have a Matrox G200, G400 or G450 graphics
-	  card.  If M is selected, the module will be called mga.  AGP
-	  support is required for this driver to work.
-
-config DRM_SIS
-	tristate "SiS video cards"
-	depends on DRM && AGP
-	depends on FB_SIS || FB_SIS=n
-	help
-	  Choose this option if you have a SiS 630 or compatible video
-	  chipset. If M is selected the module will be called sis. AGP
-	  support is required for this driver to work.
-
-config DRM_VIA
-	tristate "Via unichrome video cards"
-	depends on DRM && PCI
-	help
-	  Choose this option if you have a Via unichrome or compatible video
-	  chipset. If M is selected the module will be called via.
-
-config DRM_SAVAGE
-	tristate "Savage video cards"
-	depends on DRM && PCI
-	help
-	  Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister
-	  chipset. If M is selected the module will be called savage.
-
+# leave here to list legacy drivers
 endif # DRM_LEGACY

 config DRM_EXPORT_FOR_TESTS
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@ -126,7 +126,7 @@ obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
 # Drivers and the rest
 #

-obj-$(CONFIG_DRM_KUNIT_TEST) += tests/
+obj-y			+= tests/

 obj-$(CONFIG_DRM_MIPI_DBI) += drm_mipi_dbi.o
 obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
@ -134,21 +134,14 @@ obj-y			+= arm/
 obj-y			+= display/
 obj-$(CONFIG_DRM_TTM)	+= ttm/
 obj-$(CONFIG_DRM_SCHED)	+= scheduler/
-obj-$(CONFIG_DRM_TDFX)	+= tdfx/
-obj-$(CONFIG_DRM_R128)	+= r128/
 obj-$(CONFIG_DRM_RADEON)+= radeon/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
-obj-$(CONFIG_DRM_MGA)	+= mga/
-obj-$(CONFIG_DRM_I810)	+= i810/
 obj-$(CONFIG_DRM_I915)	+= i915/
 obj-$(CONFIG_DRM_KMB_DISPLAY)  += kmb/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
 obj-$(CONFIG_DRM_V3D)  += v3d/
 obj-$(CONFIG_DRM_VC4)  += vc4/
-obj-$(CONFIG_DRM_SIS)   += sis/
-obj-$(CONFIG_DRM_SAVAGE)+= savage/
 obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/
-obj-$(CONFIG_DRM_VIA)	+=via/
 obj-$(CONFIG_DRM_VGEM)	+= vgem/
 obj-$(CONFIG_DRM_VKMS)	+= vkms/
 obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@ -13,6 +13,8 @@ config DRM_AMDGPU
 	select DRM_TTM_HELPER
 	select POWER_SUPPLY
 	select HWMON
+	select I2C
+	select I2C_ALGOBIT
 	select BACKLIGHT_CLASS_DEVICE
 	select INTERVAL_TREE
 	select DRM_BUDDY
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@ -81,7 +81,8 @@ amdgpu-y += \
 # add DF block
 amdgpu-y += \
 	df_v1_7.o \
-	df_v3_6.o
+	df_v3_6.o \
+	df_v4_3.o

 # add GMC block
 amdgpu-y += \
@ -136,6 +137,7 @@ amdgpu-y += \
 	gfx_v10_0.o \
 	imu_v11_0.o \
 	gfx_v11_0.o \
+	gfx_v11_0_3.o \
 	imu_v11_0_3.o

 # add async DMA block
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@ -52,8 +52,7 @@
 #include <linux/pci.h>
 #include <linux/aer.h>

-#include <drm/ttm/ttm_bo_api.h>
-#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_bo.h>
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_execbuf_util.h>

@ -150,7 +149,7 @@ struct amdgpu_watchdog_timer
 * Modules parameters.
 */
 extern int amdgpu_modeset;
-extern int amdgpu_vram_limit;
+extern unsigned int amdgpu_vram_limit;
 extern int amdgpu_vis_vram_limit;
 extern int amdgpu_gart_size;
 extern int amdgpu_gtt_size;
@ -195,6 +194,7 @@ extern int amdgpu_emu_mode;
 extern uint amdgpu_smu_memory_pool_size;
 extern int amdgpu_smu_pptable_id;
 extern uint amdgpu_dc_feature_mask;
+extern uint amdgpu_freesync_vid_mode;
 extern uint amdgpu_dc_debug_mask;
 extern uint amdgpu_dc_visual_confirm;
 extern uint amdgpu_dm_abm_level;
@ -608,7 +608,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);

 /* VRAM scratch page for HDP bug, default vram page */
-struct amdgpu_vram_scratch {
+struct amdgpu_mem_scratch {
 	struct amdgpu_bo		*robj;
 	volatile uint32_t		*ptr;
 	u64				gpu_addr;
@ -755,6 +755,11 @@ struct amdgpu_mqd {
 #define AMDGPU_PRODUCT_NAME_LEN 64
 struct amdgpu_reset_domain;

+/*
+ * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
+ */
+#define AMDGPU_HAS_VRAM(_adev) ((_adev)->gmc.real_vram_size)
+
 struct amdgpu_device {
 	struct device			*dev;
 	struct pci_dev			*pdev;
@ -848,7 +853,7 @@ struct amdgpu_device {

 	/* memory management */
 	struct amdgpu_mman		mman;
-	struct amdgpu_vram_scratch	vram_scratch;
+	struct amdgpu_mem_scratch	mem_scratch;
 	struct amdgpu_wb		wb;
 	atomic64_t			num_bytes_moved;
 	atomic64_t			num_evictions;
@ -870,7 +875,7 @@ struct amdgpu_device {
 	struct amdgpu_vkms_output       *amdgpu_vkms_output;
 	struct amdgpu_mode_info		mode_info;
 	/* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */
-	struct work_struct		hotplug_work;
+	struct delayed_work         hotplug_work;
 	struct amdgpu_irq_src		crtc_irq;
 	struct amdgpu_irq_src		vline0_irq;
 	struct amdgpu_irq_src		vupdate_irq;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@ -24,6 +24,7 @@

 #include <linux/pci.h>
 #include <linux/acpi.h>
+#include <linux/backlight.h>
 #include <linux/slab.h>
 #include <linux/power_supply.h>
 #include <linux/pm_runtime.h>
@ -31,7 +32,6 @@
 #include <acpi/video.h>
 #include <acpi/actbl.h>

-#include <drm/drm_crtc_helper.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_display.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {

 struct amdgpu_kfd_dev {
 	struct kfd_dev *dev;
-	uint64_t vram_used;
+	int64_t vram_used;
 	uint64_t vram_used_aligned;
 	bool init_complete;
 	struct work_struct reset_work;
@ -271,9 +271,9 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_
 		((struct drm_file *)(drm_priv))->driver_priv)->vm)

 int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
-				     struct file *filp, u32 pasid);
+				     struct amdgpu_vm *avm, u32 pasid);
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
-					struct file *filp,
+					struct amdgpu_vm *avm,
 					void **process_info,
 					struct dma_fence **ef);
 void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@ -25,6 +25,7 @@
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task.h>
+#include <drm/ttm/ttm_tt.h>

 #include "amdgpu_object.h"
 #include "amdgpu_gem.h"
@ -1430,18 +1431,11 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
 }

 int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
-				     struct file *filp, u32 pasid)
+				     struct amdgpu_vm *avm, u32 pasid)

 {
-	struct amdgpu_fpriv *drv_priv;
-	struct amdgpu_vm *avm;
 	int ret;

-	ret = amdgpu_file_to_fpriv(filp, &drv_priv);
-	if (ret)
-		return ret;
-	avm = &drv_priv->vm;
-
 	/* Free the original amdgpu allocated pasid,
 	 * will be replaced with kfd allocated pasid.
 	 */
@ -1458,19 +1452,12 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
 }

 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
-					   struct file *filp,
+					   struct amdgpu_vm *avm,
 					   void **process_info,
 					   struct dma_fence **ef)
 {
-	struct amdgpu_fpriv *drv_priv;
-	struct amdgpu_vm *avm;
 	int ret;

-	ret = amdgpu_file_to_fpriv(filp, &drv_priv);
-	if (ret)
-		return ret;
-	avm = &drv_priv->vm;
-
 	/* Already a compute VM? */
 	if (avm->process_info)
 		return -EINVAL;
@ -1612,6 +1599,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	struct amdgpu_bo *bo;
 	struct drm_gem_object *gobj = NULL;
 	u32 domain, alloc_domain;
+	uint64_t aligned_size;
 	u64 alloc_flags;
 	int ret;

@ -1667,22 +1655,23 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	 * the memory.
 	 */
 	if ((*mem)->aql_queue)
-		size = size >> 1;
+		size >>= 1;
+	aligned_size = PAGE_ALIGN(size);

 	(*mem)->alloc_flags = flags;

 	amdgpu_sync_create(&(*mem)->sync);

-	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
+	ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
 	if (ret) {
 		pr_debug("Insufficient memory\n");
 		goto err_reserve_limit;
 	}

 	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
-			va, size, domain_string(alloc_domain));
+			va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain));

-	ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
+	ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
 				       bo_type, NULL, &gobj);
 	if (ret) {
 		pr_debug("Failed to create BO on domain %s. ret %d\n",
@ -1739,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	/* Don't unreserve system mem limit twice */
 	goto err_reserve_limit;
 err_bo_create:
-	amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
+	amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
 err_reserve_limit:
 	mutex_destroy(&(*mem)->lock);
 	if (gobj)
@ -2099,7 +2088,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_b
 	}

 	amdgpu_amdkfd_remove_eviction_fence(
-		bo, bo->kfd_bo->process_info->eviction_fence);
+		bo, bo->vm_bo->vm->process_info->eviction_fence);

 	amdgpu_bo_unreserve(bo);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@ -28,6 +28,8 @@

 struct hmm_range;

+struct drm_file;
+
 struct amdgpu_device;
 struct amdgpu_bo;
 struct amdgpu_bo_va;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@ -411,17 +411,10 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 				return -EINVAL;
 			}

-			err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
-			if (err) {
-				DRM_ERROR("Failed to request firmware\n");
-				return err;
-			}
-
-			err = amdgpu_ucode_validate(adev->pm.fw);
+			err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name);
 			if (err) {
 				DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
-				release_firmware(adev->pm.fw);
-				adev->pm.fw = NULL;
+				amdgpu_ucode_release(&adev->pm.fw);
 				return err;
 			}

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@ -25,7 +25,9 @@
 */

 #include <drm/display/drm_dp_helper.h>
+#include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_modeset_helper_vtables.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@ -996,13 +998,33 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 		}
 	}

+	if (amdgpu_connector->detected_hpd_without_ddc) {
+		force = true;
+		amdgpu_connector->detected_hpd_without_ddc = false;
+	}
+
 	if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
 		ret = connector->status;
 		goto exit;
 	}

-	if (amdgpu_connector->ddc_bus)
+	if (amdgpu_connector->ddc_bus) {
 		dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
+
+		/* Sometimes the pins required for the DDC probe on DVI
+		 * connectors don't make contact at the same time that the ones
+		 * for HPD do. If the DDC probe fails even though we had an HPD
+		 * signal, try again later
+		 */
+		if (!dret && !force &&
+		    amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
+			DRM_DEBUG_KMS("hpd detected without ddc, retrying in 1 second\n");
+			amdgpu_connector->detected_hpd_without_ddc = true;
+			schedule_delayed_work(&adev->hotplug_work,
+					      msecs_to_jiffies(1000));
+			goto exit;
+		}
+	}
 	if (dret) {
 		amdgpu_connector->detected_by_load = false;
 		amdgpu_connector_free_edid(connector);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@ -32,6 +32,8 @@

 #include <drm/amdgpu_drm.h>
 #include <drm/drm_syncobj.h>
+#include <drm/ttm/ttm_tt.h>
+
 #include "amdgpu_cs.h"
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@ -23,6 +23,8 @@
 #ifndef __AMDGPU_CS_H__
 #define __AMDGPU_CS_H__

+#include <linux/ww_mutex.h>
+
 #include "amdgpu_job.h"
 #include "amdgpu_bo_list.h"
 #include "amdgpu_ring.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@ -1717,7 +1717,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)

 static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 {
-	int r, resched, length;
+	int r, length;
 	struct amdgpu_ring *ring;
 	struct dma_fence **fences = NULL;
 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
@ -1747,8 +1747,6 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 	/* stop the scheduler */
 	kthread_park(ring->sched.thread);

-	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
-
 	/* preempt the IB */
 	r = amdgpu_ring_preempt_ib(ring);
 	if (r) {
@ -1785,8 +1783,6 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)

 	up_read(&adev->reset_domain->sem);

-	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
-
 pro_end:
 	kfree(fences);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@ -36,7 +36,9 @@
 #include <generated/utsrelease.h>
 #include <linux/pci-p2pdma.h>

+#include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
@ -90,6 +92,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
 #define AMDGPU_MAX_RETRY_LIMIT		2
 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)

+static const struct drm_driver amdgpu_kms_driver;
+
 const char *amdgpu_asic_name[] = {
 	"TAHITI",
 	"PITCAIRN",
@ -924,32 +928,33 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 }

 /**
- * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
+ * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
 *
 * @adev: amdgpu_device pointer
 *
 * Allocates a scratch page of VRAM for use by various things in the
 * driver.
 */
-static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
+static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
 {
-	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
-				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
-				       &adev->vram_scratch.robj,
-				       &adev->vram_scratch.gpu_addr,
-				       (void **)&adev->vram_scratch.ptr);
+	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+				       AMDGPU_GEM_DOMAIN_VRAM |
+				       AMDGPU_GEM_DOMAIN_GTT,
+				       &adev->mem_scratch.robj,
+				       &adev->mem_scratch.gpu_addr,
+				       (void **)&adev->mem_scratch.ptr);
 }

 /**
- * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
+ * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
 *
 * @adev: amdgpu_device pointer
 *
 * Frees the VRAM scratch page.
 */
-static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
+static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
 {
-	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
+	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
 }

 /**
@ -1981,17 +1986,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 	}

 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
-	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
+	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
 	if (err) {
 		dev_err(adev->dev,
-			"Failed to load gpu_info firmware \"%s\"\n",
-			fw_name);
-		goto out;
-	}
-	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
-	if (err) {
-		dev_err(adev->dev,
-			"Failed to validate gpu_info firmware \"%s\"\n",
+			"Failed to get gpu_info firmware \"%s\"\n",
 			fw_name);
 		goto out;
 	}
@ -2078,6 +2076,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	struct drm_device *dev = adev_to_drm(adev);
 	struct pci_dev *parent;
 	int i, r;
+	bool total;

 	amdgpu_device_enable_virtual_display(adev);

@ -2161,6 +2160,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;

+	total = true;
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
 			DRM_ERROR("disabled ip block: %d <%s>\n",
@ -2174,7 +2174,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 				} else if (r) {
 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
 						  adev->ip_blocks[i].version->funcs->name, r);
-					return r;
+					total = false;
 				} else {
 					adev->ip_blocks[i].status.valid = true;
 				}
@ -2205,6 +2205,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)

 		}
 	}
+	if (!total)
+		return -ENODEV;

 	adev->cg_flags &= amdgpu_cg_mask;
 	adev->pg_flags &= amdgpu_pg_mask;
@ -2390,9 +2392,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 			if (amdgpu_sriov_vf(adev))
 				amdgpu_virt_exchange_data(adev);

-			r = amdgpu_device_vram_scratch_init(adev);
+			r = amdgpu_device_mem_scratch_init(adev);
 			if (r) {
-				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
+				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
 				goto init_failed;
 			}
 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
@ -2410,8 +2412,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 			/* right after GMC hw init, we create CSA */
 			if (amdgpu_mcbp) {
 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
-								AMDGPU_GEM_DOMAIN_VRAM,
-								AMDGPU_CSA_SIZE);
+							       AMDGPU_GEM_DOMAIN_VRAM |
+							       AMDGPU_GEM_DOMAIN_GTT,
+							       AMDGPU_CSA_SIZE);
 				if (r) {
 					DRM_ERROR("allocate CSA failed %d\n", r);
 					goto init_failed;
@ -2581,9 +2584,10 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
 		if (!adev->ip_blocks[i].status.late_initialized)
 			continue;
-		/* skip CG for GFX on S0ix */
+		/* skip CG for GFX, SDMA on S0ix */
 		if (adev->in_s0ix &&
-		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
 			continue;
 		/* skip CG for VCE/UVD, it's handled specially */
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@ -2617,9 +2621,10 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
 		if (!adev->ip_blocks[i].status.late_initialized)
 			continue;
-		/* skip PG for GFX on S0ix */
+		/* skip PG for GFX, SDMA on S0ix */
 		if (adev->in_s0ix &&
-		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
 			continue;
 		/* skip CG for VCE/UVD, it's handled specially */
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@ -2871,7 +2876,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 			amdgpu_ucode_free_bo(adev);
 			amdgpu_free_static_csa(&adev->virt.csa_obj);
 			amdgpu_device_wb_fini(adev);
-			amdgpu_device_vram_scratch_fini(adev);
+			amdgpu_device_mem_scratch_fini(adev);
 			amdgpu_ib_pool_fini(adev);
 		}

@ -3027,6 +3032,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
 			continue;

+		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+		if (adev->in_s0ix &&
+		    (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+			continue;
+
 		/* XXX handle errors */
 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
 		/* XXX handle errors */
@ -3227,15 +3238,6 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
 			return r;
 		}
 		adev->ip_blocks[i].status.hw = true;
-
-		if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
-			/* disable gfxoff for IP resume. The gfxoff will be re-enabled in
-			 * amdgpu_device_resume() after IP resume.
-			 */
-			amdgpu_gfx_off_ctrl(adev, false);
-			DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n");
-		}
-
 	}

 	return 0;
@ -3687,6 +3689,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r)
 		return r;

+	/* Get rid of things like offb */
+	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
+	if (r)
+		return r;
+
 	/* Enable TMZ based on IP_VERSION */
 	amdgpu_gmc_tmz_set(adev);

@ -3989,10 +3996,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 	}
 	amdgpu_fence_driver_hw_fini(adev);

-	if (adev->mman.initialized) {
-		flush_delayed_work(&adev->mman.bdev.wq);
-		ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
-	}
+	if (adev->mman.initialized)
+		drain_workqueue(adev->mman.bdev.wq);

 	if (adev->pm_sysfs_en)
 		amdgpu_pm_sysfs_fini(adev);
@ -4024,8 +4029,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)

 	amdgpu_fence_driver_sw_fini(adev);
 	amdgpu_device_ip_fini(adev);
-	release_firmware(adev->firmware.gpu_info_fw);
-	adev->firmware.gpu_info_fw = NULL;
+	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
 	adev->accel_working = false;
 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));

@ -4223,13 +4227,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
 	/* Make sure IB tests flushed */
 	flush_delayed_work(&adev->delayed_init_work);

-	if (adev->in_s0ix) {
-		/* re-enable gfxoff after IP resume. This re-enables gfxoff after
-		 * it was disabled for IP resume in amdgpu_device_ip_resume_phase2().
-		 */
-		amdgpu_gfx_off_ctrl(adev, true);
-		DRM_DEBUG("will enable gfxoff for the mission mode\n");
-	}
 	if (fbcon)
 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);

@ -4610,11 +4607,6 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
 	if (!amdgpu_ras_is_poison_mode_supported(adev))
 		return true;

-	if (!amdgpu_device_ip_check_soft_reset(adev)) {
-		dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");
-		return false;
-	}
-
 	if (amdgpu_sriov_vf(adev))
 		return true;

@ -4739,7 +4731,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 		if (!need_full_reset)
 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);

-		if (!need_full_reset && amdgpu_gpu_recovery) {
+		if (!need_full_reset && amdgpu_gpu_recovery &&
+		    amdgpu_device_ip_check_soft_reset(adev)) {
 			amdgpu_device_ip_pre_soft_reset(adev);
 			r = amdgpu_device_ip_soft_reset(adev);
 			amdgpu_device_ip_post_soft_reset(adev);
@ -5865,8 +5858,8 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
 int amdgpu_in_reset(struct amdgpu_device *adev)
 {
 	return atomic_read(&adev->reset_domain->in_gpu_reset);
-	}
-	
+}
+
 /**
 * amdgpu_device_halt() - bring hardware to some kind of halt state
 *
--- a/Show More
+++ b/Show More